QLatin1StringView: add toUtf8()

A more efficient way of transencoding from Latin-1 to UTF-8 without
going through UTF-16 (QString).

[ChangeLog][QtCore][QLatin1StringView] Added toUtf8(), which can convert
without going through QString first.

Change-Id: I937f9536a473b990ba05fffd3670234055d0e46c
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
This commit is contained in:
Thiago Macieira 2024-10-02 09:00:29 -07:00
parent 6c8368226e
commit 846b84ff30
4 changed files with 53 additions and 0 deletions

View File

@ -58,6 +58,7 @@ public:
#endif // !Q_L1S_VIEW_IS_PRIMARY
inline QString toString() const;
QByteArray toUtf8() const { return QtPrivate::convertToUtf8(*this); }
constexpr const char *latin1() const noexcept { return m_data; }
constexpr qsizetype size() const noexcept { return m_size; }

View File

@ -270,6 +270,18 @@
\code
return QString(*this);
\endcode
\sa toUtf8()
*/
/*!
\fn QByteArray QLatin1StringView::toUtf8() const
\since 6.9
Returns a UTF-8 representation of the string as a QByteArray. This function
is more efficient than converting first to QString.
\sa toString(), QString::toUtf8()
*/
/*! \fn const char *QLatin1StringView::latin1() const

View File

@ -5712,6 +5712,7 @@ QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
if (Q_UNLIKELY(string.isNull()))
return QByteArray();
// create a QByteArray with the worst case scenario size
QByteArray ba(string.size() * 2, Qt::Uninitialized);
const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
ba.truncate(sz);

View File

@ -33,6 +33,8 @@ private Q_SLOTS:
void count();
void indexOf_data();
void indexOf();
void toUtf8_data();
void toUtf8();
};
void tst_QLatin1StringView::constExpr()
@ -512,6 +514,43 @@ void tst_QLatin1StringView::indexOf()
QCOMPARE(haystack.indexOf(needle, from, Qt::CaseInsensitive), (qsizetype)indexCaseInsensitive);
}
void tst_QLatin1StringView::toUtf8_data()
{
QTest::addColumn<QByteArray>("input");
QTest::newRow("null") << QByteArray();
QTest::newRow("empty") << QByteArray("");
for (int i = 0; i < 256; ++i) {
char c = i;
QTest::addRow("char-0x%02x", i) << QByteArray(1, c);
}
QByteArray ba = "abcd";
for (int i = 0; i < 6; ++i) {
QTest::addRow("ascii-%d", int(ba.size())) << ba;
ba += ba;
QTest::addRow("ascii-%d", int(ba.size()) - 1) << ba.left(ba.size() - 1);
}
ba = "\xe0""abcdef\xff";
for (int i = 0; i < 6; ++i) {
QTest::addRow("nonascii-%d", int(ba.size())) << ba;
ba += ba;
QTest::addRow("nonascii-%d", int(ba.size()) - 1) << ba.left(ba.size() - 1);
}
}
void tst_QLatin1StringView::toUtf8()
{
QFETCH(QByteArray, input);
QLatin1StringView sv(input);
QByteArray expected = sv.toString().toUtf8();
QByteArray result = sv.toUtf8();
QCOMPARE(result.isNull(), sv.isNull());
QCOMPARE(result, expected);
}
QTEST_APPLESS_MAIN(tst_QLatin1StringView)
#include "tst_qlatin1stringview.moc"