Change QString::fromLatin1 and friends to use QByteArrayView

Also adjust the QString constructor from QByteArray to ignore
\0 characters in the string (and not terminate conversion there).

[ChangeLog][QtCore][QString] Constructing a QString from a QByteArray
will not stop at intermediate '\0' (null) characters in the string as
in Qt 5, but will convert all characters in the byte array.

Change-Id: I1f6bfefe76dfa9072b165903fec7aa4af1abd882
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Lars Knoll 2020-07-30 17:14:34 +02:00 committed by Mårten Nordheim
parent c09df7b57c
commit 6abdbb65e5
4 changed files with 90 additions and 70 deletions

View File

@ -5133,30 +5133,36 @@ QList<uint> QtPrivate::convertToUcs4(QStringView string)
return qt_convert_to_ucs4(string); return qt_convert_to_ucs4(string);
} }
QString::DataPointer QString::fromLatin1_helper(const char *str, qsizetype size) /*!
\fn QString QString::fromLatin1(QByteArrayView str)
\overload
\since 6.0
Returns a QString initialized with the Latin-1 string \a str.
*/
QString QString::fromLatin1(QByteArrayView ba)
{ {
DataPointer d; DataPointer d;
if (!str) { if (!ba.data()) {
// nothing to do // nothing to do
} else if (size == 0 || (!*str && size < 0)) { } else if (ba.size() == 0) {
d = DataPointer::fromRawData(&_empty, 0); d = DataPointer::fromRawData(&_empty, 0);
} else { } else {
if (size < 0) d = DataPointer(Data::allocate(ba.size()), ba.size());
size = qstrlen(str); d.data()[ba.size()] = '\0';
d = DataPointer(Data::allocate(size), size);
d.data()[size] = '\0';
char16_t *dst = d.data(); char16_t *dst = d.data();
qt_from_latin1(dst, str, size_t(size));
qt_from_latin1(dst, ba.data(), size_t(ba.size()));
} }
return d; return QString(std::move(d));
} }
/*! \fn QString QString::fromLatin1(const char *str, qsizetype size) /*!
\fn QString QString::fromLatin1(const char *str, qsizetype size)
Returns a QString initialized with the first \a size characters Returns a QString initialized with the first \a size characters
of the Latin-1 string \a str. of the Latin-1 string \a str.
If \a size is -1 (default), it is taken to be strlen(\a If \a size is \c{-1}, \c{strlen(str)} is used instead.
str).
\sa toLatin1(), fromUtf8(), fromLocal8Bit() \sa toLatin1(), fromUtf8(), fromLocal8Bit()
*/ */
@ -5169,12 +5175,12 @@ QString::DataPointer QString::fromLatin1_helper(const char *str, qsizetype size)
Returns a QString initialized with the Latin-1 string \a str. Returns a QString initialized with the Latin-1 string \a str.
*/ */
/*! \fn QString QString::fromLocal8Bit(const char *str, qsizetype size) /*!
\fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
Returns a QString initialized with the first \a size characters Returns a QString initialized with the first \a size characters
of the 8-bit string \a str. of the 8-bit string \a str.
If \a size is -1 (default), it is taken to be strlen(\a If \a size is \c{-1}, \c{strlen(str)} is used instead.
str).
On Unix systems this is equivalen to fromUtf8(), on Windows the systems On Unix systems this is equivalen to fromUtf8(), on Windows the systems
current code page is being used. current code page is being used.
@ -5189,24 +5195,29 @@ QString::DataPointer QString::fromLatin1_helper(const char *str, qsizetype size)
Returns a QString initialized with the 8-bit string \a str. Returns a QString initialized with the 8-bit string \a str.
*/ */
QString QString::fromLocal8Bit_helper(const char *str, qsizetype size)
/*!
\fn QString QString::fromLocal8Bit(QByteArrayView str)
\overload
\since 6.0
Returns a QString initialized with the 8-bit string \a str.
*/
QString QString::fromLocal8Bit(QByteArrayView ba)
{ {
if (!str) if (ba.isNull())
return QString(); return QString();
if (size < 0) if (ba.isEmpty())
size = qstrlen(str);
if (size == 0)
return QString(DataPointer::fromRawData(&_empty, 0)); return QString(DataPointer::fromRawData(&_empty, 0));
QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless); QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
return toUtf16(str, size); return toUtf16(ba.data(), ba.size());
} }
/*! \fn QString QString::fromUtf8(const char *str, qsizetype size) /*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
Returns a QString initialized with the first \a size bytes Returns a QString initialized with the first \a size bytes
of the UTF-8 string \a str. of the UTF-8 string \a str.
If \a size is -1 (default), it is taken to be strlen(\a If \a size is \c{-1}, \c{strlen(str)} is used instead.
str).
UTF-8 is a Unicode codec and can represent all characters in a Unicode UTF-8 is a Unicode codec and can represent all characters in a Unicode
string like QString. However, invalid sequences are possible with UTF-8 string like QString. However, invalid sequences are possible with UTF-8
@ -5238,13 +5249,21 @@ QString QString::fromLocal8Bit_helper(const char *str, qsizetype size)
Returns a QString initialized with the UTF-8 string \a str. Returns a QString initialized with the UTF-8 string \a str.
*/ */
QString QString::fromUtf8_helper(const char *str, qsizetype size)
{
if (!str)
return QString();
Q_ASSERT(size != -1); /*!
return QUtf8::convertToUnicode(str, size); \fn QString QString::fromUtf8(QByteArrayView str)
\overload
\since 6.0
Returns a QString initialized with the UTF-8 string \a str.
*/
QString QString::fromUtf8(QByteArrayView ba)
{
if (ba.isNull())
return QString();
if (ba.isEmpty())
return QString(DataPointer::fromRawData(&_empty, 0));
return QUtf8::convertToUnicode(ba.data(), ba.size());
} }
/*! /*!

View File

@ -669,29 +669,32 @@ public:
[[nodiscard]] QList<uint> toUcs4() const; [[nodiscard]] QList<uint> toUcs4() const;
// note - this are all inline so we can benefit from strlen() compile time optimizations // note - this are all inline so we can benefit from strlen() compile time optimizations
static inline QString fromLatin1(const char *str, qsizetype size = -1) static QString fromLatin1(QByteArrayView ba);
Q_WEAK_OVERLOAD
static inline QString fromLatin1(const QByteArray &ba) { return fromLatin1(QByteArrayView(ba)); }
static inline QString fromLatin1(const char *str, qsizetype size)
{ {
return QString(fromLatin1_helper(str, (str && size == -1) ? qsizetype(strlen(str)) : size)); return fromLatin1(QByteArrayView(str, !str || size < 0 ? qstrlen(str) : size));
} }
static inline QString fromUtf8(const char *str, qsizetype size = -1) static QString fromUtf8(QByteArrayView utf8);
Q_WEAK_OVERLOAD
static inline QString fromUtf8(const QByteArray &ba) { return fromUtf8(QByteArrayView(ba)); }
static inline QString fromUtf8(const char *utf8, qsizetype size)
{ {
return fromUtf8_helper(str, (str && size == -1) ? qsizetype(strlen(str)) : size); return fromUtf8(QByteArrayView(utf8, !utf8 || size < 0 ? qstrlen(utf8) : size));
} }
#ifdef __cpp_char8_t #ifdef __cpp_char8_t
Q_WEAK_OVERLOAD Q_WEAK_OVERLOAD
static inline QString fromUtf8(const char8_t *str, qsizetype size = -1) static inline QString fromUtf8(const char8_t *str, qsizetype size)
{ return fromUtf8(reinterpret_cast<const char *>(str), int(size)); } { return fromUtf8(reinterpret_cast<const char *>(str), int(size)); }
#endif #endif
static inline QString fromLocal8Bit(const char *str, qsizetype size = -1) static QString fromLocal8Bit(QByteArrayView ba);
Q_WEAK_OVERLOAD
static inline QString fromLocal8Bit(const QByteArray &ba) { return fromLocal8Bit(QByteArrayView(ba)); }
static inline QString fromLocal8Bit(const char *str, qsizetype size)
{ {
return fromLocal8Bit_helper(str, (str && size == -1) ? qsizetype(strlen(str)) : size); return fromLocal8Bit(QByteArrayView(str, !str || size < 0 ? qstrlen(str) : size));
} }
static inline QString fromLatin1(const QByteArray &str)
{ return str.isNull() ? QString() : fromLatin1(str.data(), qstrnlen(str.constData(), str.size())); }
static inline QString fromUtf8(const QByteArray &str)
{ return str.isNull() ? QString() : fromUtf8(str.data(), qstrnlen(str.constData(), str.size())); }
static inline QString fromLocal8Bit(const QByteArray &str)
{ return str.isNull() ? QString() : fromLocal8Bit(str.data(), qstrnlen(str.constData(), str.size())); }
static QString fromUtf16(const char16_t *, qsizetype size = -1); static QString fromUtf16(const char16_t *, qsizetype size = -1);
static QString fromUcs4(const char32_t *, qsizetype size = -1); static QString fromUcs4(const char32_t *, qsizetype size = -1);
static QString fromRawData(const QChar *, qsizetype size); static QString fromRawData(const QChar *, qsizetype size);
@ -964,9 +967,6 @@ private:
static QString trimmed_helper(QString &str); static QString trimmed_helper(QString &str);
static QString simplified_helper(const QString &str); static QString simplified_helper(const QString &str);
static QString simplified_helper(QString &str); static QString simplified_helper(QString &str);
static DataPointer fromLatin1_helper(const char *str, qsizetype size = -1);
static QString fromUtf8_helper(const char *str, qsizetype size);
static QString fromLocal8Bit_helper(const char *, qsizetype size);
static QByteArray toLatin1_helper(const QString &); static QByteArray toLatin1_helper(const QString &);
static QByteArray toLatin1_helper_inplace(QString &); static QByteArray toLatin1_helper_inplace(QString &);
static QByteArray toUtf8_helper(const QString &); static QByteArray toUtf8_helper(const QString &);
@ -1057,8 +1057,8 @@ QString QAnyStringView::toString() const
// //
// QString inline members // QString inline members
// //
inline QString::QString(QLatin1String aLatin1) : d(fromLatin1_helper(aLatin1.latin1(), aLatin1.size())) inline QString::QString(QLatin1String latin1)
{ } { *this = QString::fromLatin1(latin1.data(), latin1.size()); }
inline const QChar QString::at(qsizetype i) const inline const QChar QString::at(qsizetype i) const
{ Q_ASSERT(size_t(i) < size_t(size())); return QChar(d.data()[i]); } { Q_ASSERT(size_t(i) < size_t(size())); return QChar(d.data()[i]); }
inline const QChar QString::operator[](qsizetype i) const inline const QChar QString::operator[](qsizetype i) const
@ -1304,9 +1304,9 @@ QT_ASCII_CAST_WARN inline bool QLatin1String::operator>=(const QByteArray &s) co
{ return QString::fromUtf8(s) <= *this; } { return QString::fromUtf8(s) <= *this; }
QT_ASCII_CAST_WARN inline bool QString::operator==(const QByteArray &s) const QT_ASCII_CAST_WARN inline bool QString::operator==(const QByteArray &s) const
{ return QString::compare_helper(constData(), size(), s.constData(), qstrnlen(s.constData(), s.size())) == 0; } { return QString::compare_helper(constData(), size(), s.constData(), s.size()) == 0; }
QT_ASCII_CAST_WARN inline bool QString::operator!=(const QByteArray &s) const QT_ASCII_CAST_WARN inline bool QString::operator!=(const QByteArray &s) const
{ return QString::compare_helper(constData(), size(), s.constData(), qstrnlen(s.constData(), s.size())) != 0; } { return QString::compare_helper(constData(), size(), s.constData(), s.size()) != 0; }
QT_ASCII_CAST_WARN inline bool QString::operator<(const QByteArray &s) const QT_ASCII_CAST_WARN inline bool QString::operator<(const QByteArray &s) const
{ return QString::compare_helper(constData(), size(), s.constData(), s.size()) < 0; } { return QString::compare_helper(constData(), size(), s.constData(), s.size()) < 0; }
QT_ASCII_CAST_WARN inline bool QString::operator>(const QByteArray &s) const QT_ASCII_CAST_WARN inline bool QString::operator>(const QByteArray &s) const

View File

@ -245,7 +245,7 @@ static QString deviceModelIdentifier()
char value[size]; char value[size];
sysctlbyname(key, &value, &size, NULL, 0); sysctlbyname(key, &value, &size, NULL, 0);
return QString::fromLatin1(value); return QString::fromLatin1(QByteArrayView(value, qsizetype(size)));
#endif #endif
} }

View File

@ -1102,7 +1102,7 @@ void tst_QString::constructorQByteArray_data()
ba1[5] = 'e'; ba1[5] = 'e';
ba1[6] = 'f'; ba1[6] = 'f';
QTest::newRow( "2" ) << ba1 << QString("abc"); QTest::newRow( "2" ) << ba1 << QString::fromUtf16(u"abc\0def", 7);
QTest::newRow( "3" ) << QByteArray::fromRawData("abcd", 3) << QString("abc"); QTest::newRow( "3" ) << QByteArray::fromRawData("abcd", 3) << QString("abc");
QTest::newRow( "4" ) << QByteArray("\xc3\xa9") << QString("\xc3\xa9"); QTest::newRow( "4" ) << QByteArray("\xc3\xa9") << QString("\xc3\xa9");
@ -1115,23 +1115,28 @@ void tst_QString::constructorQByteArray()
QFETCH(QByteArray, src); QFETCH(QByteArray, src);
QFETCH(QString, expected); QFETCH(QString, expected);
QString str1(src);
QCOMPARE(str1.length(), expected.length());
QCOMPARE( str1, expected );
QString strBA(src); QString strBA(src);
QCOMPARE( strBA, expected ); QCOMPARE( strBA, expected );
// test operator= too // test operator= too
if (src.constData()[src.length()] == '\0') {
str1.clear();
str1 = src.constData();
QCOMPARE( str1, expected );
}
strBA.clear(); strBA.clear();
strBA = src; strBA = src;
QCOMPARE( strBA, expected ); QCOMPARE( strBA, expected );
// test constructor/operator=(const char *)
if (src.constData()[src.length()] == '\0') {
qsizetype zero = expected.indexOf(QLatin1Char('\0'));
if (zero < 0)
zero = expected.length();
QString str1(src.constData());
QCOMPARE(str1.length(), zero);
QCOMPARE(str1, expected.left(zero));
str1.clear();
str1 = src.constData();
QCOMPARE(str1, expected.left(zero));
}
} }
void tst_QString::STL() void tst_QString::STL()
@ -2522,7 +2527,7 @@ void tst_QString::append_bytearray_special_cases()
} }
QFETCH( QByteArray, ba ); QFETCH( QByteArray, ba );
if (ba.constData()[ba.length()] == '\0') { if (!ba.contains('\0') && ba.constData()[ba.length()] == '\0') {
QFETCH( QString, str ); QFETCH( QString, str );
str.append(ba.constData()); str.append(ba.constData());
@ -2571,7 +2576,7 @@ void tst_QString::operator_pluseq_bytearray_special_cases()
} }
QFETCH( QByteArray, ba ); QFETCH( QByteArray, ba );
if (ba.constData()[ba.length()] == '\0') { if (!ba.contains('\0') && ba.constData()[ba.length()] == '\0') {
QFETCH( QString, str ); QFETCH( QString, str );
str += ba.constData(); str += ba.constData();
@ -2592,7 +2597,7 @@ void tst_QString::operator_eqeq_bytearray()
QVERIFY(expected == src); QVERIFY(expected == src);
QVERIFY(!(expected != src)); QVERIFY(!(expected != src));
if (src.constData()[src.length()] == '\0') { if (!src.contains('\0') && src.constData()[src.length()] == '\0') {
QVERIFY(expected == src.constData()); QVERIFY(expected == src.constData());
QVERIFY(!(expected != src.constData())); QVERIFY(!(expected != src.constData()));
} }
@ -2653,7 +2658,7 @@ void tst_QString::prepend_bytearray_special_cases_data()
// byte array with only a 0 // byte array with only a 0
ba.resize( 1 ); ba.resize( 1 );
ba[0] = 0; ba[0] = 0;
QTest::newRow( "emptyString" ) << QString("foobar ") << ba << QString("foobar "); QTest::newRow( "emptyString" ) << QString("foobar ") << ba << QStringView::fromArray(u"\0foobar ").chopped(1).toString();
// empty byte array // empty byte array
ba.resize( 0 ); ba.resize( 0 );
@ -2685,7 +2690,7 @@ void tst_QString::prepend_bytearray_special_cases()
} }
QFETCH( QByteArray, ba ); QFETCH( QByteArray, ba );
if (ba.constData()[ba.length()] == '\0') { if (!ba.contains('\0') && ba.constData()[ba.length()] == '\0') {
QFETCH( QString, str ); QFETCH( QString, str );
str.prepend(ba.constData()); str.prepend(ba.constData());
@ -4136,9 +4141,7 @@ void tst_QString::fromUtf8_data()
QTest::newRow("null-1") << QByteArray() << QString() << -1; QTest::newRow("null-1") << QByteArray() << QString() << -1;
QTest::newRow("null0") << QByteArray() << QString() << 0; QTest::newRow("null0") << QByteArray() << QString() << 0;
QTest::newRow("null5") << QByteArray() << QString() << 5;
QTest::newRow("empty-1") << QByteArray("\0abcd", 5) << QString() << -1; QTest::newRow("empty-1") << QByteArray("\0abcd", 5) << QString() << -1;
QTest::newRow("empty0") << QByteArray() << QString() << 0;
QTest::newRow("empty5") << QByteArray("\0abcd", 5) << QString::fromLatin1("\0abcd", 5) << 5; QTest::newRow("empty5") << QByteArray("\0abcd", 5) << QString::fromLatin1("\0abcd", 5) << 5;
QTest::newRow("other-1") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab") << -1; QTest::newRow("other-1") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab") << -1;
QTest::newRow("other5") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab\0cd", 5) << 5; QTest::newRow("other5") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab\0cd", 5) << 5;
@ -4474,8 +4477,6 @@ void tst_QString::fromLatin1()
a = QString::fromLatin1(0, 0); a = QString::fromLatin1(0, 0);
QVERIFY(a.isNull()); QVERIFY(a.isNull());
a = QString::fromLatin1(0, 5);
QVERIFY(a.isNull());
a = QString::fromLatin1("\0abcd", 0); a = QString::fromLatin1("\0abcd", 0);
QVERIFY(!a.isNull()); QVERIFY(!a.isNull());
QVERIFY(a.isEmpty()); QVERIFY(a.isEmpty());