QCborValue: fix sorting of UTF8-to-UTF16 strings

We must compare the raw string length in UTF-8, not the UTF-16 one.

[ChangeLog][QtCore][QCborValue] Fixed a bug that caused certain
non-US-ASCII string comparisons to produce results not in line with the
CBOR specifications.

Pick-to: 6.6 6.5
Change-Id: I5f663c2f9f4149af84fefffd17c05d1c0f1bbc3a
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
Reviewed-by: Ahmad Samir <a.samirh78@gmail.com>
(cherry picked from commit 394788c68efacdec2676988b4b4ff207b20557f2)
This commit is contained in:
Thiago Macieira 2024-03-26 09:23:53 -07:00
parent ba37ed8e8d
commit 2cd1cd1541
2 changed files with 44 additions and 6 deletions

View File

@ -1189,16 +1189,18 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen
if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) {
// Case 2: one of them is UTF-8 and the other is UTF-16, so lengths
// are NOT comparable. We need to convert to UTF-16 first...
// are NOT comparable. We need to convert to UTF-8 first...
// (we can't use QUtf8::compareUtf8 because we need to compare lengths)
auto string = [](const Element &e, const ByteData *b) {
return e.flags & Element::StringIsUtf16 ? b->asQStringRaw() : b->toUtf8String();
auto string = [](const Element &e, const ByteData *b) -> QByteArray {
if (e.flags & Element::StringIsUtf16)
return b->asStringView().toUtf8();
return b->asByteArrayView(); // actually a QByteArray::fromRaw
};
QString s1 = string(e1, b1);
QString s2 = string(e2, b2);
QByteArray s1 = string(e1, b1);
QByteArray s2 = string(e2, b2);
if (s1.size() == s2.size())
return s1.compare(s2);
return memcmp(s1.constData(), s2.constData(), s1.size());
return s1.size() < s2.size() ? -1 : 1;
}

View File

@ -17,6 +17,8 @@ Q_DECLARE_METATYPE(QCborKnownTags)
Q_DECLARE_METATYPE(QCborValue)
Q_DECLARE_METATYPE(QCborValue::EncodingOptions)
using namespace Qt::StringLiterals;
class tst_QCborValue : public QObject
{
Q_OBJECT
@ -1874,6 +1876,40 @@ void tst_QCborValue::sorting()
// which shows all doubles sorted after integrals
CHECK_ORDER(vint2, vdouble1);
QVERIFY(vint2.toInteger() > vdouble1.toDouble());
// Add some non-US-ASCII strings. In the current implementation, QCborValue
// can store a string as either US-ASCII, UTF-8, or UTF-16, so let's exercise
// those comparisons.
// we don't have a QUtf8StringView constructor, so work around it
auto utf8string = [](QByteArray str) {
Q_ASSERT(str.size() < 24);
str.prepend(char(QCborValue::String) + str.size());
return QCborValue::fromCbor(str);
};
// 5 code units in UTF-8
QCborValue vs4_utf16(u"Mørk"_s);
QCborValue vs4_utf8 = utf8string("Mørk");
QCOMPARE(vs4_utf8, vs4_utf8);
QCOMPARE(vs4_utf16, vs4_utf16);
QCOMPARE(vs4_utf16, vs4_utf8);
// 5 code units in UTF-16
QCborValue vs5_utf16(u"Først"_s);
QCborValue vs5_utf8 = utf8string("Først");
QCOMPARE(vs5_utf8, vs5_utf8);
QCOMPARE(vs5_utf16, vs5_utf16);
QCOMPARE(vs5_utf16, vs5_utf8);
// sorted by UTF-8 length first, so "Mørk" < "World" < "Først" (!!)
CHECK_ORDER(vs4_utf8, vs3);
CHECK_ORDER(vs4_utf16, vs3);
CHECK_ORDER(vs3, vs5_utf8);
CHECK_ORDER(vs3, vs5_utf16);
CHECK_ORDER(vs4_utf8, vs5_utf8);
CHECK_ORDER(vs4_utf8, vs5_utf16);
CHECK_ORDER(vs4_utf16, vs5_utf8);
CHECK_ORDER(vs4_utf16, vs5_utf16);
#undef CHECK_ORDER
}