From 81d8bf1c543ed7a2d05737021b7f202b44e1c49b Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 9 Apr 2024 20:39:21 -0700 Subject: [PATCH] CBOR: fix the UTF8-and-UTF16 detection This isn't a bug, it just removes dead code. The code to compare US-ASCII to UTF-16 was never engaged because this conditional was wrong and effectively catching everything. Fortunately, because that comparison code is now wrong with the unit tests added to tst_QCborValue in the past few commits. Change-Id: If1bf59ecbe014b569ba1fffd17c4ce184948e646 Reviewed-by: Ivan Solovev (cherry picked from commit af9f3c5da282a7eff9456b2133a91f06aa1c1a85) Reviewed-by: Thiago Macieira --- src/corelib/serialization/qcborvalue.cpp | 52 ++++++++---------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/src/corelib/serialization/qcborvalue.cpp b/src/corelib/serialization/qcborvalue.cpp index 15b5a1af4c1..f72bdc71a27 100644 --- a/src/corelib/serialization/qcborvalue.cpp +++ b/src/corelib/serialization/qcborvalue.cpp @@ -1219,51 +1219,33 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen Q_ASSERT(b2); // Officially with CBOR, we sort first the string with the shortest - // UTF-8 length. The length of an ASCII string is the same as its UTF-8 - // and UTF-16 ones, but the UTF-8 length of a string is bigger than the - // UTF-16 equivalent. Combinations are: - // 1) UTF-16 and UTF-16 - // 2) UTF-16 and UTF-8 <=== this is the problem case - // 3) UTF-16 and US-ASCII - // 4) UTF-8 and UTF-8 - // 5) UTF-8 and US-ASCII - // 6) US-ASCII and US-ASCII - if ((e1.flags & Element::StringIsUtf16) && (e2.flags & Element::StringIsUtf16)) { - // Case 1: both UTF-16 + // UTF-8 length. Since US-ASCII is just a subset of UTF-8, its length + // is the UTF-8 length. But the UTF-16 length may not be directly + // comparable. + if ((e1.flags & Element::StringIsUtf16) && (e2.flags & Element::StringIsUtf16)) return compareStringsInUtf8(b1->asStringView(), b2->asStringView()); - } if (!(e1.flags & Element::StringIsUtf16) && !(e2.flags & Element::StringIsUtf16)) { - // Cases 4, 5 and 6: neither is UTF-16, so lengths are comparable too + // Neither is UTF-16, so lengths are comparable too // (this case includes byte arrays too) if (len1 == len2) return memcmp(b1->byte(), b2->byte(), size_t(len1)); return len1 < len2 ? -1 : 1; } - if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) { - // Case 2: one of them is UTF-8 and the other is UTF-16, so lengths - // are NOT comparable. We need to convert to UTF-8 first... - // (we can't use QUtf8::compareUtf8 because we need to compare lengths) - auto string = [](const Element &e, const ByteData *b) -> QByteArray { - if (e.flags & Element::StringIsUtf16) - return b->asStringView().toUtf8(); - return b->asByteArrayView(); // actually a QByteArray::fromRaw - }; + // Only one is UTF-16 + // (we can't use QUtf8::compareUtf8 because we need to compare lengths) + auto string = [](const Element &e, const ByteData *b) -> QByteArray { + if (e.flags & Element::StringIsUtf16) + return b->asStringView().toUtf8(); + return b->asByteArrayView(); // actually a QByteArray::fromRaw + }; - QByteArray s1 = string(e1, b1); - QByteArray s2 = string(e2, b2); - if (s1.size() == s2.size()) - return memcmp(s1.constData(), s2.constData(), s1.size()); - return s1.size() < s2.size() ? -1 : 1; - } - - // Case 3 (UTF-16 and US-ASCII) remains, so lengths are comparable again - if (len1 != len2) - return len1 < len2 ? -1 : 1; - if (e1.flags & Element::StringIsUtf16) - return QtPrivate::compareStrings(b1->asStringView(), b2->asLatin1()); - return QtPrivate::compareStrings(b1->asLatin1(), b2->asStringView()); + QByteArray s1 = string(e1, b1); + QByteArray s2 = string(e2, b2); + if (s1.size() == s2.size()) + return memcmp(s1.constData(), s2.constData(), s1.size()); + return s1.size() < s2.size() ? -1 : 1; } return compareElementNoData(e1, e2);