From 81d8bf1c543ed7a2d05737021b7f202b44e1c49b Mon Sep 17 00:00:00 2001
From: Thiago Macieira <thiago.macieira@intel.com>
Date: Tue, 9 Apr 2024 20:39:21 -0700
Subject: [PATCH] CBOR: fix the UTF8-and-UTF16 detection

This isn't a bug, it just removes dead code. The code to compare
US-ASCII to UTF-16 was never engaged because this conditional was wrong
and effectively catching everything. Fortunately, because that
comparison code is now wrong with the unit tests added to tst_QCborValue
in the past few commits.

Change-Id: If1bf59ecbe014b569ba1fffd17c4ce184948e646
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
(cherry picked from commit af9f3c5da282a7eff9456b2133a91f06aa1c1a85)
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
---
 src/corelib/serialization/qcborvalue.cpp | 52 ++++++++----------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/corelib/serialization/qcborvalue.cpp b/src/corelib/serialization/qcborvalue.cpp
index 15b5a1af4c1..f72bdc71a27 100644
--- a/src/corelib/serialization/qcborvalue.cpp
+++ b/src/corelib/serialization/qcborvalue.cpp
@@ -1219,51 +1219,33 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen
         Q_ASSERT(b2);
 
         // Officially with CBOR, we sort first the string with the shortest
-        // UTF-8 length. The length of an ASCII string is the same as its UTF-8
-        // and UTF-16 ones, but the UTF-8 length of a string is bigger than the
-        // UTF-16 equivalent. Combinations are:
-        //  1) UTF-16 and UTF-16
-        //  2) UTF-16 and UTF-8  <=== this is the problem case
-        //  3) UTF-16 and US-ASCII
-        //  4) UTF-8 and UTF-8
-        //  5) UTF-8 and US-ASCII
-        //  6) US-ASCII and US-ASCII
-        if ((e1.flags & Element::StringIsUtf16) && (e2.flags & Element::StringIsUtf16)) {
-            // Case 1: both UTF-16
+        // UTF-8 length. Since US-ASCII is just a subset of UTF-8, its length
+        // is the UTF-8 length. But the UTF-16 length may not be directly
+        // comparable.
+        if ((e1.flags & Element::StringIsUtf16) && (e2.flags & Element::StringIsUtf16))
             return compareStringsInUtf8(b1->asStringView(), b2->asStringView());
-        }
 
         if (!(e1.flags & Element::StringIsUtf16) && !(e2.flags & Element::StringIsUtf16)) {
-            // Cases 4, 5 and 6: neither is UTF-16, so lengths are comparable too
+            // Neither is UTF-16, so lengths are comparable too
             // (this case includes byte arrays too)
             if (len1 == len2)
                 return memcmp(b1->byte(), b2->byte(), size_t(len1));
             return len1 < len2 ? -1 : 1;
         }
 
-        if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) {
-            // Case 2: one of them is UTF-8 and the other is UTF-16, so lengths
-            // are NOT comparable. We need to convert to UTF-8 first...
-            // (we can't use QUtf8::compareUtf8 because we need to compare lengths)
-            auto string = [](const Element &e, const ByteData *b) -> QByteArray {
-                if (e.flags & Element::StringIsUtf16)
-                    return b->asStringView().toUtf8();
-                return b->asByteArrayView();    // actually a QByteArray::fromRaw
-            };
+        // Only one is UTF-16
+        // (we can't use QUtf8::compareUtf8 because we need to compare lengths)
+        auto string = [](const Element &e, const ByteData *b) -> QByteArray {
+            if (e.flags & Element::StringIsUtf16)
+                return b->asStringView().toUtf8();
+            return b->asByteArrayView();    // actually a QByteArray::fromRaw
+        };
 
-            QByteArray s1 = string(e1, b1);
-            QByteArray s2 = string(e2, b2);
-            if (s1.size() == s2.size())
-                return memcmp(s1.constData(), s2.constData(), s1.size());
-            return s1.size() < s2.size() ? -1 : 1;
-        }
-
-        // Case 3 (UTF-16 and US-ASCII) remains, so lengths are comparable again
-        if (len1 != len2)
-            return len1 < len2 ? -1 : 1;
-        if (e1.flags & Element::StringIsUtf16)
-            return QtPrivate::compareStrings(b1->asStringView(), b2->asLatin1());
-        return QtPrivate::compareStrings(b1->asLatin1(), b2->asStringView());
+        QByteArray s1 = string(e1, b1);
+        QByteArray s2 = string(e2, b2);
+        if (s1.size() == s2.size())
+            return memcmp(s1.constData(), s2.constData(), s1.size());
+        return s1.size() < s2.size() ? -1 : 1;
     }
 
     return compareElementNoData(e1, e2);