diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp index 52b1a1cbece..4d156502ef0 100644 --- a/src/corelib/text/qchar.cpp +++ b/src/corelib/text/qchar.cpp @@ -1553,6 +1553,7 @@ static auto fullConvertCase(char32_t uc, QUnicodeTables::Case which) noexcept auto data() const { return chars; } auto size() const { return sz; } } result; + Q_ASSERT(uc <= QChar::LastValidCodePoint); auto pp = result.chars; diff --git a/src/corelib/text/qchar.h b/src/corelib/text/qchar.h index c3fe6d31072..fd9a4728c8d 100644 --- a/src/corelib/text/qchar.h +++ b/src/corelib/text/qchar.h @@ -504,11 +504,11 @@ public: } static constexpr inline bool isHighSurrogate(char32_t ucs4) noexcept { - return ((ucs4 & 0xfffffc00) == 0xd800); + return (ucs4 & 0xfffffc00) == 0xd800; // 0xd800 + up to 1023 (0x3ff) } static constexpr inline bool isLowSurrogate(char32_t ucs4) noexcept { - return ((ucs4 & 0xfffffc00) == 0xdc00); + return (ucs4 & 0xfffffc00) == 0xdc00; // 0xdc00 + up to 1023 (0x3ff) } static constexpr inline bool isSurrogate(char32_t ucs4) noexcept { @@ -520,6 +520,8 @@ public: } static constexpr inline char32_t surrogateToUcs4(char16_t high, char16_t low) noexcept { + // 0x010000 through 0x10ffff, provided params are actual high, low surrogates. + // 0x010000 + ((high - 0xd800) << 10) + (low - 0xdc00), optimized: return (char32_t(high)<<10) + low - 0x35fdc00; } static constexpr inline char32_t surrogateToUcs4(QChar high, QChar low) noexcept diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 68b7011f158..068247adb48 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -4755,7 +4755,7 @@ bool QString::isUpper() const QStringIterator it(*this); while (it.hasNext()) { - const char32_t uc = it.nextUnchecked(); + const char32_t uc = it.next(); if (qGetProp(uc)->cases[QUnicodeTables::UpperCase].diff) return false; } @@ -4781,7 +4781,7 @@ bool QString::isLower() const QStringIterator it(*this); while (it.hasNext()) { - const char32_t uc = it.nextUnchecked(); + const char32_t uc = it.next(); if (qGetProp(uc)->cases[QUnicodeTables::LowerCase].diff) return false; } @@ -6143,7 +6143,7 @@ static QString detachAndConvertCase(T &str, QStringIterator it, QUnicodeTables:: QChar *pp = s.begin() + it.index(); // will detach if necessary do { - const auto folded = fullConvertCase(it.nextUnchecked(), which); + const auto folded = fullConvertCase(it.next(), which); if (Q_UNLIKELY(folded.size() > 1)) { if (folded.chars[0] == *pp && folded.size() == 2) { // special case: only second actually changed (e.g. surrogate pairs), @@ -6183,9 +6183,9 @@ static QString convertCase(T &str, QUnicodeTables::Case which) QStringIterator it(p, e); while (it.hasNext()) { - const char32_t uc = it.nextUnchecked(); + const char32_t uc = it.next(); if (qGetProp(uc)->cases[which].diff) { - it.recedeUnchecked(); + it.recede(); return detachAndConvertCase(str, it, which); } } diff --git a/src/corelib/text/qunicodetables.cpp b/src/corelib/text/qunicodetables.cpp index 9c6aa477f86..0809d338a58 100644 --- a/src/corelib/text/qunicodetables.cpp +++ b/src/corelib/text/qunicodetables.cpp @@ -9592,6 +9592,7 @@ static const Properties uc_properties[] = { Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char32_t ucs4) noexcept { + Q_ASSERT(ucs4 <= QChar::LastValidCodePoint); if (ucs4 < 0x11000) return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> 5] + (ucs4 & 0x1f)]; diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index 126bc089036..ad59025dfee 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -458,6 +458,8 @@ private slots: void simplified_data(); void simplified(); void trimmed(); + void unicodeTableAccess_data(); + void unicodeTableAccess(); void toUpper(); void toLower(); void isLower_isUpper_data(); @@ -1949,6 +1951,32 @@ void tst_QString::rightJustified() QCOMPARE(a, QLatin1String("ABC")); } +void tst_QString::unicodeTableAccess_data() +{ + QTest::addColumn("invalid"); + + const auto join = [](char16_t high, char16_t low) { + const QChar pair[2] = { high, low }; + return QString(pair, 2); + }; + // Least high surrogate for which an invalid successor produces an error: + QTest::newRow("least-high") << join(0xdbf8, 0xfc00); + // Least successor that, after a high surrogate, produces invalid: + QTest::newRow("least-follow") << join(0xdbff, 0xe000); +} + +void tst_QString::unicodeTableAccess() +{ + // QString processing must not access unicode tables out of bounds. + QFETCH(QString, invalid); + // Exercise methods, to see if any assertions trigger: + const auto upper = invalid.toUpper(); + const auto lower = invalid.toLower(); + const auto folded = invalid.toCaseFolded(); + // Fatuous test, just to use those. + QVERIFY(upper == invalid || lower == invalid || folded == invalid || lower != upper); +} + void tst_QString::toUpper() { QCOMPARE( QString().toUpper(), QString() ); diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 167f632e373..df806eff0b6 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -2501,6 +2501,7 @@ static QByteArray createPropertyInfo() out += "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char32_t ucs4) noexcept\n" "{\n" + " Q_ASSERT(ucs4 <= QChar::LastValidCodePoint);\n" " if (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + ")\n" " return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> " + QByteArray::number(BMP_SHIFT) + "] + (ucs4 & 0x"