diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp index b03749fae24..a2a81c76051 100644 --- a/src/corelib/io/qurlidna.cpp +++ b/src/corelib/io/qurlidna.cpp @@ -423,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio if (uc >= U'A' && uc <= U'Z') uc |= 0x20; // lower-case it - if (!isValidInNormalizedAsciiName(uc)) - return {}; + if (isValidInNormalizedAsciiName(uc)) { + result.append(static_cast(uc)); + continue; + } + } - result.append(static_cast(uc)); + allAscii = false; + + // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1) + if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) { + result.append(u"ss"_s); continue; } - allAscii = false; QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc); @@ -442,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio case QUnicodeTables::IdnaStatus::Ignored: continue; case QUnicodeTables::IdnaStatus::Valid: + case QUnicodeTables::IdnaStatus::Disallowed: for (auto c : QChar::fromUcs4(uc)) result.append(c); break; case QUnicodeTables::IdnaStatus::Mapped: result.append(QUnicodeTables::idnaMapping(uc)); break; - case QUnicodeTables::IdnaStatus::Disallowed: - return {}; default: Q_UNREACHABLE(); } @@ -483,12 +488,13 @@ class DomainValidityChecker { bool domainNameIsBidi = false; bool hadBidiErrors = false; + bool ignoreBidiErrors; static constexpr char32_t ZWNJ = U'\u200C'; static constexpr char32_t ZWJ = U'\u200D'; public: - DomainValidityChecker() { } + DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { } bool checkLabel(const QString &label, QUrl::AceProcessingOptions options); private: @@ -714,7 +720,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing // because non-BMP characters are unlikely to be used for specifying // future extensions. if (label[2] == u'-' && label[3] == u'-') - return false; + return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label); } if (label.startsWith(u'-') || label.endsWith(u'-')) @@ -736,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing for (;;) { hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ; - if (!domainNameIsBidi) { + if (!ignoreBidiErrors && !domainNameIsBidi) { switch (QChar::direction(c)) { case QChar::DirR: case QChar::DirAL: @@ -784,17 +790,12 @@ static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot) QString aceResult; while (true) { - auto idx = normalizedDomain.indexOf(u'.', lastIdx); + qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx); if (idx == -1) idx = normalizedDomain.size(); - const auto labelLength = idx - lastIdx; - if (labelLength == 0) { - if (idx == normalizedDomain.size()) - break; - if (dot == ForbidLeadingDot || idx > 0) - return {}; // two delimiters in a row -- empty label not allowed - } else { + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { const auto label = normalizedDomain.sliced(lastIdx, labelLength); aceForm.clear(); qt_punycodeEncoder(label, &aceForm); @@ -807,6 +808,9 @@ static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot) if (idx == normalizedDomain.size()) break; + if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0)) + return {}; // two delimiters in a row -- empty label not allowed + lastIdx = idx + 1; aceResult += u'.'; } @@ -886,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO return result; } +static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options) +{ + qsizetype lastIdx = 0; + + DomainValidityChecker checker(true); + + while (true) { + qsizetype idx = domainName.indexOf(u'.', lastIdx); + if (idx == -1) + idx = domainName.size(); + + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = domainName.sliced(lastIdx, labelLength); + + if (!checker.checkLabel(label, options)) + return false; + } + + if (idx == domainName.size()) + break; + + lastIdx = idx + 1; + } + return true; +} + QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options) { @@ -900,6 +931,9 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, if (normalized.isEmpty()) return {}; + if (!mappedToAscii && !checkUnicodeName(normalized, options)) + return {}; + bool needsConversionToUnicode; const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot); if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode)) diff --git a/tests/auto/corelib/io/qurluts46/BLACKLIST b/tests/auto/corelib/io/qurluts46/BLACKLIST deleted file mode 100644 index 5f10f84eeb1..00000000000 --- a/tests/auto/corelib/io/qurluts46/BLACKLIST +++ /dev/null @@ -1,3 +0,0 @@ -# QTBUG-121529: Tests need to be updated to Unicode 15.1 -[idnaTestV2] -* diff --git a/tests/auto/corelib/io/qurluts46/tst_qurluts46.cpp b/tests/auto/corelib/io/qurluts46/tst_qurluts46.cpp index 2bee530eac6..d163ed19bf7 100644 --- a/tests/auto/corelib/io/qurluts46/tst_qurluts46.cpp +++ b/tests/auto/corelib/io/qurluts46/tst_qurluts46.cpp @@ -16,11 +16,11 @@ private Q_SLOTS: void idnaTestV2(); private: - // All error codes: - // A3, A4_1, A4_2, + // All error codes in UTR #46 revision 31 (Unicode 15.1): + // A4_1, A4_2, // B1, B2, B3, B4, B5, B6, // C1, C2, - // P1, P4, + // P4, // V1, V2, V3, V5, V6, // X4_2 // @@ -28,7 +28,9 @@ private: static const QSet fatalErrors; }; -const QSet tst_QUrlUts46::fatalErrors = { "A3", "A4_2", "P1", "X4_2" }; +const QSet tst_QUrlUts46::fatalErrors = { + "A4_2", // Empty ASCII label +}; /** * Replace \uXXXX escapes in test case fields. @@ -124,22 +126,19 @@ void tst_QUrlUts46::idnaTestV2() QFETCH(QString, toAsciiT); QFETCH(bool, toAsciiTOk); - auto dashesOk = [](const QString &domain) { - const auto labels = domain.split(u'.'); - return std::all_of(labels.begin(), labels.end(), [](const QString &label) { - return label.isEmpty() || !(label.startsWith(u'-') || label.endsWith(u'-')); - }); - }; - QString toAceN = QUrl::toAce(source); - if (toAsciiNOk && dashesOk(toAsciiN)) + if (toUnicodeOk && toAsciiNOk) QCOMPARE(toAceN, toAsciiN); + else if (toAsciiNOk) + QVERIFY(toAceN.isEmpty() || toAceN == toAsciiN); else QCOMPARE(toAceN, QString()); QString toAceT = QUrl::toAce(source, QUrl::AceTransitionalProcessing); - if (toAsciiTOk && dashesOk(toAsciiT)) + if (toUnicodeOk && toAsciiTOk) QCOMPARE(toAceT, toAsciiT); + else if (toAsciiTOk) + QVERIFY(toAceT.isEmpty() || toAceT == toAsciiT); else QCOMPARE(toAceT, QString());