diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index 404c736a0ef..f318ec8316a 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -5031,7 +5031,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const // first. (Known issue, QTBUG-104930, on some macOS versions when in // locale en_DE.) Our translation system might have a translation for a // locale the platform doesn't believe in. - const QString name = bcp47Name(separator); + const QString name = QString::fromLatin1(d->m_data->id().name(sep)); // Raw name if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) { // That uses contains(name) as a cheap pre-test, but there may be an // entry that matches this on purging likely subtags. @@ -5052,8 +5052,8 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const } for (qsizetype i = localeIds.size(); i-- > 0; ) { - QLocaleId id = localeIds.at(i); - qsizetype j; + const QLocaleId id = localeIds.at(i); + Q_ASSERT(id.language_id); if (id.language_id == C) { if (!uiLanguages.contains(u"C"_s)) uiLanguages.append(u"C"_s); @@ -5061,10 +5061,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const continue; } + qsizetype j; const QByteArray prior = id.name(sep); + bool faithful = true; // prior matches uiLanguages.at(j - 1) if (isSystem && i < uiLanguages.size()) { // Adding likely-adjusted forms to system locale's list. - Q_ASSERT(uiLanguages.at(i) == QLatin1StringView(prior) + faithful = uiLanguages.at(i) == QLatin1StringView(prior); + Q_ASSERT(faithful // A legacy code may get mapped to an ID with a different name: || QLocaleId::fromName(uiLanguages.at(i)).name(sep) == prior); // Insert just after the entry we're supplementing: @@ -5077,67 +5080,41 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const } const QLocaleId max = id.withLikelySubtagsAdded(); - const QLocaleId min = max.withLikelySubtagsRemoved(); + Q_ASSERT(max.language_id); + Q_ASSERT(max.language_id == id.language_id); + // We can't say the same for script or territory, though. - // Include minimal version (last) unless it's what our locale is derived from: - if (const QByteArray name = min.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - else if (!isSystem && min == id) - --j; // Put more specific forms *before* minimal entry. - - // Include various stripped-down versions when likely-equivalent and distinct: - if (id.script_id) { - if (const ushort land = id.territory_id) { - // Keep script, omit territory: - id.territory_id = 0; - if (id != min && id.withLikelySubtagsAdded() == max) { - if (const QByteArray name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - } - id.territory_id = land; - } - // Omit script (keep territory if present): - id.script_id = 0; - // Belongs before script-without-territory, even if it duplicates min: - if (id.withLikelySubtagsAdded() == max) { - if (const QByteArray name = id.name(sep); name != prior) + // We have various candidates to consider. + const auto addIfEquivalent = [&j, &uiLanguages, max, sep, prior, faithful](QLocaleId cid) { + if (cid.withLikelySubtagsAdded() == max) { + if (const QByteArray name = cid.name(sep); name != prior) uiLanguages.insert(j, QString::fromLatin1(name)); + else if (faithful) // Later candidates are more specific, so go before. + --j; } - } else { - id.script_id = max.script_id; - if (const ushort land = id.territory_id) { - // Supply script and omit territory: - id.territory_id = 0; - if (id != min && id.withLikelySubtagsAdded() == max) { - if (const QByteArray name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - } - id.territory_id = land; - } - // Supply script (keep territory, if present): - if (id != max && id.withLikelySubtagsAdded() == max) { - if (const QByteArray name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - } - // Restore to clear: - id.script_id = 0; - } - - if (!id.territory_id) { - // Supply territory, omit script: - Q_ASSERT(!min.territory_id); - Q_ASSERT(!id.script_id); // because we just cleared it. - id.territory_id = max.territory_id; - if (id != max && id.withLikelySubtagsAdded() == max) { - if (const QByteArray name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - } - } - - // Include version with all likely sub-tags (first) if distinct from the rest: - if (max != min && max != id) { - if (const QByteArray name = max.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); + }; + // language + addIfEquivalent({ max.language_id, 0, 0 }); + // language-script + if (max.script_id) + addIfEquivalent({ max.language_id, max.script_id, 0 }); + if (id.script_id && id.script_id != max.script_id) + addIfEquivalent({ id.language_id, id.script_id, 0 }); + // language-territory + if (max.territory_id) + addIfEquivalent({ max.language_id, 0, max.territory_id }); + if (id.territory_id && id.territory_id != max.territory_id) + addIfEquivalent({ id.language_id, 0, id.territory_id }); + // full + if (max.territory_id && max.script_id) + addIfEquivalent(max); + if (max.territory_id && id.script_id && id.script_id != max.script_id) + addIfEquivalent({ id.language_id, id.script_id, max.territory_id }); + if (max.script_id && id.territory_id && id.territory_id != max.territory_id) + addIfEquivalent({ id.language_id, max.script_id, id.territory_id }); + if (id.territory_id && id.territory_id != max.territory_id + && id.script_id && id.script_id != max.script_id) { + addIfEquivalent(id); } } @@ -5158,30 +5135,59 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const return name.startsWith(stem) && (name.size() == stem.size() || name.at(stem.size()) == cut); }; - for (qsizetype i = 0; i < uiLanguages.size(); ++i) { + // As we now forward-traverse the list, we need to keep track of the + // positions just after (a) the block of things added above that are + // equivalent to the current entry and (b) the block of truncations (if any) + // added just after this block. All truncations of entries in (a) belong at + // the end of (b); once i advances to the end of (a) it must jump to just + // after (b). The more specific entries in (a) may well have truncations + // that can also arise from less specific ones later in (a); for the + // purposes of determining whether such truncations go at the end of (b) or + // the end of the list, we thus need to ignore these matches. + qsizetype afterEquivs = 0; + qsizetype afterTruncs = 0; + // From here onwards, we only have the truncations we're adding, whose + // truncations should all have been included already. + // If advancing i brings us to the end of block (a), jump to the end of (b): + for (qsizetype i = 0; i < uiLanguages.size(); ++i >= afterEquivs && (i = afterTruncs)) { const QString entry = uiLanguages.at(i); + const QLocaleId max = QLocaleId::fromName(entry).withLikelySubtagsAdded(); + // Keep track of our two blocks: + if (i >= afterEquivs) { + Q_ASSERT(i >= afterTruncs); // i.e. we just skipped past the end of a block + afterEquivs = i + 1; + // Advance past equivalents of entry: + while (afterEquivs < uiLanguages.size() + && QLocaleId::fromName(uiLanguages.at(afterEquivs)) + .withLikelySubtagsAdded() == max) { + ++afterEquivs; + } + // We'll add any truncations starting there: + afterTruncs = afterEquivs; + } if (hasPrefix(entry, u"C") || hasPrefix(entry, u"und")) continue; - const ushort script = QLocaleId::fromName(entry).withLikelySubtagsAdded().script_id; qsizetype stopAt = uiLanguages.size(); QString prefix = entry; qsizetype at = 0; /* By default we append but if no later entry has this as a prefix and the locale it implies would use the same script as entry, put it - after entry instead. Thus [en-NL, nl-NL, en-GB] will append en but - [en-NL, en-GB, nl-NL] will put it before nl-NL, for example. We - require a script match so we don't pick translations that the user - cannot read, despite knowing the language. (Ideally that would be - a constraint the caller can opt into / out of. See QTBUG-112765.) + after the block of consecutive equivalents of which entry is a part + instead. Thus [en-NL, nl-NL, en-GB] will append en but [en-NL, en-GB, + nl-NL] will put it before nl-NL, for example. We require a script + match so we don't pick translations that the user cannot read, + despite knowing the language. (Ideally that would be a constraint the + caller can opt into / out of. See QTBUG-112765.) */ - bool justAfter = QLocaleId::fromName(prefix).withLikelySubtagsAdded().script_id == script; + bool justAfter + = QLocaleId::fromName(prefix).withLikelySubtagsAdded().script_id == max.script_id; while ((at = prefix.lastIndexOf(cut)) > 0) { prefix = prefix.first(at); // Don't test with hasSeen() as we might defer adding to later, when // we'll need known to see the later entry's offering of this prefix // as a new entry. bool found = known.contains(prefix); - for (qsizetype j = i + 1; !found && j < stopAt; ++j) { + for (qsizetype j = afterTruncs; !found && j < stopAt; ++j) { QString later = uiLanguages.at(j); if (!later.startsWith(prefix)) { const QByteArray laterFull = @@ -5218,7 +5224,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const // Now we're committed to adding it, get it into known: (void) known.hasSeen(prefix); if (justAfter) { - uiLanguages.insert(++i, prefix); + uiLanguages.insert(afterTruncs++, prefix); ++stopAt; // All later entries have moved one step later. } else { uiLanguages.append(prefix); diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index 97d011ce3cf..f71b76790ef 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -3712,13 +3712,17 @@ void tst_QLocale::uiLanguages_data() << QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China) << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; - // GB has no native Punjabi locales, so is eliminated by likely subtag rules: QTest::newRow("pa_IN") << QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; - QTest::newRow("pa_GB") - << QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; + QTest::newRow("pa_Guru") + << QLocale("pa_Guru") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; QTest::newRow("pa_PK") << QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s}; + QTest::newRow("pa_Arab") + << QLocale("pa_Arab") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s}; + // GB has no native Punjabi locales, so GB is eliminated by likely subtag rules: + QTest::newRow("pa_GB") + << QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; QTest::newRow("pa_Arab_GB") << QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s}; @@ -4151,67 +4155,70 @@ void tst_QLocale::mySystemLocale_data() QTest::addRow("empty") << u"no-US"_s << QLocale::NorwegianBokmal - << QStringList{u"nb-US"_s, u"nb-Latn-US"_s, + << QStringList{u"nb-Latn-US"_s, u"nb-US"_s, u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s}; QTest::addRow("no") // QTBUG-131127 << u"no"_s << QLocale::NorwegianBokmal << QStringList{u"no"_s, u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, - u"en-US"_s, u"en-Latn-US"_s, u"en-Latn"_s, u"en"_s, + u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s, u"nb"_s}; QTest::addRow("en-Latn") // Android crash << u"en-Latn"_s << QLocale::English - << QStringList{u"en-Latn"_s, u"en-Latn-US"_s, u"en-US"_s, u"en"_s, - u"en-NO"_s, u"en-Latn-NO"_s}; + << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s, + u"en-Latn-NO"_s, u"en-NO"_s}; QTest::addRow("anglo-dutch") // QTBUG-131894 << u"en-NL"_s << QLocale::English - << QStringList{u"en-NL"_s, u"en-Latn-NL"_s, + << QStringList{u"en-Latn-NL"_s, u"en-NL"_s, // No later en-Latn-* or en-* in the list, so include truncations now: u"en-Latn"_s, u"en"_s, - u"nl-NL"_s, u"nl-Latn-NL"_s, u"nl-Latn"_s, u"nl"_s}; + u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s}; QTest::addRow("anglo-dutch-GB") << u"en-NL-GB"_s << QLocale::English - << QStringList{u"en-NL"_s, u"en-Latn-NL"_s, - u"nl-NL"_s, u"nl-Latn-NL"_s, u"nl-Latn"_s, u"nl"_s, - u"en-GB"_s, u"en-Latn-GB"_s, u"en-Latn"_s, u"en"_s}; + << QStringList{u"en-Latn-NL"_s, u"en-NL"_s, + u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s, + u"en-Latn-GB"_s, u"en-GB"_s, u"en-Latn"_s, u"en"_s}; QTest::addRow("catalan") << u"ca"_s << QLocale::Catalan - << QStringList{u"ca"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s}; + << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s}; QTest::addRow("catalan-spain") << u"ca-ES"_s << QLocale::Catalan - << QStringList{u"ca-ES"_s, u"ca-Latn-ES"_s, u"ca-Latn"_s, u"ca"_s}; + << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s}; QTest::addRow("catalan-latin") << u"ca-Latn"_s << QLocale::Catalan - << QStringList{u"ca-Latn"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca"_s}; + << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s}; QTest::addRow("ukrainian") << u"uk"_s << QLocale::Ukrainian - << QStringList{u"uk"_s, u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s}; + << QStringList{u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s, u"uk"_s}; QTest::addRow("english-germany") << u"en-DE"_s << QLocale::English // First two were missed out before fix to QTBUG-104930: - << QStringList{u"en-DE"_s, u"en-Latn-DE"_s, - u"en-GB"_s, u"en-Latn-GB"_s, - u"de-DE"_s, u"de-Latn-DE"_s, u"de-Latn"_s, u"de"_s, + << QStringList{u"en-Latn-DE"_s, u"en-DE"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s, // Fallbacks implied by those: u"en-Latn"_s, u"en"_s}; QTest::addRow("german") << u"de"_s << QLocale::German - << QStringList{u"de"_s, u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s}; + << QStringList{u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s}; QTest::addRow("german-britain") << u"de-GB"_s << QLocale::German - << QStringList{u"de-GB"_s, u"de-Latn-GB"_s, u"de-Latn"_s, u"de"_s}; + << QStringList{u"de-Latn-GB"_s, u"de-GB"_s, u"de-Latn"_s, u"de"_s}; QTest::addRow("chinese-min") << u"zh"_s << QLocale::Chinese - << QStringList{u"zh"_s, u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s}; + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; QTest::addRow("chinese-full") << u"zh-Hans-CN"_s << QLocale::Chinese << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; QTest::addRow("chinese-taiwan") << u"zh-TW"_s << QLocale::Chinese - << QStringList{u"zh-TW"_s, u"zh-Hant-TW"_s, u"zh-Hant"_s, u"zh"_s}; + << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s, u"zh"_s}; + QTest::addRow("chinese-trad") + << u"zh-Hant"_s << QLocale::Chinese + << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s, u"zh"_s}; // For C, it should preserve what the system gave us but only add "C", never anything more: QTest::addRow("C") << u"C"_s << QLocale::C << QStringList{u"C"_s}; @@ -4227,22 +4234,22 @@ void tst_QLocale::mySystemLocale_data() QTest::newRow("en-Dsrt-GB") << u"en-Dsrt-GB"_s << QLocale::English << QStringList{u"en-Dsrt-GB"_s, u"en-Dsrt"_s, - u"en-GB"_s, u"en-Latn-GB"_s, u"en-Latn"_s, u"en"_s}; + u"en-Latn-GB"_s, u"en-GB"_s, u"en-Latn"_s, u"en"_s}; QTest::newRow("en-mixed") << u"en-FO"_s << QLocale::English - << QStringList{u"en-FO"_s, u"en-Latn-FO"_s, u"en-DK"_s, u"en-Latn-DK"_s, - u"en-GB"_s, u"en-Latn-GB"_s, - u"fo-FO"_s, u"fo-Latn-FO"_s, u"fo-Latn"_s, u"fo"_s, - u"da-FO"_s, u"da-Latn-FO"_s, - u"da-DK"_s, u"da-Latn-DK"_s, u"da-Latn"_s, u"da"_s, + << QStringList{u"en-Latn-FO"_s, u"en-FO"_s, u"en-Latn-DK"_s, u"en-DK"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"fo-Latn-FO"_s, u"fo-FO"_s, u"fo-Latn"_s, u"fo"_s, + u"da-Latn-FO"_s, u"da-FO"_s, + u"da-Latn-DK"_s, u"da-DK"_s, u"da-Latn"_s, u"da"_s, // Fallbacks implied by those: u"en-Latn"_s, u"en"_s}; QTest::newRow("polylingual-CA") << u"de-CA"_s << QLocale::German - << QStringList{u"de-CA"_s, u"de-Latn-CA"_s, u"en-CA"_s, u"en-Latn-CA"_s, - u"fr-CA"_s, u"fr-Latn-CA"_s, u"de-AT"_s, u"de-Latn-AT"_s, - u"en-GB"_s, u"en-Latn-GB"_s, - u"fr-FR"_s, u"fr-Latn-FR"_s, u"fr-Latn"_s, u"fr"_s, + << QStringList{u"de-Latn-CA"_s, u"de-CA"_s, u"en-Latn-CA"_s, u"en-CA"_s, + u"fr-Latn-CA"_s, u"fr-CA"_s, u"de-Latn-AT"_s, u"de-AT"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"fr-Latn-FR"_s, u"fr-FR"_s, u"fr-Latn"_s, u"fr"_s, // Fallbacks: u"de-Latn"_s, u"de"_s, u"en-Latn"_s, u"en"_s};