diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index eff083b3d94..57c5d8fa1f2 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -26,6 +26,7 @@ QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking #include "qcalendar.h" #include "qdatastream.h" #include "qdebug.h" +#include "private/qduplicatetracker_p.h" #include "qhashfunctions.h" #include "qstring.h" #include "qlocale.h" @@ -4884,7 +4885,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const // first. (Known issue, QTBUG-104930, on some macOS versions when in // locale en_DE.) Our translation system might have a translation for a // locale the platform doesn't believe in. - const QString name = bcp47Name(separator); + const QString name = QString::fromLatin1(d->m_data->id().name(sep)); // Raw name if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) { // That uses contains(name) as a cheap pre-test, but there may be an // entry that matches this on purging likely subtags. @@ -4902,6 +4903,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const { localeIds.append(d->m_data->id()); } + for (qsizetype i = localeIds.size(); i-- > 0; ) { QLocaleId id = localeIds.at(i); if (id.language_id == C) { @@ -4913,10 +4915,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const } qsizetype j; QByteArray prior; + bool faithful = true; // prior == id.name(sep) if (isSystem && i < uiLanguages.size()) { // Adding likely-adjusted forms to system locale's list. // Name the locale is derived from: prior = uiLanguages.at(i).toLatin1(); + // When we come to insert max, we do so before prior only if it matches prior. + faithful = prior == id.name(sep); // Insert just after the entry we're supplementing: j = i + 1; } else { @@ -4932,22 +4937,51 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const // Include minimal version (last) unless it's what our locale is derived from: if (auto name = min.name(sep); name != prior) uiLanguages.insert(j, QString::fromLatin1(name)); - else if (!isSystem) - --j; // bcp47Name() matches min(): put more specific forms *before* it. + else if (faithful) + --j; // List entry matches min(): put more specific forms *before* it. + // Include various stripped-down versions when likely-equivalent and distinct: if (id.script_id) { - // Include scriptless version if likely-equivalent and distinct: + if (const ushort land = id.territory_id) { + // Keep script, omit territory: + id.territory_id = 0; + if (id != min && id.withLikelySubtagsAdded() == max) { + if (const QByteArray name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } + id.territory_id = land; + } + // Omit script (keep territory if present): id.script_id = 0; - if (id != min && id.withLikelySubtagsAdded() == max) { - if (auto name = id.name(sep); name != prior) + // Belongs before script-without-territory, even if it duplicates min: + if (id.withLikelySubtagsAdded() == max) { + if (const QByteArray name = id.name(sep); name != prior) uiLanguages.insert(j, QString::fromLatin1(name)); } + } else { + id.script_id = max.script_id; + if (const ushort land = id.territory_id) { + // Supply script and omit territory: + id.territory_id = 0; + if (id != min && id.withLikelySubtagsAdded() == max) { + if (const QByteArray name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } + id.territory_id = land; + } + // Supply script (keep territory, if present): + if (id != max && id.withLikelySubtagsAdded() == max) { + if (const QByteArray name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } + // Restore to clear: + id.script_id = 0; } if (!id.territory_id) { + // Supply territory, omit script: Q_ASSERT(!min.territory_id); Q_ASSERT(!id.script_id); // because we just cleared it. - // Include version with territory if it likely-equivalent and distinct: id.territory_id = max.territory_id; if (id != max && id.withLikelySubtagsAdded() == max) { if (auto name = id.name(sep); name != prior) @@ -4958,9 +4992,19 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const // Include version with all likely sub-tags (first) if distinct from the rest: if (max != min && max != id) { if (auto name = max.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); + uiLanguages.insert(faithful ? i : j, QString::fromLatin1(name)); } } + + // Second pass: deduplicate. + QDuplicateTracker known(uiLanguages.size()); + for (qsizetype i = 0; i < uiLanguages.size();) { + if (known.hasSeen(uiLanguages.at(i))) + uiLanguages.remove(i); + else + ++i; + } + return uiLanguages; } diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index dae96188591..e9c6188581b 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -3689,12 +3689,10 @@ void tst_QLocale::uiLanguages_data() QTest::newRow("C") << QLocale::c() << QStringList{QString("C")}; QTest::newRow("en_US") - << QLocale("en_US") - << QStringList{QString("en-Latn-US"), QString("en-US"), QString("en")}; - + << QLocale("en_US") << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s}; QTest::newRow("en_Latn_US") << QLocale("en_Latn_US") // Specifying the default script makes no difference - << QStringList{QString("en-Latn-US"), QString("en-US"), QString("en")}; + << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s}; QTest::newRow("en_GB") << QLocale("en_GB") @@ -3705,19 +3703,28 @@ void tst_QLocale::uiLanguages_data() << QStringList{QString("en-Dsrt-US"), QString("en-Dsrt")}; QTest::newRow("ru_RU") - << QLocale("ru_RU") - << QStringList{QString("ru-Cyrl-RU"), QString("ru-RU"), QString("ru")}; + << QLocale("ru_RU") << QStringList{u"ru-Cyrl-RU"_s, u"ru-RU"_s, u"ru-Cyrl"_s, u"ru"_s}; QTest::newRow("zh_Hant") << QLocale("zh_Hant") - << QStringList{QString("zh-Hant-TW"), QString("zh-TW")}; + << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s}; QTest::newRow("zh_TW") << QLocale("zh_TW") - << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s}; + << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s}; QTest::newRow("zh_Hans_CN") << QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China) - << QStringList{QString("zh-Hans-CN"), QString("zh-CN"), QString("zh")}; + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; + + QTest::newRow("pa_IN") + << QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; + QTest::newRow("pa_PK") + << QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s}; + // GB has no native Punjabi locales, so is eliminated by likely subtag rules: + QTest::newRow("pa_GB") + << QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s}; + QTest::newRow("pa_Arab_GB") + << QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s}; // We presently map und (or any other unrecognized language) to C, ignoring // what a sub-tag lookup would surely find us. @@ -4081,8 +4088,28 @@ public: { switch (type) { case UILanguages: + if (m_name == u"en-Latn") + return QVariant(QStringList{u"en-NO"_s}); if (m_name == u"en-DE") // QTBUG-104930: simulate macOS's list not including m_name. return QVariant(QStringList{QStringLiteral("en-GB"), QStringLiteral("de-DE")}); + if (m_name == u"en-Dsrt-GB") + return QVariant(QStringList{u"en-Dsrt-GB"_s, u"en-GB"_s}); + if (m_name == u"en-FO") { // Nominally Faroe Islands, used for en-mixed test + return QVariant(QStringList{u"en-DK"_s, u"en-GB"_s, u"fo-FO"_s, + u"da-FO"_s, u"da-DK"_s}); + } + if (m_name == u"en-NL") // Anglophone in Netherlands: + return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s}); + if (m_name == u"en-NL-GB") // Netherlander at work for a GB-ish employer: + return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s, u"en-GB"_s}); + if (m_name == u"de-CA") { // Imagine a 2nd generation Canadian of de-AT ancestry ... + return QVariant(QStringList{u"en-CA"_s, u"fr-CA"_s, u"de-AT"_s, + u"en-GB"_s, u"fr-FR"_s}); + } + if (m_name == u"no") // QTBUG-131127 + return QVariant(QStringList{u"no"_s, u"en-US"_s, u"nb"_s}); + if (m_name == u"no-US") // Empty query result: + return QVariant(QStringList{}); return QVariant(QStringList{m_name}); case LanguageId: return m_id.language_id; @@ -4115,40 +4142,65 @@ void tst_QLocale::mySystemLocale_data() QTest::addColumn("language"); QTest::addColumn("uiLanguages"); + QTest::addRow("empty") + << u"no-US"_s << QLocale::NorwegianBokmal + << QStringList{u"nb-Latn-US"_s, u"nb-US"_s, + u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s}; + QTest::addRow("no") // QTBUG-131127 + << u"no"_s << QLocale::NorwegianBokmal + << QStringList{u"no"_s, u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s, + u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s }; + QTest::addRow("en-Latn") // Android crash + << u"en-Latn"_s << QLocale::English + << QStringList{u"en-Latn-US"_s, u"en-Latn"_s, u"en-US"_s, u"en"_s, + u"en-Latn-NO"_s, u"en-NO"_s}; + + QTest::addRow("anglo-dutch") // QTBUG-131894 + << u"en-NL"_s << QLocale::English + << QStringList{u"en-Latn-NL"_s, u"en-NL"_s, + u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s}; + QTest::addRow("anglo-dutch-GB") + << u"en-NL-GB"_s << QLocale::English + << QStringList{u"en-Latn-NL"_s, u"en-NL"_s, + u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s, + u"en-Latn-GB"_s, u"en-GB"_s}; + QTest::addRow("catalan") << QString("ca") << QLocale::Catalan - << QStringList{QStringLiteral("ca"), QStringLiteral("ca-Latn-ES"), QStringLiteral("ca-ES")}; + << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s}; QTest::addRow("catalan-spain") - << QString("ca-ES") << QLocale::Catalan - << QStringList{QStringLiteral("ca-ES"), QStringLiteral("ca-Latn-ES"), QStringLiteral("ca")}; + << u"ca-ES"_s << QLocale::Catalan + << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s}; QTest::addRow("catalan-latin") << QString("ca-Latn") << QLocale::Catalan - << QStringList{QStringLiteral("ca-Latn"), QStringLiteral("ca-Latn-ES"), + << QStringList{QStringLiteral("ca-Latn-ES"), QStringLiteral("ca-Latn"), QStringLiteral("ca-ES"), QStringLiteral("ca")}; QTest::addRow("ukrainian") << QString("uk") << QLocale::Ukrainian - << QStringList{QStringLiteral("uk"), QStringLiteral("uk-Cyrl-UA"), QStringLiteral("uk-UA")}; + << QStringList{u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s, u"uk"_s}; QTest::addRow("english-germany") << QString("en-DE") << QLocale::English // First two were missed out before fix to QTBUG-104930: - << QStringList{QStringLiteral("en-DE"), QStringLiteral("en-Latn-DE"), - QStringLiteral("en-GB"), QStringLiteral("en-Latn-GB"), - QStringLiteral("de-DE"), QStringLiteral("de-Latn-DE"), QStringLiteral("de")}; + << QStringList{u"en-Latn-DE"_s, u"en-DE"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s}; + QTest::addRow("german") << QString("de") << QLocale::German - << QStringList{QStringLiteral("de"), QStringLiteral("de-Latn-DE"), QStringLiteral("de-DE")}; + << QStringList{u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s}; QTest::addRow("german-britain") << QString("de-GB") << QLocale::German - << QStringList{QStringLiteral("de-GB"), QStringLiteral("de-Latn-GB")}; + << QStringList{u"de-Latn-GB"_s, u"de-GB"_s}; QTest::addRow("chinese-min") << QString("zh") << QLocale::Chinese - << QStringList{QStringLiteral("zh"), QStringLiteral("zh-Hans-CN"), QStringLiteral("zh-CN")}; + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; QTest::addRow("chinese-full") - << QString("zh-Hans-CN") << QLocale::Chinese - << QStringList{QStringLiteral("zh-Hans-CN"), QStringLiteral("zh-CN"), QStringLiteral("zh")}; + << u"zh-Hans-CN"_s << QLocale::Chinese + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s}; QTest::addRow("chinese-taiwan") << u"zh-TW"_s << QLocale::Chinese - << QStringList{u"zh-TW"_s, u"zh-Hant-TW"_s}; + // Not ideal: want zh-TW before zh-Hant, but zh-TW is minimal so last + << QStringList{u"zh-Hant-TW"_s, u"zh-Hant"_s, u"zh-TW"_s}; // For C, it should preserve what the system gave us but only add "C", never anything more: QTest::addRow("C") << QString("C") << QLocale::C << QStringList{QStringLiteral("C")}; @@ -4168,8 +4220,24 @@ void tst_QLocale::mySystemLocale_data() << QString("C-CN") << QLocale::C << QStringList{QStringLiteral("C-CN"), QStringLiteral("C")}; QTest::addRow("C-Hans-CN") - << QString("C-Hans-CN") << QLocale::C - << QStringList{QStringLiteral("C-Hans-CN"), QStringLiteral("C")}; + << u"C-Hans-CN"_s << QLocale::C << QStringList{u"C-Hans-CN"_s, u"C"_s}; + + QTest::newRow("en-Dsrt-GB") + << u"en-Dsrt-GB"_s << QLocale::English + << QStringList{u"en-Dsrt-GB"_s, u"en-Latn-GB"_s, u"en-GB"_s}; + QTest::newRow("en-mixed") + << u"en-FO"_s << QLocale::English + << QStringList{u"en-Latn-FO"_s, u"en-FO"_s, u"en-Latn-DK"_s, u"en-DK"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"fo-Latn-FO"_s, u"fo-FO"_s, u"fo-Latn"_s, u"fo"_s, + u"da-Latn-FO"_s, u"da-FO"_s, + u"da-Latn-DK"_s, u"da-DK"_s, u"da-Latn"_s, u"da"_s}; + QTest::newRow("polylingual-CA") + << u"de-CA"_s << QLocale::German + << QStringList{u"de-Latn-CA"_s, u"de-CA"_s, u"en-Latn-CA"_s, u"en-CA"_s, + u"fr-Latn-CA"_s, u"fr-CA"_s, u"de-Latn-AT"_s, u"de-AT"_s, + u"en-Latn-GB"_s, u"en-GB"_s, + u"fr-Latn-FR"_s, u"fr-FR"_s, u"fr-Latn"_s, u"fr"_s}; QTest::newRow("und-US") << QString("und-US") << QLocale::C