From 15189742c89fcde334b3c4f11f09487c7611f1e1 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Mon, 2 Dec 2024 14:53:41 +0100 Subject: [PATCH] QLocale: fix likely subtags to include und -> en_Latn_US The lack of this was hidden by other rules (redundant with it) until CLDR v45, but v46 prunes the redundant rules, breaking this. So include the missing rule and tweak the code that assumed likely sub-tag rules preserved language, since this one doesn't. Rework the tail of withLikelySubtagsAdded() to correctly use this rule, now that we have it. (The prior comment about there being no match-all was wrong: CLDR did have it, but our data skipped it.) Amended one test affected by it (when system locale wasn't en_US). On picking to 6.8, uiLanguages() needed some coaxing to avoid duplicate C locale entries in tests of qualified C locale. Task-number: QTBUG-130877 Change-Id: I2a415b67af4bc8aa6a766bcc1e349ee5bda9f174 Reviewed-by: Mate Barany (cherry picked from commit 303863170c3ea7d1ee1b7188f507ad432ed3f860) --- src/corelib/text/qlocale.cpp | 31 +++++++++++-------- src/corelib/text/qlocale_data_p.h | 5 +-- .../auto/corelib/text/qlocale/tst_qlocale.cpp | 2 +- util/locale_database/cldr.py | 2 -- util/locale_database/qlocalexml.py | 4 ++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index d7fb3a8d6ba..4f5b5452648 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -374,15 +374,17 @@ QLocaleId QLocaleId::withLikelySubtagsAdded() const noexcept return value; } } - if (matchesAll()) { // Skipped all of the above. - // CLDR has no match-all at v37, but might get one some day ... - pairs = std::lower_bound(pairs, afterPairs, sought); - if (pairs < afterPairs) { - // All other keys are < match-all. - Q_ASSERT(pairs + 1 == afterPairs); - Q_ASSERT(pairs->key.matchesAll()); - return pairs->value; - } + // Finally, fall back to the match-all rule (if there is one): + pairs = afterPairs - 1; // All other keys are < match-all. + if (pairs->key.matchesAll()) { + QLocaleId value = pairs->value; + if (language_id) + value.language_id = language_id; + if (territory_id) + value.territory_id = territory_id; + if (script_id) + value.script_id = script_id; + return value; } return *this; } @@ -4899,6 +4901,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const } for (qsizetype i = localeIds.size(); i-- > 0; ) { QLocaleId id = localeIds.at(i); + if (id.language_id == C) { + // Attempt no likely sub-tag amendments to C: + const QString name = QString::fromLatin1(id.name(sep)); + if (!uiLanguages.contains(name)) + uiLanguages.append(name); + continue; + } qsizetype j; QByteArray prior; if (isSystem && i < uiLanguages.size()) { @@ -4907,10 +4916,6 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const prior = uiLanguages.at(i).toLatin1(); // Insert just after the entry we're supplementing: j = i + 1; - } else if (id.language_id == C) { - // Attempt no likely sub-tag amendments to C: - uiLanguages.append(QString::fromLatin1(id.name(sep))); - continue; } else { // Plain locale or empty system uiLanguages; just append. prior = id.name(sep); diff --git a/src/corelib/text/qlocale_data_p.h b/src/corelib/text/qlocale_data_p.h index 24d022c89b2..a61240beca5 100644 --- a/src/corelib/text/qlocale_data_p.h +++ b/src/corelib/text/qlocale_data_p.h @@ -80,7 +80,7 @@ struct LanguageCodeEntry { // GENERATED PART STARTS HERE /* - This part of the file was generated on 2024-05-31 from the + This part of the file was generated on 2024-12-02 from the Common Locale Data Repository v45 http://www.unicode.org/cldr/ @@ -1051,7 +1051,8 @@ static inline constexpr QLocaleId likely_subtags[] = { { 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY { 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR { 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN - { 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM + { 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM + { 0, 0, 0 }, { 75, 66, 248 } // und -> en_Latn_US }; static inline constexpr quint16 locale_index[] = { diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index cf46889b39f..a358ed1343c 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -419,7 +419,7 @@ void tst_QLocale::defaulted_ctor() QCOMPARE(l.territory(), exp_country); \ } while (false) - TEST_CTOR(AnyLanguage, AnyTerritory, default_lang, default_country); + TEST_CTOR(AnyLanguage, AnyTerritory, QLocale::English, QLocale::UnitedStates); TEST_CTOR(C, AnyTerritory, QLocale::C, QLocale::AnyTerritory); TEST_CTOR(Aymara, AnyTerritory, default_lang, default_country); TEST_CTOR(Aymara, France, default_lang, default_country); diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 5bb1407b1ee..d4c9133d985 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -59,8 +59,6 @@ class CldrReader (object): else: self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n') continue - if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]): - continue give = (give[0], # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 9ad87855ca1..d4c13c921ff 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -151,7 +151,9 @@ class QLocaleXmlReader (object): sub-tags mapping says language's default locale uses the given script and territory.""" for have, give in self.__likely: - if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory': + if (have[0] != 'AnyLanguage' + and have[1:] == ('AnyScript', 'AnyTerritory') + and give[2] != 'AnyTerritory'): assert have[0] == give[0], (have, give) yield ((self.__langByName[give[0]][0], self.__textByName[give[1]][0]),