QLocale: fix likely subtags to include und -> en_Latn_US

The lack of this was hidden by other rules (redundant with it) until
CLDR v45, but v46 prunes the redundant rules, breaking this. So
include the missing rule and tweak the code that assumed likely
sub-tag rules preserved language, since this one doesn't. Rework the
tail of withLikelySubtagsAdded() to correctly use this rule, now that
we have it. (The prior comment about there being no match-all was
wrong: CLDR did have it, but our data skipped it.) Amended one test
affected by it (when system locale wasn't en_US).

On picking to 6.8, uiLanguages() needed some coaxing to avoid
duplicate C locale entries in tests of qualified C locale.

Task-number: QTBUG-130877
Change-Id: I2a415b67af4bc8aa6a766bcc1e349ee5bda9f174
Reviewed-by: Mate Barany <mate.barany@qt.io>
(cherry picked from commit 303863170c3ea7d1ee1b7188f507ad432ed3f860)
This commit is contained in:
Edward Welbourne 2024-12-02 14:53:41 +01:00
parent 92e454c000
commit 15189742c8
5 changed files with 25 additions and 19 deletions

View File

@ -374,15 +374,17 @@ QLocaleId QLocaleId::withLikelySubtagsAdded() const noexcept
return value; return value;
} }
} }
if (matchesAll()) { // Skipped all of the above. // Finally, fall back to the match-all rule (if there is one):
// CLDR has no match-all at v37, but might get one some day ... pairs = afterPairs - 1; // All other keys are < match-all.
pairs = std::lower_bound(pairs, afterPairs, sought); if (pairs->key.matchesAll()) {
if (pairs < afterPairs) { QLocaleId value = pairs->value;
// All other keys are < match-all. if (language_id)
Q_ASSERT(pairs + 1 == afterPairs); value.language_id = language_id;
Q_ASSERT(pairs->key.matchesAll()); if (territory_id)
return pairs->value; value.territory_id = territory_id;
} if (script_id)
value.script_id = script_id;
return value;
} }
return *this; return *this;
} }
@ -4899,6 +4901,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
} }
for (qsizetype i = localeIds.size(); i-- > 0; ) { for (qsizetype i = localeIds.size(); i-- > 0; ) {
QLocaleId id = localeIds.at(i); QLocaleId id = localeIds.at(i);
if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
const QString name = QString::fromLatin1(id.name(sep));
if (!uiLanguages.contains(name))
uiLanguages.append(name);
continue;
}
qsizetype j; qsizetype j;
QByteArray prior; QByteArray prior;
if (isSystem && i < uiLanguages.size()) { if (isSystem && i < uiLanguages.size()) {
@ -4907,10 +4916,6 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
prior = uiLanguages.at(i).toLatin1(); prior = uiLanguages.at(i).toLatin1();
// Insert just after the entry we're supplementing: // Insert just after the entry we're supplementing:
j = i + 1; j = i + 1;
} else if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
uiLanguages.append(QString::fromLatin1(id.name(sep)));
continue;
} else { } else {
// Plain locale or empty system uiLanguages; just append. // Plain locale or empty system uiLanguages; just append.
prior = id.name(sep); prior = id.name(sep);

View File

@ -80,7 +80,7 @@ struct LanguageCodeEntry {
// GENERATED PART STARTS HERE // GENERATED PART STARTS HERE
/* /*
This part of the file was generated on 2024-05-31 from the This part of the file was generated on 2024-12-02 from the
Common Locale Data Repository v45 Common Locale Data Repository v45
http://www.unicode.org/cldr/ http://www.unicode.org/cldr/
@ -1051,7 +1051,8 @@ static inline constexpr QLocaleId likely_subtags[] = {
{ 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY { 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY
{ 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR { 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR
{ 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN { 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN
{ 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM { 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM
{ 0, 0, 0 }, { 75, 66, 248 } // und -> en_Latn_US
}; };
static inline constexpr quint16 locale_index[] = { static inline constexpr quint16 locale_index[] = {

View File

@ -419,7 +419,7 @@ void tst_QLocale::defaulted_ctor()
QCOMPARE(l.territory(), exp_country); \ QCOMPARE(l.territory(), exp_country); \
} while (false) } while (false)
TEST_CTOR(AnyLanguage, AnyTerritory, default_lang, default_country); TEST_CTOR(AnyLanguage, AnyTerritory, QLocale::English, QLocale::UnitedStates);
TEST_CTOR(C, AnyTerritory, QLocale::C, QLocale::AnyTerritory); TEST_CTOR(C, AnyTerritory, QLocale::C, QLocale::AnyTerritory);
TEST_CTOR(Aymara, AnyTerritory, default_lang, default_country); TEST_CTOR(Aymara, AnyTerritory, default_lang, default_country);
TEST_CTOR(Aymara, France, default_lang, default_country); TEST_CTOR(Aymara, France, default_lang, default_country);

View File

@ -59,8 +59,6 @@ class CldrReader (object):
else: else:
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n') self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
continue
give = (give[0], give = (give[0],
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags

View File

@ -151,7 +151,9 @@ class QLocaleXmlReader (object):
sub-tags mapping says language's default locale uses the given sub-tags mapping says language's default locale uses the given
script and territory.""" script and territory."""
for have, give in self.__likely: for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory': if (have[0] != 'AnyLanguage'
and have[1:] == ('AnyScript', 'AnyTerritory')
and give[2] != 'AnyTerritory'):
assert have[0] == give[0], (have, give) assert have[0] == give[0], (have, give)
yield ((self.__langByName[give[0]][0], yield ((self.__langByName[give[0]][0],
self.__textByName[give[1]][0]), self.__textByName[give[1]][0]),