QLocale: fix likely subtags to include und -> en_Latn_US

The lack of this was hidden by other rules (redundant with it) until
CLDR v45, but v46 prunes the redundant rules, breaking this. So
include the missing rule and tweak the code that assumed likely
sub-tag rules preserved language, since this one doesn't. Rework the
tail of withLikelySubtagsAdded() to correctly use this rule, now that
we have it. (The prior comment about there being no match-all was
wrong: CLDR did have it, but our data skipped it.) Amended one test
affected by it (when system locale wasn't en_US).

On picking to 6.8, uiLanguages() needed some coaxing to avoid
duplicate C locale entries in tests of qualified C locale.

Task-number: QTBUG-130877
Change-Id: I2a415b67af4bc8aa6a766bcc1e349ee5bda9f174
Reviewed-by: Mate Barany <mate.barany@qt.io>
(cherry picked from commit 303863170c3ea7d1ee1b7188f507ad432ed3f860)
This commit is contained in:
Edward Welbourne 2024-12-02 14:53:41 +01:00
parent 92e454c000
commit 15189742c8
5 changed files with 25 additions and 19 deletions

View File

@ -374,15 +374,17 @@ QLocaleId QLocaleId::withLikelySubtagsAdded() const noexcept
return value;
}
}
if (matchesAll()) { // Skipped all of the above.
// CLDR has no match-all at v37, but might get one some day ...
pairs = std::lower_bound(pairs, afterPairs, sought);
if (pairs < afterPairs) {
// All other keys are < match-all.
Q_ASSERT(pairs + 1 == afterPairs);
Q_ASSERT(pairs->key.matchesAll());
return pairs->value;
}
// Finally, fall back to the match-all rule (if there is one):
pairs = afterPairs - 1; // All other keys are < match-all.
if (pairs->key.matchesAll()) {
QLocaleId value = pairs->value;
if (language_id)
value.language_id = language_id;
if (territory_id)
value.territory_id = territory_id;
if (script_id)
value.script_id = script_id;
return value;
}
return *this;
}
@ -4899,6 +4901,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
}
for (qsizetype i = localeIds.size(); i-- > 0; ) {
QLocaleId id = localeIds.at(i);
if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
const QString name = QString::fromLatin1(id.name(sep));
if (!uiLanguages.contains(name))
uiLanguages.append(name);
continue;
}
qsizetype j;
QByteArray prior;
if (isSystem && i < uiLanguages.size()) {
@ -4907,10 +4916,6 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
prior = uiLanguages.at(i).toLatin1();
// Insert just after the entry we're supplementing:
j = i + 1;
} else if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
uiLanguages.append(QString::fromLatin1(id.name(sep)));
continue;
} else {
// Plain locale or empty system uiLanguages; just append.
prior = id.name(sep);

View File

@ -80,7 +80,7 @@ struct LanguageCodeEntry {
// GENERATED PART STARTS HERE
/*
This part of the file was generated on 2024-05-31 from the
This part of the file was generated on 2024-12-02 from the
Common Locale Data Repository v45
http://www.unicode.org/cldr/
@ -1051,7 +1051,8 @@ static inline constexpr QLocaleId likely_subtags[] = {
{ 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY
{ 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR
{ 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN
{ 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM
{ 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM
{ 0, 0, 0 }, { 75, 66, 248 } // und -> en_Latn_US
};
static inline constexpr quint16 locale_index[] = {

View File

@ -419,7 +419,7 @@ void tst_QLocale::defaulted_ctor()
QCOMPARE(l.territory(), exp_country); \
} while (false)
TEST_CTOR(AnyLanguage, AnyTerritory, default_lang, default_country);
TEST_CTOR(AnyLanguage, AnyTerritory, QLocale::English, QLocale::UnitedStates);
TEST_CTOR(C, AnyTerritory, QLocale::C, QLocale::AnyTerritory);
TEST_CTOR(Aymara, AnyTerritory, default_lang, default_country);
TEST_CTOR(Aymara, France, default_lang, default_country);

View File

@ -59,8 +59,6 @@ class CldrReader (object):
else:
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
continue
give = (give[0],
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags

View File

@ -151,7 +151,9 @@ class QLocaleXmlReader (object):
sub-tags mapping says language's default locale uses the given
script and territory."""
for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
if (have[0] != 'AnyLanguage'
and have[1:] == ('AnyScript', 'AnyTerritory')
and give[2] != 'AnyTerritory'):
assert have[0] == give[0], (have, give)
yield ((self.__langByName[give[0]][0],
self.__textByName[give[1]][0]),