Rework QLocale::uiLanguages()'s order

On back-porting commit 7f9ee43de783105d8de0a0b614751eec639131f8 to
6.8, I worked out how to move the maximal version of each locale ID to
the front, while keeping it after a shorter ID that can't be derived
from it. Forward-port a refactored (and more systematic) version of
those changes now to dev (and thus 6.9) so that we get reasonable
compatibility between versions (albeit with 6.8 still lacking
truncated forms, since its QTranslator takes care of those).

This, however, brought to light a problem in the subsequent addition
and insertion of truncations. Since non-equivalent truncations of the
maximal version may (and often do) use the same script, they match
equivalents that are now later than the maximal one, which caused all
truncations to be appended, where some should be inserted just after
the equivalents. Furthermore, had the truncations of the maximal one
been inserted, they'd have appeared just after it, before some of its
equivalents, where it should be after. Reworked the handling of
truncations to take account of the fact that entries come in blocks of
equivalent forms; any truncations inserted instead of appended should
be inserted after this block, not between its members.

Adapt tests to match. As a "drive-by", shuffle some Punjabi tests so
that the comment that applies to two of them is followed by those two.

Task-number: QTBUG-131894
Change-Id: I67ea21cf279e8a3b2e8413c27ac0fa5dfabdaca7
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
(cherry picked from commit b6122866962880251fa8a82856d1fb9db43900d6)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Edward Welbourne 2025-01-27 17:52:39 +01:00 committed by Qt Cherry-pick Bot
parent 6067b27071
commit 8c93587612
2 changed files with 118 additions and 105 deletions

View File

@ -5031,7 +5031,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// first. (Known issue, QTBUG-104930, on some macOS versions when in
// locale en_DE.) Our translation system might have a translation for a
// locale the platform doesn't believe in.
const QString name = bcp47Name(separator);
const QString name = QString::fromLatin1(d->m_data->id().name(sep)); // Raw name
if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) {
// That uses contains(name) as a cheap pre-test, but there may be an
// entry that matches this on purging likely subtags.
@ -5052,8 +5052,8 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
}
for (qsizetype i = localeIds.size(); i-- > 0; ) {
QLocaleId id = localeIds.at(i);
qsizetype j;
const QLocaleId id = localeIds.at(i);
Q_ASSERT(id.language_id);
if (id.language_id == C) {
if (!uiLanguages.contains(u"C"_s))
uiLanguages.append(u"C"_s);
@ -5061,10 +5061,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
continue;
}
qsizetype j;
const QByteArray prior = id.name(sep);
bool faithful = true; // prior matches uiLanguages.at(j - 1)
if (isSystem && i < uiLanguages.size()) {
// Adding likely-adjusted forms to system locale's list.
Q_ASSERT(uiLanguages.at(i) == QLatin1StringView(prior)
faithful = uiLanguages.at(i) == QLatin1StringView(prior);
Q_ASSERT(faithful
// A legacy code may get mapped to an ID with a different name:
|| QLocaleId::fromName(uiLanguages.at(i)).name(sep) == prior);
// Insert just after the entry we're supplementing:
@ -5077,67 +5080,41 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
}
const QLocaleId max = id.withLikelySubtagsAdded();
const QLocaleId min = max.withLikelySubtagsRemoved();
Q_ASSERT(max.language_id);
Q_ASSERT(max.language_id == id.language_id);
// We can't say the same for script or territory, though.
// Include minimal version (last) unless it's what our locale is derived from:
if (const QByteArray name = min.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
else if (!isSystem && min == id)
--j; // Put more specific forms *before* minimal entry.
// Include various stripped-down versions when likely-equivalent and distinct:
if (id.script_id) {
if (const ushort land = id.territory_id) {
// Keep script, omit territory:
id.territory_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
id.territory_id = land;
}
// Omit script (keep territory if present):
id.script_id = 0;
// Belongs before script-without-territory, even if it duplicates min:
if (id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
// We have various candidates to consider.
const auto addIfEquivalent = [&j, &uiLanguages, max, sep, prior, faithful](QLocaleId cid) {
if (cid.withLikelySubtagsAdded() == max) {
if (const QByteArray name = cid.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
else if (faithful) // Later candidates are more specific, so go before.
--j;
}
} else {
id.script_id = max.script_id;
if (const ushort land = id.territory_id) {
// Supply script and omit territory:
id.territory_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
id.territory_id = land;
}
// Supply script (keep territory, if present):
if (id != max && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
// Restore to clear:
id.script_id = 0;
}
if (!id.territory_id) {
// Supply territory, omit script:
Q_ASSERT(!min.territory_id);
Q_ASSERT(!id.script_id); // because we just cleared it.
id.territory_id = max.territory_id;
if (id != max && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
}
// Include version with all likely sub-tags (first) if distinct from the rest:
if (max != min && max != id) {
if (const QByteArray name = max.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
};
// language
addIfEquivalent({ max.language_id, 0, 0 });
// language-script
if (max.script_id)
addIfEquivalent({ max.language_id, max.script_id, 0 });
if (id.script_id && id.script_id != max.script_id)
addIfEquivalent({ id.language_id, id.script_id, 0 });
// language-territory
if (max.territory_id)
addIfEquivalent({ max.language_id, 0, max.territory_id });
if (id.territory_id && id.territory_id != max.territory_id)
addIfEquivalent({ id.language_id, 0, id.territory_id });
// full
if (max.territory_id && max.script_id)
addIfEquivalent(max);
if (max.territory_id && id.script_id && id.script_id != max.script_id)
addIfEquivalent({ id.language_id, id.script_id, max.territory_id });
if (max.script_id && id.territory_id && id.territory_id != max.territory_id)
addIfEquivalent({ id.language_id, max.script_id, id.territory_id });
if (id.territory_id && id.territory_id != max.territory_id
&& id.script_id && id.script_id != max.script_id) {
addIfEquivalent(id);
}
}
@ -5158,30 +5135,59 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
return name.startsWith(stem)
&& (name.size() == stem.size() || name.at(stem.size()) == cut);
};
for (qsizetype i = 0; i < uiLanguages.size(); ++i) {
// As we now forward-traverse the list, we need to keep track of the
// positions just after (a) the block of things added above that are
// equivalent to the current entry and (b) the block of truncations (if any)
// added just after this block. All truncations of entries in (a) belong at
// the end of (b); once i advances to the end of (a) it must jump to just
// after (b). The more specific entries in (a) may well have truncations
// that can also arise from less specific ones later in (a); for the
// purposes of determining whether such truncations go at the end of (b) or
// the end of the list, we thus need to ignore these matches.
qsizetype afterEquivs = 0;
qsizetype afterTruncs = 0;
// From here onwards, we only have the truncations we're adding, whose
// truncations should all have been included already.
// If advancing i brings us to the end of block (a), jump to the end of (b):
for (qsizetype i = 0; i < uiLanguages.size(); ++i >= afterEquivs && (i = afterTruncs)) {
const QString entry = uiLanguages.at(i);
const QLocaleId max = QLocaleId::fromName(entry).withLikelySubtagsAdded();
// Keep track of our two blocks:
if (i >= afterEquivs) {
Q_ASSERT(i >= afterTruncs); // i.e. we just skipped past the end of a block
afterEquivs = i + 1;
// Advance past equivalents of entry:
while (afterEquivs < uiLanguages.size()
&& QLocaleId::fromName(uiLanguages.at(afterEquivs))
.withLikelySubtagsAdded() == max) {
++afterEquivs;
}
// We'll add any truncations starting there:
afterTruncs = afterEquivs;
}
if (hasPrefix(entry, u"C") || hasPrefix(entry, u"und"))
continue;
const ushort script = QLocaleId::fromName(entry).withLikelySubtagsAdded().script_id;
qsizetype stopAt = uiLanguages.size();
QString prefix = entry;
qsizetype at = 0;
/* By default we append but if no later entry has this as a prefix and
the locale it implies would use the same script as entry, put it
after entry instead. Thus [en-NL, nl-NL, en-GB] will append en but
[en-NL, en-GB, nl-NL] will put it before nl-NL, for example. We
require a script match so we don't pick translations that the user
cannot read, despite knowing the language. (Ideally that would be
a constraint the caller can opt into / out of. See QTBUG-112765.)
after the block of consecutive equivalents of which entry is a part
instead. Thus [en-NL, nl-NL, en-GB] will append en but [en-NL, en-GB,
nl-NL] will put it before nl-NL, for example. We require a script
match so we don't pick translations that the user cannot read,
despite knowing the language. (Ideally that would be a constraint the
caller can opt into / out of. See QTBUG-112765.)
*/
bool justAfter = QLocaleId::fromName(prefix).withLikelySubtagsAdded().script_id == script;
bool justAfter
= QLocaleId::fromName(prefix).withLikelySubtagsAdded().script_id == max.script_id;
while ((at = prefix.lastIndexOf(cut)) > 0) {
prefix = prefix.first(at);
// Don't test with hasSeen() as we might defer adding to later, when
// we'll need known to see the later entry's offering of this prefix
// as a new entry.
bool found = known.contains(prefix);
for (qsizetype j = i + 1; !found && j < stopAt; ++j) {
for (qsizetype j = afterTruncs; !found && j < stopAt; ++j) {
QString later = uiLanguages.at(j);
if (!later.startsWith(prefix)) {
const QByteArray laterFull =
@ -5218,7 +5224,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// Now we're committed to adding it, get it into known:
(void) known.hasSeen(prefix);
if (justAfter) {
uiLanguages.insert(++i, prefix);
uiLanguages.insert(afterTruncs++, prefix);
++stopAt; // All later entries have moved one step later.
} else {
uiLanguages.append(prefix);

View File

@ -3712,13 +3712,17 @@ void tst_QLocale::uiLanguages_data()
<< QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China)
<< QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
// GB has no native Punjabi locales, so is eliminated by likely subtag rules:
QTest::newRow("pa_IN")
<< QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_GB")
<< QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_Guru")
<< QLocale("pa_Guru") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_PK")
<< QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s};
QTest::newRow("pa_Arab")
<< QLocale("pa_Arab") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s};
// GB has no native Punjabi locales, so GB is eliminated by likely subtag rules:
QTest::newRow("pa_GB")
<< QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_Arab_GB")
<< QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s};
@ -4151,67 +4155,70 @@ void tst_QLocale::mySystemLocale_data()
QTest::addRow("empty")
<< u"no-US"_s << QLocale::NorwegianBokmal
<< QStringList{u"nb-US"_s, u"nb-Latn-US"_s,
<< QStringList{u"nb-Latn-US"_s, u"nb-US"_s,
u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s};
QTest::addRow("no") // QTBUG-131127
<< u"no"_s << QLocale::NorwegianBokmal
<< QStringList{u"no"_s, u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s,
u"en-US"_s, u"en-Latn-US"_s, u"en-Latn"_s, u"en"_s,
u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s,
u"nb"_s};
QTest::addRow("en-Latn") // Android crash
<< u"en-Latn"_s << QLocale::English
<< QStringList{u"en-Latn"_s, u"en-Latn-US"_s, u"en-US"_s, u"en"_s,
u"en-NO"_s, u"en-Latn-NO"_s};
<< QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s,
u"en-Latn-NO"_s, u"en-NO"_s};
QTest::addRow("anglo-dutch") // QTBUG-131894
<< u"en-NL"_s << QLocale::English
<< QStringList{u"en-NL"_s, u"en-Latn-NL"_s,
<< QStringList{u"en-Latn-NL"_s, u"en-NL"_s,
// No later en-Latn-* or en-* in the list, so include truncations now:
u"en-Latn"_s, u"en"_s,
u"nl-NL"_s, u"nl-Latn-NL"_s, u"nl-Latn"_s, u"nl"_s};
u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s};
QTest::addRow("anglo-dutch-GB")
<< u"en-NL-GB"_s << QLocale::English
<< QStringList{u"en-NL"_s, u"en-Latn-NL"_s,
u"nl-NL"_s, u"nl-Latn-NL"_s, u"nl-Latn"_s, u"nl"_s,
u"en-GB"_s, u"en-Latn-GB"_s, u"en-Latn"_s, u"en"_s};
<< QStringList{u"en-Latn-NL"_s, u"en-NL"_s,
u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s,
u"en-Latn-GB"_s, u"en-GB"_s, u"en-Latn"_s, u"en"_s};
QTest::addRow("catalan")
<< u"ca"_s << QLocale::Catalan
<< QStringList{u"ca"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s};
<< QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s};
QTest::addRow("catalan-spain")
<< u"ca-ES"_s << QLocale::Catalan
<< QStringList{u"ca-ES"_s, u"ca-Latn-ES"_s, u"ca-Latn"_s, u"ca"_s};
<< QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s};
QTest::addRow("catalan-latin")
<< u"ca-Latn"_s << QLocale::Catalan
<< QStringList{u"ca-Latn"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca"_s};
<< QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s};
QTest::addRow("ukrainian")
<< u"uk"_s << QLocale::Ukrainian
<< QStringList{u"uk"_s, u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s};
<< QStringList{u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s, u"uk"_s};
QTest::addRow("english-germany")
<< u"en-DE"_s << QLocale::English
// First two were missed out before fix to QTBUG-104930:
<< QStringList{u"en-DE"_s, u"en-Latn-DE"_s,
u"en-GB"_s, u"en-Latn-GB"_s,
u"de-DE"_s, u"de-Latn-DE"_s, u"de-Latn"_s, u"de"_s,
<< QStringList{u"en-Latn-DE"_s, u"en-DE"_s,
u"en-Latn-GB"_s, u"en-GB"_s,
u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s,
// Fallbacks implied by those:
u"en-Latn"_s, u"en"_s};
QTest::addRow("german")
<< u"de"_s << QLocale::German
<< QStringList{u"de"_s, u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s};
<< QStringList{u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s};
QTest::addRow("german-britain")
<< u"de-GB"_s << QLocale::German
<< QStringList{u"de-GB"_s, u"de-Latn-GB"_s, u"de-Latn"_s, u"de"_s};
<< QStringList{u"de-Latn-GB"_s, u"de-GB"_s, u"de-Latn"_s, u"de"_s};
QTest::addRow("chinese-min")
<< u"zh"_s << QLocale::Chinese
<< QStringList{u"zh"_s, u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s};
<< QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
QTest::addRow("chinese-full")
<< u"zh-Hans-CN"_s << QLocale::Chinese
<< QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
QTest::addRow("chinese-taiwan")
<< u"zh-TW"_s << QLocale::Chinese
<< QStringList{u"zh-TW"_s, u"zh-Hant-TW"_s, u"zh-Hant"_s, u"zh"_s};
<< QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s, u"zh"_s};
QTest::addRow("chinese-trad")
<< u"zh-Hant"_s << QLocale::Chinese
<< QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s, u"zh"_s};
// For C, it should preserve what the system gave us but only add "C", never anything more:
QTest::addRow("C") << u"C"_s << QLocale::C << QStringList{u"C"_s};
@ -4227,22 +4234,22 @@ void tst_QLocale::mySystemLocale_data()
QTest::newRow("en-Dsrt-GB")
<< u"en-Dsrt-GB"_s << QLocale::English
<< QStringList{u"en-Dsrt-GB"_s, u"en-Dsrt"_s,
u"en-GB"_s, u"en-Latn-GB"_s, u"en-Latn"_s, u"en"_s};
u"en-Latn-GB"_s, u"en-GB"_s, u"en-Latn"_s, u"en"_s};
QTest::newRow("en-mixed")
<< u"en-FO"_s << QLocale::English
<< QStringList{u"en-FO"_s, u"en-Latn-FO"_s, u"en-DK"_s, u"en-Latn-DK"_s,
u"en-GB"_s, u"en-Latn-GB"_s,
u"fo-FO"_s, u"fo-Latn-FO"_s, u"fo-Latn"_s, u"fo"_s,
u"da-FO"_s, u"da-Latn-FO"_s,
u"da-DK"_s, u"da-Latn-DK"_s, u"da-Latn"_s, u"da"_s,
<< QStringList{u"en-Latn-FO"_s, u"en-FO"_s, u"en-Latn-DK"_s, u"en-DK"_s,
u"en-Latn-GB"_s, u"en-GB"_s,
u"fo-Latn-FO"_s, u"fo-FO"_s, u"fo-Latn"_s, u"fo"_s,
u"da-Latn-FO"_s, u"da-FO"_s,
u"da-Latn-DK"_s, u"da-DK"_s, u"da-Latn"_s, u"da"_s,
// Fallbacks implied by those:
u"en-Latn"_s, u"en"_s};
QTest::newRow("polylingual-CA")
<< u"de-CA"_s << QLocale::German
<< QStringList{u"de-CA"_s, u"de-Latn-CA"_s, u"en-CA"_s, u"en-Latn-CA"_s,
u"fr-CA"_s, u"fr-Latn-CA"_s, u"de-AT"_s, u"de-Latn-AT"_s,
u"en-GB"_s, u"en-Latn-GB"_s,
u"fr-FR"_s, u"fr-Latn-FR"_s, u"fr-Latn"_s, u"fr"_s,
<< QStringList{u"de-Latn-CA"_s, u"de-CA"_s, u"en-Latn-CA"_s, u"en-CA"_s,
u"fr-Latn-CA"_s, u"fr-CA"_s, u"de-Latn-AT"_s, u"de-AT"_s,
u"en-Latn-GB"_s, u"en-GB"_s,
u"fr-Latn-FR"_s, u"fr-FR"_s, u"fr-Latn"_s, u"fr"_s,
// Fallbacks:
u"de-Latn"_s, u"de"_s, u"en-Latn"_s, u"en"_s};