Amend QLocale::uiLanguages() order

Include each of an entry's truncations right after the entry if its
script (or, where absent, the one implied via likely subtag rules)
matches and there is no later entry that it should appear after. This
is a compromise between prefering strictly equivalent matches to
potentially incompatible ones and prefering candidates derived from
earlier entries in the original list. Defer to QLocaleSelector (see
QTBUG-112765) the addition of options to give the caller control over
how to make that compromise.

Pick-to: 6.9
Fixes: QTBUG-131894
Change-Id: I8750f4f40530c94638853388f47fcae1008db8d5
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Edward Welbourne 2024-12-13 13:31:57 +01:00
parent 7f9ee43de7
commit cbf49f735e
2 changed files with 37 additions and 7 deletions

View File

@ -5150,7 +5150,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// Third pass: add truncations, when not already present.
// Cubic in list length, but hopefully that's at most a dozen or so.
const QLatin1Char cut(sep);
const auto hasPrefix = [cut](QStringView name, QStringView stem) {
const auto hasPrefix = [cut](auto name, QStringView stem) {
// A prefix only counts if it's either full or followed by a separator.
return name.startsWith(stem)
&& (name.size() == stem.size() || name.at(stem.size()) == cut);
@ -5159,9 +5159,19 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
const QString entry = uiLanguages.at(i);
if (hasPrefix(entry, u"C") || hasPrefix(entry, u"und"))
continue;
const qsizetype stopAt = uiLanguages.size();
const ushort script = QLocaleId::fromName(entry).withLikelySubtagsAdded().script_id;
qsizetype stopAt = uiLanguages.size();
QString prefix = entry;
qsizetype at = 0;
/* By default we append but if no later entry has this as a prefix and
the locale it implies would use the same script as entry, put it
after entry instead. Thus [en-NL, nl-NL, en-GB] will append en but
[en-NL, en-GB, nl-NL] will put it before nl-NL, for example. We
require a script match so we don't pick translations that the user
cannot read, despite knowing the language. (Ideally that would be
a constraint the caller can opt into / out of. See QTBUG-112765.)
*/
bool justAfter = QLocaleId::fromName(prefix).withLikelySubtagsAdded().script_id == script;
while ((at = prefix.lastIndexOf(cut)) > 0) {
prefix = prefix.first(at);
// Don't test with hasSeen() as we might defer adding to later, when
@ -5170,11 +5180,19 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
bool found = known.contains(prefix);
for (qsizetype j = i + 1; !found && j < stopAt; ++j) {
QString later = uiLanguages.at(j);
if (!later.startsWith(prefix))
if (!later.startsWith(prefix)) {
const QByteArray laterFull =
QLocaleId::fromName(later.replace(cut, u'-')
).withLikelySubtagsAdded().name(sep);
// When prefix matches a later entry's max, it belongs later.
if (hasPrefix(QLatin1StringView(laterFull), prefix))
justAfter = false;
continue;
}
// The duplicate tracker would already have spotted if equal:
Q_ASSERT(later.size() > prefix.size());
if (later.at(prefix.size()) == cut) {
justAfter = false;
// Prefix match. Shall produce the same prefix, but possibly
// after prefixes of other entries in the list. If later has
// a longer prefix not yet in the list, we want that before
@ -5196,7 +5214,12 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
break; // any further truncations of prefix would also be found.
// Now we're committed to adding it, get it into known:
(void) known.hasSeen(prefix);
uiLanguages.append(entry.first(prefix.size()));
if (justAfter) {
uiLanguages.insert(++i, prefix);
++stopAt; // All later entries have moved one step later.
} else {
uiLanguages.append(prefix);
}
}
}

View File

@ -4094,6 +4094,8 @@ public:
return QVariant(QStringList{u"en-DK"_s, u"en-GB"_s, u"fo-FO"_s,
u"da-FO"_s, u"da-DK"_s});
}
if (m_name == u"en-NL") // Anglophone in Netherlands:
return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s});
if (m_name == u"en-NL-GB") // Netherlander at work for a GB-ish employer:
return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s, u"en-GB"_s});
if (m_name == u"de-CA") { // Imagine a 2nd generation Canadian of de-AT ancestry ...
@ -4161,6 +4163,12 @@ void tst_QLocale::mySystemLocale_data()
<< QStringList{u"en-Latn"_s, u"en-Latn-US"_s, u"en-US"_s, u"en"_s,
u"en-NO"_s, u"en-Latn-NO"_s};
QTest::addRow("anglo-dutch") // QTBUG-131894
<< u"en-NL"_s << QLocale::English
<< QStringList{u"en-NL"_s, u"en-Latn-NL"_s,
// No later en-Latn-* or en-* in the list, so include truncations now:
u"en-Latn"_s, u"en"_s,
u"nl-NL"_s, u"nl-Latn-NL"_s, u"nl-Latn"_s, u"nl"_s};
QTest::addRow("anglo-dutch-GB")
<< u"en-NL-GB"_s << QLocale::English
<< QStringList{u"en-NL"_s, u"en-Latn-NL"_s,
@ -4218,9 +4226,8 @@ void tst_QLocale::mySystemLocale_data()
QTest::newRow("en-Dsrt-GB")
<< u"en-Dsrt-GB"_s << QLocale::English
<< QStringList{u"en-Dsrt-GB"_s, u"en-GB"_s, u"en-Latn-GB"_s,
// Fallbacks - plain "en" last, not between the others:
u"en-Dsrt"_s, u"en-Latn"_s, u"en"_s};
<< QStringList{u"en-Dsrt-GB"_s, u"en-Dsrt"_s,
u"en-GB"_s, u"en-Latn-GB"_s, u"en-Latn"_s, u"en"_s};
QTest::newRow("en-mixed")
<< u"en-FO"_s << QLocale::English
<< QStringList{u"en-FO"_s, u"en-Latn-FO"_s, u"en-DK"_s, u"en-Latn-DK"_s,