Include more pruned likely-equivalent entries in QLocale::uiLanguages

We were including, for each entry we got from CLDR or the system,
minimal and maximal additions from CLDR's likely-subtag rules, plus
versions with script omitted and possibly also territory added, when
likely-equivalent to the original. Include also the converse, with
territory omitted and possibly also script added when
likely-equivalent, so as to control order when these entries also show
up by truncation of others.

The pick to 6.8 was bound to get conflicts and is really a back-port
of reasonably future-compatible behavior to the 6.8 code-base. Results
from uiLanguages() itself are not the same as in 6.9 - truncations are
not included, because QTranslator takes care of those. It includes,
from commit cbf49f735e3cca85922a85d6548666816686db78 and earlier work
on 6.9, various test-cases adapted to this altered aim. It also fixes
some minor details that I'll now forward-port to dev and pick back to
6.9, that I worked out how to do while wrestling to make those
test-cases produce sensible results.

Task-number: QTBUG-131894
Change-Id: I363bfe31867be43807fe3b4942dafa186b8d2e94
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
(cherry picked from commit 7f9ee43de783105d8de0a0b614751eec639131f8)
(cherry picked from commit 45c9a35d359381419f529ba36a5381b1302bf0f4)
This commit is contained in:
Edward Welbourne 2025-01-09 19:03:02 +01:00
parent 5ee58a4f01
commit 1f55a339d2
2 changed files with 145 additions and 33 deletions

View File

@ -26,6 +26,7 @@ QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking
#include "qcalendar.h" #include "qcalendar.h"
#include "qdatastream.h" #include "qdatastream.h"
#include "qdebug.h" #include "qdebug.h"
#include "private/qduplicatetracker_p.h"
#include "qhashfunctions.h" #include "qhashfunctions.h"
#include "qstring.h" #include "qstring.h"
#include "qlocale.h" #include "qlocale.h"
@ -4884,7 +4885,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// first. (Known issue, QTBUG-104930, on some macOS versions when in // first. (Known issue, QTBUG-104930, on some macOS versions when in
// locale en_DE.) Our translation system might have a translation for a // locale en_DE.) Our translation system might have a translation for a
// locale the platform doesn't believe in. // locale the platform doesn't believe in.
const QString name = bcp47Name(separator); const QString name = QString::fromLatin1(d->m_data->id().name(sep)); // Raw name
if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) { if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) {
// That uses contains(name) as a cheap pre-test, but there may be an // That uses contains(name) as a cheap pre-test, but there may be an
// entry that matches this on purging likely subtags. // entry that matches this on purging likely subtags.
@ -4902,6 +4903,7 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
{ {
localeIds.append(d->m_data->id()); localeIds.append(d->m_data->id());
} }
for (qsizetype i = localeIds.size(); i-- > 0; ) { for (qsizetype i = localeIds.size(); i-- > 0; ) {
QLocaleId id = localeIds.at(i); QLocaleId id = localeIds.at(i);
if (id.language_id == C) { if (id.language_id == C) {
@ -4913,10 +4915,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
} }
qsizetype j; qsizetype j;
QByteArray prior; QByteArray prior;
bool faithful = true; // prior == id.name(sep)
if (isSystem && i < uiLanguages.size()) { if (isSystem && i < uiLanguages.size()) {
// Adding likely-adjusted forms to system locale's list. // Adding likely-adjusted forms to system locale's list.
// Name the locale is derived from: // Name the locale is derived from:
prior = uiLanguages.at(i).toLatin1(); prior = uiLanguages.at(i).toLatin1();
// When we come to insert max, we do so before prior only if it matches prior.
faithful = prior == id.name(sep);
// Insert just after the entry we're supplementing: // Insert just after the entry we're supplementing:
j = i + 1; j = i + 1;
} else { } else {
@ -4932,22 +4937,51 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// Include minimal version (last) unless it's what our locale is derived from: // Include minimal version (last) unless it's what our locale is derived from:
if (auto name = min.name(sep); name != prior) if (auto name = min.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name)); uiLanguages.insert(j, QString::fromLatin1(name));
else if (!isSystem) else if (faithful)
--j; // bcp47Name() matches min(): put more specific forms *before* it. --j; // List entry matches min(): put more specific forms *before* it.
// Include various stripped-down versions when likely-equivalent and distinct:
if (id.script_id) { if (id.script_id) {
// Include scriptless version if likely-equivalent and distinct: if (const ushort land = id.territory_id) {
// Keep script, omit territory:
id.territory_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
id.territory_id = land;
}
// Omit script (keep territory if present):
id.script_id = 0; id.script_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max) { // Belongs before script-without-territory, even if it duplicates min:
if (auto name = id.name(sep); name != prior) if (id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name)); uiLanguages.insert(j, QString::fromLatin1(name));
} }
} else {
id.script_id = max.script_id;
if (const ushort land = id.territory_id) {
// Supply script and omit territory:
id.territory_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
id.territory_id = land;
}
// Supply script (keep territory, if present):
if (id != max && id.withLikelySubtagsAdded() == max) {
if (const QByteArray name = id.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name));
}
// Restore to clear:
id.script_id = 0;
} }
if (!id.territory_id) { if (!id.territory_id) {
// Supply territory, omit script:
Q_ASSERT(!min.territory_id); Q_ASSERT(!min.territory_id);
Q_ASSERT(!id.script_id); // because we just cleared it. Q_ASSERT(!id.script_id); // because we just cleared it.
// Include version with territory if it likely-equivalent and distinct:
id.territory_id = max.territory_id; id.territory_id = max.territory_id;
if (id != max && id.withLikelySubtagsAdded() == max) { if (id != max && id.withLikelySubtagsAdded() == max) {
if (auto name = id.name(sep); name != prior) if (auto name = id.name(sep); name != prior)
@ -4958,9 +4992,19 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
// Include version with all likely sub-tags (first) if distinct from the rest: // Include version with all likely sub-tags (first) if distinct from the rest:
if (max != min && max != id) { if (max != min && max != id) {
if (auto name = max.name(sep); name != prior) if (auto name = max.name(sep); name != prior)
uiLanguages.insert(j, QString::fromLatin1(name)); uiLanguages.insert(faithful ? i : j, QString::fromLatin1(name));
} }
} }
// Second pass: deduplicate.
QDuplicateTracker<QString> known(uiLanguages.size());
for (qsizetype i = 0; i < uiLanguages.size();) {
if (known.hasSeen(uiLanguages.at(i)))
uiLanguages.remove(i);
else
++i;
}
return uiLanguages; return uiLanguages;
} }

View File

@ -3689,12 +3689,10 @@ void tst_QLocale::uiLanguages_data()
QTest::newRow("C") << QLocale::c() << QStringList{QString("C")}; QTest::newRow("C") << QLocale::c() << QStringList{QString("C")};
QTest::newRow("en_US") QTest::newRow("en_US")
<< QLocale("en_US") << QLocale("en_US") << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s};
<< QStringList{QString("en-Latn-US"), QString("en-US"), QString("en")};
QTest::newRow("en_Latn_US") QTest::newRow("en_Latn_US")
<< QLocale("en_Latn_US") // Specifying the default script makes no difference << QLocale("en_Latn_US") // Specifying the default script makes no difference
<< QStringList{QString("en-Latn-US"), QString("en-US"), QString("en")}; << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s};
QTest::newRow("en_GB") QTest::newRow("en_GB")
<< QLocale("en_GB") << QLocale("en_GB")
@ -3705,19 +3703,28 @@ void tst_QLocale::uiLanguages_data()
<< QStringList{QString("en-Dsrt-US"), QString("en-Dsrt")}; << QStringList{QString("en-Dsrt-US"), QString("en-Dsrt")};
QTest::newRow("ru_RU") QTest::newRow("ru_RU")
<< QLocale("ru_RU") << QLocale("ru_RU") << QStringList{u"ru-Cyrl-RU"_s, u"ru-RU"_s, u"ru-Cyrl"_s, u"ru"_s};
<< QStringList{QString("ru-Cyrl-RU"), QString("ru-RU"), QString("ru")};
QTest::newRow("zh_Hant") QTest::newRow("zh_Hant")
<< QLocale("zh_Hant") << QLocale("zh_Hant")
<< QStringList{QString("zh-Hant-TW"), QString("zh-TW")}; << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s};
QTest::newRow("zh_TW") QTest::newRow("zh_TW")
<< QLocale("zh_TW") << QLocale("zh_TW")
<< QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s}; << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s};
QTest::newRow("zh_Hans_CN") QTest::newRow("zh_Hans_CN")
<< QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China) << QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China)
<< QStringList{QString("zh-Hans-CN"), QString("zh-CN"), QString("zh")}; << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
QTest::newRow("pa_IN")
<< QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_PK")
<< QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s};
// GB has no native Punjabi locales, so is eliminated by likely subtag rules:
QTest::newRow("pa_GB")
<< QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa-Guru"_s, u"pa"_s};
QTest::newRow("pa_Arab_GB")
<< QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s};
// We presently map und (or any other unrecognized language) to C, ignoring // We presently map und (or any other unrecognized language) to C, ignoring
// what a sub-tag lookup would surely find us. // what a sub-tag lookup would surely find us.
@ -4081,8 +4088,28 @@ public:
{ {
switch (type) { switch (type) {
case UILanguages: case UILanguages:
if (m_name == u"en-Latn")
return QVariant(QStringList{u"en-NO"_s});
if (m_name == u"en-DE") // QTBUG-104930: simulate macOS's list not including m_name. if (m_name == u"en-DE") // QTBUG-104930: simulate macOS's list not including m_name.
return QVariant(QStringList{QStringLiteral("en-GB"), QStringLiteral("de-DE")}); return QVariant(QStringList{QStringLiteral("en-GB"), QStringLiteral("de-DE")});
if (m_name == u"en-Dsrt-GB")
return QVariant(QStringList{u"en-Dsrt-GB"_s, u"en-GB"_s});
if (m_name == u"en-FO") { // Nominally Faroe Islands, used for en-mixed test
return QVariant(QStringList{u"en-DK"_s, u"en-GB"_s, u"fo-FO"_s,
u"da-FO"_s, u"da-DK"_s});
}
if (m_name == u"en-NL") // Anglophone in Netherlands:
return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s});
if (m_name == u"en-NL-GB") // Netherlander at work for a GB-ish employer:
return QVariant(QStringList{u"en-NL"_s, u"nl-NL"_s, u"en-GB"_s});
if (m_name == u"de-CA") { // Imagine a 2nd generation Canadian of de-AT ancestry ...
return QVariant(QStringList{u"en-CA"_s, u"fr-CA"_s, u"de-AT"_s,
u"en-GB"_s, u"fr-FR"_s});
}
if (m_name == u"no") // QTBUG-131127
return QVariant(QStringList{u"no"_s, u"en-US"_s, u"nb"_s});
if (m_name == u"no-US") // Empty query result:
return QVariant(QStringList{});
return QVariant(QStringList{m_name}); return QVariant(QStringList{m_name});
case LanguageId: case LanguageId:
return m_id.language_id; return m_id.language_id;
@ -4115,40 +4142,65 @@ void tst_QLocale::mySystemLocale_data()
QTest::addColumn<QLocale::Language>("language"); QTest::addColumn<QLocale::Language>("language");
QTest::addColumn<QStringList>("uiLanguages"); QTest::addColumn<QStringList>("uiLanguages");
QTest::addRow("empty")
<< u"no-US"_s << QLocale::NorwegianBokmal
<< QStringList{u"nb-Latn-US"_s, u"nb-US"_s,
u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s};
QTest::addRow("no") // QTBUG-131127
<< u"no"_s << QLocale::NorwegianBokmal
<< QStringList{u"no"_s, u"nb-Latn-NO"_s, u"nb-NO"_s, u"nb-Latn"_s, u"nb"_s,
u"en-Latn-US"_s, u"en-US"_s, u"en-Latn"_s, u"en"_s };
QTest::addRow("en-Latn") // Android crash
<< u"en-Latn"_s << QLocale::English
<< QStringList{u"en-Latn-US"_s, u"en-Latn"_s, u"en-US"_s, u"en"_s,
u"en-Latn-NO"_s, u"en-NO"_s};
QTest::addRow("anglo-dutch") // QTBUG-131894
<< u"en-NL"_s << QLocale::English
<< QStringList{u"en-Latn-NL"_s, u"en-NL"_s,
u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s};
QTest::addRow("anglo-dutch-GB")
<< u"en-NL-GB"_s << QLocale::English
<< QStringList{u"en-Latn-NL"_s, u"en-NL"_s,
u"nl-Latn-NL"_s, u"nl-NL"_s, u"nl-Latn"_s, u"nl"_s,
u"en-Latn-GB"_s, u"en-GB"_s};
QTest::addRow("catalan") QTest::addRow("catalan")
<< QString("ca") << QLocale::Catalan << QString("ca") << QLocale::Catalan
<< QStringList{QStringLiteral("ca"), QStringLiteral("ca-Latn-ES"), QStringLiteral("ca-ES")}; << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s};
QTest::addRow("catalan-spain") QTest::addRow("catalan-spain")
<< QString("ca-ES") << QLocale::Catalan << u"ca-ES"_s << QLocale::Catalan
<< QStringList{QStringLiteral("ca-ES"), QStringLiteral("ca-Latn-ES"), QStringLiteral("ca")}; << QStringList{u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s, u"ca"_s};
QTest::addRow("catalan-latin") QTest::addRow("catalan-latin")
<< QString("ca-Latn") << QLocale::Catalan << QString("ca-Latn") << QLocale::Catalan
<< QStringList{QStringLiteral("ca-Latn"), QStringLiteral("ca-Latn-ES"), << QStringList{QStringLiteral("ca-Latn-ES"), QStringLiteral("ca-Latn"),
QStringLiteral("ca-ES"), QStringLiteral("ca")}; QStringLiteral("ca-ES"), QStringLiteral("ca")};
QTest::addRow("ukrainian") QTest::addRow("ukrainian")
<< QString("uk") << QLocale::Ukrainian << QString("uk") << QLocale::Ukrainian
<< QStringList{QStringLiteral("uk"), QStringLiteral("uk-Cyrl-UA"), QStringLiteral("uk-UA")}; << QStringList{u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s, u"uk"_s};
QTest::addRow("english-germany") QTest::addRow("english-germany")
<< QString("en-DE") << QLocale::English << QString("en-DE") << QLocale::English
// First two were missed out before fix to QTBUG-104930: // First two were missed out before fix to QTBUG-104930:
<< QStringList{QStringLiteral("en-DE"), QStringLiteral("en-Latn-DE"), << QStringList{u"en-Latn-DE"_s, u"en-DE"_s,
QStringLiteral("en-GB"), QStringLiteral("en-Latn-GB"), u"en-Latn-GB"_s, u"en-GB"_s,
QStringLiteral("de-DE"), QStringLiteral("de-Latn-DE"), QStringLiteral("de")}; u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s};
QTest::addRow("german") QTest::addRow("german")
<< QString("de") << QLocale::German << QString("de") << QLocale::German
<< QStringList{QStringLiteral("de"), QStringLiteral("de-Latn-DE"), QStringLiteral("de-DE")}; << QStringList{u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s, u"de"_s};
QTest::addRow("german-britain") QTest::addRow("german-britain")
<< QString("de-GB") << QLocale::German << QString("de-GB") << QLocale::German
<< QStringList{QStringLiteral("de-GB"), QStringLiteral("de-Latn-GB")}; << QStringList{u"de-Latn-GB"_s, u"de-GB"_s};
QTest::addRow("chinese-min") QTest::addRow("chinese-min")
<< QString("zh") << QLocale::Chinese << QString("zh") << QLocale::Chinese
<< QStringList{QStringLiteral("zh"), QStringLiteral("zh-Hans-CN"), QStringLiteral("zh-CN")}; << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
QTest::addRow("chinese-full") QTest::addRow("chinese-full")
<< QString("zh-Hans-CN") << QLocale::Chinese << u"zh-Hans-CN"_s << QLocale::Chinese
<< QStringList{QStringLiteral("zh-Hans-CN"), QStringLiteral("zh-CN"), QStringLiteral("zh")}; << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s, u"zh"_s};
QTest::addRow("chinese-taiwan") QTest::addRow("chinese-taiwan")
<< u"zh-TW"_s << QLocale::Chinese << u"zh-TW"_s << QLocale::Chinese
<< QStringList{u"zh-TW"_s, u"zh-Hant-TW"_s}; // Not ideal: want zh-TW before zh-Hant, but zh-TW is minimal so last
<< QStringList{u"zh-Hant-TW"_s, u"zh-Hant"_s, u"zh-TW"_s};
// For C, it should preserve what the system gave us but only add "C", never anything more: // For C, it should preserve what the system gave us but only add "C", never anything more:
QTest::addRow("C") << QString("C") << QLocale::C << QStringList{QStringLiteral("C")}; QTest::addRow("C") << QString("C") << QLocale::C << QStringList{QStringLiteral("C")};
@ -4168,8 +4220,24 @@ void tst_QLocale::mySystemLocale_data()
<< QString("C-CN") << QLocale::C << QString("C-CN") << QLocale::C
<< QStringList{QStringLiteral("C-CN"), QStringLiteral("C")}; << QStringList{QStringLiteral("C-CN"), QStringLiteral("C")};
QTest::addRow("C-Hans-CN") QTest::addRow("C-Hans-CN")
<< QString("C-Hans-CN") << QLocale::C << u"C-Hans-CN"_s << QLocale::C << QStringList{u"C-Hans-CN"_s, u"C"_s};
<< QStringList{QStringLiteral("C-Hans-CN"), QStringLiteral("C")};
QTest::newRow("en-Dsrt-GB")
<< u"en-Dsrt-GB"_s << QLocale::English
<< QStringList{u"en-Dsrt-GB"_s, u"en-Latn-GB"_s, u"en-GB"_s};
QTest::newRow("en-mixed")
<< u"en-FO"_s << QLocale::English
<< QStringList{u"en-Latn-FO"_s, u"en-FO"_s, u"en-Latn-DK"_s, u"en-DK"_s,
u"en-Latn-GB"_s, u"en-GB"_s,
u"fo-Latn-FO"_s, u"fo-FO"_s, u"fo-Latn"_s, u"fo"_s,
u"da-Latn-FO"_s, u"da-FO"_s,
u"da-Latn-DK"_s, u"da-DK"_s, u"da-Latn"_s, u"da"_s};
QTest::newRow("polylingual-CA")
<< u"de-CA"_s << QLocale::German
<< QStringList{u"de-Latn-CA"_s, u"de-CA"_s, u"en-Latn-CA"_s, u"en-CA"_s,
u"fr-Latn-CA"_s, u"fr-CA"_s, u"de-Latn-AT"_s, u"de-AT"_s,
u"en-Latn-GB"_s, u"en-GB"_s,
u"fr-Latn-FR"_s, u"fr-FR"_s, u"fr-Latn"_s, u"fr"_s};
QTest::newRow("und-US") QTest::newRow("und-US")
<< QString("und-US") << QLocale::C << QString("und-US") << QLocale::C