Use likelySubtags to instantiate a locale id from it's short form

...just like described in http://www.unicode.org/reports/tr35/#Likely_Subtags.
This is much more effective than current "guessing" algorithm
+ makes it possible to instantiate a locale by the script or territory code only.

Change-Id: I674f8476e65b01c56960b6e83a1a346df0715274
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
Konstantin Ritt 2012-11-21 06:08:24 +02:00 committed by The Qt Project
parent e7c79face6
commit fe8962d3a5
7 changed files with 1521 additions and 1155 deletions

View File

@ -208,17 +208,94 @@ QString QLocalePrivate::countryCode() const
return code; return code;
} }
QString QLocalePrivate::bcp47Name() const // http://www.unicode.org/reports/tr35/#Likely_Subtags
static bool addLikelySubtags(QLocaleId &localeId)
{ {
if (m_data->m_language_id == QLocale::AnyLanguage) // ### optimize with bsearch
const int likely_subtags_count = sizeof(likely_subtags) / sizeof(likely_subtags[0]);
const QLocaleId *p = likely_subtags;
const QLocaleId *const e = p + likely_subtags_count;
for ( ; p < e; p += 2) {
if (localeId == p[0]) {
localeId = p[1];
return true;
}
}
return false;
}
QLocaleId QLocaleId::withLikelySubtagsAdded() const
{
// language_script_region
if (language_id || script_id || country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, country_id);
if (addLikelySubtags(id))
return id;
}
// language_script
if (country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, 0);
if (addLikelySubtags(id)) {
id.country_id = country_id;
return id;
}
}
// language_region
if (script_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, country_id);
if (addLikelySubtags(id)) {
id.script_id = script_id;
return id;
}
}
// language
if (script_id && country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, 0);
if (addLikelySubtags(id)) {
id.script_id = script_id;
id.country_id = country_id;
return id;
}
}
return *this;
}
QLocaleId QLocaleId::withLikelySubtagsRemoved() const
{
QLocaleId max = withLikelySubtagsAdded();
// language
{
QLocaleId id = QLocaleId::fromIds(language_id, 0, 0);
if (id.withLikelySubtagsAdded() == max)
return id;
}
// language_region
if (country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, country_id);
if (id.withLikelySubtagsAdded() == max)
return id;
}
// language_script
if (script_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, 0);
if (id.withLikelySubtagsAdded() == max)
return id;
}
return max;
}
QString QLocaleId::bcp47Name() const
{
if (language_id == QLocale::AnyLanguage)
return QString(); return QString();
if (m_data->m_language_id == QLocale::C) if (language_id == QLocale::C)
return QStringLiteral("C"); return QStringLiteral("C");
const unsigned char *lang = language_code_list + 3*(uint(m_data->m_language_id));
const unsigned char *lang = language_code_list + 3*uint(language_id);
const unsigned char *script = const unsigned char *script =
(m_data->m_script_id != QLocale::AnyScript ? script_code_list + 4*(uint(m_data->m_script_id)) : 0); (script_id != QLocale::AnyScript ? script_code_list + 4*uint(script_id) : 0);
const unsigned char *country = const unsigned char *country =
(m_data->m_country_id != QLocale::AnyCountry ? country_code_list + 3*(uint(m_data->m_country_id)) : 0); (country_id != QLocale::AnyCountry ? country_code_list + 3*uint(country_id) : 0);
char len = (lang[2] != 0 ? 3 : 2) + (script ? 4+1 : 0) + (country ? (country[2] != 0 ? 3 : 2)+1 : 0); char len = (lang[2] != 0 ? 3 : 2) + (script ? 4+1 : 0) + (country ? (country[2] != 0 ? 3 : 2)+1 : 0);
QString name(len, Qt::Uninitialized); QString name(len, Qt::Uninitialized);
QChar *uc = name.data(); QChar *uc = name.data();
@ -243,42 +320,59 @@ QString QLocalePrivate::bcp47Name() const
return name; return name;
} }
QString QLocalePrivate::bcp47Name() const
{
if (m_data->m_language_id == QLocale::AnyLanguage)
return QString();
if (m_data->m_language_id == QLocale::C)
return QStringLiteral("C");
QLocaleId localeId = QLocaleId::fromIds(m_data->m_language_id, m_data->m_script_id, m_data->m_country_id);
return localeId.withLikelySubtagsRemoved().bcp47Name();
}
const QLocaleData *QLocaleData::findLocaleData(QLocale::Language language, QLocale::Script script, QLocale::Country country) const QLocaleData *QLocaleData::findLocaleData(QLocale::Language language, QLocale::Script script, QLocale::Country country)
{ {
const unsigned language_id = language; QLocaleId localeId = QLocaleId::fromIds(language, script, country);
const unsigned script_id = script; localeId = localeId.withLikelySubtagsAdded();
const unsigned country_id = country;
uint idx = locale_index[language_id]; uint idx = locale_index[localeId.language_id];
const QLocaleData *data = locale_data + idx; const QLocaleData *data = locale_data + idx;
if (idx == 0) // default language has no associated country if (idx == 0) // default language has no associated country
return data; return data;
if (script == QLocale::AnyScript && country == QLocale::AnyCountry) Q_ASSERT(data->m_language_id == localeId.language_id);
return data;
Q_ASSERT(data->m_language_id == language_id); if (localeId.script_id != QLocale::AnyScript && localeId.country_id != QLocale::AnyCountry) {
if (country == QLocale::AnyCountry) {
while (data->m_language_id == language_id && data->m_script_id != script_id)
++data;
if (data->m_language_id == language_id && data->m_script_id == script_id)
return data;
} else if (script == QLocale::AnyScript) {
while (data->m_language_id == language_id) {
if (data->m_script_id == script_id && data->m_country_id == country_id)
return data;
++data;
}
} else {
// both script and country are explicitly specified // both script and country are explicitly specified
while (data->m_language_id == language_id) { do {
if (data->m_script_id == script_id && data->m_country_id == country_id) if (data->m_script_id == localeId.script_id && data->m_country_id == localeId.country_id)
return data; return data;
++data; ++data;
} while (data->m_language_id == localeId.language_id);
// no match; try again with default script
localeId.script_id = QLocale::AnyScript;
data = locale_data + idx;
} }
if (localeId.script_id == QLocale::AnyScript && localeId.country_id == QLocale::AnyCountry)
return data;
if (localeId.script_id == QLocale::AnyScript) {
do {
if (data->m_country_id == localeId.country_id)
return data;
++data;
} while (data->m_language_id == localeId.language_id);
} else if (localeId.country_id == QLocale::AnyCountry) {
do {
if (data->m_script_id == localeId.script_id)
return data;
++data;
} while (data->m_language_id == localeId.language_id);
} }
return locale_data + idx; return locale_data + idx;
@ -3227,7 +3321,7 @@ QString QLocale::toCurrencyString(double value, const QString &symbol) const
\since 4.8 \since 4.8
Returns an ordered list of locale names for translation purposes in Returns an ordered list of locale names for translation purposes in
preference order. preference order (like "en", "en-US", "en-Latn-US").
The return value represents locale names that the user expects to see the The return value represents locale names that the user expects to see the
UI translation in. UI translation in.
@ -3251,7 +3345,20 @@ QStringList QLocale::uiLanguages() const
} }
} }
#endif #endif
return QStringList(bcp47Name()); QLocaleId id = QLocaleId::fromIds(d->m_data->m_language_id, d->m_data->m_script_id, d->m_data->m_country_id);
const QLocaleId max = id.withLikelySubtagsAdded();
const QLocaleId min = max.withLikelySubtagsRemoved();
QStringList uiLanguages;
uiLanguages.append(min.bcp47Name());
if (id.script_id) {
id.script_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max)
uiLanguages.append(id.bcp47Name());
}
if (max != min && max != id)
uiLanguages.append(max.bcp47Name());
return uiLanguages;
} }
/*! /*!

File diff suppressed because it is too large Load Diff

View File

@ -138,6 +138,28 @@ namespace QIcu {
#endif #endif
struct QLocaleId
{
// bypass constructors
static inline QLocaleId fromIds(ushort language, ushort script, ushort country)
{
const QLocaleId localeId = { language, script, country };
return localeId;
}
inline bool operator==(QLocaleId other) const
{ return language_id == other.language_id && script_id == other.script_id && country_id == other.country_id; }
inline bool operator!=(QLocaleId other) const
{ return !operator==(other); }
QLocaleId withLikelySubtagsAdded() const;
QLocaleId withLikelySubtagsRemoved() const;
QString bcp47Name() const;
ushort language_id, script_id, country_id;
};
struct QLocaleData struct QLocaleData
{ {
public: public:

View File

@ -1,7 +1,7 @@
CONFIG += console testcase CONFIG += console testcase
CONFIG += parallel_test CONFIG += parallel_test
CONFIG -= app_bundle CONFIG -= app_bundle
QT = core testlib QT = core testlib core-private
embedded: QT += gui embedded: QT += gui
SOURCES = ../tst_qlocale.cpp SOURCES = ../tst_qlocale.cpp

View File

@ -120,6 +120,7 @@ private slots:
void toDateTime(); void toDateTime();
void negativeNumbers(); void negativeNumbers();
void numberOptions(); void numberOptions();
void testNames_data();
void testNames(); void testNames();
void dayName_data(); void dayName_data();
void dayName(); void dayName();
@ -364,16 +365,19 @@ void tst_QLocale::ctor()
+ "/" + QLocale::countryToString(l.country())).toLatin1().constData()); \ + "/" + QLocale::countryToString(l.country())).toLatin1().constData()); \
} }
TEST_CTOR("zh_CN", Chinese, AnyScript, China) TEST_CTOR("zh_CN", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hans_CN", Chinese, SimplifiedHanScript, China) TEST_CTOR("zh_Hans_CN", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hans", Chinese, SimplifiedHanScript, China) TEST_CTOR("zh_Hans", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hant", Chinese, TraditionalHanScript, HongKong) TEST_CTOR("zh_Hant", Chinese, TraditionalHanScript, Taiwan)
TEST_CTOR("zh_Hans_MO", Chinese, SimplifiedHanScript, Macau) TEST_CTOR("zh_Hans_MO", Chinese, SimplifiedHanScript, Macau)
TEST_CTOR("zh_Hant_MO", Chinese, TraditionalHanScript, Macau) TEST_CTOR("zh_Hant_MO", Chinese, TraditionalHanScript, Macau)
TEST_CTOR("az_Latn_AZ", Azerbaijani, LatinScript, Azerbaijan) TEST_CTOR("az_Latn_AZ", Azerbaijani, LatinScript, Azerbaijan)
TEST_CTOR("ha_Arab_NG", Hausa, ArabicScript, Nigeria) TEST_CTOR("ha_Arab_NG", Hausa, ArabicScript, Nigeria)
TEST_CTOR("ha_Latn_NG", Hausa, LatinScript, Nigeria) TEST_CTOR("ha_Latn_NG", Hausa, LatinScript, Nigeria)
TEST_CTOR("ru", Russian, CyrillicScript, RussianFederation)
TEST_CTOR("ru_Cyrl", Russian, CyrillicScript, RussianFederation)
#undef TEST_CTOR #undef TEST_CTOR
} }
@ -1410,370 +1414,62 @@ void tst_QLocale::negativeNumbers()
QCOMPARE(i, -1000000); QCOMPARE(i, -1000000);
} }
struct LocaleListItem #include <private/qlocale_p.h>
#include <private/qlocale_data_p.h>
static const int locale_data_count = sizeof(locale_data)/sizeof(locale_data[0]);
void tst_QLocale::testNames_data()
{ {
int language; QTest::addColumn<int>("language");
int country; QTest::addColumn<int>("country");
};
// first two rows of locale_data[] in qlocale_data_p.h for (int i = 0; i < locale_data_count; ++i) {
static const LocaleListItem g_locale_list[] = { const QLocaleData &item = locale_data[i];
{ 1, 0}, // C/AnyCountry
{ 3, 69}, // Afan/Ethiopia
{ 3, 111}, // Afan/Kenya
{ 4, 59}, // Afar/Djibouti
{ 4, 67}, // Afar/Eritrea
{ 4, 69}, // Afar/Ethiopia
{ 5, 195}, // Afrikaans/SouthAfrica
{ 5, 148}, // Afrikaans/Namibia
{ 6, 2}, // Albanian/Albania
{ 7, 69}, // Amharic/Ethiopia
{ 8, 186}, // Arabic/SaudiArabia
{ 8, 3}, // Arabic/Algeria
{ 8, 17}, // Arabic/Bahrain
{ 8, 64}, // Arabic/Egypt
{ 8, 103}, // Arabic/Iraq
{ 8, 109}, // Arabic/Jordan
{ 8, 115}, // Arabic/Kuwait
{ 8, 119}, // Arabic/Lebanon
{ 8, 122}, // Arabic/LibyanArabJamahiriya
{ 8, 145}, // Arabic/Morocco
{ 8, 162}, // Arabic/Oman
{ 8, 175}, // Arabic/Qatar
{ 8, 201}, // Arabic/Sudan
{ 8, 207}, // Arabic/SyrianArabRepublic
{ 8, 216}, // Arabic/Tunisia
{ 8, 223}, // Arabic/UnitedArabEmirates
{ 8, 237}, // Arabic/Yemen
{ 9, 11}, // Armenian/Armenia
{ 10, 100}, // Assamese/India
{ 12, 15}, // Azerbaijani/Azerbaijan
{ 12, 102}, // Azerbaijani/Iran
{ 14, 197}, // Basque/Spain
{ 15, 18}, // Bengali/Bangladesh
{ 15, 100}, // Bengali/India
{ 16, 25}, // Bhutani/Bhutan
{ 19, 74}, // Breton/France
{ 20, 33}, // Bulgarian/Bulgaria
{ 21, 147}, // Burmese/Myanmar
{ 22, 20}, // Byelorussian/Belarus
{ 23, 36}, // Cambodian/Cambodia
{ 24, 197}, // Catalan/Spain
{ 25, 44}, // Chinese/China
{ 25, 97}, // Chinese/HongKong
{ 25, 126}, // Chinese/Macau
{ 25, 190}, // Chinese/Singapore
{ 25, 208}, // Chinese/Taiwan
{ 27, 54}, // Croatian/Croatia
{ 28, 57}, // Czech/CzechRepublic
{ 29, 58}, // Danish/Denmark
{ 30, 151}, // Dutch/Netherlands
{ 30, 21}, // Dutch/Belgium
{ 31, 225}, // English/UnitedStates
{ 31, 4}, // English/AmericanSamoa
{ 31, 13}, // English/Australia
{ 31, 21}, // English/Belgium
{ 31, 22}, // English/Belize
{ 31, 28}, // English/Botswana
{ 31, 38}, // English/Canada
{ 31, 89}, // English/Guam
{ 31, 97}, // English/HongKong
{ 31, 100}, // English/India
{ 31, 104}, // English/Ireland
{ 31, 107}, // English/Jamaica
{ 31, 133}, // English/Malta
{ 31, 134}, // English/MarshallIslands
{ 31, 137}, // English/Mauritius
{ 31, 148}, // English/Namibia
{ 31, 154}, // English/NewZealand
{ 31, 160}, // English/NorthernMarianaIslands
{ 31, 163}, // English/Pakistan
{ 31, 170}, // English/Philippines
{ 31, 190}, // English/Singapore
{ 31, 195}, // English/SouthAfrica
{ 31, 215}, // English/TrinidadAndTobago
{ 31, 224}, // English/UnitedKingdom
{ 31, 226}, // English/UnitedStatesMinorOutlyingIslands
{ 31, 234}, // English/USVirginIslands
{ 31, 240}, // English/Zimbabwe
{ 33, 68}, // Estonian/Estonia
{ 34, 71}, // Faroese/FaroeIslands
{ 36, 73}, // Finnish/Finland
{ 37, 74}, // French/France
{ 37, 21}, // French/Belgium
{ 37, 37}, // French/Cameroon
{ 37, 38}, // French/Canada
{ 37, 41}, // French/CentralAfricanRepublic
{ 37, 53}, // French/IvoryCoast
{ 37, 88}, // French/Guadeloupe
{ 37, 91}, // French/Guinea
{ 37, 125}, // French/Luxembourg
{ 37, 128}, // French/Madagascar
{ 37, 132}, // French/Mali
{ 37, 135}, // French/Martinique
{ 37, 142}, // French/Monaco
{ 37, 156}, // French/Niger
{ 37, 176}, // French/Reunion
{ 37, 187}, // French/Senegal
{ 37, 206}, // French/Switzerland
{ 37, 244}, // French/Saint Barthelemy
{ 37, 245}, // French/Saint Martin
{ 40, 197}, // Galician/Spain
{ 41, 81}, // Georgian/Georgia
{ 42, 82}, // German/Germany
{ 42, 14}, // German/Austria
{ 42, 21}, // German/Belgium
{ 42, 123}, // German/Liechtenstein
{ 42, 125}, // German/Luxembourg
{ 42, 206}, // German/Switzerland
{ 43, 85}, // Greek/Greece
{ 43, 56}, // Greek/Cyprus
{ 44, 86}, // Greenlandic/Greenland
{ 46, 100}, // Gujarati/India
{ 47, 83}, // Hausa/Ghana
{ 47, 156}, // Hausa/Niger
{ 47, 157}, // Hausa/Nigeria
{ 47, 201}, // Hausa/Sudan
{ 48, 105}, // Hebrew/Israel
{ 49, 100}, // Hindi/India
{ 50, 98}, // Hungarian/Hungary
{ 51, 99}, // Icelandic/Iceland
{ 52, 101}, // Indonesian/Indonesia
{ 57, 104}, // Irish/Ireland
{ 58, 106}, // Italian/Italy
{ 58, 206}, // Italian/Switzerland
{ 59, 108}, // Japanese/Japan
{ 61, 100}, // Kannada/India
{ 63, 110}, // Kazakh/Kazakhstan
{ 64, 179}, // Kinyarwanda/Rwanda
{ 65, 116}, // Kirghiz/Kyrgyzstan
{ 66, 114}, // Korean/RepublicOfKorea
{ 67, 102}, // Kurdish/Iran
{ 67, 103}, // Kurdish/Iraq
{ 67, 207}, // Kurdish/SyrianArabRepublic
{ 67, 217}, // Kurdish/Turkey
{ 69, 117}, // Laothian/Lao
{ 71, 118}, // Latvian/Latvia
{ 72, 49}, // Lingala/DemocraticRepublicOfCongo
{ 72, 50}, // Lingala/PeoplesRepublicOfCongo
{ 73, 124}, // Lithuanian/Lithuania
{ 74, 127}, // Macedonian/Macedonia
{ 75, 128}, // Malagasy/Madagascar
{ 76, 130}, // Malay/Malaysia
{ 76, 32}, // Malay/BruneiDarussalam
{ 77, 100}, // Malayalam/India
{ 78, 133}, // Maltese/Malta
{ 79, 154}, // Maori/NewZealand
{ 80, 100}, // Marathi/India
{ 82, 44}, // Mongolian/China
{ 82, 143}, // Mongolian/Mongolia
{ 84, 100}, // Nepali/India
{ 84, 150}, // Nepali/Nepal
{ 85, 161}, // Norwegian/Norway
{ 86, 74}, // Occitan/France
{ 87, 100}, // Oriya/India
{ 88, 1}, // Pashto/Afghanistan
{ 89, 102}, // Persian/Iran
{ 89, 1}, // Persian/Afghanistan
{ 90, 172}, // Polish/Poland
{ 91, 173}, // Portuguese/Portugal
{ 91, 30}, // Portuguese/Brazil
{ 91, 92}, // Portuguese/GuineaBissau
{ 91, 146}, // Portuguese/Mozambique
{ 92, 100}, // Punjabi/India
{ 92, 163}, // Punjabi/Pakistan
{ 94, 206}, // RhaetoRomance/Switzerland
{ 95, 141}, // Romanian/Moldova
{ 95, 177}, // Romanian/Romania
{ 96, 178}, // Russian/RussianFederation
{ 96, 141}, // Russian/Moldova
{ 96, 222}, // Russian/Ukraine
{ 98, 41}, // Sangho/CentralAfricanRepublic
{ 99, 100}, // Sanskrit/India
{ 100, 27}, // Serbian/BosniaAndHerzegowina
{ 100, 242}, // Serbian/Montenegro
{ 100, 243}, // Serbian/Serbia
{ 102, 120}, // Sesotho/Lesotho
{ 102, 195}, // Sesotho/SouthAfrica
{ 103, 195}, // Setswana/SouthAfrica
{ 104, 240}, // Shona/Zimbabwe
{ 106, 198}, // Singhalese/SriLanka
{ 107, 195}, // Siswati/SouthAfrica
{ 107, 204}, // Siswati/Swaziland
{ 108, 191}, // Slovak/Slovakia
{ 109, 192}, // Slovenian/Slovenia
{ 110, 194}, // Somali/Somalia
{ 110, 59}, // Somali/Djibouti
{ 110, 69}, // Somali/Ethiopia
{ 110, 111}, // Somali/Kenya
{ 111, 197}, // Spanish/Spain
{ 111, 10}, // Spanish/Argentina
{ 111, 26}, // Spanish/Bolivia
{ 111, 43}, // Spanish/Chile
{ 111, 47}, // Spanish/Colombia
{ 111, 52}, // Spanish/CostaRica
{ 111, 61}, // Spanish/DominicanRepublic
{ 111, 63}, // Spanish/Ecuador
{ 111, 65}, // Spanish/ElSalvador
{ 111, 66}, // Spanish/EquatorialGuinea
{ 111, 90}, // Spanish/Guatemala
{ 111, 96}, // Spanish/Honduras
{ 111, 139}, // Spanish/Mexico
{ 111, 155}, // Spanish/Nicaragua
{ 111, 166}, // Spanish/Panama
{ 111, 168}, // Spanish/Paraguay
{ 111, 169}, // Spanish/Peru
{ 111, 174}, // Spanish/PuertoRico
{ 111, 225}, // Spanish/UnitedStates
{ 111, 227}, // Spanish/Uruguay
{ 111, 231}, // Spanish/Venezuela
{ 113, 111}, // Swahili/Kenya
{ 113, 210}, // Swahili/Tanzania
{ 114, 205}, // Swedish/Sweden
{ 114, 73}, // Swedish/Finland
{ 116, 209}, // Tajik/Tajikistan
{ 117, 100}, // Tamil/India
{ 117, 198}, // Tamil/SriLanka
{ 118, 178}, // Tatar/RussianFederation
{ 119, 100}, // Telugu/India
{ 120, 211}, // Thai/Thailand
{ 121, 44}, // Tibetan/China
{ 121, 100}, // Tibetan/India
{ 122, 67}, // Tigrinya/Eritrea
{ 122, 69}, // Tigrinya/Ethiopia
{ 123, 214}, // Tonga/Tonga
{ 124, 195}, // Tsonga/SouthAfrica
{ 125, 217}, // Turkish/Turkey
{ 128, 44}, // Uigur/China
{ 129, 222}, // Ukrainian/Ukraine
{ 130, 100}, // Urdu/India
{ 130, 163}, // Urdu/Pakistan
{ 131, 228}, // Uzbek/Uzbekistan
{ 131, 1}, // Uzbek/Afghanistan
{ 132, 232}, // Vietnamese/VietNam
{ 134, 224}, // Welsh/UnitedKingdom
{ 135, 187}, // Wolof/Senegal
{ 136, 195}, // Xhosa/SouthAfrica
{ 138, 157}, // Yoruba/Nigeria
{ 140, 195}, // Zulu/SouthAfrica
{ 141, 161}, // Nynorsk/Norway
{ 142, 27}, // Bosnian/BosniaAndHerzegowina
{ 143, 131}, // Divehi/Maldives
{ 144, 224}, // Manx/UnitedKingdom
{ 145, 224}, // Cornish/UnitedKingdom
{ 146, 83}, // Akan/Ghana
{ 147, 100}, // Konkani/India
{ 148, 83}, // Ga/Ghana
{ 149, 157}, // Igbo/Nigeria
{ 150, 111}, // Kamba/Kenya
{ 151, 207}, // Syriac/SyrianArabRepublic
{ 152, 67}, // Blin/Eritrea
{ 153, 67}, // Geez/Eritrea
{ 153, 69}, // Geez/Ethiopia
{ 154, 53}, // Koro/IvoryCoast
{ 155, 69}, // Sidamo/Ethiopia
{ 156, 157}, // Atsam/Nigeria
{ 157, 67}, // Tigre/Eritrea
{ 158, 157}, // Jju/Nigeria
{ 159, 106}, // Friulian/Italy
{ 160, 195}, // Venda/SouthAfrica
{ 161, 83}, // Ewe/Ghana
{ 161, 212}, // Ewe/Togo
{ 162, 69}, // Walamo/Ethiopia
{ 163, 225}, // Hawaiian/UnitedStates
{ 164, 157}, // Tyap/Nigeria
{ 165, 129}, // Chewa/Malawi
{ 166, 170}, // Filipino/Philippines
{ 167, 206}, // Swiss German/Switzerland
{ 168, 44}, // Sichuan Yi/China
{ 169, 91}, // Kpelle/Guinea
{ 169, 121}, // Kpelle/Liberia
{ 170, 82}, // Low German/Germany
{ 171, 195}, // South Ndebele/SouthAfrica
{ 172, 195}, // Northern Sotho/SouthAfrica
{ 173, 73}, // Northern Sami/Finland
{ 173, 161}, // Northern Sami/Norway
{ 174, 208}, // Taroko/Taiwan
{ 175, 111}, // Gusii/Kenya
{ 176, 111}, // Taita/Kenya
{ 177, 187}, // Fulah/Senegal
{ 178, 111}, // Kikuyu/Kenya
{ 179, 111}, // Samburu/Kenya
{ 180, 146}, // Sena/Mozambique
{ 181, 240}, // North Ndebele/Zimbabwe
{ 182, 210}, // Rombo/Tanzania
{ 183, 145}, // Tachelhit/Morocco
{ 184, 3}, // Kabyle/Algeria
{ 185, 221}, // Nyankole/Uganda
{ 186, 210}, // Bena/Tanzania
{ 187, 210}, // Vunjo/Tanzania
{ 188, 132}, // Bambara/Mali
{ 189, 111}, // Embu/Kenya
{ 190, 225}, // Cherokee/UnitedStates
{ 191, 137}, // Morisyen/Mauritius
{ 192, 210}, // Makonde/Tanzania
{ 193, 210}, // Langi/Tanzania
{ 194, 221}, // Ganda/Uganda
{ 195, 239}, // Bemba/Zambia
{ 196, 39}, // Kabuverdianu/CapeVerde
{ 197, 111}, // Meru/Kenya
{ 198, 111}, // Kalenjin/Kenya
{ 199, 148}, // Nama/Namibia
{ 200, 210}, // Machame/Tanzania
{ 201, 82}, // Colognian/Germany
{ 202, 111}, // Masai/Kenya
{ 202, 210}, // Masai/Tanzania
{ 203, 221}, // Soga/Uganda
{ 204, 111}, // Luyia/Kenya
{ 205, 210}, // Asu/Tanzania
{ 206, 111}, // Teso/Kenya
{ 206, 221}, // Teso/Uganda
{ 207, 67}, // Saho/Eritrea
{ 208, 132}, // Koyra Chiini/Mali
{ 209, 210}, // Rwa/Tanzania
{ 210, 111}, // Luo/Kenya
{ 211, 221}, // Chiga/Uganda
{ 212, 145}, // Central Morocco Tamazight/Morocco
{ 213, 132}, // Koyraboro Senni/Mali
{ 214, 210} // Shambala/Tanzania
};
static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
const QString testName = QString::fromLatin1("data_%1 (%2/%3)").arg(i)
.arg(QLocale::languageToString((QLocale::Language)item.m_language_id))
.arg(QLocale::countryToString((QLocale::Country)item.m_country_id));
QTest::newRow(testName.toLatin1().constData()) << (int)item.m_language_id << (int)item.m_country_id;
}
}
void tst_QLocale::testNames() void tst_QLocale::testNames()
{ {
for (int i = 0; i < g_locale_list_count; ++i) { QFETCH(int, language);
const LocaleListItem &item = g_locale_list[i]; QFETCH(int, country);
QLocale l1((QLocale::Language)item.language, (QLocale::Country)item.country);
QCOMPARE((int)l1.language(), item.language); QLocale l1((QLocale::Language)language, (QLocale::Country)country);
QCOMPARE((int)l1.country(), item.country); if (language == QLocale::AnyLanguage && country == QLocale::AnyCountry)
language = QLocale::C;
QCOMPARE((int)l1.language(), language);
QCOMPARE((int)l1.country(), country);
QString name = l1.name(); QString name = l1.name();
QLocale l2(name); QLocale l2(name);
QCOMPARE((int)l2.language(), item.language); QCOMPARE((int)l2.language(), language);
QCOMPARE((int)l2.country(), item.country); QCOMPARE((int)l2.country(), country);
QCOMPARE(l2.name(), name); QCOMPARE(l2.name(), name);
QLocale l3(name + QLatin1String("@foo")); QLocale l3(name + QLatin1String("@foo"));
QCOMPARE((int)l3.language(), item.language); QCOMPARE((int)l3.language(), language);
QCOMPARE((int)l3.country(), item.country); QCOMPARE((int)l3.country(), country);
QCOMPARE(l3.name(), name); QCOMPARE(l3.name(), name);
QLocale l4(name + QLatin1String(".foo")); QLocale l4(name + QLatin1String(".foo"));
QCOMPARE((int)l4.language(), item.language); QCOMPARE((int)l4.language(), language);
QCOMPARE((int)l4.country(), item.country); QCOMPARE((int)l4.country(), country);
QCOMPARE(l4.name(), name); QCOMPARE(l4.name(), name);
if (item.language != QLocale::C) { if (language != QLocale::C) {
int idx = name.indexOf(QLatin1Char('_')); int idx = name.indexOf(QLatin1Char('_'));
QVERIFY(idx != -1); QVERIFY(idx != -1);
QString lang = name.left(idx); QString lang = name.left(idx);
QCOMPARE((int)QLocale(lang).language(), item.language); QCOMPARE((int)QLocale(lang).language(), language);
QCOMPARE((int)QLocale(lang + QLatin1String("@foo")).language(), item.language); QCOMPARE((int)QLocale(lang + QLatin1String("@foo")).language(), language);
QCOMPARE((int)QLocale(lang + QLatin1String(".foo")).language(), item.language); QCOMPARE((int)QLocale(lang + QLatin1String(".foo")).language(), language);
}
} }
} }
@ -2028,12 +1724,37 @@ void tst_QLocale::uiLanguages()
QCOMPARE(c.uiLanguages().at(0), QLatin1String("C")); QCOMPARE(c.uiLanguages().at(0), QLatin1String("C"));
const QLocale en_US("en_US"); const QLocale en_US("en_US");
QCOMPARE(en_US.uiLanguages().size(), 1); QCOMPARE(en_US.uiLanguages().size(), 3);
QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en-US")); QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en"));
QCOMPARE(en_US.uiLanguages().at(1), QLatin1String("en-US"));
QCOMPARE(en_US.uiLanguages().at(2), QLatin1String("en-Latn-US"));
const QLocale en_Latn_US("en_Latn_US");
QCOMPARE(en_Latn_US.uiLanguages().size(), 3);
QCOMPARE(en_Latn_US.uiLanguages().at(0), QLatin1String("en"));
QCOMPARE(en_Latn_US.uiLanguages().at(1), QLatin1String("en-US"));
QCOMPARE(en_Latn_US.uiLanguages().at(2), QLatin1String("en-Latn-US"));
const QLocale en_GB("en_GB");
QCOMPARE(en_GB.uiLanguages().size(), 2);
QCOMPARE(en_GB.uiLanguages().at(0), QLatin1String("en-GB"));
QCOMPARE(en_GB.uiLanguages().at(1), QLatin1String("en-Latn-GB"));
const QLocale en_Dsrt_US("en_Dsrt_US");
QCOMPARE(en_Dsrt_US.uiLanguages().size(), 2);
QCOMPARE(en_Dsrt_US.uiLanguages().at(0), QLatin1String("en-Dsrt"));
QCOMPARE(en_Dsrt_US.uiLanguages().at(1), QLatin1String("en-Dsrt-US"));
const QLocale ru_RU("ru_RU"); const QLocale ru_RU("ru_RU");
QCOMPARE(ru_RU.uiLanguages().size(), 1); QCOMPARE(ru_RU.uiLanguages().size(), 3);
QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru-RU")); QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru"));
QCOMPARE(ru_RU.uiLanguages().at(1), QLatin1String("ru-RU"));
QCOMPARE(ru_RU.uiLanguages().at(2), QLatin1String("ru-Cyrl-RU"));
const QLocale zh_Hant("zh_Hant");
QCOMPARE(zh_Hant.uiLanguages().size(), 2);
QCOMPARE(zh_Hant.uiLanguages().at(0), QLatin1String("zh-TW"));
QCOMPARE(zh_Hant.uiLanguages().at(1), QLatin1String("zh-Hant-TW"));
} }
void tst_QLocale::weekendDays() void tst_QLocale::weekendDays()

View File

@ -48,6 +48,7 @@ from xpathlite import DraftResolution
from dateconverter import convert_date from dateconverter import convert_date
import re import re
findAlias = xpathlite.findAlias
findEntry = xpathlite.findEntry findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile findTagsInFile = xpathlite.findTagsInFile
@ -116,6 +117,12 @@ def generateLocaleInfo(path):
if not path.endswith(".xml"): if not path.endswith(".xml"):
return {} return {}
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
raise xpathlite.Error("alias to \"%s\"" % alias)
language_code = findEntryInFile(path, "identity/language", attribute="type")[0] language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
if language_code == 'root': if language_code == 'root':
# just skip it # just skip it
@ -128,18 +135,16 @@ def generateLocaleInfo(path):
# ### actually there is only one locale with variant: en_US_POSIX # ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all? # does anybody care about it at all?
if variant_code: if variant_code:
return {} raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
language_id = enumdata.languageCodeToId(language_code) language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0: if language_id <= 0:
sys.stderr.write("unknown language code \"" + language_code + "\"\n") raise xpathlite.Error("unknown language code \"%s\"" % language_code)
return {}
language = enumdata.language_list[language_id][0] language = enumdata.language_list[language_id][0]
script_id = enumdata.scriptCodeToId(script_code) script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1: if script_id == -1:
sys.stderr.write("unknown script code \"" + script_code + "\"\n") raise xpathlite.Error("unknown script code \"%s\"" % script_code)
return {}
script = enumdata.script_list[script_id][0] script = enumdata.script_list[script_id][0]
# we should handle fully qualified names with the territory # we should handle fully qualified names with the territory
@ -147,8 +152,7 @@ def generateLocaleInfo(path):
return {} return {}
country_id = enumdata.countryCodeToId(country_code) country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0: if country_id <= 0:
sys.stderr.write("unknown country code \"" + country_code + "\"\n") raise xpathlite.Error("unknown country code \"%s\"" % country_code)
return {}
country = enumdata.country_list[country_id][0] country = enumdata.country_list[country_id][0]
# So we say we accept only those values that have "contributed" or # So we say we accept only those values that have "contributed" or
@ -557,10 +561,14 @@ cldr_files = os.listdir(cldr_dir)
locale_database = {} locale_database = {}
for file in cldr_files: for file in cldr_files:
try:
l = generateLocaleInfo(cldr_dir + "/" + file) l = generateLocaleInfo(cldr_dir + "/" + file)
if not l: if not l:
sys.stderr.write("skipping file \"" + file + "\"\n") sys.stderr.write("skipping file \"" + file + "\"\n")
continue continue
except xpathlite.Error as e:
sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
continue
locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
@ -611,16 +619,15 @@ def _parseLocale(l):
script = "AnyScript" script = "AnyScript"
country = "AnyCountry" country = "AnyCountry"
if l == "und": # we are treating unknown locale like C if l == "und":
return (None, None, None) raise xpathlite.Error("we are treating unknown locale like C")
items = l.split("_") items = l.split("_")
language_code = items[0] language_code = items[0]
if language_code != "und": if language_code != "und":
language_id = enumdata.languageCodeToId(language_code) language_id = enumdata.languageCodeToId(language_code)
if language_id == -1: if language_id == -1:
sys.stderr.write("unknown language code \"" + language_code + "\"\n") raise xpathlite.Error("unknown language code \"%s\"" % language_code)
return (None, None, None)
language = enumdata.language_list[language_id][0] language = enumdata.language_list[language_id][0]
if len(items) > 1: if len(items) > 1:
@ -631,16 +638,14 @@ def _parseLocale(l):
if len(script_code) == 4: if len(script_code) == 4:
script_id = enumdata.scriptCodeToId(script_code) script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1: if script_id == -1:
sys.stderr.write("unknown script code \"" + script_code + "\"\n") raise xpathlite.Error("unknown script code \"%s\"" % script_code)
return (None, None, None)
script = enumdata.script_list[script_id][0] script = enumdata.script_list[script_id][0]
else: else:
country_code = script_code country_code = script_code
if country_code: if country_code:
country_id = enumdata.countryCodeToId(country_code) country_id = enumdata.countryCodeToId(country_code)
if country_id == -1: if country_id == -1:
sys.stderr.write("unknown country code \"" + country_code + "\"\n") raise xpathlite.Error("unknown country code \"%s\"" % country_code)
return (None, None, None)
country = enumdata.country_list[country_id][0] country = enumdata.country_list[country_id][0]
return (language, script, country) return (language, script, country)
@ -651,13 +656,15 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]]
tmp[data[0]] = data[1] tmp[data[0]] = data[1]
try:
(from_language, from_script, from_country) = _parseLocale(tmp[u"from"]) (from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
if not from_language: except xpathlite.Error as e:
sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue continue
try:
(to_language, to_script, to_country) = _parseLocale(tmp[u"to"]) (to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
if not to_language: except xpathlite.Error as e:
sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country: if to_country == "AnyCountry" and from_country != to_country:

View File

@ -291,7 +291,7 @@ class Locale:
self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat")) self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat")) self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
def loadLocaleMap(doc, language_map, script_map, country_map): def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
result = {} result = {}
locale_list_elt = firstChildElt(doc.documentElement, "localeList") locale_list_elt = firstChildElt(doc.documentElement, "localeList")
@ -307,6 +307,28 @@ def loadLocaleMap(doc, language_map, script_map, country_map):
country_id = countryNameToId(locale.country, country_map) country_id = countryNameToId(locale.country, country_map)
if country_id == -1: if country_id == -1:
sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
if language_id != 1: # C
if country_id == 0:
sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language)
if script_id == 0:
# find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags)
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country:
locale.script = tmp["to"][1]
script_id = scriptNameToId(locale.script, script_map)
break
if script_id == 0 and country_id != 0:
# try with no country
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry":
locale.script = tmp["to"][1]
script_id = scriptNameToId(locale.script, script_map)
break
result[(language_id, script_id, country_id)] = locale result[(language_id, script_id, country_id)] = locale
locale_elt = nextSiblingElt(locale_elt, "locale") locale_elt = nextSiblingElt(locale_elt, "locale")
@ -321,13 +343,21 @@ def compareLocaleKeys(key1, key2):
l1 = compareLocaleKeys.locale_map[key1] l1 = compareLocaleKeys.locale_map[key1]
l2 = compareLocaleKeys.locale_map[key2] l2 = compareLocaleKeys.locale_map[key2]
if l1.language in compareLocaleKeys.default_map: if (l1.language, l1.script) in compareLocaleKeys.default_map.keys():
default = compareLocaleKeys.default_map[l1.language] default = compareLocaleKeys.default_map[(l1.language, l1.script)]
if l1.country == default and key1[1] == 0: if l1.country == default:
return -1 return -1
if l2.country == default and key2[1] == 0: if l2.country == default:
return 1 return 1
if key1[1] != key2[1]:
if (l2.language, l2.script) in compareLocaleKeys.default_map.keys():
default = compareLocaleKeys.default_map[(l2.language, l2.script)]
if l2.country == default:
return 1
if l1.country == default:
return -1
if key1[1] != key2[1]: if key1[1] != key2[1]:
return key1[1] - key2[1] return key1[1] - key2[1]
else: else:
@ -476,9 +506,9 @@ def main():
default_map = {} default_map = {}
for key in likely_subtags_map.keys(): for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key] tmp = likely_subtags_map[key]
if tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry" and tmp["from"][1] == "AnyScript": if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry":
default_map[tmp["to"][0]] = tmp["to"][2] default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2]
locale_map = loadLocaleMap(doc, language_map, script_map, country_map) locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
dupes = findDupes(language_map, country_map) dupes = findDupes(language_map, country_map)
cldr_version = eltText(firstChildElt(doc.documentElement, "version")) cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
@ -495,6 +525,57 @@ def main():
*/\n\n\n\ */\n\n\n\
" % (str(datetime.date.today()), cldr_version) ) " % (str(datetime.date.today()), cldr_version) )
# Likely subtags map
data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
index = 0
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
from_language = languageNameToId(tmp["from"][0], language_map)
from_script = scriptNameToId(tmp["from"][1], script_map)
from_country = countryNameToId(tmp["from"][2], country_map)
to_language = languageNameToId(tmp["to"][0], language_map)
to_script = scriptNameToId(tmp["to"][1], script_map)
to_country = countryNameToId(tmp["to"][2], country_map)
cmnt_from = ""
if from_language != 0:
cmnt_from = cmnt_from + language_map[from_language][1]
else:
cmnt_from = cmnt_from + "und"
if from_script != 0:
if cmnt_from:
cmnt_from = cmnt_from + "_"
cmnt_from = cmnt_from + script_map[from_script][1]
if from_country != 0:
if cmnt_from:
cmnt_from = cmnt_from + "_"
cmnt_from = cmnt_from + country_map[from_country][1]
cmnt_to = ""
if to_language != 0:
cmnt_to = cmnt_to + language_map[to_language][1]
else:
cmnt_from = cmnt_from + "und"
if to_script != 0:
if cmnt_to:
cmnt_to = cmnt_to + "_"
cmnt_to = cmnt_to + script_map[to_script][1]
if to_country != 0:
if cmnt_to:
cmnt_to = cmnt_to + "_"
cmnt_to = cmnt_to + country_map[to_country][1]
data_temp_file.write(" ")
data_temp_file.write("{ %3d, %2d, %3d }, { %3d, %2d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country))
index += 1
if index != len(likely_subtags_map):
data_temp_file.write(",")
else:
data_temp_file.write(" ")
data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to))
data_temp_file.write("};\n")
data_temp_file.write("\n")
# Locale index # Locale index
data_temp_file.write("static const quint16 locale_index[] = {\n") data_temp_file.write("static const quint16 locale_index[] = {\n")
index = 0 index = 0