Use CLDR's names in QLocale::*ToName() for language, script, territory
Various comments need to continue using the enumdata.py names, as they associate data with particular enum members, but we can now correctly use the en.xml versions of their names when we report them, rather than the enum-friendly names we use in the code. Since this now means the data may stray outside plain ASCII - it'll be UTF-8-encoded - this implies replacing the QLatin1StringView()s of the code that formerly read this data with QString::fromUtf8(). Fixes: QTBUG-94460 Change-Id: Id3b08875a46af58c0555c3e303b0e15a19441509 Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
afd7d68244
commit
1ae24f8b50
@ -1568,7 +1568,7 @@ QString QLocale::languageToString(Language language)
|
||||
{
|
||||
if (language > QLocale::LastLanguage)
|
||||
return "Unknown"_L1;
|
||||
return QLatin1StringView(language_name_list + language_name_index[language]);
|
||||
return QString::fromUtf8(language_name_list + language_name_index[language]);
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -1582,7 +1582,7 @@ QString QLocale::territoryToString(QLocale::Territory territory)
|
||||
{
|
||||
if (territory > QLocale::LastTerritory)
|
||||
return "Unknown"_L1;
|
||||
return QLatin1StringView(territory_name_list + territory_name_index[territory]);
|
||||
return QString::fromUtf8(territory_name_list + territory_name_index[territory]);
|
||||
}
|
||||
|
||||
#if QT_DEPRECATED_SINCE(6, 6)
|
||||
@ -1610,7 +1610,7 @@ QString QLocale::scriptToString(QLocale::Script script)
|
||||
{
|
||||
if (script > QLocale::LastScript)
|
||||
return "Unknown"_L1;
|
||||
return QLatin1StringView(script_name_list + script_name_index[script]);
|
||||
return QString::fromUtf8(script_name_list + script_name_index[script]);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3382,9 +3382,20 @@ void tst_QLocale::languageToString_data()
|
||||
QTest::addColumn<QString>("name");
|
||||
|
||||
// Prone to change at CLDR updates.
|
||||
QTest::newRow("cu") << QLocale::Church << u"Church Slavic"_s;
|
||||
QTest::newRow("dyo") << QLocale::JolaFonyi << u"Jola-Fonyi"_s;
|
||||
QTest::newRow("ff") << QLocale::Fulah << u"Fula"_s;
|
||||
QTest::newRow("gd") << QLocale::Gaelic << u"Scottish Gaelic"_s;
|
||||
QTest::newRow("ht") << QLocale::Haitian << u"Haitian Creole"_s;
|
||||
QTest::newRow("lu") << QLocale::LubaKatanga << u"Luba-Katanga"_s;
|
||||
QTest::newRow("mgh") << QLocale::MakhuwaMeetto << u"Makhuwa-Meetto"_s;
|
||||
QTest::newRow("mgo") << QLocale::Meta << u"Meta\u02bc"_s;
|
||||
QTest::newRow("mi") << QLocale::Maori << u"M\u0101" "ori"_s;
|
||||
QTest::newRow("nb") << QLocale::NorwegianBokmal << u"Norwegian Bokm\u00e5" "l"_s;
|
||||
QTest::newRow("nqo") << QLocale::Nko << u"N\u2019" "Ko"_s;
|
||||
QTest::newRow("quc") << QLocale::Kiche << u"K\u02bc" "iche\u02bc"_s;
|
||||
QTest::newRow("sah") << QLocale::Sakha << u"Yakut"_s;
|
||||
QTest::newRow("vo") << QLocale::Volapuk << u"Volap\u00fc" "k"_s;
|
||||
}
|
||||
|
||||
void tst_QLocale::languageToString()
|
||||
@ -3401,9 +3412,15 @@ void tst_QLocale::scriptToString_data()
|
||||
QTest::addColumn<QString>("name");
|
||||
|
||||
// Prone to change at CLDR updates.
|
||||
QTest::newRow("Cans")
|
||||
<< QLocale::CanadianAboriginalScript << u"Unified Canadian Aboriginal Syllabics"_s;
|
||||
QTest::newRow("Dupl") << QLocale::DuployanScript << u"Duployan shorthand"_s;
|
||||
QTest::newRow("Egyp") << QLocale::EgyptianHieroglyphsScript << u"Egyptian hieroglyphs"_s;
|
||||
QTest::newRow("Nkoo") << QLocale::NkoScript << u"N\u2019" "Ko"_s;
|
||||
QTest::newRow("Phag") << QLocale::PhagsPaScript << u"Phags-pa"_s;
|
||||
QTest::newRow("Rohg") << QLocale::HanifiScript << u"Hanifi Rohingya"_s;
|
||||
QTest::newRow("Sgnw") << QLocale::SignWritingScript << u"SignWriting"_s;
|
||||
QTest::newRow("Xsux") << QLocale::CuneiformScript << u"Sumero-Akkadian Cuneiform"_s;
|
||||
}
|
||||
|
||||
void tst_QLocale::scriptToString()
|
||||
@ -3420,11 +3437,43 @@ void tst_QLocale::territoryToString_data()
|
||||
QTest::addColumn<QString>("name");
|
||||
// Prone to change at CLDR updates.
|
||||
|
||||
QTest::newRow("AX") << QLocale::AlandIslands << u"\u00c5" "land Islands"_s;
|
||||
QTest::newRow("AG") << QLocale::AntiguaAndBarbuda << u"Antigua & Barbuda"_s;
|
||||
QTest::newRow("BA") << QLocale::BosniaAndHerzegovina << u"Bosnia & Herzegovina"_s;
|
||||
QTest::newRow("BL") << QLocale::SaintBarthelemy << u"St. Barth\u00e9" "lemy"_s;
|
||||
QTest::newRow("CC") << QLocale::CocosIslands << u"Cocos (Keeling) Islands"_s;
|
||||
QTest::newRow("CD") << QLocale::CongoKinshasa << u"Congo - Kinshasa"_s;
|
||||
QTest::newRow("CG") << QLocale::CongoBrazzaville << u"Congo - Brazzaville"_s;
|
||||
QTest::newRow("CI") << QLocale::IvoryCoast << u"C\u00f4" "te d\u2019" "Ivoire"_s;
|
||||
QTest::newRow("CW") << QLocale::Curacao << u"Cura\u00e7" "ao"_s;
|
||||
QTest::newRow("EA") << QLocale::CeutaAndMelilla << u"Ceuta & Melilla"_s;
|
||||
QTest::newRow("GS")
|
||||
<< QLocale::SouthGeorgiaAndSouthSandwichIslands
|
||||
<< u"South Georgia & South Sandwich Islands"_s;
|
||||
QTest::newRow("GW") << QLocale::GuineaBissau << u"Guinea-Bissau"_s;
|
||||
QTest::newRow("HM") << QLocale::HeardAndMcDonaldIslands << u"Heard & McDonald Islands"_s;
|
||||
QTest::newRow("IM") << QLocale::IsleOfMan << u"Isle of Man"_s;
|
||||
QTest::newRow("KN") << QLocale::SaintKittsAndNevis << u"St. Kitts & Nevis"_s;
|
||||
QTest::newRow("LC") << QLocale::SaintLucia << u"St. Lucia"_s;
|
||||
QTest::newRow("MF") << QLocale::SaintMartin << u"St. Martin"_s;
|
||||
QTest::newRow("MK") << QLocale::Macedonia << u"North Macedonia"_s;
|
||||
QTest::newRow("MM") << QLocale::Myanmar << u"Myanmar (Burma)"_s;
|
||||
QTest::newRow("MO") << QLocale::Macao << u"Macao SAR China"_s;
|
||||
QTest::newRow("PM") << QLocale::SaintPierreAndMiquelon << u"St. Pierre & Miquelon"_s;
|
||||
QTest::newRow("PN") << QLocale::Pitcairn << u"Pitcairn Islands"_s;
|
||||
QTest::newRow("RE") << QLocale::Reunion << u"R\u00e9" "union"_s;
|
||||
QTest::newRow("SH") << QLocale::SaintHelena << u"St. Helena"_s;
|
||||
QTest::newRow("SJ") << QLocale::SvalbardAndJanMayen << u"Svalbard & Jan Mayen"_s;
|
||||
QTest::newRow("ST")
|
||||
<< QLocale::SaoTomeAndPrincipe << u"S\u00e3" "o Tom\u00e9" " & Pr\u00ed" "ncipe"_s;
|
||||
QTest::newRow("TA") << QLocale::TristanDaCunha << u"Tristan da Cunha"_s;
|
||||
QTest::newRow("TC") << QLocale::TurksAndCaicosIslands << u"Turks & Caicos Islands"_s;
|
||||
QTest::newRow("TR") << QLocale::Turkey << u"T\u00fc" "rkiye"_s;
|
||||
QTest::newRow("TT") << QLocale::TrinidadAndTobago << u"Trinidad & Tobago"_s;
|
||||
QTest::newRow("UM") << QLocale::UnitedStatesOutlyingIslands << u"U.S. Outlying Islands"_s;
|
||||
QTest::newRow("VC") << QLocale::SaintVincentAndGrenadines << u"St. Vincent & Grenadines"_s;
|
||||
QTest::newRow("VI") << QLocale::UnitedStatesVirginIslands << u"U.S. Virgin Islands"_s;
|
||||
QTest::newRow("WF") << QLocale::WallisAndFutuna << u"Wallis & Futuna"_s;
|
||||
QTest::newRow("001") << QLocale::World << u"world"_s;
|
||||
}
|
||||
|
||||
|
@ -254,6 +254,9 @@ class CldrAccess (object):
|
||||
inheritance, where relevant."""
|
||||
return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
|
||||
|
||||
def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
|
||||
return self.__codeMap(tag).get
|
||||
|
||||
@property
|
||||
def fileLocales(self) -> Iterable[str]:
|
||||
"""Generator for locale IDs seen in file-names.
|
||||
|
@ -76,7 +76,7 @@ def main(out, err):
|
||||
writer = QLocaleXmlWriter(emit.write)
|
||||
|
||||
writer.version(reader.root.cldrVersion)
|
||||
writer.enumData()
|
||||
writer.enumData(reader.root.englishNaming)
|
||||
writer.likelySubTags(reader.likelySubTags())
|
||||
writer.locales(reader.readLocales(args.calendars), args.calendars)
|
||||
|
||||
|
@ -75,7 +75,7 @@ def names_clash(cldr, enum):
|
||||
cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
|
||||
|
||||
# Various accented letters:
|
||||
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
|
||||
remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
|
||||
skip = '\u02bc' # Punctuation for which .isalpha() is true.
|
||||
# Let cldr match (ignoring non-letters and case) any substring as enum:
|
||||
if ''.join(enum.lower().split()) in ''.join(
|
||||
|
@ -100,16 +100,20 @@ def convertFormat(format):
|
||||
class QLocaleXmlReader (object):
|
||||
def __init__(self, filename):
|
||||
self.root = self.__parse(filename)
|
||||
# Lists of (id, name, code) triples:
|
||||
languages = tuple(self.__loadMap('language'))
|
||||
scripts = tuple(self.__loadMap('script'))
|
||||
territories = tuple(self.__loadMap('territory'))
|
||||
|
||||
from enumdata import language_map, script_map, territory_map
|
||||
# Lists of (id, enum name, code, en.xml name) tuples:
|
||||
languages = tuple(self.__loadMap('language', language_map))
|
||||
scripts = tuple(self.__loadMap('script', script_map))
|
||||
territories = tuple(self.__loadMap('territory', territory_map))
|
||||
self.__likely = tuple(self.__likelySubtagsMap())
|
||||
# Mappings {ID: (name, code)}
|
||||
|
||||
# Mappings {ID: (enum name, code, en.xml name)}
|
||||
self.languages = dict((v[0], v[1:]) for v in languages)
|
||||
self.scripts = dict((v[0], v[1:]) for v in scripts)
|
||||
self.territories = dict((v[0], v[1:]) for v in territories)
|
||||
# Private mappings {name: (ID, code)}
|
||||
|
||||
# Private mappings {enum name: (ID, code)}
|
||||
self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
|
||||
self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
|
||||
self.__landByName = dict((v[1], (v[0], v[2])) for v in territories)
|
||||
@ -211,10 +215,11 @@ class QLocaleXmlReader (object):
|
||||
return name
|
||||
|
||||
# Implementation details:
|
||||
def __loadMap(self, category):
|
||||
def __loadMap(self, category, enum):
|
||||
kid = self.__firstChildText
|
||||
for element in self.__eachEltInGroup(self.root, f'{category}List', category):
|
||||
yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
|
||||
key = int(kid(element, 'id'))
|
||||
yield key, enum[key][0], kid(element, 'code'), kid(element, 'name')
|
||||
|
||||
def __likelySubtagsMap(self):
|
||||
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
|
||||
@ -341,11 +346,21 @@ class QLocaleXmlWriter (object):
|
||||
self.__write('<localeDatabase>')
|
||||
|
||||
# Output of various sections, in their usual order:
|
||||
def enumData(self):
|
||||
def enumData(self, code2name):
|
||||
"""Output name/id/code tables for language, script and territory.
|
||||
|
||||
Parameter, code2name, is a function taking 'language',
|
||||
'script' or 'territory' and returning a lookup function that
|
||||
maps codes, of the relevant type, to their English names. This
|
||||
lookup function is passed a code and the name, both taken from
|
||||
enumdata.py, that QLocale uses, so the .get() of a dict will
|
||||
work. The English name from this lookup will be used by
|
||||
QLocale::*ToString() for the enum member whose name is based
|
||||
on the enumdata.py name passed as fallback to the lookup."""
|
||||
from enumdata import language_map, script_map, territory_map
|
||||
self.__enumTable('language', language_map)
|
||||
self.__enumTable('script', script_map)
|
||||
self.__enumTable('territory', territory_map)
|
||||
self.__enumTable('language', language_map, code2name)
|
||||
self.__enumTable('script', script_map, code2name)
|
||||
self.__enumTable('territory', territory_map, code2name)
|
||||
# Prepare to detect any unused codes (see __writeLocale(), close()):
|
||||
self.__languages = set(p[1] for p in language_map.values()
|
||||
if not p[1].isspace())
|
||||
@ -407,13 +422,18 @@ class QLocaleXmlWriter (object):
|
||||
def __complain(text):
|
||||
raise Error('Attempted to write data after closing :-(')
|
||||
|
||||
def __enumTable(self, tag, table):
|
||||
@staticmethod
|
||||
def __xmlSafe(text):
|
||||
return text.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
|
||||
def __enumTable(self, tag, table, code2name):
|
||||
self.__openTag(f'{tag}List')
|
||||
for key, value in table.items():
|
||||
enname, safe = code2name(tag), self.__xmlSafe
|
||||
for key, (name, code) in table.items():
|
||||
self.__openTag(tag)
|
||||
self.inTag('name', value[0])
|
||||
self.inTag('name', safe(enname(code, name)))
|
||||
self.inTag('id', key)
|
||||
self.inTag('code', value[1])
|
||||
self.inTag('code', code)
|
||||
self.__closeTag(tag)
|
||||
self.__closeTag(f'{tag}List')
|
||||
|
||||
|
@ -20,7 +20,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from qlocalexml import QLocaleXmlReader
|
||||
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor, qtbase_root
|
||||
from localetools import *
|
||||
from iso639_3 import LanguageCodeData
|
||||
|
||||
class LocaleKeySorter:
|
||||
@ -337,7 +337,11 @@ class LocaleDataWriter (LocaleSourceEditor):
|
||||
for key, value in book.items():
|
||||
if key == 0:
|
||||
continue
|
||||
out(f'"{value[0]}\\0"\n')
|
||||
enum, name = value[0], value[-1]
|
||||
if names_clash(name, enum):
|
||||
out(f'"{name}\\0" // {enum}\n')
|
||||
else:
|
||||
out(f'"{name}\\0"\n') # Automagically utf-8 encoded
|
||||
out(';\n\n')
|
||||
|
||||
out(f'static constexpr quint16 {form}_name_index[] = {{\n')
|
||||
@ -346,9 +350,8 @@ class LocaleDataWriter (LocaleSourceEditor):
|
||||
for key, value in book.items():
|
||||
if key == 0:
|
||||
continue
|
||||
name = value[0]
|
||||
out(f'{index:6d}, // {name}\n')
|
||||
index += len(name) + 1
|
||||
out(f'{index:6d}, // {value[0]}\n')
|
||||
index += len(value[-1].encode('utf-8')) + 1
|
||||
out('};\n\n')
|
||||
|
||||
@staticmethod
|
||||
|
Loading…
x
Reference in New Issue
Block a user