diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 75d687dd11a..b518c873511 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -57,14 +57,14 @@ class CldrReader (object): else: self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n') continue - if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]): + if not any(have): continue give = (give[0], # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags - have[1] if give[1] == 'AnyScript' else give[1], - have[2] if give[2] == 'AnyTerritory' else give[2], - give[3]) # AnyVariant similarly ? + give[1] or have[1], + give[2] or have[2], + give[3] or have[3]) yield have, give @@ -205,7 +205,7 @@ class CldrReader (object): script, territory, variant = tags except ValueError: pass - return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant)) + return tuple(p[0] for p in self.root.codesToIdName(language, script, territory, variant)) def __splitLocale(self, name): """Generate (language, script, territory, variant) from a locale name diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 56b421a3735..d0f5ccbe01f 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -53,7 +53,7 @@ class QLocaleXmlReader (object): languages = tuple(self.__loadMap('language', language_map)) scripts = tuple(self.__loadMap('script', script_map)) territories = tuple(self.__loadMap('territory', territory_map)) - self.__likely = tuple(self.__likelySubtagsMap()) # in enum name form + self.__likely = tuple(self.__likelySubtagsMap()) # as enum numeric values # Mappings {ID: (enum name, code, en.xml name)} self.languages = {v[0]: v[1:] for v in languages} @@ -75,6 +75,13 @@ class QLocaleXmlReader (object): ) def loadLocaleMap(self, calendars, grumble = lambda text: None): + """Yields id-triplet and locale object for each locale read. + + The id-triplet gives the (language, script, territory) numeric + values for the QLocale enum members describing the + locale. Where the relevant enum value is zero (an Any* member + of the enum), likely subtag rules are used to fill in the + script or territory, if missing, in this triplet.""" kid = self.__firstChildText likely = dict(self.__likely) for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'): @@ -92,14 +99,14 @@ class QLocaleXmlReader (object): # http://www.unicode.org/reports/tr35/#Likely_Subtags try: try: - to = likely[(locale.language, 'AnyScript', locale.territory)] + to = likely[(language, 0, territory)] except KeyError: - to = likely[(locale.language, 'AnyScript', 'AnyTerritory')] + to = likely[(language, 0, 0)] except KeyError: pass else: - locale.script = to[1] - script = self.__textByName[locale.script][0] + script = to[1] + locale.script = self.scripts[script][0] yield (language, script, territory), locale @@ -146,12 +153,16 @@ class QLocaleXmlReader (object): return kl(tuple(x[0] for x in pair[0])) # Sort self.__likely to enable binary search in C++ code. - for have, give in sorted(((self.__fromNames(has), - self.__fromNames(got)) + for have, give in sorted(((self.__fromIds(has), + self.__fromIds(got)) for has, got in self.__likely), key = keyLikely): - yield ('_'.join(tag(have)), ids(have), - '_'.join(tag(give)), ids(give)) + try: + yield ('_'.join(tag(have)), ids(have), + '_'.join(tag(give)), ids(give)) + except TypeError as what: + what.args += (have, give) + raise def defaultMap(self): """Map language and script to their default territory by ID. @@ -160,11 +171,9 @@ class QLocaleXmlReader (object): sub-tags mapping says language's default locale uses the given script and territory.""" for have, give in self.__likely: - if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory': + if have[1:] == (0, 0) and give[2]: assert have[0] == give[0], (have, give) - yield ((self.__langByName[give[0]][0], - self.__textByName[give[1]][0]), - self.__landByName[give[2]][0]) + yield (give[:2], give[2]) def enumify(self, name, suffix): """Stick together the parts of an enumdata.py name. @@ -207,14 +216,16 @@ class QLocaleXmlReader (object): key = int(key) yield key, enum[key][0], code, name - def __fromNames(self, names): + def __fromIds(self, ids): # Three (ID, code) pairs: - return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]] + return ((ids[0], self.languages[ids[0]][1]), + (ids[1], self.scripts[ids[1]][1]), + (ids[2], self.territories[ids[2]][1])) # Likely subtag management: def __likelySubtagsMap(self): def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText): - return tuple(kid(element, key) for key in keys) + return tuple(int(kid(element, key)) for key in keys) kid = self.__firstChildElt for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'): diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc index 7428d4b1e18..44129724ff2 100644 --- a/util/locale_database/qlocalexml.rnc +++ b/util/locale_database/qlocalexml.rnc @@ -34,9 +34,9 @@ LikelySubtag = element likelySubtag { } LocaleTriplet = ( - element language { text }, - element script { text }, - element territory { text } + element language { xsd:nonNegativeInteger }, + element script { xsd:nonNegativeInteger }, + element territory { xsd:nonNegativeInteger } ) # TODO: xsd patterns for IANA IDs and space-joined lists of them