QLocaleXML: Use enum values instead of names in likely subtag map

The existing naming lists provide the needed mapping and this prepares
the way to move the language, script and territory into the from and
to elements as attributes, saving some file-size. It incidentally
pushes the mapping to enum values upstream and simplifies the
downstream processing.

Change-Id: I8f6d2615d52b14d46d1b795539c71f8afdc310ca
Reviewed-by: Dennis Oberst <dennis.oberst@qt.io>
This commit is contained in:
Edward Welbourne 2024-07-02 18:59:20 +02:00
parent 0a65ded704
commit bd5bb70b7c
3 changed files with 35 additions and 24 deletions

View File

@ -57,14 +57,14 @@ class CldrReader (object):
else: else:
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n') self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]): if not any(have):
continue continue
give = (give[0], give = (give[0],
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
have[1] if give[1] == 'AnyScript' else give[1], give[1] or have[1],
have[2] if give[2] == 'AnyTerritory' else give[2], give[2] or have[2],
give[3]) # AnyVariant similarly ? give[3] or have[3])
yield have, give yield have, give
@ -205,7 +205,7 @@ class CldrReader (object):
script, territory, variant = tags script, territory, variant = tags
except ValueError: except ValueError:
pass pass
return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant)) return tuple(p[0] for p in self.root.codesToIdName(language, script, territory, variant))
def __splitLocale(self, name): def __splitLocale(self, name):
"""Generate (language, script, territory, variant) from a locale name """Generate (language, script, territory, variant) from a locale name

View File

@ -53,7 +53,7 @@ class QLocaleXmlReader (object):
languages = tuple(self.__loadMap('language', language_map)) languages = tuple(self.__loadMap('language', language_map))
scripts = tuple(self.__loadMap('script', script_map)) scripts = tuple(self.__loadMap('script', script_map))
territories = tuple(self.__loadMap('territory', territory_map)) territories = tuple(self.__loadMap('territory', territory_map))
self.__likely = tuple(self.__likelySubtagsMap()) # in enum name form self.__likely = tuple(self.__likelySubtagsMap()) # as enum numeric values
# Mappings {ID: (enum name, code, en.xml name)} # Mappings {ID: (enum name, code, en.xml name)}
self.languages = {v[0]: v[1:] for v in languages} self.languages = {v[0]: v[1:] for v in languages}
@ -75,6 +75,13 @@ class QLocaleXmlReader (object):
) )
def loadLocaleMap(self, calendars, grumble = lambda text: None): def loadLocaleMap(self, calendars, grumble = lambda text: None):
"""Yields id-triplet and locale object for each locale read.
The id-triplet gives the (language, script, territory) numeric
values for the QLocale enum members describing the
locale. Where the relevant enum value is zero (an Any* member
of the enum), likely subtag rules are used to fill in the
script or territory, if missing, in this triplet."""
kid = self.__firstChildText kid = self.__firstChildText
likely = dict(self.__likely) likely = dict(self.__likely)
for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'): for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'):
@ -92,14 +99,14 @@ class QLocaleXmlReader (object):
# http://www.unicode.org/reports/tr35/#Likely_Subtags # http://www.unicode.org/reports/tr35/#Likely_Subtags
try: try:
try: try:
to = likely[(locale.language, 'AnyScript', locale.territory)] to = likely[(language, 0, territory)]
except KeyError: except KeyError:
to = likely[(locale.language, 'AnyScript', 'AnyTerritory')] to = likely[(language, 0, 0)]
except KeyError: except KeyError:
pass pass
else: else:
locale.script = to[1] script = to[1]
script = self.__textByName[locale.script][0] locale.script = self.scripts[script][0]
yield (language, script, territory), locale yield (language, script, territory), locale
@ -146,12 +153,16 @@ class QLocaleXmlReader (object):
return kl(tuple(x[0] for x in pair[0])) return kl(tuple(x[0] for x in pair[0]))
# Sort self.__likely to enable binary search in C++ code. # Sort self.__likely to enable binary search in C++ code.
for have, give in sorted(((self.__fromNames(has), for have, give in sorted(((self.__fromIds(has),
self.__fromNames(got)) self.__fromIds(got))
for has, got in self.__likely), for has, got in self.__likely),
key = keyLikely): key = keyLikely):
yield ('_'.join(tag(have)), ids(have), try:
'_'.join(tag(give)), ids(give)) yield ('_'.join(tag(have)), ids(have),
'_'.join(tag(give)), ids(give))
except TypeError as what:
what.args += (have, give)
raise
def defaultMap(self): def defaultMap(self):
"""Map language and script to their default territory by ID. """Map language and script to their default territory by ID.
@ -160,11 +171,9 @@ class QLocaleXmlReader (object):
sub-tags mapping says language's default locale uses the given sub-tags mapping says language's default locale uses the given
script and territory.""" script and territory."""
for have, give in self.__likely: for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory': if have[1:] == (0, 0) and give[2]:
assert have[0] == give[0], (have, give) assert have[0] == give[0], (have, give)
yield ((self.__langByName[give[0]][0], yield (give[:2], give[2])
self.__textByName[give[1]][0]),
self.__landByName[give[2]][0])
def enumify(self, name, suffix): def enumify(self, name, suffix):
"""Stick together the parts of an enumdata.py name. """Stick together the parts of an enumdata.py name.
@ -207,14 +216,16 @@ class QLocaleXmlReader (object):
key = int(key) key = int(key)
yield key, enum[key][0], code, name yield key, enum[key][0], code, name
def __fromNames(self, names): def __fromIds(self, ids):
# Three (ID, code) pairs: # Three (ID, code) pairs:
return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]] return ((ids[0], self.languages[ids[0]][1]),
(ids[1], self.scripts[ids[1]][1]),
(ids[2], self.territories[ids[2]][1]))
# Likely subtag management: # Likely subtag management:
def __likelySubtagsMap(self): def __likelySubtagsMap(self):
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText): def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
return tuple(kid(element, key) for key in keys) return tuple(int(kid(element, key)) for key in keys)
kid = self.__firstChildElt kid = self.__firstChildElt
for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'): for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'):

View File

@ -34,9 +34,9 @@ LikelySubtag = element likelySubtag {
} }
LocaleTriplet = ( LocaleTriplet = (
element language { text }, element language { xsd:nonNegativeInteger },
element script { text }, element script { xsd:nonNegativeInteger },
element territory { text } element territory { xsd:nonNegativeInteger }
) )
# TODO: xsd patterns for IANA IDs and space-joined lists of them # TODO: xsd patterns for IANA IDs and space-joined lists of them