Check our enumdata.py tables are consistent with CLDR
Compare the code->name mappings we're using to the ones CLDR's common/main/en.xml provides; report discrepancies. Tolerate tags missing from en.xml if they're known to the locale-inheritance machinery. Change-Id: Ibe96c18bf55984a35de3b3644f3586a9f30720b2 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
parent
dcbc8f16d0
commit
d11bf5fc24
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8; -*-
|
||||||
#############################################################################
|
#############################################################################
|
||||||
##
|
##
|
||||||
## Copyright (C) 2020 The Qt Company Ltd.
|
## Copyright (C) 2020 The Qt Company Ltd.
|
||||||
@ -58,6 +59,7 @@ class CldrReader (object):
|
|||||||
verbose output."""
|
verbose output."""
|
||||||
self.root = CldrAccess(root)
|
self.root = CldrAccess(root)
|
||||||
self.whitter, self.grumble = whitter, grumble
|
self.whitter, self.grumble = whitter, grumble
|
||||||
|
self.root.checkEnumData(grumble)
|
||||||
|
|
||||||
def likelySubTags(self):
|
def likelySubTags(self):
|
||||||
"""Generator for likely subtag information.
|
"""Generator for likely subtag information.
|
||||||
@ -372,6 +374,68 @@ class CldrAccess (object):
|
|||||||
raise Error('Unknown ' + ', '.join(parts),
|
raise Error('Unknown ' + ', '.join(parts),
|
||||||
language, script, country, variant)
|
language, script, country, variant)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __checkEnum(given, proper, scraps,
|
||||||
|
remap = { u'å': 'a', u'ã': 'a', u'ç': 'c', u'é': 'e', u'í': 'i', u'ü': 'u'},
|
||||||
|
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
|
||||||
|
suffixes = ( 'Han', ),
|
||||||
|
skip = u'\u02bc'):
|
||||||
|
# Each is a { code: full name } mapping
|
||||||
|
for code, name in given.items():
|
||||||
|
try: right = proper[code]
|
||||||
|
except KeyError:
|
||||||
|
# No en.xml name for this code, but supplementalData's
|
||||||
|
# parentLocale may still believe in it:
|
||||||
|
if code not in scraps:
|
||||||
|
yield name, '[Found no CLDR name for code {}]'.format(code)
|
||||||
|
continue
|
||||||
|
if name == right: continue
|
||||||
|
ok = right.replace('&', 'And')
|
||||||
|
for k, v in prefix.items():
|
||||||
|
if ok.startswith(k + ' '):
|
||||||
|
ok = v + ok[len(k):]
|
||||||
|
while '(' in ok:
|
||||||
|
try: f, t = ok.index('('), ok.index(')')
|
||||||
|
except ValueError: break
|
||||||
|
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
|
||||||
|
if any(name == ok + ' ' + s for s in suffixes):
|
||||||
|
continue
|
||||||
|
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
|
||||||
|
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
|
||||||
|
continue
|
||||||
|
yield name, ok
|
||||||
|
|
||||||
|
def checkEnumData(self, grumble):
|
||||||
|
scraps = set()
|
||||||
|
for k in self.__parentLocale.keys():
|
||||||
|
for f in k.split('_'):
|
||||||
|
scraps.add(f)
|
||||||
|
from enumdata import language_list, country_list, script_list
|
||||||
|
language = dict((v, k) for k, v in language_list.values() if not v.isspace())
|
||||||
|
country = dict((v, k) for k, v in country_list.values() if v != 'ZZ')
|
||||||
|
script = dict((v, k) for k, v in script_list.values() if v != 'Zzzz')
|
||||||
|
lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
|
||||||
|
land = dict(self.__checkEnum(country, self.__codeMap('country'), scraps))
|
||||||
|
text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
|
||||||
|
if lang or land or text:
|
||||||
|
grumble("""\
|
||||||
|
Using names that don't match CLDR: consider updating the name(s) in
|
||||||
|
enumdata.py (keeping the old name as an alias):
|
||||||
|
""")
|
||||||
|
if lang:
|
||||||
|
grumble('Language:\n\t'
|
||||||
|
+ '\n\t'.join('{} -> {}'.format(k, v) for k, v in lang.items())
|
||||||
|
+ '\n')
|
||||||
|
if land:
|
||||||
|
grumble('Country:\n\t'
|
||||||
|
+ '\n\t'.join('{} -> {}'.format(k, v) for k, v in land.items())
|
||||||
|
+ '\n')
|
||||||
|
if text:
|
||||||
|
grumble('Script:\n\t'
|
||||||
|
+ '\n\t'.join('{} -> {}'.format(k, v) for k, v in text.items())
|
||||||
|
+ '\n')
|
||||||
|
grumble('\n')
|
||||||
|
|
||||||
def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
|
def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
|
||||||
"""Digest CLDR's MS-Win time-zone name mapping.
|
"""Digest CLDR's MS-Win time-zone name mapping.
|
||||||
|
|
||||||
@ -662,7 +726,8 @@ class CldrAccess (object):
|
|||||||
seen.add(key)
|
seen.add(key)
|
||||||
|
|
||||||
# CLDR uses inheritance between locales to save repetition:
|
# CLDR uses inheritance between locales to save repetition:
|
||||||
def __parentLocale(self, name, cache = {}):
|
@property
|
||||||
|
def __parentLocale(self, cache = {}):
|
||||||
# see http://www.unicode.org/reports/tr35/#Parent_Locales
|
# see http://www.unicode.org/reports/tr35/#Parent_Locales
|
||||||
if not cache:
|
if not cache:
|
||||||
for tag, attrs in self.__supplementalData.find('parentLocales'):
|
for tag, attrs in self.__supplementalData.find('parentLocales'):
|
||||||
@ -671,7 +736,7 @@ class CldrAccess (object):
|
|||||||
cache[child] = parent
|
cache[child] = parent
|
||||||
assert cache
|
assert cache
|
||||||
|
|
||||||
return cache[name]
|
return cache
|
||||||
|
|
||||||
def __localeAsDoc(self, name, aliasFor = None,
|
def __localeAsDoc(self, name, aliasFor = None,
|
||||||
joinPath = os.path.join, exists = os.path.isfile):
|
joinPath = os.path.join, exists = os.path.isfile):
|
||||||
@ -699,7 +764,7 @@ class CldrAccess (object):
|
|||||||
yield Node(doc, self.__unDistinguishedAttributes)
|
yield Node(doc, self.__unDistinguishedAttributes)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
name = self.__parentLocale(name)
|
name = self.__parentLocale[name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
try:
|
try:
|
||||||
name, tail = name.rsplit('_', 1)
|
name, tail = name.rsplit('_', 1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user