Rework CLDR parser to filter out unsupportable number systems

In the process, also have it only scan number systems once, caching
the result, rather than scanning all of them for each locale.  This
means we only see the new warning messages once, too.

Task-number: QTBUG-69324
Change-Id: Ia0695a0ba6159b50748a61e9949ad5bd07e4c4c3
Reviewed-by: Kai Koehne <kai.koehne@qt.io>
This commit is contained in:
Edward Welbourne 2018-07-06 14:06:11 +02:00
parent e000c60ab3
commit 042d41e23e

View File

@ -145,6 +145,28 @@ def generateLocaleInfo(path):
return _generateLocaleInfo(path, code('language'), code('script'),
code('territory'), code('variant'))
def getNumberSystems(cache={}):
"""Cached look-up of number system information.
Pass no arguments. Returns a mapping from number system names to,
for each system, a mapping with keys u'digits', u'type' and
u'id'\n"""
if not cache:
for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
'numberingSystems.xml'),
'numberingSystems'):
# ns has form: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
entry = dict(ns[1])
name = entry[u'id']
if u'digits' in entry and ord(entry[u'digits'][0]) > 0xffff:
# FIXME: make this redundant:
# omit number system if zero doesn't fit in single-char16 UTF-16 :-(
sys.stderr.write('skipping number system "%s" [can\'t represent its zero, U+%X, QTBUG-69324]\n'
% (name, ord(entry[u'digits'][0])))
else:
cache[name] = entry
return cache
def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
if not path.endswith(".xml"):
return {}
@ -242,20 +264,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system)
result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
try:
numbering_systems = {}
for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
'numberingSystems.xml'),
'numberingSystems'):
tmp = {}
id = ""
for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
tmp[data[0]] = data[1]
if data[0] == u"id":
id = data[1]
numbering_systems[id] = tmp
result['zero'] = numbering_systems[numbering_system][u"digits"][0]
except e:
sys.stderr.write("Native zero detection problem:\n" + str(e) + "\n")
result['zero'] = getNumberSystems()[numbering_system][u"digits"][0]
except Exception as e:
sys.stderr.write("Native zero detection problem: %s\n" % repr(e))
result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system)
result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system)
result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system)