Move some shared code to a localetools module

The time-zone script was importing two functions from the locale data
generation script. Move them to a separate module, to which I'll
shortly add some more shared utilities. Cleaned up some imports in the
process.

Combined qlocalexml2cpp's and xpathlit's error classes into a new
Error class in the new module and made it a bit more like a proper
python error class.

Task-number: QTBUG-81344
Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75
Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
Edward Welbourne 2020-02-19 17:18:28 +01:00 committed by Edward Welbourne
parent 4d9f1a87de
commit c3dea1ffca
6 changed files with 105 additions and 75 deletions

View File

@ -58,14 +58,14 @@ import re
import textwrap import textwrap
import enumdata import enumdata
import xpathlite from localetools import Error
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
_findEntryInFile as findEntryInFile
from dateconverter import convert_date from dateconverter import convert_date
from qlocalexml import Locale, QLocaleXmlWriter from qlocalexml import Locale, QLocaleXmlWriter
# TODO: make calendars a command-line option # TODO: make calendars a command-line option
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
findEntryInFile = xpathlite._findEntryInFile
def wrappedwarn(err, prefix, tokens): def wrappedwarn(err, prefix, tokens):
return err.write( return err.write(
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens), '\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
type of code to look up. Do not pass further parameters (the next type of code to look up. Do not pass further parameters (the next
will deprive you of the cache). will deprive you of the cache).
Raises xpathlite.Error with a suitable message, that includes the Raises localetools.Error with a suitable message, that includes
unknown code's full name if found. the unknown code's full name if found.
Relies on global cldr_dir being set before it's called; see tail Relies on global cldr_dir being set before it's called; see tail
of this file. of this file.
""" """
if not cache: if not cache:
cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
name = cache[form].get(code) name = cache[form].get(code)
msg = 'unknown %s code "%s"' % (form, code) msg = 'unknown %s code "%s"' % (form, code)
if name: if name:
msg += ' - could use "%s"' % name msg += ' - could use "%s"' % name
raise xpathlite.Error(msg) raise Error(msg)
def parse_list_pattern_part_format(pattern): def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only. # This is a very limited parsing of the format for list pattern part only.
@ -182,7 +182,7 @@ def generateLocaleInfo(path):
# skip legacy/compatibility ones # skip legacy/compatibility ones
alias = findAlias(path) alias = findAlias(path)
if alias: if alias:
raise xpathlite.Error('alias to "%s"' % alias) raise Error('Alias to "{}"'.format(alias))
def code(tag): def code(tag):
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# ### actually there is only one locale with variant: en_US_POSIX # ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all? # does anybody care about it at all?
if variant_code: if variant_code:
raise xpathlite.Error('we do not support variants ("%s")' % variant_code) raise Error('We do not support variants ("{}")'.format(variant_code))
language_id = enumdata.languageCodeToId(language_code) language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0: if language_id <= 0:
@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
numbering_system = None numbering_system = None
try: try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem") numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
except xpathlite.Error: except Error:
pass pass
def findEntryDef(path, xpath, value=''): def findEntryDef(path, xpath, value=''):
try: try:
return findEntry(path, xpath) return findEntry(path, xpath)
except xpathlite.Error: except Error:
return value return value
def get_number_in_system(path, xpath, numbering_system): def get_number_in_system(path, xpath, numbering_system):
if numbering_system: if numbering_system:
try: try:
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
except xpathlite.Error: except Error:
# in CLDR 1.9 number system was refactored for numbers (but not for currency) # in CLDR 1.9 number system was refactored for numbers (but not for currency)
# so if previous findEntry doesn't work we should try this: # so if previous findEntry doesn't work we should try this:
try: try:
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
except xpathlite.Error: except Error:
# fallback to default # fallback to default
pass pass
return findEntry(path, xpath) return findEntry(path, xpath)
@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
for count in ('many', 'few', 'two', 'other', 'zero', 'one'): for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try: try:
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
except xpathlite.Error: except Error:
continue continue
# TODO: epxloit count-handling, instead of discarding placeholders # TODO: epxloit count-handling, instead of discarding placeholders
@ -498,7 +498,7 @@ def _parseLocale(l):
country = "AnyCountry" country = "AnyCountry"
if l == "und": if l == "und":
raise xpathlite.Error("we are treating unknown locale like C") raise Error('We treat unknown locale like C')
parsed = splitLocale(l) parsed = splitLocale(l)
language_code = parsed.next() language_code = parsed.next()
@ -511,19 +511,19 @@ def _parseLocale(l):
if language_code != "und": if language_code != "und":
language_id = enumdata.languageCodeToId(language_code) language_id = enumdata.languageCodeToId(language_code)
if language_id == -1: if language_id == -1:
raise xpathlite.Error('unknown language code "%s"' % language_code) raise Error('Unknown language code "{}"'.format(language_code))
language = enumdata.language_list[language_id][0] language = enumdata.language_list[language_id][0]
if script_code: if script_code:
script_id = enumdata.scriptCodeToId(script_code) script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1: if script_id == -1:
raise xpathlite.Error('unknown script code "%s"' % script_code) raise Error('Unknown script code "{}"'.format(script_code))
script = enumdata.script_list[script_id][0] script = enumdata.script_list[script_id][0]
if country_code: if country_code:
country_id = enumdata.countryCodeToId(country_code) country_id = enumdata.countryCodeToId(country_code)
if country_id == -1: if country_id == -1:
raise xpathlite.Error('unknown country code "%s"' % country_code) raise Error('Unknown country code "{}"'.format(country_code))
country = enumdata.country_list[country_id][0] country = enumdata.country_list[country_id][0]
return (language, script, country) return (language, script, country)
@ -538,11 +538,13 @@ def likelySubtags(root, err):
try: try:
from_language, from_script, from_country = _parseLocale(tmp[u"from"]) from_language, from_script, from_country = _parseLocale(tmp[u"from"])
to_language, to_script, to_country = _parseLocale(tmp[u"to"]) to_language, to_script, to_country = _parseLocale(tmp[u"to"])
except xpathlite.Error as e: except Error as e:
if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']: if (tmp['to'].startswith(tmp['from'])
skips.append(tmp[u'to']) and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
skips.append(tmp['to'])
else: else:
sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
tmp[u"from"], tmp[u"to"], e.message))
continue continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country: if to_country == "AnyCountry" and from_country != to_country:
@ -612,8 +614,8 @@ def main(args, out, err):
if not l: if not l:
skips.append(file) skips.append(file)
continue continue
except xpathlite.Error as e: except Error as e:
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e))) sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
continue continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
@ -628,8 +630,8 @@ def main(args, out, err):
if not l: if not l:
skips.append(file) skips.append(file)
continue continue
except xpathlite.Error as e: except Error as e:
sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e))) sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
continue continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l

View File

@ -54,20 +54,14 @@ The XML structure is as follows:
import os import os
import sys import sys
import re
import datetime import datetime
import tempfile import tempfile
import enumdata
import xpathlite
from xpathlite import DraftResolution
import re
import qlocalexml2cpp
findAlias = xpathlite.findAlias import enumdata
findEntry = xpathlite.findEntry from localetools import unicode2hex, wrap_list, Error
findEntryInFile = xpathlite._findEntryInFile from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
findTagsInFile = xpathlite.findTagsInFile _findEntryInFile as findEntryInFile
unicode2hex = qlocalexml2cpp.unicode2hex
wrap_list = qlocalexml2cpp.wrap_list
class ByteArrayData: class ByteArrayData:
def __init__(self): def __init__(self):
@ -343,13 +337,13 @@ if mapTimezones:
else: else:
data['countryId'] = enumdata.countryCodeToId(data['countryCode']) data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
if data['countryId'] < 0: if data['countryId'] < 0:
raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode']) raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
data['country'] = enumdata.country_list[data['countryId']][0] data['country'] = enumdata.country_list[data['countryId']][0]
windowsIdDict[data['windowsKey'], data['countryId']] = data windowsIdDict[data['windowsKey'], data['countryId']] = data
if badZones: if badZones:
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones)) sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
+ "\nto the windowIdList in cldr2qtimezone.py\n\n") + "\nto the windowIdList in cldr2qtimezone.py\n\n")
raise xpathlite.Error("Unknown Windows IDs") raise Error('Unknown Windows IDs')
print "Input file parsed, now writing data" print "Input file parsed, now writing data"

View File

@ -0,0 +1,65 @@
#############################################################################
##
## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3 as published by the Free Software
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################
"""Utilities shared among the CLDR extraction tools.
Functions:
unicode2hex() -- converts unicode text to UCS-2 in hex form.
wrap_list() -- map list to comma-separated string, 20 entries per line.
Classes:
Error -- A shared error class.
"""
class Error (StandardError):
__upinit = StandardError.__init__
def __init__(self, msg, *args):
self.__upinit(msg, *args)
self.message = msg
def __str__(self):
return self.message
def unicode2hex(s):
lst = []
for x in s:
v = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:
lst.append(hex(v))
return lst
def wrap_list(lst):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))

View File

@ -39,7 +39,7 @@ Support:
from __future__ import print_function from __future__ import print_function
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from xpathlite import Error from localetools import Error
# Tools used by Locale: # Tools used by Locale:
def camel(seq): def camel(seq):

View File

@ -37,9 +37,10 @@ import os
import sys import sys
import tempfile import tempfile
import datetime import datetime
from enumdata import language_aliases, country_aliases, script_aliases
from qlocalexml import QLocaleXmlReader from qlocalexml import QLocaleXmlReader
from enumdata import language_aliases, country_aliases, script_aliases
from localetools import unicode2hex, wrap_list, Error
# TODO: Make calendars a command-line parameter # TODO: Make calendars a command-line parameter
# map { CLDR name: Qt file name } # map { CLDR name: Qt file name }
@ -59,19 +60,6 @@ generated_template = """
""" """
class Error:
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
def wrap_list(lst):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))
def fixedScriptName(name, dupes): def fixedScriptName(name, dupes):
# Don't .capitalize() as some names are already camel-case (see enumdata.py): # Don't .capitalize() as some names are already camel-case (see enumdata.py):
name = ''.join(word[0].upper() + word[1:] for word in name.split()) name = ''.join(word[0].upper() + word[1:] for word in name.split())
@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
return key1[1] - key2[1] return key1[1] - key2[1]
def unicode2hex(s):
lst = []
for x in s:
v = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:
lst.append(hex(v))
return lst
class StringDataToken: class StringDataToken:
def __init__(self, index, length): def __init__(self, index, length):
if index > 0xFFFF or length > 0xFFFF: if index > 0xFFFF or length > 0xFFFF:

View File

@ -31,6 +31,8 @@ import sys
import os import os
import xml.dom.minidom import xml.dom.minidom
from localetools import Error
class DraftResolution: class DraftResolution:
# See http://www.unicode.org/cldr/process.html for description # See http://www.unicode.org/cldr/process.html for description
unconfirmed = 'unconfirmed' unconfirmed = 'unconfirmed'
@ -43,12 +45,6 @@ class DraftResolution:
def toInt(self): def toInt(self):
return DraftResolution._values[self.resolution] return DraftResolution._values[self.resolution]
class Error:
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
doc_cache = {} doc_cache = {}
def parseDoc(file): def parseDoc(file):
if not doc_cache.has_key(file): if not doc_cache.has_key(file):