Move some shared code to a localetools module
The time-zone script was importing two functions from the locale data generation script. Move them to a separate module, to which I'll shortly add some more shared utilities. Cleaned up some imports in the process. Combined qlocalexml2cpp's and xpathlit's error classes into a new Error class in the new module and made it a bit more like a proper python error class. Task-number: QTBUG-81344 Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
parent
4d9f1a87de
commit
c3dea1ffca
@ -58,14 +58,14 @@ import re
|
|||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
import enumdata
|
import enumdata
|
||||||
import xpathlite
|
from localetools import Error
|
||||||
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
|
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
|
||||||
|
_findEntryInFile as findEntryInFile
|
||||||
from dateconverter import convert_date
|
from dateconverter import convert_date
|
||||||
from qlocalexml import Locale, QLocaleXmlWriter
|
from qlocalexml import Locale, QLocaleXmlWriter
|
||||||
|
|
||||||
# TODO: make calendars a command-line option
|
# TODO: make calendars a command-line option
|
||||||
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
|
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
|
||||||
findEntryInFile = xpathlite._findEntryInFile
|
|
||||||
def wrappedwarn(err, prefix, tokens):
|
def wrappedwarn(err, prefix, tokens):
|
||||||
return err.write(
|
return err.write(
|
||||||
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
|
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
|
||||||
@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
|
|||||||
type of code to look up. Do not pass further parameters (the next
|
type of code to look up. Do not pass further parameters (the next
|
||||||
will deprive you of the cache).
|
will deprive you of the cache).
|
||||||
|
|
||||||
Raises xpathlite.Error with a suitable message, that includes the
|
Raises localetools.Error with a suitable message, that includes
|
||||||
unknown code's full name if found.
|
the unknown code's full name if found.
|
||||||
|
|
||||||
Relies on global cldr_dir being set before it's called; see tail
|
Relies on global cldr_dir being set before it's called; see tail
|
||||||
of this file.
|
of this file.
|
||||||
"""
|
"""
|
||||||
if not cache:
|
if not cache:
|
||||||
cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
|
cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
|
||||||
name = cache[form].get(code)
|
name = cache[form].get(code)
|
||||||
msg = 'unknown %s code "%s"' % (form, code)
|
msg = 'unknown %s code "%s"' % (form, code)
|
||||||
if name:
|
if name:
|
||||||
msg += ' - could use "%s"' % name
|
msg += ' - could use "%s"' % name
|
||||||
raise xpathlite.Error(msg)
|
raise Error(msg)
|
||||||
|
|
||||||
def parse_list_pattern_part_format(pattern):
|
def parse_list_pattern_part_format(pattern):
|
||||||
# This is a very limited parsing of the format for list pattern part only.
|
# This is a very limited parsing of the format for list pattern part only.
|
||||||
@ -182,7 +182,7 @@ def generateLocaleInfo(path):
|
|||||||
# skip legacy/compatibility ones
|
# skip legacy/compatibility ones
|
||||||
alias = findAlias(path)
|
alias = findAlias(path)
|
||||||
if alias:
|
if alias:
|
||||||
raise xpathlite.Error('alias to "%s"' % alias)
|
raise Error('Alias to "{}"'.format(alias))
|
||||||
|
|
||||||
def code(tag):
|
def code(tag):
|
||||||
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
|
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
|
||||||
@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
|||||||
# ### actually there is only one locale with variant: en_US_POSIX
|
# ### actually there is only one locale with variant: en_US_POSIX
|
||||||
# does anybody care about it at all?
|
# does anybody care about it at all?
|
||||||
if variant_code:
|
if variant_code:
|
||||||
raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
|
raise Error('We do not support variants ("{}")'.format(variant_code))
|
||||||
|
|
||||||
language_id = enumdata.languageCodeToId(language_code)
|
language_id = enumdata.languageCodeToId(language_code)
|
||||||
if language_id <= 0:
|
if language_id <= 0:
|
||||||
@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
|||||||
numbering_system = None
|
numbering_system = None
|
||||||
try:
|
try:
|
||||||
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
|
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
|
||||||
except xpathlite.Error:
|
except Error:
|
||||||
pass
|
pass
|
||||||
def findEntryDef(path, xpath, value=''):
|
def findEntryDef(path, xpath, value=''):
|
||||||
try:
|
try:
|
||||||
return findEntry(path, xpath)
|
return findEntry(path, xpath)
|
||||||
except xpathlite.Error:
|
except Error:
|
||||||
return value
|
return value
|
||||||
def get_number_in_system(path, xpath, numbering_system):
|
def get_number_in_system(path, xpath, numbering_system):
|
||||||
if numbering_system:
|
if numbering_system:
|
||||||
try:
|
try:
|
||||||
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
|
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
|
||||||
except xpathlite.Error:
|
except Error:
|
||||||
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
|
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
|
||||||
# so if previous findEntry doesn't work we should try this:
|
# so if previous findEntry doesn't work we should try this:
|
||||||
try:
|
try:
|
||||||
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
|
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
|
||||||
except xpathlite.Error:
|
except Error:
|
||||||
# fallback to default
|
# fallback to default
|
||||||
pass
|
pass
|
||||||
return findEntry(path, xpath)
|
return findEntry(path, xpath)
|
||||||
@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
|||||||
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
|
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
|
||||||
try:
|
try:
|
||||||
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
|
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
|
||||||
except xpathlite.Error:
|
except Error:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# TODO: epxloit count-handling, instead of discarding placeholders
|
# TODO: epxloit count-handling, instead of discarding placeholders
|
||||||
@ -498,7 +498,7 @@ def _parseLocale(l):
|
|||||||
country = "AnyCountry"
|
country = "AnyCountry"
|
||||||
|
|
||||||
if l == "und":
|
if l == "und":
|
||||||
raise xpathlite.Error("we are treating unknown locale like C")
|
raise Error('We treat unknown locale like C')
|
||||||
|
|
||||||
parsed = splitLocale(l)
|
parsed = splitLocale(l)
|
||||||
language_code = parsed.next()
|
language_code = parsed.next()
|
||||||
@ -511,19 +511,19 @@ def _parseLocale(l):
|
|||||||
if language_code != "und":
|
if language_code != "und":
|
||||||
language_id = enumdata.languageCodeToId(language_code)
|
language_id = enumdata.languageCodeToId(language_code)
|
||||||
if language_id == -1:
|
if language_id == -1:
|
||||||
raise xpathlite.Error('unknown language code "%s"' % language_code)
|
raise Error('Unknown language code "{}"'.format(language_code))
|
||||||
language = enumdata.language_list[language_id][0]
|
language = enumdata.language_list[language_id][0]
|
||||||
|
|
||||||
if script_code:
|
if script_code:
|
||||||
script_id = enumdata.scriptCodeToId(script_code)
|
script_id = enumdata.scriptCodeToId(script_code)
|
||||||
if script_id == -1:
|
if script_id == -1:
|
||||||
raise xpathlite.Error('unknown script code "%s"' % script_code)
|
raise Error('Unknown script code "{}"'.format(script_code))
|
||||||
script = enumdata.script_list[script_id][0]
|
script = enumdata.script_list[script_id][0]
|
||||||
|
|
||||||
if country_code:
|
if country_code:
|
||||||
country_id = enumdata.countryCodeToId(country_code)
|
country_id = enumdata.countryCodeToId(country_code)
|
||||||
if country_id == -1:
|
if country_id == -1:
|
||||||
raise xpathlite.Error('unknown country code "%s"' % country_code)
|
raise Error('Unknown country code "{}"'.format(country_code))
|
||||||
country = enumdata.country_list[country_id][0]
|
country = enumdata.country_list[country_id][0]
|
||||||
|
|
||||||
return (language, script, country)
|
return (language, script, country)
|
||||||
@ -538,11 +538,13 @@ def likelySubtags(root, err):
|
|||||||
try:
|
try:
|
||||||
from_language, from_script, from_country = _parseLocale(tmp[u"from"])
|
from_language, from_script, from_country = _parseLocale(tmp[u"from"])
|
||||||
to_language, to_script, to_country = _parseLocale(tmp[u"to"])
|
to_language, to_script, to_country = _parseLocale(tmp[u"to"])
|
||||||
except xpathlite.Error as e:
|
except Error as e:
|
||||||
if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
|
if (tmp['to'].startswith(tmp['from'])
|
||||||
skips.append(tmp[u'to'])
|
and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
|
||||||
|
skips.append(tmp['to'])
|
||||||
else:
|
else:
|
||||||
sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
|
sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
|
||||||
|
tmp[u"from"], tmp[u"to"], e.message))
|
||||||
continue
|
continue
|
||||||
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
|
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||||
if to_country == "AnyCountry" and from_country != to_country:
|
if to_country == "AnyCountry" and from_country != to_country:
|
||||||
@ -612,8 +614,8 @@ def main(args, out, err):
|
|||||||
if not l:
|
if not l:
|
||||||
skips.append(file)
|
skips.append(file)
|
||||||
continue
|
continue
|
||||||
except xpathlite.Error as e:
|
except Error as e:
|
||||||
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e)))
|
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
||||||
@ -628,8 +630,8 @@ def main(args, out, err):
|
|||||||
if not l:
|
if not l:
|
||||||
skips.append(file)
|
skips.append(file)
|
||||||
continue
|
continue
|
||||||
except xpathlite.Error as e:
|
except Error as e:
|
||||||
sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e)))
|
sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
||||||
|
@ -54,20 +54,14 @@ The XML structure is as follows:
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
import tempfile
|
import tempfile
|
||||||
import enumdata
|
|
||||||
import xpathlite
|
|
||||||
from xpathlite import DraftResolution
|
|
||||||
import re
|
|
||||||
import qlocalexml2cpp
|
|
||||||
|
|
||||||
findAlias = xpathlite.findAlias
|
import enumdata
|
||||||
findEntry = xpathlite.findEntry
|
from localetools import unicode2hex, wrap_list, Error
|
||||||
findEntryInFile = xpathlite._findEntryInFile
|
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
|
||||||
findTagsInFile = xpathlite.findTagsInFile
|
_findEntryInFile as findEntryInFile
|
||||||
unicode2hex = qlocalexml2cpp.unicode2hex
|
|
||||||
wrap_list = qlocalexml2cpp.wrap_list
|
|
||||||
|
|
||||||
class ByteArrayData:
|
class ByteArrayData:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -343,13 +337,13 @@ if mapTimezones:
|
|||||||
else:
|
else:
|
||||||
data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
|
data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
|
||||||
if data['countryId'] < 0:
|
if data['countryId'] < 0:
|
||||||
raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
|
raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
|
||||||
data['country'] = enumdata.country_list[data['countryId']][0]
|
data['country'] = enumdata.country_list[data['countryId']][0]
|
||||||
windowsIdDict[data['windowsKey'], data['countryId']] = data
|
windowsIdDict[data['windowsKey'], data['countryId']] = data
|
||||||
if badZones:
|
if badZones:
|
||||||
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
|
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
|
||||||
+ "\nto the windowIdList in cldr2qtimezone.py\n\n")
|
+ "\nto the windowIdList in cldr2qtimezone.py\n\n")
|
||||||
raise xpathlite.Error("Unknown Windows IDs")
|
raise Error('Unknown Windows IDs')
|
||||||
|
|
||||||
print "Input file parsed, now writing data"
|
print "Input file parsed, now writing data"
|
||||||
|
|
||||||
|
65
util/locale_database/localetools.py
Normal file
65
util/locale_database/localetools.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
#############################################################################
|
||||||
|
##
|
||||||
|
## Copyright (C) 2020 The Qt Company Ltd.
|
||||||
|
## Contact: https://www.qt.io/licensing/
|
||||||
|
##
|
||||||
|
## This file is part of the test suite of the Qt Toolkit.
|
||||||
|
##
|
||||||
|
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
||||||
|
## Commercial License Usage
|
||||||
|
## Licensees holding valid commercial Qt licenses may use this file in
|
||||||
|
## accordance with the commercial license agreement provided with the
|
||||||
|
## Software or, alternatively, in accordance with the terms contained in
|
||||||
|
## a written agreement between you and The Qt Company. For licensing terms
|
||||||
|
## and conditions see https://www.qt.io/terms-conditions. For further
|
||||||
|
## information use the contact form at https://www.qt.io/contact-us.
|
||||||
|
##
|
||||||
|
## GNU General Public License Usage
|
||||||
|
## Alternatively, this file may be used under the terms of the GNU
|
||||||
|
## General Public License version 3 as published by the Free Software
|
||||||
|
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
||||||
|
## included in the packaging of this file. Please review the following
|
||||||
|
## information to ensure the GNU General Public License requirements will
|
||||||
|
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
||||||
|
##
|
||||||
|
## $QT_END_LICENSE$
|
||||||
|
##
|
||||||
|
#############################################################################
|
||||||
|
"""Utilities shared among the CLDR extraction tools.
|
||||||
|
Functions:
|
||||||
|
unicode2hex() -- converts unicode text to UCS-2 in hex form.
|
||||||
|
wrap_list() -- map list to comma-separated string, 20 entries per line.
|
||||||
|
|
||||||
|
Classes:
|
||||||
|
Error -- A shared error class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Error (StandardError):
|
||||||
|
__upinit = StandardError.__init__
|
||||||
|
def __init__(self, msg, *args):
|
||||||
|
self.__upinit(msg, *args)
|
||||||
|
self.message = msg
|
||||||
|
def __str__(self):
|
||||||
|
return self.message
|
||||||
|
|
||||||
|
def unicode2hex(s):
|
||||||
|
lst = []
|
||||||
|
for x in s:
|
||||||
|
v = ord(x)
|
||||||
|
if v > 0xFFFF:
|
||||||
|
# make a surrogate pair
|
||||||
|
# copied from qchar.h
|
||||||
|
high = (v >> 10) + 0xd7c0
|
||||||
|
low = (v % 0x400 + 0xdc00)
|
||||||
|
lst.append(hex(high))
|
||||||
|
lst.append(hex(low))
|
||||||
|
else:
|
||||||
|
lst.append(hex(v))
|
||||||
|
return lst
|
||||||
|
|
||||||
|
def wrap_list(lst):
|
||||||
|
def split(lst, size):
|
||||||
|
while lst:
|
||||||
|
head, lst = lst[:size], lst[size:]
|
||||||
|
yield head
|
||||||
|
return ",\n".join(", ".join(x) for x in split(lst, 20))
|
@ -39,7 +39,7 @@ Support:
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
from xpathlite import Error
|
from localetools import Error
|
||||||
|
|
||||||
# Tools used by Locale:
|
# Tools used by Locale:
|
||||||
def camel(seq):
|
def camel(seq):
|
||||||
|
@ -37,9 +37,10 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import datetime
|
import datetime
|
||||||
from enumdata import language_aliases, country_aliases, script_aliases
|
|
||||||
|
|
||||||
from qlocalexml import QLocaleXmlReader
|
from qlocalexml import QLocaleXmlReader
|
||||||
|
from enumdata import language_aliases, country_aliases, script_aliases
|
||||||
|
from localetools import unicode2hex, wrap_list, Error
|
||||||
|
|
||||||
# TODO: Make calendars a command-line parameter
|
# TODO: Make calendars a command-line parameter
|
||||||
# map { CLDR name: Qt file name }
|
# map { CLDR name: Qt file name }
|
||||||
@ -59,19 +60,6 @@ generated_template = """
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Error:
|
|
||||||
def __init__(self, msg):
|
|
||||||
self.msg = msg
|
|
||||||
def __str__(self):
|
|
||||||
return self.msg
|
|
||||||
|
|
||||||
def wrap_list(lst):
|
|
||||||
def split(lst, size):
|
|
||||||
while lst:
|
|
||||||
head, lst = lst[:size], lst[size:]
|
|
||||||
yield head
|
|
||||||
return ",\n".join(", ".join(x) for x in split(lst, 20))
|
|
||||||
|
|
||||||
def fixedScriptName(name, dupes):
|
def fixedScriptName(name, dupes):
|
||||||
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
||||||
name = ''.join(word[0].upper() + word[1:] for word in name.split())
|
name = ''.join(word[0].upper() + word[1:] for word in name.split())
|
||||||
@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
|
|||||||
return key1[1] - key2[1]
|
return key1[1] - key2[1]
|
||||||
|
|
||||||
|
|
||||||
def unicode2hex(s):
|
|
||||||
lst = []
|
|
||||||
for x in s:
|
|
||||||
v = ord(x)
|
|
||||||
if v > 0xFFFF:
|
|
||||||
# make a surrogate pair
|
|
||||||
# copied from qchar.h
|
|
||||||
high = (v >> 10) + 0xd7c0
|
|
||||||
low = (v % 0x400 + 0xdc00)
|
|
||||||
lst.append(hex(high))
|
|
||||||
lst.append(hex(low))
|
|
||||||
else:
|
|
||||||
lst.append(hex(v))
|
|
||||||
return lst
|
|
||||||
|
|
||||||
class StringDataToken:
|
class StringDataToken:
|
||||||
def __init__(self, index, length):
|
def __init__(self, index, length):
|
||||||
if index > 0xFFFF or length > 0xFFFF:
|
if index > 0xFFFF or length > 0xFFFF:
|
||||||
|
@ -31,6 +31,8 @@ import sys
|
|||||||
import os
|
import os
|
||||||
import xml.dom.minidom
|
import xml.dom.minidom
|
||||||
|
|
||||||
|
from localetools import Error
|
||||||
|
|
||||||
class DraftResolution:
|
class DraftResolution:
|
||||||
# See http://www.unicode.org/cldr/process.html for description
|
# See http://www.unicode.org/cldr/process.html for description
|
||||||
unconfirmed = 'unconfirmed'
|
unconfirmed = 'unconfirmed'
|
||||||
@ -43,12 +45,6 @@ class DraftResolution:
|
|||||||
def toInt(self):
|
def toInt(self):
|
||||||
return DraftResolution._values[self.resolution]
|
return DraftResolution._values[self.resolution]
|
||||||
|
|
||||||
class Error:
|
|
||||||
def __init__(self, msg):
|
|
||||||
self.msg = msg
|
|
||||||
def __str__(self):
|
|
||||||
return self.msg
|
|
||||||
|
|
||||||
doc_cache = {}
|
doc_cache = {}
|
||||||
def parseDoc(file):
|
def parseDoc(file):
|
||||||
if not doc_cache.has_key(file):
|
if not doc_cache.has_key(file):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user