Move some shared code to a localetools module

The time-zone script was importing two functions from the locale data generation script. Move them to a separate module, to which I'll shortly add some more shared utilities. Cleaned up some imports in the process. Combined qlocalexml2cpp's and xpathlit's error classes into a new Error class in the new module and made it a bit more like a proper python error class. Task-number: QTBUG-81344 Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
2020-02-19 17:18:28 +01:00 · 2020-02-19 17:18:28 +01:00 · c3dea1ffca
commit c3dea1ffca
parent 4d9f1a87de
6 changed files with 105 additions and 75 deletions
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@ -58,14 +58,14 @@ import re
 import textwrap
 import enumdata
-import xpathlite
+from localetools import Error
-from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
    _findEntryInFile as findEntryInFile
 from dateconverter import convert_date
 from qlocalexml import Locale, QLocaleXmlWriter
 # TODO: make calendars a command-line option
 calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
 findEntryInFile = xpathlite._findEntryInFile
 def wrappedwarn(err, prefix, tokens):
    return err.write(
        '\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
    type of code to look up.  Do not pass further parameters (the next
    will deprive you of the cache).
-    Raises xpathlite.Error with a suitable message, that includes the
+    Raises localetools.Error with a suitable message, that includes
-    unknown code's full name if found.
+    the unknown code's full name if found.
    Relies on global cldr_dir being set before it's called; see tail
    of this file.
    """
    if not cache:
-        cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
+        cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
    name = cache[form].get(code)
    msg = 'unknown %s code "%s"' % (form, code)
    if name:
        msg += ' - could use "%s"' % name
-    raise xpathlite.Error(msg)
+    raise Error(msg)
 def parse_list_pattern_part_format(pattern):
    # This is a very limited parsing of the format for list pattern part only.
@ -182,7 +182,7 @@ def generateLocaleInfo(path):
    # skip legacy/compatibility ones
    alias = findAlias(path)
    if alias:
-        raise xpathlite.Error('alias to "%s"' % alias)
+        raise Error('Alias to "{}"'.format(alias))
    def code(tag):
        return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
    # ### actually there is only one locale with variant: en_US_POSIX
    #     does anybody care about it at all?
    if variant_code:
-        raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
+        raise Error('We do not support variants ("{}")'.format(variant_code))
    language_id = enumdata.languageCodeToId(language_code)
    if language_id <= 0:
@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
    numbering_system = None
    try:
        numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
-    except xpathlite.Error:
+    except Error:
        pass
    def findEntryDef(path, xpath, value=''):
        try:
            return findEntry(path, xpath)
-        except xpathlite.Error:
+        except Error:
            return value
    def get_number_in_system(path, xpath, numbering_system):
        if numbering_system:
            try:
                return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
-            except xpathlite.Error:
+            except Error:
                # in CLDR 1.9 number system was refactored for numbers (but not for currency)
                # so if previous findEntry doesn't work we should try this:
                try:
                    return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
-                except xpathlite.Error:
+                except Error:
                    # fallback to default
                    pass
        return findEntry(path, xpath)
@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
        for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
            try:
                ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
-            except xpathlite.Error:
+            except Error:
                continue
            # TODO: epxloit count-handling, instead of discarding placeholders
@ -498,7 +498,7 @@ def _parseLocale(l):
    country = "AnyCountry"
    if l == "und":
-        raise xpathlite.Error("we are treating unknown locale like C")
+        raise Error('We treat unknown locale like C')
    parsed = splitLocale(l)
    language_code = parsed.next()
@ -511,19 +511,19 @@ def _parseLocale(l):
    if language_code != "und":
        language_id = enumdata.languageCodeToId(language_code)
        if language_id == -1:
-            raise xpathlite.Error('unknown language code "%s"' % language_code)
+            raise Error('Unknown language code "{}"'.format(language_code))
        language = enumdata.language_list[language_id][0]
    if script_code:
        script_id = enumdata.scriptCodeToId(script_code)
        if script_id == -1:
-            raise xpathlite.Error('unknown script code "%s"' % script_code)
+            raise Error('Unknown script code "{}"'.format(script_code))
        script = enumdata.script_list[script_id][0]
    if country_code:
        country_id = enumdata.countryCodeToId(country_code)
        if country_id == -1:
-            raise xpathlite.Error('unknown country code "%s"' % country_code)
+            raise Error('Unknown country code "{}"'.format(country_code))
        country = enumdata.country_list[country_id][0]
    return (language, script, country)
@ -538,11 +538,13 @@ def likelySubtags(root, err):
        try:
            from_language, from_script, from_country = _parseLocale(tmp[u"from"])
            to_language, to_script, to_country = _parseLocale(tmp[u"to"])
-        except xpathlite.Error as e:
+        except Error as e:
-            if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
+            if (tmp['to'].startswith(tmp['from'])
-                skips.append(tmp[u'to'])
+                and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
                skips.append(tmp['to'])
            else:
-                sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
+                sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
                        tmp[u"from"], tmp[u"to"], e.message))
            continue
        # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
        if to_country == "AnyCountry" and from_country != to_country:
@ -612,8 +614,8 @@ def main(args, out, err):
            if not l:
                skips.append(file)
                continue
-        except xpathlite.Error as e:
+        except Error as e:
-            sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e)))
+            sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
            continue
        locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
@ -628,8 +630,8 @@ def main(args, out, err):
            if not l:
                skips.append(file)
                continue
-        except xpathlite.Error as e:
+        except Error as e:
-            sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e)))
+            sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
            continue
        locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
--- a/util/locale_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@ -54,20 +54,14 @@ The XML structure is as follows:
 import os
 import sys
 import re
 import datetime
 import tempfile
 import enumdata
 import xpathlite
 from  xpathlite import DraftResolution
 import re
 import qlocalexml2cpp
-findAlias = xpathlite.findAlias
+import enumdata
-findEntry = xpathlite.findEntry
+from localetools import unicode2hex, wrap_list, Error
-findEntryInFile = xpathlite._findEntryInFile
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
-findTagsInFile = xpathlite.findTagsInFile
+    _findEntryInFile as findEntryInFile
 unicode2hex = qlocalexml2cpp.unicode2hex
 wrap_list = qlocalexml2cpp.wrap_list
 class ByteArrayData:
    def __init__(self):
@ -343,13 +337,13 @@ if mapTimezones:
            else:
                data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
                if data['countryId'] < 0:
-                    raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
+                    raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
                data['country'] = enumdata.country_list[data['countryId']][0]
                windowsIdDict[data['windowsKey'], data['countryId']] = data
    if badZones:
        sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
                         + "\nto the windowIdList in cldr2qtimezone.py\n\n")
-        raise xpathlite.Error("Unknown Windows IDs")
+        raise Error('Unknown Windows IDs')
 print "Input file parsed, now writing data"
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@ -0,0 +1,65 @@
 #############################################################################
 ##
 ## Copyright (C) 2020 The Qt Company Ltd.
 ## Contact: https://www.qt.io/licensing/
 ##
 ## This file is part of the test suite of the Qt Toolkit.
 ##
 ## $QT_BEGIN_LICENSE:GPL-EXCEPT$
 ## Commercial License Usage
 ## Licensees holding valid commercial Qt licenses may use this file in
 ## accordance with the commercial license agreement provided with the
 ## Software or, alternatively, in accordance with the terms contained in
 ## a written agreement between you and The Qt Company. For licensing terms
 ## and conditions see https://www.qt.io/terms-conditions. For further
 ## information use the contact form at https://www.qt.io/contact-us.
 ##
 ## GNU General Public License Usage
 ## Alternatively, this file may be used under the terms of the GNU
 ## General Public License version 3 as published by the Free Software
 ## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
 ## included in the packaging of this file. Please review the following
 ## information to ensure the GNU General Public License requirements will
 ## be met: https://www.gnu.org/licenses/gpl-3.0.html.
 ##
 ## $QT_END_LICENSE$
 ##
 #############################################################################
 """Utilities shared among the CLDR extraction tools.
 Functions:
  unicode2hex() -- converts unicode text to UCS-2 in hex form.
  wrap_list() -- map list to comma-separated string, 20 entries per line.
 Classes:
  Error -- A shared error class.
 """
 class Error (StandardError):
    __upinit = StandardError.__init__
    def __init__(self, msg, *args):
        self.__upinit(msg, *args)
        self.message = msg
    def __str__(self):
        return self.message
 def unicode2hex(s):
    lst = []
    for x in s:
        v = ord(x)
        if v > 0xFFFF:
            # make a surrogate pair
            # copied from qchar.h
            high = (v >> 10) + 0xd7c0
            low = (v % 0x400 + 0xdc00)
            lst.append(hex(high))
            lst.append(hex(low))
        else:
            lst.append(hex(v))
    return lst
 def wrap_list(lst):
    def split(lst, size):
        while lst:
            head, lst = lst[:size], lst[size:]
            yield head
    return ",\n".join(", ".join(x) for x in split(lst, 20))
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@ -39,7 +39,7 @@ Support:
 from __future__ import print_function
 from xml.sax.saxutils import escape
-from xpathlite import Error
+from localetools import Error
 # Tools used by Locale:
 def camel(seq):
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@ -37,9 +37,10 @@ import os
 import sys
 import tempfile
 import datetime
 from enumdata import language_aliases, country_aliases, script_aliases
 from qlocalexml import QLocaleXmlReader
 from enumdata import language_aliases, country_aliases, script_aliases
 from localetools import unicode2hex, wrap_list, Error
 # TODO: Make calendars a command-line parameter
 # map { CLDR name: Qt file name }
@ -59,19 +60,6 @@ generated_template = """
 """
 class Error:
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg
 def wrap_list(lst):
    def split(lst, size):
        while lst:
            head, lst = lst[:size], lst[size:]
            yield head
    return ",\n".join(", ".join(x) for x in split(lst, 20))
 def fixedScriptName(name, dupes):
    # Don't .capitalize() as some names are already camel-case (see enumdata.py):
    name = ''.join(word[0].upper() + word[1:] for word in name.split())
@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
    return key1[1] - key2[1]
 def unicode2hex(s):
    lst = []
    for x in s:
        v = ord(x)
        if v > 0xFFFF:
            # make a surrogate pair
            # copied from qchar.h
            high = (v >> 10) + 0xd7c0
            low = (v % 0x400 + 0xdc00)
            lst.append(hex(high))
            lst.append(hex(low))
        else:
            lst.append(hex(v))
    return lst
 class StringDataToken:
    def __init__(self, index, length):
        if index > 0xFFFF or length > 0xFFFF:
--- a/util/locale_database/xpathlite.py
+++ b/util/locale_database/xpathlite.py
@ -31,6 +31,8 @@ import sys
 import os
 import xml.dom.minidom
 from localetools import Error
 class DraftResolution:
    # See http://www.unicode.org/cldr/process.html for description
    unconfirmed = 'unconfirmed'
@ -43,12 +45,6 @@ class DraftResolution:
    def toInt(self):
        return DraftResolution._values[self.resolution]
 class Error:
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg
 doc_cache = {}
 def parseDoc(file):
    if not doc_cache.has_key(file):