diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 45db816c5ef..450d868bfc6 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -10,11 +10,11 @@ The former should normally be all you need to access. See individual classes for further detail. """ -from typing import Iterable, TextIO +from typing import Callable, Iterable, Iterator, TextIO from xml.dom import minidom from weakref import WeakValueDictionary as CacheDict from pathlib import Path -from datetime import datetime +from datetime import datetime, timedelta from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner from localetools import names_clash @@ -309,7 +309,7 @@ class CldrReader (object): # the cache. If a process were to instantiate this class with distinct # roots, each cache would be filled by the first to need it ! class CldrAccess (object): - def __init__(self, root: Path): + def __init__(self, root: Path) -> None: """Set up a master object for accessing CLDR data. Single parameter, root, is the file-system path to the root of @@ -317,20 +317,20 @@ class CldrAccess (object): contain dtd/, main/ and supplemental/ sub-directories.""" self.root = root - def xml(self, relative_path: str): + def xml(self, relative_path: str) -> XmlScanner: """Load a single XML file and return its root element as an XmlScanner. The path is interpreted relative to self.root""" return XmlScanner(Node(self.__xml(relative_path))) - def supplement(self, name): + def supplement(self, name: str) -> Supplement: """Loads supplemental data as a Supplement object. The name should be that of a file in common/supplemental/, without path. """ return Supplement(Node(self.__xml(f'common/supplemental/{name}'))) - def locale(self, name): + def locale(self, name: str) -> LocaleScanner: """Loads all data for a locale as a LocaleScanner object. The name should be a locale name; adding suffix '.xml' to it @@ -340,7 +340,7 @@ class CldrAccess (object): inheritance, where relevant.""" return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale) - def englishNaming(self, tag): # see QLocaleXmlWriter.enumData() + def englishNaming(self, tag: str) -> Callable[[str], str]: # see QLocaleXmlWriter.enumData() return self.__codeMap(tag).get @property @@ -354,18 +354,18 @@ class CldrAccess (object): yield path.stem @property - def defaultContentLocales(self): + def defaultContentLocales(self) -> Iterator[str]: """Generator for the default content locales.""" for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'): try: - locales = attrs['locales'] + locales: str = attrs['locales'] except KeyError: pass else: for locale in locales.split(): yield locale - def likelySubTags(self): + def likelySubTags(self) -> Iterator[tuple[str, str]]: for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'): yield attrs['from'], attrs['to'] @@ -380,7 +380,7 @@ class CldrAccess (object): except KeyError: raise Error(f'Unsupported number system: {system}') - def weekData(self, territory): + def weekData(self, territory: str) -> tuple[str, str, str]: """Data on the weekly cycle. Returns a triple (W, S, E) of en's short names for week-days; @@ -393,7 +393,7 @@ class CldrAccess (object): except KeyError: return self.__weekData['001'] - def currencyData(self, territory): + def currencyData(self, territory: str) -> tuple[str, int, int]: """Returns currency data for the given territory code. Return value is a tuple (ISO4217 code, digit count, rounding @@ -405,7 +405,9 @@ class CldrAccess (object): except KeyError: return '', 2, 1 - def codesToIdName(self, language, script, territory, variant = ''): + def codesToIdName(self, language: str, script: str, territory: str, variant: str = '' + ) -> tuple[tuple[int, str], tuple[int, str], + tuple[int, str], tuple[int, str]]: """Maps each code to the appropriate ID and name. Returns a 4-tuple of (ID, name) pairs corresponding to the @@ -417,7 +419,7 @@ class CldrAccess (object): Until we implement variant support (QTBUG-81051), the fourth member of the returned tuple is always 0 paired with a string that should not be used.""" - enum = self.__enumMap + enum: Callable[[str], dict[str, tuple[int, str]]] = self.__enumMap try: return (enum('language')[language], enum('script')[script], @@ -428,8 +430,9 @@ class CldrAccess (object): parts, values = [], [language, script, territory, variant] for index, key in enumerate(('language', 'script', 'territory', 'variant')): - naming, enums = self.__codeMap(key), enum(key) - value = values[index] + naming: dict[str, str] = self.__codeMap(key) + enums: dict[str, tuple[int, str]] = enum(key) + value: str = values[index] if value not in enums: text = f'{key} code {value}' name = naming.get(value) @@ -447,21 +450,22 @@ class CldrAccess (object): language, script, territory, variant) @staticmethod - def __checkEnum(given, proper, scraps): + def __checkEnum(given: dict[str, str], proper: dict[str, str], scraps: set[str] + ) -> Iterator[tuple[str, str]]: # Each is a { code: full name } mapping for code, name in given.items(): - try: right = proper[code] + try: right: str = proper[code] except KeyError: # No en.xml name for this code, but supplementalData's # parentLocale may still believe in it: if code not in scraps: yield name, f'[Found no CLDR name for code {code}]' continue - cleaned = names_clash(right, name) + cleaned: None | str = names_clash(right, name) if cleaned: yield name, cleaned - def checkEnumData(self, grumble): + def checkEnumData(self, grumble: Callable[[str], int]) -> None: scraps = set() for k in self.__parentLocale.keys(): for f in k.split('_'): @@ -492,7 +496,7 @@ enumdata.py (keeping the old name as an alias): + '\n') grumble('\n') - def bcp47Aliases(self): + def bcp47Aliases(self) -> tuple[dict[str, str], dict[str, str]]: """Reads the mapping from CLDR IDs to IANA IDs CLDR identifies timezones in various ways but its standard @@ -530,7 +534,8 @@ enumdata.py (keeping the old name as an alias): # If we ever need a mapping back to CLDR ID, we can make # (description, space-joined-list) the naming values. - alias, naming = {}, {} # { alias: iana }, { iana: description } + alias: dict[str, str] = {} # { alias: iana } + naming: dict[str, str] = {} # { iana: description } for item, attrs in root.find('keyword/key/type', exclude=('deprecated',)): assert 'description' in attrs, item assert 'alias' in attrs, item @@ -545,7 +550,8 @@ enumdata.py (keeping the old name as an alias): return alias, naming - def readWindowsTimeZones(self, alias): + def readWindowsTimeZones(self, alias: dict[str, str]) -> tuple[dict[str, str], + list[tuple[str, str, str]]]: """Digest CLDR's MS-Win time-zone name mapping. Single argument, alias, should be the first part of the pair @@ -582,7 +588,8 @@ enumdata.py (keeping the old name as an alias): mapZone element and the last is s, its cleaned-up list of IANA IDs.""" - defaults, windows = {}, [] + defaults: dict[str, str] = {} + windows: list[tuple[str, str, str]] = [] zones = self.supplement('windowsZones.xml') for name, attrs in zones.find('windowsZones/mapTimezones'): if name != 'mapZone': @@ -602,7 +609,10 @@ enumdata.py (keeping the old name as an alias): return defaults, windows - def readMetaZoneMap(self, alias): + def readMetaZoneMap(self, alias: dict[str, str] + ) -> tuple[dict[str, dict[str, str]], + dict[str, tuple[tuple[int, int, str], ...]], + dict[str, str]]: """Digests the metaZones supplemental data. Required argument, alias, should be the first of @@ -633,9 +643,9 @@ enumdata.py (keeping the old name as an alias): locale.""" metaZones = self.supplement('metaZones.xml') # Doesn't appear to use draft attribute # Map CLDR name to IANA name (or use CLDR name if unknown to alias): - zoneName = lambda n, g=alias.get: g(n, n) + zoneName: Callable[[str], str] = lambda n, g=alias.get: g(n, n) - metaMap = {} # { meta: { territory code: zoneId } } + metaMap: dict[str, dict[str, str]] = {} # { meta: { territory code: zoneId } } # Entry with territory 001 is "golden zone" for the metazone. for mapMeta in metaZones.findNodes('metaZones/mapTimezones'): attrs = mapMeta.attributes() @@ -646,13 +656,13 @@ enumdata.py (keeping the old name as an alias): raise Error('Version of metazone map type is not 2018e', attrs) for node in mapMeta.findAllChildren('mapZone'): - attrs = node.attributes() + attrs: dict[str, str] = node.attributes() try: meta, code, zone = attrs['other'], attrs['territory'], attrs['type'] except KeyError: continue - bok = metaMap.setdefault(meta, {}) + bok: dict[str, str] = metaMap.setdefault(meta, {}) assert code not in bok, (meta, code) bok[code] = zoneName(zone) # Territories not named in a metaMap entry fall back on the @@ -660,16 +670,16 @@ enumdata.py (keeping the old name as an alias): # entry: assert all('001' in bok for bok in metaMap.values()) - def scanUses(zone, check=metaMap): + def scanUses(zone: Node, check=metaMap) -> Iterator[tuple[str|None, str|None, str]]: for node in zone.findAllChildren('usesMetazone'): - attrs = node.attributes() - mzone = attrs['mzone'] + attrs: dict[str, str] = node.attributes() + mzone: str = attrs['mzone'] if mzone not in check: raise Error('Unknown metazone', mzone) # These are UTC date-times. yield attrs.get('from'), attrs.get('to'), mzone - def sortKey(triple): + def sortKey(triple: tuple[str|None, str|None, str]) -> str | None: start, stop, mzone = triple # The start = None entry should sort first; since its key # is its stop, which is likely the next entry's start, we @@ -680,11 +690,11 @@ enumdata.py (keeping the old name as an alias): # in the list, so the sorting is fatuous and the key # doesn't matter). - def timeRep(text, notime, epoch=datetime(1970, 1, 1, 0, 0)): + def timeRep(text: str, notime: bool, epoch=datetime(1970, 1, 1, 0, 0)) -> int: """Map a 'yyyy-MM-dd HH:mm' string to epoch minutes. If the HH:mm part is omitted, second parameter notime is true to - use the end of the day, false for the start. LDM specifies this + use the end of the day, false for the start. LDML specifies this reading of the pure-date values for start and stop attributes. If the HH:mm part is 24:00, the end of the day is also used; LDML specifies this but python's datetime.fromisoformat() doesn't like @@ -704,16 +714,20 @@ enumdata.py (keeping the old name as an alias): assert len(text) == 16, text # If it's given with HH:mm as 24:00, this throws: - diff = datetime.fromisoformat(text) - epoch + diff: timedelta = datetime.fromisoformat(text) - epoch except ValueError: diff = datetime.fromisoformat(text[:10]) - epoch diff += diff.__class__(days=1) assert diff.days >= 0 and diff.seconds >= 0, (diff, text) - assert diff.seconds % 60 == 0, (diff, text) - return diff.days * 1440 + int(diff.seconds / 60) + mins, secs = divmod(diff.seconds, 60) + assert secs == 0, (diff, text) + return diff.days * 1440 + mins - def mapTimes(triple, alpha=0, omega=(1<<32)-1, torep=timeRep): + def mapTimes(triple: tuple[str|None, str|None, str], + alpha: int = 0, omega: int = (1<<32) - 1, + torep: Callable[[str, bool, datetime], int] = timeRep + ) -> tuple[int, int, str]: start, stop, mzone = triple start = alpha if start is None else torep(start, False) stop = omega if stop is None else torep(stop, True) @@ -723,10 +737,11 @@ enumdata.py (keeping the old name as an alias): stop = omega return start, stop, mzone - zones = {} # { ianaId: ( (from, to, meta), ... ) } + # zones is { ianaId: ( (from, to, meta), ... ) } + zones: dict[str, tuple[tuple[int, int, str], ...]] = {} for metaInfo in metaZones.findNodes('metaZones/metazoneInfo'): for zone in metaInfo.findAllChildren('timezone'): - iana = zoneName(zone.dom.attributes['type'].value) + iana: str = zoneName(zone.dom.attributes['type'].value) story = tuple(sorted(scanUses(zone), key=sortKey)) # Only {first,last} entry can have None for {from,to}: assert not any(s[0] is None for s in story[1:]), (iana, story) @@ -743,7 +758,7 @@ enumdata.py (keeping the old name as an alias): for zone in bok.values()) for metaz, bok in metaMap.items()) - territorial = {} # { territory code: IANA ID } + territorial: dict[str, str] = {} # { territory code: IANA ID } for prime in metaZones.findNodes('primaryZones/primaryZone'): code = prime.attributes()['iso3166'] assert code not in territorial, code @@ -752,36 +767,36 @@ enumdata.py (keeping the old name as an alias): return metaMap, zones, territorial @property - def cldrVersion(self): + def cldrVersion(self) -> str: # Evaluate so as to ensure __cldrVersion is set: self.__unDistinguishedAttributes return self.__cldrVersion # Implementation details - def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse): + def __xml(self, relPath: str, cache = CacheDict(), read = minidom.parse) -> minidom.Element: try: - doc = cache[relative_path] + doc: minidom.Element = cache[relPath] except KeyError: - cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement + cache[relPath] = doc = read(str(self.root.joinpath(relPath))).documentElement return doc def __open(self, relative_path: str) -> TextIO: return self.root.joinpath(relative_path).open() @property - def __rootLocale(self, cache = []): + def __rootLocale(self, cache: list[XmlScanner] = []) -> XmlScanner: if not cache: cache.append(self.xml('common/main/root.xml')) return cache[0] @property - def __supplementalData(self, cache = []): + def __supplementalData(self, cache: list[Supplement] = []) -> Supplement: if not cache: cache.append(self.supplement('supplementalData.xml')) return cache[0] @property - def __numberSystems(self, cache = {}): + def __numberSystems(self, cache: dict[str, dict[str, str]] = {}) -> dict[str, dict[str, str]]: if not cache: for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'): cache[attrs['id']] = attrs @@ -789,20 +804,22 @@ enumdata.py (keeping the old name as an alias): return cache @property - def __weekData(self, cache = {}): + def __weekData(self, cache: dict[str, tuple[str, str, str]] = {} + ) -> dict[str, tuple[str, str, str]]: if not cache: + # firstDay, weStart and weEnd are all dict[str, str] firstDay, weStart, weEnd = self.__getWeekData() # Massage those into an easily-consulted form: # World defaults given for code '001': mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001'] - lands = set(firstDay) | set(weStart) | set(weEnd) + lands: set[str] = set(firstDay) | set(weStart) | set(weEnd) cache.update((land, (firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun))) for land in lands) assert cache return cache - def __getWeekData(self): + def __getWeekData(self) -> Iterator[dict[str, str]]: """Scan for data on the weekly cycle. Yields three mappings from locales to en's short names for @@ -811,12 +828,12 @@ enumdata.py (keeping the old name as an alias): gives the day on which the week starts, the second gives the day on which the week-end starts, the third gives the last day of the week-end.""" - source = self.__supplementalData + source: Supplement = self.__supplementalData for key in ('firstDay', 'weekendStart', 'weekendEnd'): - result = {} + result: dict[str, str] = {} for ignore, attrs in source.find(f'weekData/{key}'): assert ignore == key - day = attrs['day'] + day: str = attrs['day'] assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day if 'alt' in attrs: continue @@ -825,7 +842,8 @@ enumdata.py (keeping the old name as an alias): yield result @property - def __currencyData(self, cache = {}): + def __currencyData(self, cache: dict[str, tuple[str, int, int]] = {} + ) -> dict[str, tuple[str, int, int]]: if not cache: source = self.__supplementalData for elt in source.findNodes('currencyData/region'): @@ -850,15 +868,16 @@ enumdata.py (keeping the old name as an alias): if iso: for tag, data in source.find( f'currencyData/fractions/info[iso4217={iso}]'): - digits = data['digits'] - rounding = data['rounding'] + digits = int(data['digits']) + rounding = int(data['rounding']) cache[territory] = iso, digits, rounding assert cache return cache @property - def __unDistinguishedAttributes(self, cache = {}): + def __unDistinguishedAttributes(self, cache: dict[str, tuple[str, ...]] = {} + ) -> dict[str, tuple[str, ...]]: """Mapping from tag names to lists of attributes. LDML defines some attributes as 'distinguishing': if a node @@ -878,7 +897,7 @@ enumdata.py (keeping the old name as an alias): return cache - def __scanLdmlDtd(self): + def __scanLdmlDtd(self) -> Iterator[tuple[str, tuple[str, ...]]]: """Scan the LDML DTD, record CLDR version Yields (tag, attrs) pairs: on elements with a given tag, @@ -920,7 +939,8 @@ enumdata.py (keeping the old name as an alias): if tag and ignored: yield tag, tuple(ignored) - def __enumMap(self, key, cache = {}): + def __enumMap(self, key: str, cache: dict[str, dict[str, tuple[int, str]]] = {} + ) -> dict[str, tuple[int, str]]: if not cache: cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')} # They're mappings from numeric value to pairs of full @@ -943,19 +963,19 @@ enumdata.py (keeping the old name as an alias): return cache[key] - def __codeMap(self, key, cache = {}, + def __codeMap(self, key: str, cache: dict[str, dict[str, str]] = {}, # Maps our name for it to CLDR's name: naming = {'language': 'languages', 'script': 'scripts', - 'territory': 'territories', 'variant': 'variants'}): + 'territory': 'territories', 'variant': 'variants'}) -> dict[str, str]: if not cache: - root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames') + root: Node = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames') for dst, src in naming.items(): cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src))) assert cache return cache[key] - def __codeMapScan(self, node): + def __codeMapScan(self, node: Node) -> Iterator[tuple[str, str]]: """Get mapping from codes to element values. Passed in node is a , , or @@ -986,23 +1006,23 @@ enumdata.py (keeping the old name as an alias): # CLDR uses inheritance between locales to save repetition: @property - def __parentLocale(self, cache = {}): + def __parentLocale(self, cache: dict[str, str] = {}) -> dict[str, str]: # see http://www.unicode.org/reports/tr35/#Parent_Locales if not cache: for tag, attrs in self.__supplementalData.find('parentLocales', ('component',)): - parent = attrs.get('parent', '') + parent: str = attrs.get('parent', '') for child in attrs['locales'].split(): cache[child] = parent assert cache return cache - def __scanLocaleRoots(self, name: str): + def __scanLocaleRoots(self, name: str) -> Iterator[Node]: while name and name != 'root': path = f'common/main/{name}.xml' if self.root.joinpath(path).exists(): - elt = self.__xml(path) # which has no top-level alias children: + elt: minidom.Element = self.__xml(path) # which has no top-level alias children: assert not any(True for child in Node(elt).findAllChildren( 'alias', allDull=True) @@ -1019,11 +1039,11 @@ enumdata.py (keeping the old name as an alias): break class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok. - def __localeRoots(self, name, cache = CacheDict()): + def __localeRoots(self, name: str, cache = CacheDict()) -> __Seq: try: - chain = cache[name] + chain: CldrAccess.__Seq = cache[name] except KeyError: - cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name)) + cache[name] = chain = CldrAccess.__Seq(self.__scanLocaleRoots(name)) return chain # Unpolute the namespace: we don't need to export these. diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index 88f152c910a..10937df5485 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -64,7 +64,7 @@ def wrap_list(lst, perline=20): yield head return ",\n".join(", ".join(x) for x in split(lst, perline)) -def names_clash(cldr, enum): +def names_clash(cldr: str, enum: str) -> None | str: """True if the reader might not recognize cldr as the name of enum First argument, cldr, is the name CLDR gives for some language,