From 35a0bfd1e3dbc3c07a5731ed58e7a1579e4405eb Mon Sep 17 00:00:00 2001 From: Mate Barany Date: Mon, 7 Oct 2024 16:38:39 +0200 Subject: [PATCH] Add type annotations to LocaleScanner Task-number: QTBUG-129566 Change-Id: I768fda6b5202ebabc8283ecedead9157653862be Reviewed-by: Edward Welbourne (cherry picked from commit b9e4f53b7e46636e886e41dde5693420dbfd81dc) --- util/locale_database/cldr.py | 2 +- util/locale_database/ldml.py | 104 +++++++++++++++++++---------------- 2 files changed, 57 insertions(+), 49 deletions(-) diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 836e98dab61..ddf5074eba5 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -353,7 +353,7 @@ class CldrAccess (object): for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'): yield attrs['from'], attrs['to'] - def numberSystem(self, system): + def numberSystem(self, system: str) -> dict[str, str]: """Get a description of a numbering system. Returns a mapping, with keys 'digits', 'type' and 'id'; the diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index 468311268bc..a0ced67bfd6 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -21,7 +21,7 @@ See individual classes for further detail. from __future__ import annotations from localetools import Error from dateconverter import convert_date -from typing import Any, Iterator +from typing import Any, Callable, Iterator from xml.dom import minidom # The github version of CLDR uses '↑↑↑' to indicate "inherit" @@ -204,17 +204,17 @@ class Supplement (XmlScanner): yield elt.nodeName, _attrsFromDom(elt) class LocaleScanner (object): - def __init__(self, name, nodes, root): + def __init__(self, name: str, nodes: list[Node], root: XmlScanner) -> None: """Set up to scan data for a specified locale. First parameter is the name of the locale; it will be used in - error messages. Second is a tuple of DOM root-nodes of files + error messages. Second is a sequence of DOM root-nodes of files with locale data, later ones serving as fall-backs for data missing in earlier ones. Third parameter is the root locale's DOM node.""" self.name, self.nodes, self.base = name, nodes, root - def find(self, xpath, default = None, draft = None): + def find(self, xpath: str, default: str|None = None, draft: int|None = None) -> str: """XPath search for the content of an element. Required argument, xpath, is the XPath to search for. Optional @@ -234,7 +234,7 @@ class LocaleScanner (object): raise return default - def tagCodes(self): + def tagCodes(self) -> Iterator[str]: """Yields four tag codes The tag codes are language, script, territory and variant; an @@ -244,7 +244,7 @@ class LocaleScanner (object): top-level element of this file has a non-empty source attribute; that attribute value is mentioned in the error's message.""" - root = self.nodes[0] + root: Node = self.nodes[0] for alias in root.findAllChildren('alias', allDull=True): try: source = alias.dom.attributes['source'].nodeValue @@ -253,7 +253,7 @@ class LocaleScanner (object): else: raise Error(f'Alias to {source}') - ids = root.findUniqueChild('identity') + ids: Node = root.findUniqueChild('identity') for code in ('language', 'script', 'territory', 'variant'): for node in ids.findAllChildren(code, allDull=True): try: @@ -265,7 +265,7 @@ class LocaleScanner (object): else: # No value for this code, use empty yield '' - def currencyData(self, isoCode): + def currencyData(self, isoCode: str) -> Iterator[tuple[str, str]]: """Fetches currency data for this locale. Single argument, isoCode, is the ISO currency code for the @@ -273,28 +273,29 @@ class LocaleScanner (object): includes some currency formats. """ if isoCode: - stem = f'numbers/currencies/currency[{isoCode}]/' - symbol = self.find(f'{stem}symbol', '') - name = self.__currencyDisplayName(stem) + stem: str = f'numbers/currencies/currency[{isoCode}]/' + symbol: str = self.find(f'{stem}symbol', '') + name: str = self.__currencyDisplayName(stem) else: symbol = name = '' yield 'currencySymbol', symbol yield 'currencyDisplayName', name - def numericData(self, lookup): + def numericData(self, lookup: Callable[[str], dict[str, str]] + ) -> Iterator[tuple[str, str]]: """Generate assorted numeric data for the locale. First argument, lookup, is a callable that maps a numbering system's name to certain data about the system, as a mapping; we expect this to have 'digits' as a key. """ - system = self.find('numbers/defaultNumberingSystem') - stem = f'numbers/symbols[numberSystem={system}]/' - decimal = self.find(f'{stem}decimal') - group = self.find(f'{stem}group') + system: str = self.find('numbers/defaultNumberingSystem') + stem: str = f'numbers/symbols[numberSystem={system}]/' + decimal: str = self.find(f'{stem}decimal') + group: str = self.find(f'{stem}group') if decimal == group: # mn_Mong_MN @v43 :-( - clean = Node.draftScore('approved') + clean: int = Node.draftScore('approved') decimal = self.find(f'{stem}decimal', draft=clean) group = self.find(f'{stem}group', draft=clean) assert decimal != group, (self.name, system, decimal) @@ -306,9 +307,9 @@ class LocaleScanner (object): yield 'exp', self.find(f'{stem}exponential') yield 'groupSizes', self.__numberGrouping(system) - digits = lookup(system)['digits'] + digits: str = lookup(system)['digits'] assert len(digits) == 10 - zero = digits[0] + zero: str = digits[0] # Qt's number-formatting code assumes digits are consecutive # (except Suzhou - see QTBUG-85409 - which shares its zero # with CLDR's very-non-contiguous hanidec): @@ -316,15 +317,15 @@ class LocaleScanner (object): for i, c in enumerate(digits[1:], 1)) yield 'zero', zero - plus = self.find(f'{stem}plusSign') - minus = self.find(f'{stem}minusSign') + plus: str = self.find(f'{stem}plusSign') + minus: str = self.find(f'{stem}minusSign') yield 'plus', plus yield 'minus', minus # Currency formatting: xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern' try: - money = self.find(xpath.replace('Formats/', + money: str = self.find(xpath.replace('Formats/', f'Formats[numberSystem={system}]/')) except Error: money = self.find(xpath) @@ -336,7 +337,7 @@ class LocaleScanner (object): neg = it yield 'currencyNegativeFormat', neg - def textPatternData(self): + def textPatternData(self) -> Iterator[tuple[str, str]]: for key in ('quotationStart', 'alternateQuotationEnd', 'quotationEnd', 'alternateQuotationStart'): yield key, self.find(f'delimiters/{key}') @@ -363,7 +364,8 @@ class LocaleScanner (object): convert_date(self.find( f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern'))) - def endonyms(self, language, script, territory, variant): + def endonyms(self, language: str, script: str, territory: str, variant: str + ) -> Iterator[tuple[str, str]]: # TODO: take variant into account ? # TODO: QTBUG-47892, support query for all combinations for seq in ((language, script, territory), @@ -384,12 +386,12 @@ class LocaleScanner (object): yield ('territoryEndonym', self.find(f'localeDisplayNames/territories/territory[{territory}]', '')) - def unitData(self): + def unitData(self) -> Iterator[tuple[str, str]]: yield ('byte_unit', self.find('units/unitLength[long]/unit[digital-byte]/displayName', 'bytes')) - unit = self.__findUnit('', 'B') + unit: str | None = self.__findUnit('', 'B') cache = [] # Populated by the SI call, to give hints to the IEC call yield ('byte_si_quantified', ';'.join(self.__unitCount('', unit, cache))) @@ -398,8 +400,8 @@ class LocaleScanner (object): yield ('byte_iec_quantified', ';'.join(self.__unitCount('bi', 'iB', cache))) - def calendarNames(self, calendars): - namings = self.__nameForms + def calendarNames(self, calendars: list[str]) -> Iterator[tuple[str, str]]: + namings: tuple[tuple[str, str, str], ...] = self.__nameForms for cal in calendars: stem = f'dates/calendars/calendar[{cal}]/months/' for key, mode, size in namings: @@ -427,11 +429,15 @@ class LocaleScanner (object): ('narrow', 'format', 'narrow'), ) # Used for month and day names - def __find(self, xpath): - retries, foundNone = [ xpath.split('/') ], True + def __find(self, xpath: str) -> Iterator[Node]: + retries: list[list[str]] = [ xpath.split('/') ] + foundNone: bool = True while retries: - tags, elts, roots = retries.pop(), self.nodes, (self.base.root,) + tags: list[str] = retries.pop() + elts: tuple[Node, ...] = tuple(self.nodes) + roots: tuple[Node] = (self.base.root,) for selector in tags: + # tag is a str, attrs is a dict[str, str] tag, attrs = _parseXPath(selector) elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts)) if not elts: @@ -448,12 +454,13 @@ class LocaleScanner (object): # Process roots separately: otherwise the alias-processing # is excessive. for i, selector in enumerate(tags): + # tag is a str, attrs is a dict[str, str] tag, attrs = _parseXPath(selector) for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True) for r in roots)): if alias.dom.attributes['source'].nodeValue == 'locale': - replace = alias.dom.attributes['path'].nodeValue.split('/') + replace: list[str] = alias.dom.attributes['path'].nodeValue.split('/') retries.append(self.__xpathJoin(tags[:i], replace, tags[i:])) roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots)) @@ -480,7 +487,7 @@ class LocaleScanner (object): raise Error(f'No {sought} in {self.name}') @staticmethod - def __skipInheritors(elts): + def __skipInheritors(elts: tuple[Node, ...]) -> Iterator[Node]: for elt in elts: try: if elt.dom.firstChild.nodeValue != INHERIT: @@ -488,7 +495,7 @@ class LocaleScanner (object): except (AttributeError, KeyError): yield elt - def __currencyDisplayName(self, stem): + def __currencyDisplayName(self, stem: str) -> str | None: try: return self.find(stem + 'displayName') except Error: @@ -500,7 +507,7 @@ class LocaleScanner (object): pass return '' - def __findUnit(self, keySuffix, quantify, fallback=''): + def __findUnit(self, keySuffix: str, quantify: str, fallback: str = '') -> str: # The displayName for a quantified unit in en.xml is kByte # (even for unitLength[narrow]) instead of kB (etc.), so # prefer any unitPattern provided, but prune its placeholder: @@ -508,7 +515,7 @@ class LocaleScanner (object): stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/' for count in ('many', 'few', 'two', 'other', 'zero', 'one'): try: - ans = self.find(f'{stem}unitPattern[count={count}]') + ans: str = self.find(f'{stem}unitPattern[count={count}]') except Error: continue @@ -527,10 +534,11 @@ class LocaleScanner (object): return fallback - def __unitCount(self, keySuffix, suffix, cache, + def __unitCount(self, keySuffix: str, suffix: str, cache: list[str], # Stop at exa/exbi: 16 exbi = 2^{64} < zetta = # 1000^7 < zebi = 2^{70}, the next quantifiers up: - siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): + siQuantifiers: tuple[str, ...] = ('kilo', + 'mega', 'giga', 'tera', 'peta', 'exa')) -> Iterator[str]: """Work out the unit quantifiers. Unfortunately, the CLDR data only go up to terabytes and we @@ -556,7 +564,7 @@ class LocaleScanner (object): else: # first call tail = suffix = suffix or 'B' for q in siQuantifiers: - it = self.__findUnit(keySuffix, q) + it: str | None = self.__findUnit(keySuffix, q) # kB for kilobyte, in contrast with KiB for IEC: q = q[0] if q == 'kilo' else q[0].upper() if not it: @@ -567,7 +575,7 @@ class LocaleScanner (object): cache.append(rest) yield it - def __numberGrouping(self, system): + def __numberGrouping(self, system: str) -> tuple[int, int, int]: """Sizes of groups of digits within a number. Returns a triple (least, higher, top) for which: @@ -587,9 +595,9 @@ class LocaleScanner (object): elsewhere).""" top = int(self.find('numbers/minimumGroupingDigits')) assert top < 4, top # We store it in a 2-bit field - grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/' + grouping: str | None = self.find(f'numbers/decimalFormats[numberSystem={system}]/' 'decimalFormatLength/decimalFormat/pattern') - groups = grouping.split('.')[0].split(',')[-3:] + groups: list[str] = grouping.split('.')[0].split(',')[-3:] assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields if len(groups) > 2: return len(groups[-1]), len(groups[-2]), top @@ -598,7 +606,7 @@ class LocaleScanner (object): return size, size, top @staticmethod - def __currencyFormats(patterns, plus, minus): + def __currencyFormats(patterns: str, plus: str, minus: str) -> Iterator[str]: for p in patterns.split(';'): p = p.replace('0', '#').replace(',', '').replace('.', '') try: @@ -619,17 +627,17 @@ class LocaleScanner (object): yield p @staticmethod - def __fromLdmlListPattern(pattern): + def __fromLdmlListPattern(pattern: str) -> str: # This is a very limited parsing of the format for list pattern part only. return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3') @staticmethod - def __fromLdmlPath(seq): # tool function for __xpathJoin() + def __fromLdmlPath(seq: list[str]) -> Iterator[str]: # tool function for __xpathJoin() """Convert LDML's [@name='value'] to our [name=value] form.""" for it in seq: # First dismember it: - attrs = it.split('[') - tag = attrs.pop(0) + attrs: list[str] = it.split('[') + tag: str = attrs.pop(0) if not attrs: # Short-cut the easy case: yield it continue @@ -646,7 +654,7 @@ class LocaleScanner (object): yield '['.join(attrs) @classmethod - def __xpathJoin(cls, head, insert, tail): + def __xpathJoin(cls, head: list[str], insert: list[str], tail: list[str]) -> list[str]: """Join three lists of XPath selectors. Each of head, insert and tail is a sequence of selectors but