Add type annotations to LocaleScanner

Task-number: QTBUG-129566
Change-Id: I768fda6b5202ebabc8283ecedead9157653862be
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit b9e4f53b7e46636e886e41dde5693420dbfd81dc)
This commit is contained in:
Mate Barany 2024-10-07 16:38:39 +02:00
parent b56e959d60
commit 35a0bfd1e3
2 changed files with 57 additions and 49 deletions

View File

@ -353,7 +353,7 @@ class CldrAccess (object):
for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'): for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
yield attrs['from'], attrs['to'] yield attrs['from'], attrs['to']
def numberSystem(self, system): def numberSystem(self, system: str) -> dict[str, str]:
"""Get a description of a numbering system. """Get a description of a numbering system.
Returns a mapping, with keys 'digits', 'type' and 'id'; the Returns a mapping, with keys 'digits', 'type' and 'id'; the

View File

@ -21,7 +21,7 @@ See individual classes for further detail.
from __future__ import annotations from __future__ import annotations
from localetools import Error from localetools import Error
from dateconverter import convert_date from dateconverter import convert_date
from typing import Any, Iterator from typing import Any, Callable, Iterator
from xml.dom import minidom from xml.dom import minidom
# The github version of CLDR uses '↑↑↑' to indicate "inherit" # The github version of CLDR uses '↑↑↑' to indicate "inherit"
@ -204,17 +204,17 @@ class Supplement (XmlScanner):
yield elt.nodeName, _attrsFromDom(elt) yield elt.nodeName, _attrsFromDom(elt)
class LocaleScanner (object): class LocaleScanner (object):
def __init__(self, name, nodes, root): def __init__(self, name: str, nodes: list[Node], root: XmlScanner) -> None:
"""Set up to scan data for a specified locale. """Set up to scan data for a specified locale.
First parameter is the name of the locale; it will be used in First parameter is the name of the locale; it will be used in
error messages. Second is a tuple of DOM root-nodes of files error messages. Second is a sequence of DOM root-nodes of files
with locale data, later ones serving as fall-backs for data with locale data, later ones serving as fall-backs for data
missing in earlier ones. Third parameter is the root locale's missing in earlier ones. Third parameter is the root locale's
DOM node.""" DOM node."""
self.name, self.nodes, self.base = name, nodes, root self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, default = None, draft = None): def find(self, xpath: str, default: str|None = None, draft: int|None = None) -> str:
"""XPath search for the content of an element. """XPath search for the content of an element.
Required argument, xpath, is the XPath to search for. Optional Required argument, xpath, is the XPath to search for. Optional
@ -234,7 +234,7 @@ class LocaleScanner (object):
raise raise
return default return default
def tagCodes(self): def tagCodes(self) -> Iterator[str]:
"""Yields four tag codes """Yields four tag codes
The tag codes are language, script, territory and variant; an The tag codes are language, script, territory and variant; an
@ -244,7 +244,7 @@ class LocaleScanner (object):
top-level <alias> element of this file has a non-empty source top-level <alias> element of this file has a non-empty source
attribute; that attribute value is mentioned in the error's attribute; that attribute value is mentioned in the error's
message.""" message."""
root = self.nodes[0] root: Node = self.nodes[0]
for alias in root.findAllChildren('alias', allDull=True): for alias in root.findAllChildren('alias', allDull=True):
try: try:
source = alias.dom.attributes['source'].nodeValue source = alias.dom.attributes['source'].nodeValue
@ -253,7 +253,7 @@ class LocaleScanner (object):
else: else:
raise Error(f'Alias to {source}') raise Error(f'Alias to {source}')
ids = root.findUniqueChild('identity') ids: Node = root.findUniqueChild('identity')
for code in ('language', 'script', 'territory', 'variant'): for code in ('language', 'script', 'territory', 'variant'):
for node in ids.findAllChildren(code, allDull=True): for node in ids.findAllChildren(code, allDull=True):
try: try:
@ -265,7 +265,7 @@ class LocaleScanner (object):
else: # No value for this code, use empty else: # No value for this code, use empty
yield '' yield ''
def currencyData(self, isoCode): def currencyData(self, isoCode: str) -> Iterator[tuple[str, str]]:
"""Fetches currency data for this locale. """Fetches currency data for this locale.
Single argument, isoCode, is the ISO currency code for the Single argument, isoCode, is the ISO currency code for the
@ -273,28 +273,29 @@ class LocaleScanner (object):
includes some currency formats. includes some currency formats.
""" """
if isoCode: if isoCode:
stem = f'numbers/currencies/currency[{isoCode}]/' stem: str = f'numbers/currencies/currency[{isoCode}]/'
symbol = self.find(f'{stem}symbol', '') symbol: str = self.find(f'{stem}symbol', '')
name = self.__currencyDisplayName(stem) name: str = self.__currencyDisplayName(stem)
else: else:
symbol = name = '' symbol = name = ''
yield 'currencySymbol', symbol yield 'currencySymbol', symbol
yield 'currencyDisplayName', name yield 'currencyDisplayName', name
def numericData(self, lookup): def numericData(self, lookup: Callable[[str], dict[str, str]]
) -> Iterator[tuple[str, str]]:
"""Generate assorted numeric data for the locale. """Generate assorted numeric data for the locale.
First argument, lookup, is a callable that maps a numbering First argument, lookup, is a callable that maps a numbering
system's name to certain data about the system, as a mapping; system's name to certain data about the system, as a mapping;
we expect this to have 'digits' as a key. we expect this to have 'digits' as a key.
""" """
system = self.find('numbers/defaultNumberingSystem') system: str = self.find('numbers/defaultNumberingSystem')
stem = f'numbers/symbols[numberSystem={system}]/' stem: str = f'numbers/symbols[numberSystem={system}]/'
decimal = self.find(f'{stem}decimal') decimal: str = self.find(f'{stem}decimal')
group = self.find(f'{stem}group') group: str = self.find(f'{stem}group')
if decimal == group: if decimal == group:
# mn_Mong_MN @v43 :-( # mn_Mong_MN @v43 :-(
clean = Node.draftScore('approved') clean: int = Node.draftScore('approved')
decimal = self.find(f'{stem}decimal', draft=clean) decimal = self.find(f'{stem}decimal', draft=clean)
group = self.find(f'{stem}group', draft=clean) group = self.find(f'{stem}group', draft=clean)
assert decimal != group, (self.name, system, decimal) assert decimal != group, (self.name, system, decimal)
@ -306,9 +307,9 @@ class LocaleScanner (object):
yield 'exp', self.find(f'{stem}exponential') yield 'exp', self.find(f'{stem}exponential')
yield 'groupSizes', self.__numberGrouping(system) yield 'groupSizes', self.__numberGrouping(system)
digits = lookup(system)['digits'] digits: str = lookup(system)['digits']
assert len(digits) == 10 assert len(digits) == 10
zero = digits[0] zero: str = digits[0]
# Qt's number-formatting code assumes digits are consecutive # Qt's number-formatting code assumes digits are consecutive
# (except Suzhou - see QTBUG-85409 - which shares its zero # (except Suzhou - see QTBUG-85409 - which shares its zero
# with CLDR's very-non-contiguous hanidec): # with CLDR's very-non-contiguous hanidec):
@ -316,15 +317,15 @@ class LocaleScanner (object):
for i, c in enumerate(digits[1:], 1)) for i, c in enumerate(digits[1:], 1))
yield 'zero', zero yield 'zero', zero
plus = self.find(f'{stem}plusSign') plus: str = self.find(f'{stem}plusSign')
minus = self.find(f'{stem}minusSign') minus: str = self.find(f'{stem}minusSign')
yield 'plus', plus yield 'plus', plus
yield 'minus', minus yield 'minus', minus
# Currency formatting: # Currency formatting:
xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern' xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
try: try:
money = self.find(xpath.replace('Formats/', money: str = self.find(xpath.replace('Formats/',
f'Formats[numberSystem={system}]/')) f'Formats[numberSystem={system}]/'))
except Error: except Error:
money = self.find(xpath) money = self.find(xpath)
@ -336,7 +337,7 @@ class LocaleScanner (object):
neg = it neg = it
yield 'currencyNegativeFormat', neg yield 'currencyNegativeFormat', neg
def textPatternData(self): def textPatternData(self) -> Iterator[tuple[str, str]]:
for key in ('quotationStart', 'alternateQuotationEnd', for key in ('quotationStart', 'alternateQuotationEnd',
'quotationEnd', 'alternateQuotationStart'): 'quotationEnd', 'alternateQuotationStart'):
yield key, self.find(f'delimiters/{key}') yield key, self.find(f'delimiters/{key}')
@ -363,7 +364,8 @@ class LocaleScanner (object):
convert_date(self.find( convert_date(self.find(
f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern'))) f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
def endonyms(self, language, script, territory, variant): def endonyms(self, language: str, script: str, territory: str, variant: str
) -> Iterator[tuple[str, str]]:
# TODO: take variant into account ? # TODO: take variant into account ?
# TODO: QTBUG-47892, support query for all combinations # TODO: QTBUG-47892, support query for all combinations
for seq in ((language, script, territory), for seq in ((language, script, territory),
@ -384,12 +386,12 @@ class LocaleScanner (object):
yield ('territoryEndonym', yield ('territoryEndonym',
self.find(f'localeDisplayNames/territories/territory[{territory}]', '')) self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
def unitData(self): def unitData(self) -> Iterator[tuple[str, str]]:
yield ('byte_unit', yield ('byte_unit',
self.find('units/unitLength[long]/unit[digital-byte]/displayName', self.find('units/unitLength[long]/unit[digital-byte]/displayName',
'bytes')) 'bytes'))
unit = self.__findUnit('', 'B') unit: str | None = self.__findUnit('', 'B')
cache = [] # Populated by the SI call, to give hints to the IEC call cache = [] # Populated by the SI call, to give hints to the IEC call
yield ('byte_si_quantified', yield ('byte_si_quantified',
';'.join(self.__unitCount('', unit, cache))) ';'.join(self.__unitCount('', unit, cache)))
@ -398,8 +400,8 @@ class LocaleScanner (object):
yield ('byte_iec_quantified', yield ('byte_iec_quantified',
';'.join(self.__unitCount('bi', 'iB', cache))) ';'.join(self.__unitCount('bi', 'iB', cache)))
def calendarNames(self, calendars): def calendarNames(self, calendars: list[str]) -> Iterator[tuple[str, str]]:
namings = self.__nameForms namings: tuple[tuple[str, str, str], ...] = self.__nameForms
for cal in calendars: for cal in calendars:
stem = f'dates/calendars/calendar[{cal}]/months/' stem = f'dates/calendars/calendar[{cal}]/months/'
for key, mode, size in namings: for key, mode, size in namings:
@ -427,11 +429,15 @@ class LocaleScanner (object):
('narrow', 'format', 'narrow'), ('narrow', 'format', 'narrow'),
) # Used for month and day names ) # Used for month and day names
def __find(self, xpath): def __find(self, xpath: str) -> Iterator[Node]:
retries, foundNone = [ xpath.split('/') ], True retries: list[list[str]] = [ xpath.split('/') ]
foundNone: bool = True
while retries: while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,) tags: list[str] = retries.pop()
elts: tuple[Node, ...] = tuple(self.nodes)
roots: tuple[Node] = (self.base.root,)
for selector in tags: for selector in tags:
# tag is a str, attrs is a dict[str, str]
tag, attrs = _parseXPath(selector) tag, attrs = _parseXPath(selector)
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts)) elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
if not elts: if not elts:
@ -448,12 +454,13 @@ class LocaleScanner (object):
# Process roots separately: otherwise the alias-processing # Process roots separately: otherwise the alias-processing
# is excessive. # is excessive.
for i, selector in enumerate(tags): for i, selector in enumerate(tags):
# tag is a str, attrs is a dict[str, str]
tag, attrs = _parseXPath(selector) tag, attrs = _parseXPath(selector)
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True) for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
for r in roots)): for r in roots)):
if alias.dom.attributes['source'].nodeValue == 'locale': if alias.dom.attributes['source'].nodeValue == 'locale':
replace = alias.dom.attributes['path'].nodeValue.split('/') replace: list[str] = alias.dom.attributes['path'].nodeValue.split('/')
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:])) retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots)) roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
@ -480,7 +487,7 @@ class LocaleScanner (object):
raise Error(f'No {sought} in {self.name}') raise Error(f'No {sought} in {self.name}')
@staticmethod @staticmethod
def __skipInheritors(elts): def __skipInheritors(elts: tuple[Node, ...]) -> Iterator[Node]:
for elt in elts: for elt in elts:
try: try:
if elt.dom.firstChild.nodeValue != INHERIT: if elt.dom.firstChild.nodeValue != INHERIT:
@ -488,7 +495,7 @@ class LocaleScanner (object):
except (AttributeError, KeyError): except (AttributeError, KeyError):
yield elt yield elt
def __currencyDisplayName(self, stem): def __currencyDisplayName(self, stem: str) -> str | None:
try: try:
return self.find(stem + 'displayName') return self.find(stem + 'displayName')
except Error: except Error:
@ -500,7 +507,7 @@ class LocaleScanner (object):
pass pass
return '' return ''
def __findUnit(self, keySuffix, quantify, fallback=''): def __findUnit(self, keySuffix: str, quantify: str, fallback: str = '') -> str:
# The displayName for a quantified unit in en.xml is kByte # The displayName for a quantified unit in en.xml is kByte
# (even for unitLength[narrow]) instead of kB (etc.), so # (even for unitLength[narrow]) instead of kB (etc.), so
# prefer any unitPattern provided, but prune its placeholder: # prefer any unitPattern provided, but prune its placeholder:
@ -508,7 +515,7 @@ class LocaleScanner (object):
stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/' stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
for count in ('many', 'few', 'two', 'other', 'zero', 'one'): for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try: try:
ans = self.find(f'{stem}unitPattern[count={count}]') ans: str = self.find(f'{stem}unitPattern[count={count}]')
except Error: except Error:
continue continue
@ -527,10 +534,11 @@ class LocaleScanner (object):
return fallback return fallback
def __unitCount(self, keySuffix, suffix, cache, def __unitCount(self, keySuffix: str, suffix: str, cache: list[str],
# Stop at exa/exbi: 16 exbi = 2^{64} < zetta = # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
# 1000^7 < zebi = 2^{70}, the next quantifiers up: # 1000^7 < zebi = 2^{70}, the next quantifiers up:
siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): siQuantifiers: tuple[str, ...] = ('kilo',
'mega', 'giga', 'tera', 'peta', 'exa')) -> Iterator[str]:
"""Work out the unit quantifiers. """Work out the unit quantifiers.
Unfortunately, the CLDR data only go up to terabytes and we Unfortunately, the CLDR data only go up to terabytes and we
@ -556,7 +564,7 @@ class LocaleScanner (object):
else: # first call else: # first call
tail = suffix = suffix or 'B' tail = suffix = suffix or 'B'
for q in siQuantifiers: for q in siQuantifiers:
it = self.__findUnit(keySuffix, q) it: str | None = self.__findUnit(keySuffix, q)
# kB for kilobyte, in contrast with KiB for IEC: # kB for kilobyte, in contrast with KiB for IEC:
q = q[0] if q == 'kilo' else q[0].upper() q = q[0] if q == 'kilo' else q[0].upper()
if not it: if not it:
@ -567,7 +575,7 @@ class LocaleScanner (object):
cache.append(rest) cache.append(rest)
yield it yield it
def __numberGrouping(self, system): def __numberGrouping(self, system: str) -> tuple[int, int, int]:
"""Sizes of groups of digits within a number. """Sizes of groups of digits within a number.
Returns a triple (least, higher, top) for which: Returns a triple (least, higher, top) for which:
@ -587,9 +595,9 @@ class LocaleScanner (object):
elsewhere).""" elsewhere)."""
top = int(self.find('numbers/minimumGroupingDigits')) top = int(self.find('numbers/minimumGroupingDigits'))
assert top < 4, top # We store it in a 2-bit field assert top < 4, top # We store it in a 2-bit field
grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/' grouping: str | None = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
'decimalFormatLength/decimalFormat/pattern') 'decimalFormatLength/decimalFormat/pattern')
groups = grouping.split('.')[0].split(',')[-3:] groups: list[str] = grouping.split('.')[0].split(',')[-3:]
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
if len(groups) > 2: if len(groups) > 2:
return len(groups[-1]), len(groups[-2]), top return len(groups[-1]), len(groups[-2]), top
@ -598,7 +606,7 @@ class LocaleScanner (object):
return size, size, top return size, size, top
@staticmethod @staticmethod
def __currencyFormats(patterns, plus, minus): def __currencyFormats(patterns: str, plus: str, minus: str) -> Iterator[str]:
for p in patterns.split(';'): for p in patterns.split(';'):
p = p.replace('0', '#').replace(',', '').replace('.', '') p = p.replace('0', '#').replace(',', '').replace('.', '')
try: try:
@ -619,17 +627,17 @@ class LocaleScanner (object):
yield p yield p
@staticmethod @staticmethod
def __fromLdmlListPattern(pattern): def __fromLdmlListPattern(pattern: str) -> str:
# This is a very limited parsing of the format for list pattern part only. # This is a very limited parsing of the format for list pattern part only.
return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3') return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3')
@staticmethod @staticmethod
def __fromLdmlPath(seq): # tool function for __xpathJoin() def __fromLdmlPath(seq: list[str]) -> Iterator[str]: # tool function for __xpathJoin()
"""Convert LDML's [@name='value'] to our [name=value] form.""" """Convert LDML's [@name='value'] to our [name=value] form."""
for it in seq: for it in seq:
# First dismember it: # First dismember it:
attrs = it.split('[') attrs: list[str] = it.split('[')
tag = attrs.pop(0) tag: str = attrs.pop(0)
if not attrs: # Short-cut the easy case: if not attrs: # Short-cut the easy case:
yield it yield it
continue continue
@ -646,7 +654,7 @@ class LocaleScanner (object):
yield '['.join(attrs) yield '['.join(attrs)
@classmethod @classmethod
def __xpathJoin(cls, head, insert, tail): def __xpathJoin(cls, head: list[str], insert: list[str], tail: list[str]) -> list[str]:
"""Join three lists of XPath selectors. """Join three lists of XPath selectors.
Each of head, insert and tail is a sequence of selectors but Each of head, insert and tail is a sequence of selectors but