Add type annotations to LocaleScanner

Task-number: QTBUG-129566
Change-Id: I768fda6b5202ebabc8283ecedead9157653862be
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit b9e4f53b7e46636e886e41dde5693420dbfd81dc)
This commit is contained in:
Mate Barany 2024-10-07 16:38:39 +02:00
parent b56e959d60
commit 35a0bfd1e3
2 changed files with 57 additions and 49 deletions

View File

@ -353,7 +353,7 @@ class CldrAccess (object):
for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
yield attrs['from'], attrs['to']
def numberSystem(self, system):
def numberSystem(self, system: str) -> dict[str, str]:
"""Get a description of a numbering system.
Returns a mapping, with keys 'digits', 'type' and 'id'; the

View File

@ -21,7 +21,7 @@ See individual classes for further detail.
from __future__ import annotations
from localetools import Error
from dateconverter import convert_date
from typing import Any, Iterator
from typing import Any, Callable, Iterator
from xml.dom import minidom
# The github version of CLDR uses '↑↑↑' to indicate "inherit"
@ -204,17 +204,17 @@ class Supplement (XmlScanner):
yield elt.nodeName, _attrsFromDom(elt)
class LocaleScanner (object):
def __init__(self, name, nodes, root):
def __init__(self, name: str, nodes: list[Node], root: XmlScanner) -> None:
"""Set up to scan data for a specified locale.
First parameter is the name of the locale; it will be used in
error messages. Second is a tuple of DOM root-nodes of files
error messages. Second is a sequence of DOM root-nodes of files
with locale data, later ones serving as fall-backs for data
missing in earlier ones. Third parameter is the root locale's
DOM node."""
self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, default = None, draft = None):
def find(self, xpath: str, default: str|None = None, draft: int|None = None) -> str:
"""XPath search for the content of an element.
Required argument, xpath, is the XPath to search for. Optional
@ -234,7 +234,7 @@ class LocaleScanner (object):
raise
return default
def tagCodes(self):
def tagCodes(self) -> Iterator[str]:
"""Yields four tag codes
The tag codes are language, script, territory and variant; an
@ -244,7 +244,7 @@ class LocaleScanner (object):
top-level <alias> element of this file has a non-empty source
attribute; that attribute value is mentioned in the error's
message."""
root = self.nodes[0]
root: Node = self.nodes[0]
for alias in root.findAllChildren('alias', allDull=True):
try:
source = alias.dom.attributes['source'].nodeValue
@ -253,7 +253,7 @@ class LocaleScanner (object):
else:
raise Error(f'Alias to {source}')
ids = root.findUniqueChild('identity')
ids: Node = root.findUniqueChild('identity')
for code in ('language', 'script', 'territory', 'variant'):
for node in ids.findAllChildren(code, allDull=True):
try:
@ -265,7 +265,7 @@ class LocaleScanner (object):
else: # No value for this code, use empty
yield ''
def currencyData(self, isoCode):
def currencyData(self, isoCode: str) -> Iterator[tuple[str, str]]:
"""Fetches currency data for this locale.
Single argument, isoCode, is the ISO currency code for the
@ -273,28 +273,29 @@ class LocaleScanner (object):
includes some currency formats.
"""
if isoCode:
stem = f'numbers/currencies/currency[{isoCode}]/'
symbol = self.find(f'{stem}symbol', '')
name = self.__currencyDisplayName(stem)
stem: str = f'numbers/currencies/currency[{isoCode}]/'
symbol: str = self.find(f'{stem}symbol', '')
name: str = self.__currencyDisplayName(stem)
else:
symbol = name = ''
yield 'currencySymbol', symbol
yield 'currencyDisplayName', name
def numericData(self, lookup):
def numericData(self, lookup: Callable[[str], dict[str, str]]
) -> Iterator[tuple[str, str]]:
"""Generate assorted numeric data for the locale.
First argument, lookup, is a callable that maps a numbering
system's name to certain data about the system, as a mapping;
we expect this to have 'digits' as a key.
"""
system = self.find('numbers/defaultNumberingSystem')
stem = f'numbers/symbols[numberSystem={system}]/'
decimal = self.find(f'{stem}decimal')
group = self.find(f'{stem}group')
system: str = self.find('numbers/defaultNumberingSystem')
stem: str = f'numbers/symbols[numberSystem={system}]/'
decimal: str = self.find(f'{stem}decimal')
group: str = self.find(f'{stem}group')
if decimal == group:
# mn_Mong_MN @v43 :-(
clean = Node.draftScore('approved')
clean: int = Node.draftScore('approved')
decimal = self.find(f'{stem}decimal', draft=clean)
group = self.find(f'{stem}group', draft=clean)
assert decimal != group, (self.name, system, decimal)
@ -306,9 +307,9 @@ class LocaleScanner (object):
yield 'exp', self.find(f'{stem}exponential')
yield 'groupSizes', self.__numberGrouping(system)
digits = lookup(system)['digits']
digits: str = lookup(system)['digits']
assert len(digits) == 10
zero = digits[0]
zero: str = digits[0]
# Qt's number-formatting code assumes digits are consecutive
# (except Suzhou - see QTBUG-85409 - which shares its zero
# with CLDR's very-non-contiguous hanidec):
@ -316,15 +317,15 @@ class LocaleScanner (object):
for i, c in enumerate(digits[1:], 1))
yield 'zero', zero
plus = self.find(f'{stem}plusSign')
minus = self.find(f'{stem}minusSign')
plus: str = self.find(f'{stem}plusSign')
minus: str = self.find(f'{stem}minusSign')
yield 'plus', plus
yield 'minus', minus
# Currency formatting:
xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
try:
money = self.find(xpath.replace('Formats/',
money: str = self.find(xpath.replace('Formats/',
f'Formats[numberSystem={system}]/'))
except Error:
money = self.find(xpath)
@ -336,7 +337,7 @@ class LocaleScanner (object):
neg = it
yield 'currencyNegativeFormat', neg
def textPatternData(self):
def textPatternData(self) -> Iterator[tuple[str, str]]:
for key in ('quotationStart', 'alternateQuotationEnd',
'quotationEnd', 'alternateQuotationStart'):
yield key, self.find(f'delimiters/{key}')
@ -363,7 +364,8 @@ class LocaleScanner (object):
convert_date(self.find(
f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
def endonyms(self, language, script, territory, variant):
def endonyms(self, language: str, script: str, territory: str, variant: str
) -> Iterator[tuple[str, str]]:
# TODO: take variant into account ?
# TODO: QTBUG-47892, support query for all combinations
for seq in ((language, script, territory),
@ -384,12 +386,12 @@ class LocaleScanner (object):
yield ('territoryEndonym',
self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
def unitData(self):
def unitData(self) -> Iterator[tuple[str, str]]:
yield ('byte_unit',
self.find('units/unitLength[long]/unit[digital-byte]/displayName',
'bytes'))
unit = self.__findUnit('', 'B')
unit: str | None = self.__findUnit('', 'B')
cache = [] # Populated by the SI call, to give hints to the IEC call
yield ('byte_si_quantified',
';'.join(self.__unitCount('', unit, cache)))
@ -398,8 +400,8 @@ class LocaleScanner (object):
yield ('byte_iec_quantified',
';'.join(self.__unitCount('bi', 'iB', cache)))
def calendarNames(self, calendars):
namings = self.__nameForms
def calendarNames(self, calendars: list[str]) -> Iterator[tuple[str, str]]:
namings: tuple[tuple[str, str, str], ...] = self.__nameForms
for cal in calendars:
stem = f'dates/calendars/calendar[{cal}]/months/'
for key, mode, size in namings:
@ -427,11 +429,15 @@ class LocaleScanner (object):
('narrow', 'format', 'narrow'),
) # Used for month and day names
def __find(self, xpath):
retries, foundNone = [ xpath.split('/') ], True
def __find(self, xpath: str) -> Iterator[Node]:
retries: list[list[str]] = [ xpath.split('/') ]
foundNone: bool = True
while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
tags: list[str] = retries.pop()
elts: tuple[Node, ...] = tuple(self.nodes)
roots: tuple[Node] = (self.base.root,)
for selector in tags:
# tag is a str, attrs is a dict[str, str]
tag, attrs = _parseXPath(selector)
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
if not elts:
@ -448,12 +454,13 @@ class LocaleScanner (object):
# Process roots separately: otherwise the alias-processing
# is excessive.
for i, selector in enumerate(tags):
# tag is a str, attrs is a dict[str, str]
tag, attrs = _parseXPath(selector)
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
for r in roots)):
if alias.dom.attributes['source'].nodeValue == 'locale':
replace = alias.dom.attributes['path'].nodeValue.split('/')
replace: list[str] = alias.dom.attributes['path'].nodeValue.split('/')
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
@ -480,7 +487,7 @@ class LocaleScanner (object):
raise Error(f'No {sought} in {self.name}')
@staticmethod
def __skipInheritors(elts):
def __skipInheritors(elts: tuple[Node, ...]) -> Iterator[Node]:
for elt in elts:
try:
if elt.dom.firstChild.nodeValue != INHERIT:
@ -488,7 +495,7 @@ class LocaleScanner (object):
except (AttributeError, KeyError):
yield elt
def __currencyDisplayName(self, stem):
def __currencyDisplayName(self, stem: str) -> str | None:
try:
return self.find(stem + 'displayName')
except Error:
@ -500,7 +507,7 @@ class LocaleScanner (object):
pass
return ''
def __findUnit(self, keySuffix, quantify, fallback=''):
def __findUnit(self, keySuffix: str, quantify: str, fallback: str = '') -> str:
# The displayName for a quantified unit in en.xml is kByte
# (even for unitLength[narrow]) instead of kB (etc.), so
# prefer any unitPattern provided, but prune its placeholder:
@ -508,7 +515,7 @@ class LocaleScanner (object):
stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try:
ans = self.find(f'{stem}unitPattern[count={count}]')
ans: str = self.find(f'{stem}unitPattern[count={count}]')
except Error:
continue
@ -527,10 +534,11 @@ class LocaleScanner (object):
return fallback
def __unitCount(self, keySuffix, suffix, cache,
def __unitCount(self, keySuffix: str, suffix: str, cache: list[str],
# Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
# 1000^7 < zebi = 2^{70}, the next quantifiers up:
siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
siQuantifiers: tuple[str, ...] = ('kilo',
'mega', 'giga', 'tera', 'peta', 'exa')) -> Iterator[str]:
"""Work out the unit quantifiers.
Unfortunately, the CLDR data only go up to terabytes and we
@ -556,7 +564,7 @@ class LocaleScanner (object):
else: # first call
tail = suffix = suffix or 'B'
for q in siQuantifiers:
it = self.__findUnit(keySuffix, q)
it: str | None = self.__findUnit(keySuffix, q)
# kB for kilobyte, in contrast with KiB for IEC:
q = q[0] if q == 'kilo' else q[0].upper()
if not it:
@ -567,7 +575,7 @@ class LocaleScanner (object):
cache.append(rest)
yield it
def __numberGrouping(self, system):
def __numberGrouping(self, system: str) -> tuple[int, int, int]:
"""Sizes of groups of digits within a number.
Returns a triple (least, higher, top) for which:
@ -587,9 +595,9 @@ class LocaleScanner (object):
elsewhere)."""
top = int(self.find('numbers/minimumGroupingDigits'))
assert top < 4, top # We store it in a 2-bit field
grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
grouping: str | None = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
'decimalFormatLength/decimalFormat/pattern')
groups = grouping.split('.')[0].split(',')[-3:]
groups: list[str] = grouping.split('.')[0].split(',')[-3:]
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
if len(groups) > 2:
return len(groups[-1]), len(groups[-2]), top
@ -598,7 +606,7 @@ class LocaleScanner (object):
return size, size, top
@staticmethod
def __currencyFormats(patterns, plus, minus):
def __currencyFormats(patterns: str, plus: str, minus: str) -> Iterator[str]:
for p in patterns.split(';'):
p = p.replace('0', '#').replace(',', '').replace('.', '')
try:
@ -619,17 +627,17 @@ class LocaleScanner (object):
yield p
@staticmethod
def __fromLdmlListPattern(pattern):
def __fromLdmlListPattern(pattern: str) -> str:
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3')
@staticmethod
def __fromLdmlPath(seq): # tool function for __xpathJoin()
def __fromLdmlPath(seq: list[str]) -> Iterator[str]: # tool function for __xpathJoin()
"""Convert LDML's [@name='value'] to our [name=value] form."""
for it in seq:
# First dismember it:
attrs = it.split('[')
tag = attrs.pop(0)
attrs: list[str] = it.split('[')
tag: str = attrs.pop(0)
if not attrs: # Short-cut the easy case:
yield it
continue
@ -646,7 +654,7 @@ class LocaleScanner (object):
yield '['.join(attrs)
@classmethod
def __xpathJoin(cls, head, insert, tail):
def __xpathJoin(cls, head: list[str], insert: list[str], tail: list[str]) -> list[str]:
"""Join three lists of XPath selectors.
Each of head, insert and tail is a sequence of selectors but