Add type annotations to ByteArrayData, StringDataToken and StringData

Add some type hints to unicode2hex as well, it is used by
ByteArrayData.

Task-number: QTBUG-128634
Pick-to: 6.8
Change-Id: I86b7ce8567483bf8a4d4db78c9585652526cb90d
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
Mate Barany 2024-09-16 16:31:58 +02:00
parent 5be7b99a16
commit c1ffb6e751
2 changed files with 34 additions and 33 deletions

View File

@ -42,15 +42,15 @@ def qtVersion(root = qtbase_root, pfx = 'set(QT_REPO_MODULE_VERSION '):
raise Error(f'Failed to find {pfx}...) line in {root.joinpath(".cmake.conf")}') raise Error(f'Failed to find {pfx}...) line in {root.joinpath(".cmake.conf")}')
qtVersion = qtVersion() qtVersion = qtVersion()
def unicode2hex(s): def unicode2hex(s: str) -> list[str]:
lst = [] lst: list[str] = []
for x in s: for x in s:
v = ord(x) v: int = ord(x)
if v > 0xFFFF: if v > 0xFFFF:
# make a surrogate pair # make a surrogate pair
# copied from qchar.h # copied from qchar.h
high = (v >> 10) + 0xd7c0 high: int = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00) low: int = (v % 0x400 + 0xdc00)
lst.append(hex(high)) lst.append(hex(high))
lst.append(hex(low)) lst.append(hex(low))
else: else:

View File

@ -17,7 +17,7 @@ The ISO 639-3 data file can be downloaded from the SIL website:
import datetime import datetime
import argparse import argparse
from pathlib import Path from pathlib import Path
from typing import Iterator, Optional from typing import Callable, Iterator, Optional
from qlocalexml import QLocaleXmlReader from qlocalexml import QLocaleXmlReader
from localetools import * from localetools import *
@ -66,13 +66,14 @@ class LocaleKeySorter:
class ByteArrayData: class ByteArrayData:
# Only for use with ASCII data, e.g. IANA IDs. # Only for use with ASCII data, e.g. IANA IDs.
def __init__(self): def __init__(self) -> None:
self.data, self.hash = [], {} self.data: list[str] = []
self.hash: dict[str, int] = {}
def lookup(self, s): def lookup(self, s: str) -> int:
return self.append(s, False) return self.append(s, False)
def append(self, s, create = True): def append(self, s: str, create: bool = True) -> int:
assert s.isascii(), s assert s.isascii(), s
s += '\0' s += '\0'
if s in self.hash: if s in self.hash:
@ -80,21 +81,21 @@ class ByteArrayData:
if not create: if not create:
raise Error(f'Entry "{s[:-1]}" missing from reused table !') raise Error(f'Entry "{s[:-1]}" missing from reused table !')
index = len(self.data) index: int = len(self.data)
if index > 0xffff: if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !') raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index self.hash[s] = index
self.data += unicode2hex(s) self.data += unicode2hex(s)
return index return index
def write(self, out, name): def write(self, out: Callable[[str], int], name: str) -> None:
out(f'\nstatic constexpr char {name}[] = {{\n') out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
# All data is ASCII, so only two-digit hex is ever needed. # All data is ASCII, so only two-digit hex is ever needed.
out('\n};\n') out('\n};\n')
class StringDataToken: class StringDataToken:
def __init__(self, index, length, lenbits, indbits): def __init__(self, index: int, length: int, lenbits: int, indbits: int) -> None:
if index >= (1 << indbits): if index >= (1 << indbits):
raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!') raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!')
if length >= (1 << lenbits): if length >= (1 << lenbits):
@ -106,27 +107,27 @@ class StringDataToken:
# Would tables benefit from pre-population, one script at a time ? # Would tables benefit from pre-population, one script at a time ?
# That might improve the chances of match-ups in store. # That might improve the chances of match-ups in store.
class StringData: class StringData:
def __init__(self, name, lenbits = 8, indbits = 16): def __init__(self, name: str, lenbits: int = 8, indbits: int = 16) -> None:
self.data = [] self.data: list[str] = []
self.hash = {} self.hash: dict[str, StringDataToken] = {}
self.name = name self.name = name
self.text = '' # Used in quick-search for matches in data self.text = '' # Used in quick-search for matches in data
self.__bits = lenbits, indbits self.__bits: tuple[int, int] = lenbits, indbits
def end(self): def end(self) -> StringDataToken:
return StringDataToken(len(self.data), 0, *self.__bits) return StringDataToken(len(self.data), 0, *self.__bits)
def append(self, s): def append(self, s: str) -> StringDataToken:
try: try:
token = self.hash[s] token: StringDataToken = self.hash[s]
except KeyError: except KeyError:
token = self.__store(s) token: StringDataToken = self.__store(s)
self.hash[s] = token self.hash[s] = token
return token return token
# The longMetaZoneName table grows to c. 0xe061c bytes, making the # The longMetaZoneName table grows to c. 0xe061c bytes, making the
# searching here rather expensive. # searching here rather expensive.
def __store(self, s): def __store(self, s: str) -> StringDataToken:
"""Add string s to known data. """Add string s to known data.
Seeks to avoid duplication, where possible. Seeks to avoid duplication, where possible.
@ -134,9 +135,9 @@ class StringData:
""" """
if not s: if not s:
return StringDataToken(0, 0, *self.__bits) return StringDataToken(0, 0, *self.__bits)
ucs2 = unicode2hex(s) ucs2: list[str] = unicode2hex(s)
try: try:
index = self.text.index(s) - 1 index: int = self.text.index(s) - 1
matched = 0 matched = 0
while matched < len(ucs2): while matched < len(ucs2):
index, matched = self.data.index(ucs2[0], index + 1), 1 index, matched = self.data.index(ucs2[0], index + 1), 1
@ -156,17 +157,17 @@ class StringData:
e.args += (self.name, s) e.args += (self.name, s)
raise raise
def write(self, fd): def write(self, out: Callable[[str], int]) -> None:
indbits = self.__bits[1] indbits: int = self.__bits[1]
if len(self.data) >= (1 << indbits): if len(self.data) >= (1 << indbits):
raise ValueError(f'Data is too big ({len(self.data)}) ' raise ValueError(f'Data is too big ({len(self.data)}) '
f'for {indbits}-bit index to its end!', f'for {indbits}-bit index to its end!',
self.name) self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n") out(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ') out(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
fd.write("\n};\n") out("\n};\n")
def currencyIsoCodeData(s): def currencyIsoCodeData(s: str) -> str:
if s: if s:
return '{' + ",".join(str(ord(x)) for x in s) + '}' return '{' + ",".join(str(ord(x)) for x in s) + '}'
return "{0,0,0}" return "{0,0,0}"
@ -583,7 +584,7 @@ class LocaleZoneDataWriter (LocaleSourceEditor):
self.__longZoneNameTable, self.__longZoneNameTable,
self.__shortMetaZoneNameTable, self.__shortMetaZoneNameTable,
self.__longMetaZoneNameTable): self.__longMetaZoneNameTable):
data.write(self.writer) data.write(self.writer.write)
self.writer.write('\n') self.writer.write('\n')
@ -765,7 +766,7 @@ class LocaleDataWriter (LocaleSourceEditor):
byte_unit_data, am_data, pm_data, currency_symbol_data, byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data, currency_display_name_data, currency_format_data,
endonyms_data): endonyms_data):
data.write(self.writer) data.write(self.writer.write)
@staticmethod @staticmethod
def __writeNameData(out, book, form): def __writeNameData(out, book, form):
@ -883,7 +884,7 @@ class CalendarDataWriter (LocaleSourceEditor):
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) )) self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+ '// trailing zeros\n') + '// trailing zeros\n')
self.writer.write('};\n') self.writer.write('};\n')
months_data.write(self.writer) months_data.write(self.writer.write)
class TestLocaleWriter (LocaleSourceEditor): class TestLocaleWriter (LocaleSourceEditor):