Add type annotations to ByteArrayData, StringDataToken and StringData

Add some type hints to unicode2hex as well, it is used by
ByteArrayData.

Task-number: QTBUG-128634
Change-Id: I86b7ce8567483bf8a4d4db78c9585652526cb90d
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit c1ffb6e7515621a953b5ebf19c384b27df785d73)
This commit is contained in:
Mate Barany 2024-09-16 16:31:58 +02:00
parent 41eccd318a
commit 0d06c01611
2 changed files with 31 additions and 30 deletions

View File

@ -26,15 +26,15 @@ class Error (Exception):
def __str__(self):
return self.message
def unicode2hex(s):
lst = []
def unicode2hex(s: str) -> list[str]:
lst: list[str] = []
for x in s:
v = ord(x)
v: int = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
high: int = (v >> 10) + 0xd7c0
low: int = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:

View File

@ -17,7 +17,7 @@ The ISO 639-3 data file can be downloaded from the SIL website:
import datetime
import argparse
from pathlib import Path
from typing import Iterator, Optional
from typing import Callable, Iterator, Optional
from qlocalexml import QLocaleXmlReader
from localetools import *
@ -66,30 +66,31 @@ class LocaleKeySorter:
class ByteArrayData:
# Only for use with ASCII data, e.g. IANA IDs.
def __init__(self):
self.data, self.hash = [], {}
def __init__(self) -> None:
self.data: list[str] = []
self.hash: dict[str, int] = {}
def append(self, s):
def append(self, s: str) -> int:
assert s.isascii(), s
s += '\0'
if s in self.hash:
return self.hash[s]
index = len(self.data)
index: int = len(self.data)
if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index
self.data += unicode2hex(s)
return index
def write(self, out, name):
def write(self, out: Callable[[str], int], name: str) -> None:
out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
# All data is ASCII, so only two-digit hex is ever needed.
out('\n};\n')
class StringDataToken:
def __init__(self, index, length, lenbits, indbits):
def __init__(self, index: int, length: int, lenbits: int, indbits: int) -> None:
if index >= (1 << indbits):
raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!')
if length >= (1 << lenbits):
@ -99,22 +100,22 @@ class StringDataToken:
self.length = length
class StringData:
def __init__(self, name, lenbits = 8, indbits = 16):
self.data = []
self.hash = {}
def __init__(self, name: str, lenbits: int = 8, indbits: int = 16) -> None:
self.data: list[str] = []
self.hash: dict[str, StringDataToken] = {}
self.name = name
self.text = '' # Used in quick-search for matches in data
self.__bits = lenbits, indbits
self.__bits: tuple[int, int] = lenbits, indbits
def append(self, s):
def append(self, s: str) -> StringDataToken:
try:
token = self.hash[s]
token: StringDataToken = self.hash[s]
except KeyError:
token = self.__store(s)
token: StringDataToken = self.__store(s)
self.hash[s] = token
return token
def __store(self, s):
def __store(self, s: str) -> StringDataToken:
"""Add string s to known data.
Seeks to avoid duplication, where possible.
@ -122,9 +123,9 @@ class StringData:
"""
if not s:
return StringDataToken(0, 0, *self.__bits)
ucs2 = unicode2hex(s)
ucs2: list[str] = unicode2hex(s)
try:
index = self.text.index(s) - 1
index: int = self.text.index(s) - 1
matched = 0
while matched < len(ucs2):
index, matched = self.data.index(ucs2[0], index + 1), 1
@ -144,17 +145,17 @@ class StringData:
e.args += (self.name, s)
raise
def write(self, fd):
indbits = self.__bits[1]
def write(self, out: Callable[[str], int]) -> None:
indbits: int = self.__bits[1]
if len(self.data) >= (1 << indbits):
raise ValueError(f'Data is too big ({len(self.data)}) '
f'for {indbits}-bit index to its end!',
self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
fd.write("\n};\n")
out(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
out(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
out("\n};\n")
def currencyIsoCodeData(s):
def currencyIsoCodeData(s: str) -> str:
if s:
return '{' + ",".join(str(ord(x)) for x in s) + '}'
return "{0,0,0}"
@ -458,7 +459,7 @@ class LocaleDataWriter (LocaleSourceEditor):
byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data,
endonyms_data):
data.write(self.writer)
data.write(self.writer.write)
@staticmethod
def __writeNameData(out, book, form):
@ -585,7 +586,7 @@ class CalendarDataWriter (LocaleSourceEditor):
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+ '// trailing zeros\n')
self.writer.write('};\n')
months_data.write(self.writer)
months_data.write(self.writer.write)
class TestLocaleWriter (LocaleSourceEditor):