Add type annotations to ByteArrayData, StringDataToken and StringData

Add some type hints to unicode2hex as well, it is used by
ByteArrayData.

Task-number: QTBUG-128634
Change-Id: I86b7ce8567483bf8a4d4db78c9585652526cb90d
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit c1ffb6e7515621a953b5ebf19c384b27df785d73)
This commit is contained in:
Mate Barany 2024-09-16 16:31:58 +02:00
parent 41eccd318a
commit 0d06c01611
2 changed files with 31 additions and 30 deletions

View File

@ -26,15 +26,15 @@ class Error (Exception):
def __str__(self): def __str__(self):
return self.message return self.message
def unicode2hex(s): def unicode2hex(s: str) -> list[str]:
lst = [] lst: list[str] = []
for x in s: for x in s:
v = ord(x) v: int = ord(x)
if v > 0xFFFF: if v > 0xFFFF:
# make a surrogate pair # make a surrogate pair
# copied from qchar.h # copied from qchar.h
high = (v >> 10) + 0xd7c0 high: int = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00) low: int = (v % 0x400 + 0xdc00)
lst.append(hex(high)) lst.append(hex(high))
lst.append(hex(low)) lst.append(hex(low))
else: else:

View File

@ -17,7 +17,7 @@ The ISO 639-3 data file can be downloaded from the SIL website:
import datetime import datetime
import argparse import argparse
from pathlib import Path from pathlib import Path
from typing import Iterator, Optional from typing import Callable, Iterator, Optional
from qlocalexml import QLocaleXmlReader from qlocalexml import QLocaleXmlReader
from localetools import * from localetools import *
@ -66,30 +66,31 @@ class LocaleKeySorter:
class ByteArrayData: class ByteArrayData:
# Only for use with ASCII data, e.g. IANA IDs. # Only for use with ASCII data, e.g. IANA IDs.
def __init__(self): def __init__(self) -> None:
self.data, self.hash = [], {} self.data: list[str] = []
self.hash: dict[str, int] = {}
def append(self, s): def append(self, s: str) -> int:
assert s.isascii(), s assert s.isascii(), s
s += '\0' s += '\0'
if s in self.hash: if s in self.hash:
return self.hash[s] return self.hash[s]
index = len(self.data) index: int = len(self.data)
if index > 0xffff: if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !') raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index self.hash[s] = index
self.data += unicode2hex(s) self.data += unicode2hex(s)
return index return index
def write(self, out, name): def write(self, out: Callable[[str], int], name: str) -> None:
out(f'\nstatic constexpr char {name}[] = {{\n') out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
# All data is ASCII, so only two-digit hex is ever needed. # All data is ASCII, so only two-digit hex is ever needed.
out('\n};\n') out('\n};\n')
class StringDataToken: class StringDataToken:
def __init__(self, index, length, lenbits, indbits): def __init__(self, index: int, length: int, lenbits: int, indbits: int) -> None:
if index >= (1 << indbits): if index >= (1 << indbits):
raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!') raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!')
if length >= (1 << lenbits): if length >= (1 << lenbits):
@ -99,22 +100,22 @@ class StringDataToken:
self.length = length self.length = length
class StringData: class StringData:
def __init__(self, name, lenbits = 8, indbits = 16): def __init__(self, name: str, lenbits: int = 8, indbits: int = 16) -> None:
self.data = [] self.data: list[str] = []
self.hash = {} self.hash: dict[str, StringDataToken] = {}
self.name = name self.name = name
self.text = '' # Used in quick-search for matches in data self.text = '' # Used in quick-search for matches in data
self.__bits = lenbits, indbits self.__bits: tuple[int, int] = lenbits, indbits
def append(self, s): def append(self, s: str) -> StringDataToken:
try: try:
token = self.hash[s] token: StringDataToken = self.hash[s]
except KeyError: except KeyError:
token = self.__store(s) token: StringDataToken = self.__store(s)
self.hash[s] = token self.hash[s] = token
return token return token
def __store(self, s): def __store(self, s: str) -> StringDataToken:
"""Add string s to known data. """Add string s to known data.
Seeks to avoid duplication, where possible. Seeks to avoid duplication, where possible.
@ -122,9 +123,9 @@ class StringData:
""" """
if not s: if not s:
return StringDataToken(0, 0, *self.__bits) return StringDataToken(0, 0, *self.__bits)
ucs2 = unicode2hex(s) ucs2: list[str] = unicode2hex(s)
try: try:
index = self.text.index(s) - 1 index: int = self.text.index(s) - 1
matched = 0 matched = 0
while matched < len(ucs2): while matched < len(ucs2):
index, matched = self.data.index(ucs2[0], index + 1), 1 index, matched = self.data.index(ucs2[0], index + 1), 1
@ -144,17 +145,17 @@ class StringData:
e.args += (self.name, s) e.args += (self.name, s)
raise raise
def write(self, fd): def write(self, out: Callable[[str], int]) -> None:
indbits = self.__bits[1] indbits: int = self.__bits[1]
if len(self.data) >= (1 << indbits): if len(self.data) >= (1 << indbits):
raise ValueError(f'Data is too big ({len(self.data)}) ' raise ValueError(f'Data is too big ({len(self.data)}) '
f'for {indbits}-bit index to its end!', f'for {indbits}-bit index to its end!',
self.name) self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n") out(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ') out(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
fd.write("\n};\n") out("\n};\n")
def currencyIsoCodeData(s): def currencyIsoCodeData(s: str) -> str:
if s: if s:
return '{' + ",".join(str(ord(x)) for x in s) + '}' return '{' + ",".join(str(ord(x)) for x in s) + '}'
return "{0,0,0}" return "{0,0,0}"
@ -458,7 +459,7 @@ class LocaleDataWriter (LocaleSourceEditor):
byte_unit_data, am_data, pm_data, currency_symbol_data, byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data, currency_display_name_data, currency_format_data,
endonyms_data): endonyms_data):
data.write(self.writer) data.write(self.writer.write)
@staticmethod @staticmethod
def __writeNameData(out, book, form): def __writeNameData(out, book, form):
@ -585,7 +586,7 @@ class CalendarDataWriter (LocaleSourceEditor):
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) )) self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+ '// trailing zeros\n') + '// trailing zeros\n')
self.writer.write('};\n') self.writer.write('};\n')
months_data.write(self.writer) months_data.write(self.writer.write)
class TestLocaleWriter (LocaleSourceEditor): class TestLocaleWriter (LocaleSourceEditor):