Move sorting of likely subtag table upstream to QLocaleXmlReader

This means LocaleDataWriter.likelySubtags() now only gets an iterable,
so doesn't know when it's on the last item to skip the comma after it,
but that seems to be acceptable in modern C++.

Change-Id: I9d3bb9af3bb46b28b7a2529e27ab72a72c358503
Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
Edward Welbourne 2024-05-07 15:19:25 +02:00
parent fd786667ab
commit 23dc3b9e08
3 changed files with 38 additions and 24 deletions

View File

@ -78,7 +78,7 @@ struct LanguageCodeEntry {
// GENERATED PART STARTS HERE
/*
This part of the file was generated on 2024-04-04 from the
This part of the file was generated on 2024-05-07 from the
Common Locale Data Repository v44.1
http://www.unicode.org/cldr/
@ -1048,7 +1048,7 @@ static constexpr QLocaleId likely_subtags[] = {
{ 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY
{ 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR
{ 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN
{ 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM
{ 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM
};
static constexpr quint16 locale_index[] = {

View File

@ -53,7 +53,7 @@ class QLocaleXmlReader (object):
languages = tuple(self.__loadMap('language', language_map))
scripts = tuple(self.__loadMap('script', script_map))
territories = tuple(self.__loadMap('territory', territory_map))
self.__likely = tuple(self.__likelySubtagsMap())
self.__likely = tuple(self.__likelySubtagsMap()) # in enum name form
# Mappings {ID: (enum name, code, en.xml name)}
self.languages = {v[0]: v[1:] for v in languages}
@ -137,9 +137,19 @@ class QLocaleXmlReader (object):
def ids(t):
return tuple(x[0] for x in t)
for pair in self.__likely:
have = self.__fromNames(pair[0])
give = self.__fromNames(pair[1])
def keyLikely(pair, kl=self.__keyLikely):
"""Sort by IDs from first entry in pair
We're passed a pair (h, g) of triplets (lang, script, territory) of
pairs (ID, name); we extract the ID from each entry in the first
triplet, then hand that triplet of IDs off to __keyLikely()."""
return kl(tuple(x[0] for x in pair[0]))
# Sort self.__likely to enable binary search in C++ code.
for have, give in sorted(((self.__fromNames(has),
self.__fromNames(got))
for has, got in self.__likely),
key = keyLikely):
yield ('_'.join(tag(have)), ids(have),
'_'.join(tag(give)), ids(give))
@ -197,6 +207,11 @@ class QLocaleXmlReader (object):
key = int(key)
yield key, enum[key][0], code, name
def __fromNames(self, names):
# Three (ID, code) pairs:
return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]]
# Likely subtag management:
def __likelySubtagsMap(self):
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
return tuple(kid(element, key) for key in keys)
@ -205,8 +220,21 @@ class QLocaleXmlReader (object):
for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'):
yield triplet(kid(elt, "from")), triplet(kid(elt, "to"))
def __fromNames(self, names):
return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]]
@staticmethod
def __keyLikely(key, huge=0x10000):
"""Sort order key for a likely subtag key
Although the entries are (lang, script, region), sort by (lang, region,
script) and sort 0 after all non-zero values, in each position. This
ensures that, when several mappings partially match a requested locale,
the one we should prefer to use appears first.
We use 0x10000 as replacement for 0, as all IDs are unsigned short, so
less than 2^16."""
# Map zero to huge:
have = tuple(x or huge for x in key)
# Use language, territory, script for sort order:
return have[0], have[2], have[1]
# DOM access:
from xml.dom import minidom

View File

@ -268,25 +268,11 @@ class TimeZoneDataWriter (LocaleSourceEditor):
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
# First sort likely, so that we can use binary search in C++
# code. Although the entries are (lang, script, region), sort
# as (lang, region, script) and sort 0 after all non-zero
# values. This ensures that, when several mappings partially
# match a requested locale, the one we should prefer to use
# appears first.
huge = 0x10000 # > any ushort; all tag values are ushort
def keyLikely(entry):
have = entry[1] # Numeric id triple
return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
likely = sorted(likely, key=keyLikely)
i = 0
# Sort order of likely is taken care of upstream.
self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
for had, have, got, give in likely:
i += 1
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
self.writer.write(' ' if i == len(likely) else ',')
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }},'.format(*give))
self.writer.write(f' // {had} -> {got}\n')
self.writer.write('};\n\n')