diff --git a/src/corelib/text/qlocale_data_p.h b/src/corelib/text/qlocale_data_p.h index 6175398dd99..e2fa89aca5b 100644 --- a/src/corelib/text/qlocale_data_p.h +++ b/src/corelib/text/qlocale_data_p.h @@ -78,7 +78,7 @@ struct LanguageCodeEntry { // GENERATED PART STARTS HERE /* - This part of the file was generated on 2024-04-04 from the + This part of the file was generated on 2024-05-07 from the Common Locale Data Repository v44.1 http://www.unicode.org/cldr/ @@ -1048,7 +1048,7 @@ static constexpr QLocaleId likely_subtags[] = { { 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY { 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR { 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN - { 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM + { 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM }; static constexpr quint16 locale_index[] = { diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index ea202a45c82..ee18f63583b 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -53,7 +53,7 @@ class QLocaleXmlReader (object): languages = tuple(self.__loadMap('language', language_map)) scripts = tuple(self.__loadMap('script', script_map)) territories = tuple(self.__loadMap('territory', territory_map)) - self.__likely = tuple(self.__likelySubtagsMap()) + self.__likely = tuple(self.__likelySubtagsMap()) # in enum name form # Mappings {ID: (enum name, code, en.xml name)} self.languages = {v[0]: v[1:] for v in languages} @@ -137,9 +137,19 @@ class QLocaleXmlReader (object): def ids(t): return tuple(x[0] for x in t) - for pair in self.__likely: - have = self.__fromNames(pair[0]) - give = self.__fromNames(pair[1]) + def keyLikely(pair, kl=self.__keyLikely): + """Sort by IDs from first entry in pair + + We're passed a pair (h, g) of triplets (lang, script, territory) of + pairs (ID, name); we extract the ID from each entry in the first + triplet, then hand that triplet of IDs off to __keyLikely().""" + return kl(tuple(x[0] for x in pair[0])) + + # Sort self.__likely to enable binary search in C++ code. + for have, give in sorted(((self.__fromNames(has), + self.__fromNames(got)) + for has, got in self.__likely), + key = keyLikely): yield ('_'.join(tag(have)), ids(have), '_'.join(tag(give)), ids(give)) @@ -197,6 +207,11 @@ class QLocaleXmlReader (object): key = int(key) yield key, enum[key][0], code, name + def __fromNames(self, names): + # Three (ID, code) pairs: + return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]] + + # Likely subtag management: def __likelySubtagsMap(self): def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText): return tuple(kid(element, key) for key in keys) @@ -205,8 +220,21 @@ class QLocaleXmlReader (object): for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'): yield triplet(kid(elt, "from")), triplet(kid(elt, "to")) - def __fromNames(self, names): - return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]] + @staticmethod + def __keyLikely(key, huge=0x10000): + """Sort order key for a likely subtag key + + Although the entries are (lang, script, region), sort by (lang, region, + script) and sort 0 after all non-zero values, in each position. This + ensures that, when several mappings partially match a requested locale, + the one we should prefer to use appears first. + + We use 0x10000 as replacement for 0, as all IDs are unsigned short, so + less than 2^16.""" + # Map zero to huge: + have = tuple(x or huge for x in key) + # Use language, territory, script for sort order: + return have[0], have[2], have[1] # DOM access: from xml.dom import minidom diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index dd015896725..8d03e6d72ad 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -268,25 +268,11 @@ class TimeZoneDataWriter (LocaleSourceEditor): class LocaleDataWriter (LocaleSourceEditor): def likelySubtags(self, likely): - # First sort likely, so that we can use binary search in C++ - # code. Although the entries are (lang, script, region), sort - # as (lang, region, script) and sort 0 after all non-zero - # values. This ensures that, when several mappings partially - # match a requested locale, the one we should prefer to use - # appears first. - huge = 0x10000 # > any ushort; all tag values are ushort - def keyLikely(entry): - have = entry[1] # Numeric id triple - return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script - likely = sorted(likely, key=keyLikely) - - i = 0 + # Sort order of likely is taken care of upstream. self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n') for had, have, got, give in likely: - i += 1 self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have)) - self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give)) - self.writer.write(' ' if i == len(likely) else ',') + self.writer.write(', {{ {:3d}, {:3d}, {:3d} }},'.format(*give)) self.writer.write(f' // {had} -> {got}\n') self.writer.write('};\n\n')