Prune timezone L10n data in preparation for writing it to C++ files
The data is very big but much of it is inherited by zones from those that they map to via likely-subtag reduction, so omit the data where it coincides with the result of such an inheritance; this shall complicate the reading of the data, but saves dramatically on its size, reducing it to "only" c. 2 MiB. Task-number: QTBUG-115158 Change-Id: I53ff13e29f1f73a551d73d75773373bb90673c8e Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
parent
6b36e5de76
commit
cf48fc4bc8
@ -115,6 +115,68 @@ class QLocaleXmlReader (object):
|
||||
|
||||
yield (language, script, territory), locale
|
||||
|
||||
def pruneZoneNaming(self, locmap, report=lambda *x: None):
|
||||
"""Deduplicate zoneNaming and metaNaming mapings.
|
||||
|
||||
Where one locale would fall back to another via likely subtag
|
||||
fallbacks, skip any entries in the former's zoneNaming and metaNaming
|
||||
where it agrees with the latter.
|
||||
|
||||
This prunes over half of the (locale, zone) table and nearly two
|
||||
thirds of the (locale, meta) table."""
|
||||
likely = tuple((has, got) for have, has, give, got in self.likelyMap())
|
||||
def fallbacks(key):
|
||||
# Should match QtTimeZoneLocale::fallbackLocalesFor() in qlocale.cpp
|
||||
tried, head = { key }, 2
|
||||
while head > 0:
|
||||
# Retain [:head] of key but use 0 (i.e. Any) for the rest:
|
||||
it = self.__fillLikely(key[:head] + (0,) * (3 - head), likely)
|
||||
if it not in tried:
|
||||
tried.add(it)
|
||||
if it in locmap:
|
||||
yield locmap[it]
|
||||
head -= 1
|
||||
|
||||
# TODO: fix case of later fallbacks lacking a short name for a
|
||||
# metazone, where earlier ones with a short name all agree. Maybe do
|
||||
# similar for long names, and for zones as well as meta.
|
||||
# For a metazone, the territories in its map to IANA ID, combined with
|
||||
# the language and script of a locale that lacks names, give locales to
|
||||
# consult that might fall back to it, and to which to pay particular
|
||||
# attention.
|
||||
|
||||
zonePrior = metaPrior = 0
|
||||
zoneCount = metaCount = locCount = 0
|
||||
for key, loc in locmap.items():
|
||||
zonePrior += len(loc.zoneNaming)
|
||||
metaPrior += len(loc.metaNaming)
|
||||
# Omit zoneNaming and metaNaming entries that match those
|
||||
# of their likely sub-tag fallbacks.
|
||||
filtered = False
|
||||
for alt in fallbacks(key):
|
||||
filtered = True
|
||||
# Collect keys to purge before purging, so as not to
|
||||
# modify mappings while iterating them.
|
||||
purge = [zone for zone, data in loc.zoneNaming.items()
|
||||
if (zone in alt.zoneNaming
|
||||
and data == alt.zoneNaming[zone])]
|
||||
zoneCount += len(purge)
|
||||
for zone in purge:
|
||||
del loc.zoneNaming[zone]
|
||||
|
||||
purge = [meta for meta, data in loc.metaNaming.items()
|
||||
if (meta in alt.metaNaming
|
||||
and data == alt.metaNaming[meta])]
|
||||
metaCount += len(purge)
|
||||
for meta in purge:
|
||||
del loc.metaNaming[meta]
|
||||
if filtered:
|
||||
locCount += 1
|
||||
|
||||
report(f'Pruned duplicates: {zoneCount} (of {zonePrior}) zone '
|
||||
f'and {metaCount} (of {metaPrior}) metazone '
|
||||
f'entries from {locCount} (of {len(locmap)}) locales.\n')
|
||||
|
||||
def aliasToIana(self):
|
||||
def attr(elt, key):
|
||||
return elt.attributes[key].nodeValue
|
||||
@ -282,6 +344,80 @@ class QLocaleXmlReader (object):
|
||||
# Use language, territory, script for sort order:
|
||||
return have[0], have[2], have[1]
|
||||
|
||||
@classmethod
|
||||
def __lowerLikely(cls, key, likely):
|
||||
"""Lower-bound index for key in the likely subtag table
|
||||
|
||||
Equivalent to the std::lower_bound() calls in
|
||||
QLocaleId::withLikelySubtagsAdded()."""
|
||||
lo, hi = 0, len(likely)
|
||||
key = cls.__keyLikely(key)
|
||||
while lo + 1 < hi:
|
||||
mid, rem = divmod(lo + hi, 2)
|
||||
has = cls.__keyLikely(likely[mid][0])
|
||||
if has < key:
|
||||
lo = mid
|
||||
elif has > key:
|
||||
hi = mid
|
||||
else:
|
||||
return mid
|
||||
return hi
|
||||
|
||||
@classmethod
|
||||
def __fillLikely(cls, key, likely):
|
||||
"""Equivalent to QLocaleId::withLikelySubtagsAdded()
|
||||
|
||||
Takes one (language, script, territory) triple, key, of QLocale enum
|
||||
numeric values and returns another that fills in any zero entries based
|
||||
on the likely subtag data supplied as likely."""
|
||||
lang, script, land = key
|
||||
if lang and likely:
|
||||
likely = likely[cls.__lowerLikely(key, likely):]
|
||||
for entry in likely:
|
||||
vox, txt, ter = entry[0]
|
||||
if vox != lang:
|
||||
break
|
||||
if land and ter != land:
|
||||
continue
|
||||
if script and txt != script:
|
||||
continue
|
||||
|
||||
vox, txt, ter = entry[1]
|
||||
return vox, txt or script, ter or land
|
||||
|
||||
if land and likely:
|
||||
likely = likely[cls.__lowerLikely((0, script, land), likely):]
|
||||
for entry in likely:
|
||||
vox, txt, ter = entry[0]
|
||||
assert not vox, (key, entry)
|
||||
if ter != land:
|
||||
break
|
||||
if txt != script:
|
||||
continue
|
||||
|
||||
vox, txt, ter = entry[1]
|
||||
return lang or vox, txt or script, ter
|
||||
|
||||
if script and likely:
|
||||
likely = likely[cls.__lowerLikely((0, script, 0), likely):]
|
||||
for entry in likely:
|
||||
vox, txt, ter = entry[0]
|
||||
assert not (vox or ter), (key, entry)
|
||||
if txt != script:
|
||||
break
|
||||
|
||||
vox, txt, ter = entry[1]
|
||||
return lang or vox, txt, land or ter
|
||||
|
||||
if not any(key) and likely:
|
||||
likely = likely[cls.__lowerLikely(key, likely):]
|
||||
if likely:
|
||||
assert len(likely) == 1
|
||||
assert likely[0][0] == key
|
||||
return likely[0][1]
|
||||
|
||||
return key
|
||||
|
||||
# DOM access:
|
||||
from xml.dom import minidom
|
||||
@staticmethod
|
||||
|
@ -740,6 +740,7 @@ def main(argv, out, err):
|
||||
parser.add_argument('-q', '--quiet', help='less output',
|
||||
dest='verbose', action='store_const', const=-1)
|
||||
args = parser.parse_args(argv[1:])
|
||||
mutter = (lambda *x: None) if args.verbose < 0 else out.write
|
||||
|
||||
qlocalexml = args.input_file
|
||||
qtsrcdir = Path(args.qtbase_path)
|
||||
@ -752,6 +753,7 @@ def main(argv, out, err):
|
||||
|
||||
reader = QLocaleXmlReader(qlocalexml)
|
||||
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
|
||||
reader.pruneZoneNaming(locale_map, mutter)
|
||||
locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
|
||||
|
||||
code_data = LanguageCodeData(args.iso_path)
|
||||
|
Loading…
x
Reference in New Issue
Block a user