Include timezone L10n data in QLocaleXML files

This makes the XML file bigger by a factor of roughly 8, at about 30
MB. Code to read the new data out of it shall follow in a later
commit.

Task-number: QTBUG-115158
Change-Id: I7b9b6abe88be2457fa6cf0e8d7b6a68845136770
Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
Edward Welbourne 2024-08-16 11:17:44 +02:00
parent dc71864d4a
commit 8b456bbd9a
4 changed files with 77 additions and 9 deletions

View File

@ -37,6 +37,8 @@ class CldrReader (object):
self.root = CldrAccess(root)
self.whitter, self.grumble = whitter, grumble
self.root.checkEnumData(grumble)
# TODO: can we do anything but ignore with the namings here ?
self.__bcp47Alias, ignore = self.root.bcp47Aliases()
def likelySubTags(self):
"""Generator for likely subtag information.
@ -105,7 +107,7 @@ class CldrReader (object):
that are not mentioned in enumdata.territory_map, on any
Windows IDs given in zonedata.windowsIdList that are no longer
covered by the CLDR data."""
alias, ignored = self.root.bcp47Aliases()
alias = self.__bcp47Alias
defaults, winIds = self.root.readWindowsTimeZones(alias)
metamap, zones, territorial = self.root.readMetaZoneMap(alias)
@ -296,6 +298,9 @@ class CldrReader (object):
locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
locale.update(scan.calendarNames(calendars)) # Names of days and months
# Naming of timezones:
locale.update(scan.timeZoneNames(self.__bcp47Alias))
return locale
# Note: various caches assume this class is a singleton, so the

View File

@ -441,7 +441,7 @@ class LocaleScanner (object):
See CldrAccess.readMetaZoneMap() for the locale-independent
data that stitches these pieces together."""
stem, formats = 'dates/timeZoneNames', {}
# '+HH:mm;-HH:mm'
# '+HH:mm;-HH:mm' (and :ss is also supported, but nowhere used in v45).
# Sometimes has single-digit hours
hours = self.find(f'{stem}/hourFormat').split(';')
assert all('H' in f and 'm' in f for f in hours), (hours, self.name)

View File

@ -505,14 +505,16 @@ class QLocaleXmlWriter (object):
tuple of numeric IDs that corresponds to en_US (needed to
provide fallbacks for the C locale)."""
def writeLocale(locale, cal = calendars, this = self):
this.__openTag('locale')
this.__writeLocale(locale, cal)
this.__writeLocaleZones(locale)
this.__closeTag('locale')
self.__openTag('localeList')
self.__openTag('locale')
self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
writeLocale(Locale.C(locales[en_US]))
for key in sorted(locales.keys()):
self.__openTag('locale')
self.__writeLocale(locales[key], calendars)
self.__closeTag('locale')
writeLocale(locales[key])
self.__closeTag('localeList')
def inTag(self, tag, text, **attrs):
@ -602,6 +604,35 @@ class QLocaleXmlWriter (object):
self.__scripts.discard(locale.script_code)
self.__territories.discard(locale.territory_code)
def __writeLocaleZones(self, locale):
self.__writeZoneForms('regionZoneFormats', locale.regionZoneFormats)
self.__writeZoneNaming('zoneNaming', locale.zoneNaming)
self.__writeZoneNaming('metaZoneNaming', locale.metaZoneNaming)
def __writeZoneNaming(self, group, naming):
if not naming:
return
self.__openTag(group)
for iana in sorted(naming.keys()):
data = naming[iana]
self.__openTag('zoneNames', name=iana)
if 'exemplarCity' in data:
self.inTag('exemplar', data['exemplarCity'])
for form in ('short', 'long'):
if form in data:
self.__writeZoneForms(form, data[form])
self.__closeTag('zoneNames')
self.__closeTag(group)
def __writeZoneForms(self, group, forms):
if all(x is None for x in forms):
return
self.__openTag('zoneForms', name=group)
for i, tag in enumerate(('generic', 'standard', 'daylightSaving')):
if forms[i]:
self.safeInTag(tag, forms[i])
self.__closeTag('zoneForms')
def __openTag(self, tag, **attrs):
if attrs:
text = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
@ -721,12 +752,18 @@ class Locale (object):
'longTimeFormat', 'shortTimeFormat',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
'currencyFormat', 'currencyNegativeFormat',
'positiveOffsetFormat', 'negativeOffsetFormat',
'gmtOffsetFormat', 'fallbackZoneFormat',
) + tuple(self.propsMonthDay('days')) + tuple(
'_'.join((k, cal))
for k in self.propsMonthDay('months')
for cal in calendars):
write(key, escape(get(key)))
# The regionZoneFormats, zoneNaming and metaZoneNaming members
# are handled by QLocaleXmlWriter.__writeLocaleZones(). Their
# elements hold sub-elements.
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))

View File

@ -86,6 +86,19 @@ Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" }
GroupSizes = xsd:string { pattern = "\d;\d;\d" }
ZoneForms = element zoneForms {
attribute name { text }, # 'regionFormats', 'short' or 'long'
element generic { text }?,
element standard { text }?,
element daylightSaving { text }?
}
ZoneNames = element zoneNames {
attribute name { text }, # IANA ID of zone, or CLDR metazone name
element exemplar { text }?, # metaZoneNaming omits exemplar
ZoneForms* # for name in {'short', 'long'}
}
Locale = element locale {
element language { text },
element script { text },
@ -125,6 +138,14 @@ Locale = element locale {
element currencyDisplayName { text },
element currencyFormat { text },
element currencyNegativeFormat { text },
# Timezone (and metazone) format data
element positiveOffsetFormat { text },
element negativeOffsetFormat { text },
element gmtOffsetFormat { text },
element fallbackZoneFormat { text },
# Day names
element longDays { text },
element standaloneLongDays { text },
element shortDays { text },
@ -154,5 +175,10 @@ Locale = element locale {
element groupSizes { GroupSizes },
element currencyDigits { xsd:nonNegativeInteger },
element currencyRounding { xsd:nonNegativeInteger }
element currencyRounding { xsd:nonNegativeInteger },
# Timezone (and metazone) naming data
ZoneForms?, # name is 'regionFormats'
element zoneNaming { ZoneNames+ }?,
element metaZoneNaming { ZoneNames+ }?
}