Include timezone L10n data in QLocaleXML files

This makes the XML file bigger by a factor of roughly 8, at about 30
MB. Code to read the new data out of it shall follow in a later
commit.

Task-number: QTBUG-115158
Change-Id: I7b9b6abe88be2457fa6cf0e8d7b6a68845136770
Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
Edward Welbourne 2024-08-16 11:17:44 +02:00
parent dc71864d4a
commit 8b456bbd9a
4 changed files with 77 additions and 9 deletions

View File

@ -37,6 +37,8 @@ class CldrReader (object):
self.root = CldrAccess(root) self.root = CldrAccess(root)
self.whitter, self.grumble = whitter, grumble self.whitter, self.grumble = whitter, grumble
self.root.checkEnumData(grumble) self.root.checkEnumData(grumble)
# TODO: can we do anything but ignore with the namings here ?
self.__bcp47Alias, ignore = self.root.bcp47Aliases()
def likelySubTags(self): def likelySubTags(self):
"""Generator for likely subtag information. """Generator for likely subtag information.
@ -105,7 +107,7 @@ class CldrReader (object):
that are not mentioned in enumdata.territory_map, on any that are not mentioned in enumdata.territory_map, on any
Windows IDs given in zonedata.windowsIdList that are no longer Windows IDs given in zonedata.windowsIdList that are no longer
covered by the CLDR data.""" covered by the CLDR data."""
alias, ignored = self.root.bcp47Aliases() alias = self.__bcp47Alias
defaults, winIds = self.root.readWindowsTimeZones(alias) defaults, winIds = self.root.readWindowsTimeZones(alias)
metamap, zones, territorial = self.root.readMetaZoneMap(alias) metamap, zones, territorial = self.root.readMetaZoneMap(alias)
@ -296,6 +298,9 @@ class CldrReader (object):
locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ... locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
locale.update(scan.calendarNames(calendars)) # Names of days and months locale.update(scan.calendarNames(calendars)) # Names of days and months
# Naming of timezones:
locale.update(scan.timeZoneNames(self.__bcp47Alias))
return locale return locale
# Note: various caches assume this class is a singleton, so the # Note: various caches assume this class is a singleton, so the

View File

@ -441,7 +441,7 @@ class LocaleScanner (object):
See CldrAccess.readMetaZoneMap() for the locale-independent See CldrAccess.readMetaZoneMap() for the locale-independent
data that stitches these pieces together.""" data that stitches these pieces together."""
stem, formats = 'dates/timeZoneNames', {} stem, formats = 'dates/timeZoneNames', {}
# '+HH:mm;-HH:mm' # '+HH:mm;-HH:mm' (and :ss is also supported, but nowhere used in v45).
# Sometimes has single-digit hours # Sometimes has single-digit hours
hours = self.find(f'{stem}/hourFormat').split(';') hours = self.find(f'{stem}/hourFormat').split(';')
assert all('H' in f and 'm' in f for f in hours), (hours, self.name) assert all('H' in f and 'm' in f for f in hours), (hours, self.name)

View File

@ -505,14 +505,16 @@ class QLocaleXmlWriter (object):
tuple of numeric IDs that corresponds to en_US (needed to tuple of numeric IDs that corresponds to en_US (needed to
provide fallbacks for the C locale).""" provide fallbacks for the C locale)."""
def writeLocale(locale, cal = calendars, this = self):
this.__openTag('locale')
this.__writeLocale(locale, cal)
this.__writeLocaleZones(locale)
this.__closeTag('locale')
self.__openTag('localeList') self.__openTag('localeList')
self.__openTag('locale') writeLocale(Locale.C(locales[en_US]))
self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()): for key in sorted(locales.keys()):
self.__openTag('locale') writeLocale(locales[key])
self.__writeLocale(locales[key], calendars)
self.__closeTag('locale')
self.__closeTag('localeList') self.__closeTag('localeList')
def inTag(self, tag, text, **attrs): def inTag(self, tag, text, **attrs):
@ -602,6 +604,35 @@ class QLocaleXmlWriter (object):
self.__scripts.discard(locale.script_code) self.__scripts.discard(locale.script_code)
self.__territories.discard(locale.territory_code) self.__territories.discard(locale.territory_code)
def __writeLocaleZones(self, locale):
self.__writeZoneForms('regionZoneFormats', locale.regionZoneFormats)
self.__writeZoneNaming('zoneNaming', locale.zoneNaming)
self.__writeZoneNaming('metaZoneNaming', locale.metaZoneNaming)
def __writeZoneNaming(self, group, naming):
if not naming:
return
self.__openTag(group)
for iana in sorted(naming.keys()):
data = naming[iana]
self.__openTag('zoneNames', name=iana)
if 'exemplarCity' in data:
self.inTag('exemplar', data['exemplarCity'])
for form in ('short', 'long'):
if form in data:
self.__writeZoneForms(form, data[form])
self.__closeTag('zoneNames')
self.__closeTag(group)
def __writeZoneForms(self, group, forms):
if all(x is None for x in forms):
return
self.__openTag('zoneForms', name=group)
for i, tag in enumerate(('generic', 'standard', 'daylightSaving')):
if forms[i]:
self.safeInTag(tag, forms[i])
self.__closeTag('zoneForms')
def __openTag(self, tag, **attrs): def __openTag(self, tag, **attrs):
if attrs: if attrs:
text = ' '.join(f'{k}="{v}"' for k, v in attrs.items()) text = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
@ -721,12 +752,18 @@ class Locale (object):
'longTimeFormat', 'shortTimeFormat', 'longTimeFormat', 'shortTimeFormat',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName', 'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
'currencyFormat', 'currencyNegativeFormat', 'currencyFormat', 'currencyNegativeFormat',
'positiveOffsetFormat', 'negativeOffsetFormat',
'gmtOffsetFormat', 'fallbackZoneFormat',
) + tuple(self.propsMonthDay('days')) + tuple( ) + tuple(self.propsMonthDay('days')) + tuple(
'_'.join((k, cal)) '_'.join((k, cal))
for k in self.propsMonthDay('months') for k in self.propsMonthDay('months')
for cal in calendars): for cal in calendars):
write(key, escape(get(key))) write(key, escape(get(key)))
# The regionZoneFormats, zoneNaming and metaZoneNaming members
# are handled by QLocaleXmlWriter.__writeLocaleZones(). Their
# elements hold sub-elements.
write('groupSizes', ';'.join(str(x) for x in get('groupSizes'))) write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'): for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key)) write(key, get(key))

View File

@ -86,6 +86,19 @@ Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" } Punctuation = xsd:string { pattern = "\p{P}" }
GroupSizes = xsd:string { pattern = "\d;\d;\d" } GroupSizes = xsd:string { pattern = "\d;\d;\d" }
ZoneForms = element zoneForms {
attribute name { text }, # 'regionFormats', 'short' or 'long'
element generic { text }?,
element standard { text }?,
element daylightSaving { text }?
}
ZoneNames = element zoneNames {
attribute name { text }, # IANA ID of zone, or CLDR metazone name
element exemplar { text }?, # metaZoneNaming omits exemplar
ZoneForms* # for name in {'short', 'long'}
}
Locale = element locale { Locale = element locale {
element language { text }, element language { text },
element script { text }, element script { text },
@ -125,6 +138,14 @@ Locale = element locale {
element currencyDisplayName { text }, element currencyDisplayName { text },
element currencyFormat { text }, element currencyFormat { text },
element currencyNegativeFormat { text }, element currencyNegativeFormat { text },
# Timezone (and metazone) format data
element positiveOffsetFormat { text },
element negativeOffsetFormat { text },
element gmtOffsetFormat { text },
element fallbackZoneFormat { text },
# Day names
element longDays { text }, element longDays { text },
element standaloneLongDays { text }, element standaloneLongDays { text },
element shortDays { text }, element shortDays { text },
@ -154,5 +175,10 @@ Locale = element locale {
element groupSizes { GroupSizes }, element groupSizes { GroupSizes },
element currencyDigits { xsd:nonNegativeInteger }, element currencyDigits { xsd:nonNegativeInteger },
element currencyRounding { xsd:nonNegativeInteger } element currencyRounding { xsd:nonNegativeInteger },
# Timezone (and metazone) naming data
ZoneForms?, # name is 'regionFormats'
element zoneNaming { ZoneNames+ }?,
element metaZoneNaming { ZoneNames+ }?
} }