From 8b456bbd9ab23d39b0c151e2bc81b195aceacc28 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Fri, 16 Aug 2024 11:17:44 +0200 Subject: [PATCH] Include timezone L10n data in QLocaleXML files This makes the XML file bigger by a factor of roughly 8, at about 30 MB. Code to read the new data out of it shall follow in a later commit. Task-number: QTBUG-115158 Change-Id: I7b9b6abe88be2457fa6cf0e8d7b6a68845136770 Reviewed-by: Mate Barany --- util/locale_database/cldr.py | 7 ++++- util/locale_database/ldml.py | 2 +- util/locale_database/qlocalexml.py | 49 +++++++++++++++++++++++++---- util/locale_database/qlocalexml.rnc | 28 ++++++++++++++++- 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 387e8146569..6a125731459 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -37,6 +37,8 @@ class CldrReader (object): self.root = CldrAccess(root) self.whitter, self.grumble = whitter, grumble self.root.checkEnumData(grumble) + # TODO: can we do anything but ignore with the namings here ? + self.__bcp47Alias, ignore = self.root.bcp47Aliases() def likelySubTags(self): """Generator for likely subtag information. @@ -105,7 +107,7 @@ class CldrReader (object): that are not mentioned in enumdata.territory_map, on any Windows IDs given in zonedata.windowsIdList that are no longer covered by the CLDR data.""" - alias, ignored = self.root.bcp47Aliases() + alias = self.__bcp47Alias defaults, winIds = self.root.readWindowsTimeZones(alias) metamap, zones, territorial = self.root.readMetaZoneMap(alias) @@ -296,6 +298,9 @@ class CldrReader (object): locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ... locale.update(scan.calendarNames(calendars)) # Names of days and months + # Naming of timezones: + locale.update(scan.timeZoneNames(self.__bcp47Alias)) + return locale # Note: various caches assume this class is a singleton, so the diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index af8de38689b..48d031e8ee8 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -441,7 +441,7 @@ class LocaleScanner (object): See CldrAccess.readMetaZoneMap() for the locale-independent data that stitches these pieces together.""" stem, formats = 'dates/timeZoneNames', {} - # '+HH:mm;-HH:mm' + # '+HH:mm;-HH:mm' (and :ss is also supported, but nowhere used in v45). # Sometimes has single-digit hours hours = self.find(f'{stem}/hourFormat').split(';') assert all('H' in f and 'm' in f for f in hours), (hours, self.name) diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index d2415a7bad3..97dc4f71346 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -505,14 +505,16 @@ class QLocaleXmlWriter (object): tuple of numeric IDs that corresponds to en_US (needed to provide fallbacks for the C locale).""" + def writeLocale(locale, cal = calendars, this = self): + this.__openTag('locale') + this.__writeLocale(locale, cal) + this.__writeLocaleZones(locale) + this.__closeTag('locale') + self.__openTag('localeList') - self.__openTag('locale') - self.__writeLocale(Locale.C(locales[en_US]), calendars) - self.__closeTag('locale') + writeLocale(Locale.C(locales[en_US])) for key in sorted(locales.keys()): - self.__openTag('locale') - self.__writeLocale(locales[key], calendars) - self.__closeTag('locale') + writeLocale(locales[key]) self.__closeTag('localeList') def inTag(self, tag, text, **attrs): @@ -602,6 +604,35 @@ class QLocaleXmlWriter (object): self.__scripts.discard(locale.script_code) self.__territories.discard(locale.territory_code) + def __writeLocaleZones(self, locale): + self.__writeZoneForms('regionZoneFormats', locale.regionZoneFormats) + self.__writeZoneNaming('zoneNaming', locale.zoneNaming) + self.__writeZoneNaming('metaZoneNaming', locale.metaZoneNaming) + + def __writeZoneNaming(self, group, naming): + if not naming: + return + self.__openTag(group) + for iana in sorted(naming.keys()): + data = naming[iana] + self.__openTag('zoneNames', name=iana) + if 'exemplarCity' in data: + self.inTag('exemplar', data['exemplarCity']) + for form in ('short', 'long'): + if form in data: + self.__writeZoneForms(form, data[form]) + self.__closeTag('zoneNames') + self.__closeTag(group) + + def __writeZoneForms(self, group, forms): + if all(x is None for x in forms): + return + self.__openTag('zoneForms', name=group) + for i, tag in enumerate(('generic', 'standard', 'daylightSaving')): + if forms[i]: + self.safeInTag(tag, forms[i]) + self.__closeTag('zoneForms') + def __openTag(self, tag, **attrs): if attrs: text = ' '.join(f'{k}="{v}"' for k, v in attrs.items()) @@ -721,12 +752,18 @@ class Locale (object): 'longTimeFormat', 'shortTimeFormat', 'currencyIsoCode', 'currencySymbol', 'currencyDisplayName', 'currencyFormat', 'currencyNegativeFormat', + 'positiveOffsetFormat', 'negativeOffsetFormat', + 'gmtOffsetFormat', 'fallbackZoneFormat', ) + tuple(self.propsMonthDay('days')) + tuple( '_'.join((k, cal)) for k in self.propsMonthDay('months') for cal in calendars): write(key, escape(get(key))) + # The regionZoneFormats, zoneNaming and metaZoneNaming members + # are handled by QLocaleXmlWriter.__writeLocaleZones(). Their + # elements hold sub-elements. + write('groupSizes', ';'.join(str(x) for x in get('groupSizes'))) for key in ('currencyDigits', 'currencyRounding'): write(key, get(key)) diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc index fd38b62c302..5ed59c47b58 100644 --- a/util/locale_database/qlocalexml.rnc +++ b/util/locale_database/qlocalexml.rnc @@ -86,6 +86,19 @@ Digit = xsd:string { pattern = "\d" } Punctuation = xsd:string { pattern = "\p{P}" } GroupSizes = xsd:string { pattern = "\d;\d;\d" } +ZoneForms = element zoneForms { + attribute name { text }, # 'regionFormats', 'short' or 'long' + element generic { text }?, + element standard { text }?, + element daylightSaving { text }? +} + +ZoneNames = element zoneNames { + attribute name { text }, # IANA ID of zone, or CLDR metazone name + element exemplar { text }?, # metaZoneNaming omits exemplar + ZoneForms* # for name in {'short', 'long'} +} + Locale = element locale { element language { text }, element script { text }, @@ -125,6 +138,14 @@ Locale = element locale { element currencyDisplayName { text }, element currencyFormat { text }, element currencyNegativeFormat { text }, + + # Timezone (and metazone) format data + element positiveOffsetFormat { text }, + element negativeOffsetFormat { text }, + element gmtOffsetFormat { text }, + element fallbackZoneFormat { text }, + + # Day names element longDays { text }, element standaloneLongDays { text }, element shortDays { text }, @@ -154,5 +175,10 @@ Locale = element locale { element groupSizes { GroupSizes }, element currencyDigits { xsd:nonNegativeInteger }, - element currencyRounding { xsd:nonNegativeInteger } + element currencyRounding { xsd:nonNegativeInteger }, + + # Timezone (and metazone) naming data + ZoneForms?, # name is 'regionFormats' + element zoneNaming { ZoneNames+ }?, + element metaZoneNaming { ZoneNames+ }? }