Derive C locale data from en_US, overriding minor details

The qlocalexml.py Locale.C() had to replicate a whole lot of data that
isn't really relevant to how C differs from en_US and every addition
to what we support required further additions to it. So pass the en_US
Locale object to the pseudoconstructor so that C can inherit from it
and only override the parts where we care about the difference.

Hand-code shortening for short Jalali month names, to match Soroush's
original contribution, and include the narrow forms in the hard-coded
data to keep the generated data unchanged (for now). Note some of the
departures from CLDR; we may want to drop these overrides later.

In the process, convert the mapping from keys to locales to
consistently use IDs for all members of the key, instead of using the
(empty) code value for (as yet unused) variant; it now gets ID 0 and
is consistent with returns from codesToIdNames(). This makes life
easier for the code that now has to construct an en_US key.

Task-number: QTBUG-115158
Change-Id: I3d7acb6a4059daec1bba341fcf015c39c7a6803b
Reviewed-by: Kai Köhne <kai.koehne@qt.io>
This commit is contained in:
Edward Welbourne 2024-04-26 12:27:10 +02:00
parent 5641b17e2f
commit 0c809fc3b5
3 changed files with 50 additions and 91 deletions

View File

@ -146,7 +146,7 @@ class CldrReader (object):
return alias, defaults, winIds
def readLocales(self, calendars = ('gregorian',)):
return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
return {(k.language_id, k.script_id, k.territory_id, k.variant_id): k
for k in self.__allLocales(calendars)}
def __allLocales(self, calendars):
@ -264,7 +264,7 @@ class CldrReader (object):
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
territory = names[2], territory_code = territory, territory_id = ids[2],
variant_code = variant)
variant_code = variant, variant_id = ids[3])
firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')

View File

@ -91,7 +91,8 @@ def main(argv, out, err):
writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
writer.locales(reader.readLocales(args.calendars), args.calendars)
en_US = tuple(id for id, name in reader.root.codesToIdName('en', '', 'US'))
writer.locales(reader.readLocales(args.calendars), args.calendars, en_US)
writer.close(err.write)
return 0

View File

@ -372,10 +372,19 @@ class QLocaleXmlWriter (object):
self.__closeTag('msZoneIana')
self.__closeTag('windowsZone')
def locales(self, locales, calendars):
def locales(self, locales, calendars, en_US):
"""Write the data for each locale.
First argument, locales, is the mapping whose values are the
Locale objects, with each key being the matching tuple of
numeric IDs for language, script, territory and variant.
Second argument is a tuple of calendar names. Third is the
tuple of numeric IDs that corresponds to en_US (needed to
provide fallbacks for the C locale)."""
self.__openTag('localeList')
self.__openTag('locale')
self.__writeLocale(Locale.C(calendars), calendars)
self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()):
self.__openTag('locale')
@ -575,97 +584,46 @@ class Locale (object):
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))
# Tools used by __monthNames:
def fullName(i, name): return name
def firstThree(i, name): return name[:3]
def initial(i, name): return name[:1]
def number(i, name): return str(i + 1)
def islamicShort(i, name):
if not name: return name
if name == 'Shawwal': return 'Shaw.'
words = name.split()
if words[0].startswith('Dhu'):
words[0] = words[0][:7] + '.'
elif len(words[0]) > 3:
words[0] = words[0][:3] + '.'
return ' '.join(words)
@staticmethod
def __monthNames(calendars,
known={ # Map calendar to (names, extractors...):
# TODO: do we even need these ? CLDR's root.xml seems to
# have them, complete with yeartype="leap" handling for
# Hebrew's extra.
'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December'),
# Extractor pairs, (plain, standalone)
(fullName, fullName), # long
(firstThree, firstThree), # short
(number, initial)), # narrow
'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
(fullName, fullName),
(firstThree, firstThree),
(number, initial)),
'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
(fullName, fullName),
(islamicShort, islamicShort),
(number, number)),
'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
(fullName, fullName),
(fullName, fullName),
(number, number)),
},
sizes=('long', 'short', 'narrow')):
for cal in calendars:
try:
data = known[cal]
except KeyError as e: # Need to add an entry to known, above.
e.args += ('Unsupported calendar:', cal)
raise
names, get = data[0], data[1:]
for n, size in enumerate(sizes):
yield ('_'.join((camelCase((size, 'months')), cal)),
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
';'.join(get[n][1](i, x) for i, x in enumerate(names)))
del fullName, firstThree, initial, number, islamicShort
@classmethod
def C(cls, calendars=('gregorian',),
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday'),
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(cls.__monthNames(calendars),
def C(cls, en_US):
"""Returns an object representing the C locale.
Required argument, en_US, is the corresponding object for the
en_US locale (or the en_US_POSIX one if we ever support
variants). The C locale inherits from this, overriding what it
may need to."""
base = en_US.__dict__.copy()
# Soroush's original contribution shortened Jalali month names
# - contrary to CLDR, which doesn't abbreviate these in
# root.xml or en.xml, although some locales do, e.g. fr_CA.
# For compatibility with that,
for k in ('shortMonths_persian', 'standaloneShortMonths_persian'):
base[k] = ';'.join(x[:3] for x in base[k].split(';'))
return cls(base,
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
territory='AnyTerritory', territory_code='0', territoryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',
# CLDR has non-ASCII versions of these:
quotationStart='"', quotationEnd='"',
alternateQuotationStart='\'', alternateQuotationEnd='\'',
listPatternPartStart='%1, %2',
listPatternPartMiddle='%1, %2',
listPatternPartEnd='%1, %2',
listPatternPartTwo='%1, %2',
byte_unit='bytes',
byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
am='AM', pm='PM', firstDayOfWeek='mon',
weekendStart='sat', weekendEnd='sun',
alternateQuotationStart="'", alternateQuotationEnd="'",
# CLDR gives 'dddd, MMMM d, yyyy', 'M/d/yy', 'h:mm:ssAp tttt',
# 'h:mmAp' with non-breaking space before Ap.
longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
narrowDays='7;1;2;3;4;5;6',
standaloneLongDays=';'.join(days),
standaloneShortDays=';'.join(d[:3] for d in days),
standaloneNarrowDays=';'.join(d[:1] for d in days),
currencyIsoCode='', currencySymbol='',
currencyDisplayName='',
# CLDR has US-$ and US-style formats:
currencyIsoCode='', currencySymbol='', currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
currencyFormat='%1%2', currencyNegativeFormat='')
currencyFormat='%1%2', currencyNegativeFormat='',
# We may want to fall back to CLDR for some of these:
firstDayOfWeek='mon', # CLDR has 'sun'
exp='e', # CLDR has 'E'
listPatternPartEnd='%1, %2', # CLDR has '%1, and %2'
listPatternPartTwo='%1, %2', # CLDR has '%1 and %2'
narrowDays='7;1;2;3;4;5;6', # CLDR has letters
narrowMonths_gregorian='1;2;3;4;5;6;7;8;9;10;11;12', # CLDR has letters
standaloneNarrowMonths_persian='F;O;K;T;M;S;M;A;A;D;B;E', # CLDR has digits
# Keep these explicit, despite matching CLDR:
decimal='.', group=',', percent='%',
zero='0', minus='-', plus='+',
am='AM', pm='PM', weekendStart='sat', weekendEnd='sun')