Tweak lookup of en.xml names for languages, scripts and territories

Prefer stand-alone versions of the names when available. This saves
the need for a Han-specific kludge in the check for discrepancies
between our enum names and the en.xml names. Causes no change to
generated locale data.

Change-Id: I162f3107d6ffc1f8b893b206e0b78b61cf7254f6
Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
(cherry picked from commit 40b063cd745136c1c0be4c9903955218ef647a02)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Edward Welbourne 2023-08-01 12:23:49 +02:00 committed by Qt Cherry-pick Bot
parent a1ec66e7a3
commit 5221fbc4b0

View File

@ -356,7 +356,6 @@ class CldrAccess (object):
def __checkEnum(given, proper, scraps, def __checkEnum(given, proper, scraps,
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'}, remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
prefix = { 'St.': 'Saint', 'U.S.': 'United States' }, prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
suffixes = ( 'Han', ),
skip = '\u02bc'): skip = '\u02bc'):
# Each is a { code: full name } mapping # Each is a { code: full name } mapping
for code, name in given.items(): for code, name in given.items():
@ -376,8 +375,6 @@ class CldrAccess (object):
try: f, t = ok.index('('), ok.index(')') try: f, t = ok.index('('), ok.index(')')
except ValueError: break except ValueError: break
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip() ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
if any(name == ok + ' ' + s for s in suffixes):
continue
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join( if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip): remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
continue continue
@ -699,7 +696,13 @@ enumdata.py (keeping the old name as an alias):
except (KeyError, ValueError, TypeError): except (KeyError, ValueError, TypeError):
pass pass
else: else:
if key not in seen or 'alt' not in elt.attributes: # Prefer stand-alone forms of names when present, ignore other
# alt="..." entries. For example, Traditional and Simplified
# Han omit "Han" in the plain form, but include it for
# stand-alone. As the stand-alone version appears later, it
# over-writes the plain one.
if (key not in seen or 'alt' not in elt.attributes
or elt.attributes['alt'].nodeValue == 'stand-alone'):
yield key, value yield key, value
seen.add(key) seen.add(key)