Tweak lookup of en.xml names for languages, scripts and territories

Prefer stand-alone versions of the names when available. This saves
the need for a Han-specific kludge in the check for discrepancies
between our enum names and the en.xml names. Causes no change to
generated locale data.

Change-Id: I162f3107d6ffc1f8b893b206e0b78b61cf7254f6
Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
(cherry picked from commit 40b063cd745136c1c0be4c9903955218ef647a02)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Edward Welbourne 2023-08-01 12:23:49 +02:00 committed by Qt Cherry-pick Bot
parent a1ec66e7a3
commit 5221fbc4b0

View File

@ -356,7 +356,6 @@ class CldrAccess (object):
def __checkEnum(given, proper, scraps,
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
suffixes = ( 'Han', ),
skip = '\u02bc'):
# Each is a { code: full name } mapping
for code, name in given.items():
@ -376,8 +375,6 @@ class CldrAccess (object):
try: f, t = ok.index('('), ok.index(')')
except ValueError: break
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
if any(name == ok + ' ' + s for s in suffixes):
continue
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
continue
@ -699,7 +696,13 @@ enumdata.py (keeping the old name as an alias):
except (KeyError, ValueError, TypeError):
pass
else:
if key not in seen or 'alt' not in elt.attributes:
# Prefer stand-alone forms of names when present, ignore other
# alt="..." entries. For example, Traditional and Simplified
# Han omit "Han" in the plain form, but include it for
# stand-alone. As the stand-alone version appears later, it
# over-writes the plain one.
if (key not in seen or 'alt' not in elt.attributes
or elt.attributes['alt'].nodeValue == 'stand-alone'):
yield key, value
seen.add(key)