Purge some archaic complications from CLDR parsing

Apparently there used to be a mechanism where an alias element in a
top-level LDML element could serve to provide a parent locale as its
source attribute. That is long gone and, since at least a decade ago,
alias elements only ever appear in root.xml, with source="locale" and
a path that starts ../ (so is a relative XPath).

Ditch some complications (that I transcribed faithfully five-ish years
ago when transforming the scripts), replacing them with assertions
that check what's now documented in the LDML spec and confirmed by my
own grep-checks in the CLDR data. This incidentally made one prior
(weaker) check redundant, so I've now removed that from the look-up
for the tags that identify a locale. That look-up is only ever
performed after the DOM root nodes it uses have come through the scan
of locale roots that now does the stronger check.

Makes no difference to generated data.

Change-Id: I811ffbef5f5ecb69183d68fa8bda57281f2a579d
Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
Edward Welbourne 2024-09-11 17:14:33 +02:00
parent d466b6deaf
commit f6afd98e7e
2 changed files with 16 additions and 38 deletions

View File

@ -997,29 +997,17 @@ enumdata.py (keeping the old name as an alias):
return cache
def __localeAsDoc(self, name: str, aliasFor = None):
def __scanLocaleRoots(self, name: str):
while name and name != 'root':
path = f'common/main/{name}.xml'
if self.root.joinpath(path).exists():
elt = self.__xml(path)
for child in Node(elt).findAllChildren('alias'):
try:
alias = child.dom.attributes['source'].nodeValue
except (KeyError, AttributeError):
pass
else:
return self.__localeAsDoc(alias, aliasFor or name)
# No alias child with a source:
return elt
if aliasFor:
raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
'but found no file for the alias')
def __scanLocaleRoots(self, name):
while name and name != 'root':
doc = self.__localeAsDoc(name)
if doc is not None:
yield Node(doc, self.__unDistinguishedAttributes)
elt = self.__xml(path) # which has no top-level alias children:
assert not any(True
for child in Node(elt).findAllChildren(
'alias', allDull=True)
), (f"Locale {name} "
"has an archaic top-level alias element")
yield Node(elt, self.__unDistinguishedAttributes)
try:
name = self.__parentLocale[name]

View File

@ -233,19 +233,8 @@ class LocaleScanner (object):
The tag codes are language, script, territory and variant; an
empty value for any of them indicates that no value was
provided. The values are obtained from the primary file's
top-level <identity> element. An Error is raised if any
top-level <alias> element of this file has a non-empty source
attribute; that attribute value is mentioned in the error's
message."""
top-level <identity> element."""
root = self.nodes[0]
for alias in root.findAllChildren('alias', allDull=True):
try:
source = alias.dom.attributes['source'].nodeValue
except (KeyError, AttributeError):
pass
else:
raise Error(f'Alias to {source}')
ids = root.findUniqueChild('identity')
for code in ('language', 'script', 'territory', 'variant'):
for node in ids.findAllChildren(code, allDull=True):
@ -529,13 +518,14 @@ class LocaleScanner (object):
yield elt
# Process roots separately: otherwise the alias-processing
# is excessive.
# is excessive (and alias only ever shows up in root.xml,
# always with source="locale").
for i, selector in enumerate(tags):
tag, attrs = _parseXPath(selector)
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
for r in roots)):
if alias.dom.attributes['source'].nodeValue == 'locale':
assert alias.dom.attributes['source'].nodeValue == 'locale', alias
replace = alias.dom.attributes['path'].nodeValue.split('/')
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))