From f6afd98e7e7f5fce1a498a2736e8b964e4d9a9bd Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Wed, 11 Sep 2024 17:14:33 +0200 Subject: [PATCH] Purge some archaic complications from CLDR parsing Apparently there used to be a mechanism where an alias element in a top-level LDML element could serve to provide a parent locale as its source attribute. That is long gone and, since at least a decade ago, alias elements only ever appear in root.xml, with source="locale" and a path that starts ../ (so is a relative XPath). Ditch some complications (that I transcribed faithfully five-ish years ago when transforming the scripts), replacing them with assertions that check what's now documented in the LDML spec and confirmed by my own grep-checks in the CLDR data. This incidentally made one prior (weaker) check redundant, so I've now removed that from the look-up for the tags that identify a locale. That look-up is only ever performed after the DOM root nodes it uses have come through the scan of locale roots that now does the stronger check. Makes no difference to generated data. Change-Id: I811ffbef5f5ecb69183d68fa8bda57281f2a579d Reviewed-by: Mate Barany --- util/locale_database/cldr.py | 32 ++++++++++---------------------- util/locale_database/ldml.py | 22 ++++++---------------- 2 files changed, 16 insertions(+), 38 deletions(-) diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 2436e5dbfcb..1692b872d03 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -997,29 +997,17 @@ enumdata.py (keeping the old name as an alias): return cache - def __localeAsDoc(self, name: str, aliasFor = None): - path = f'common/main/{name}.xml' - if self.root.joinpath(path).exists(): - elt = self.__xml(path) - for child in Node(elt).findAllChildren('alias'): - try: - alias = child.dom.attributes['source'].nodeValue - except (KeyError, AttributeError): - pass - else: - return self.__localeAsDoc(alias, aliasFor or name) - # No alias child with a source: - return elt - - if aliasFor: - raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", ' - 'but found no file for the alias') - - def __scanLocaleRoots(self, name): + def __scanLocaleRoots(self, name: str): while name and name != 'root': - doc = self.__localeAsDoc(name) - if doc is not None: - yield Node(doc, self.__unDistinguishedAttributes) + path = f'common/main/{name}.xml' + if self.root.joinpath(path).exists(): + elt = self.__xml(path) # which has no top-level alias children: + assert not any(True + for child in Node(elt).findAllChildren( + 'alias', allDull=True) + ), (f"Locale {name} " + "has an archaic top-level alias element") + yield Node(elt, self.__unDistinguishedAttributes) try: name = self.__parentLocale[name] diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index 48d031e8ee8..50a538b8cf1 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -233,19 +233,8 @@ class LocaleScanner (object): The tag codes are language, script, territory and variant; an empty value for any of them indicates that no value was provided. The values are obtained from the primary file's - top-level element. An Error is raised if any - top-level element of this file has a non-empty source - attribute; that attribute value is mentioned in the error's - message.""" + top-level element.""" root = self.nodes[0] - for alias in root.findAllChildren('alias', allDull=True): - try: - source = alias.dom.attributes['source'].nodeValue - except (KeyError, AttributeError): - pass - else: - raise Error(f'Alias to {source}') - ids = root.findUniqueChild('identity') for code in ('language', 'script', 'territory', 'variant'): for node in ids.findAllChildren(code, allDull=True): @@ -529,15 +518,16 @@ class LocaleScanner (object): yield elt # Process roots separately: otherwise the alias-processing - # is excessive. + # is excessive (and alias only ever shows up in root.xml, + # always with source="locale"). for i, selector in enumerate(tags): tag, attrs = _parseXPath(selector) for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True) for r in roots)): - if alias.dom.attributes['source'].nodeValue == 'locale': - replace = alias.dom.attributes['path'].nodeValue.split('/') - retries.append(self.__xpathJoin(tags[:i], replace, tags[i:])) + assert alias.dom.attributes['source'].nodeValue == 'locale', alias + replace = alias.dom.attributes['path'].nodeValue.split('/') + retries.append(self.__xpathJoin(tags[:i], replace, tags[i:])) roots = tuple(_iterateEach(r.findAllChildren(tag, attrs, allDull=allDull)