Check all matches for each XPath when searching

Previously, if we found one element with required attributes, we would
search into it and ignore any later elements also with those required
attributes. This meant that, if the first didn't contain the child
elements we were looking for, we'd fail to find what we sought, if it
was in a later matching element (e.g. with some ignored attributes).
We would then go on to look for a match in a later file, where there
might have been a match we should have found in the earlier file.

Check all matches, rather than only the first match in each file.  Do
the search in each file "in parallel" to save reparsing the XPath.
This clears the search code of rather hard-to-follow break/else
handling in loops; and currently makes no change to the generated
data.

Change-Id: I86b010e65b9a1fc1b79e5fdd45a5aeff1ed5d5d5
Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
Edward Welbourne 2020-03-16 12:31:38 +01:00
parent 89bd12b9ad
commit 963931550d

View File

@ -204,66 +204,25 @@ class LocaleScanner (object):
def __init__(self, name, nodes, root): def __init__(self, name, nodes, root):
self.name, self.nodes, self.base = name, nodes, root self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, draft = None): def find(self, xpath, default = None, draft = None):
tags = xpath.split('/') """XPath search for the content of an element.
while True:
replace = None
for elt in self.nodes:
for selector in tags:
tag, attrs = _parseXPath(selector)
for elt in elt.findAllChildren(tag, attrs):
if draft is None or elt.draft <= draft:
break # and process the next selector
else:
break # no child, try next elt in self.nodes
else:
# processed all selectors
try:
return elt.dom.firstChild.nodeValue
except (AttributeError, KeyError):
pass # move on to next elt in self.nodes
# No match in self.nodes; check root Required argument, xpath, is the XPath to search for. Optional
elt = self.base.root second argument is a default value to use, if no such node is
for i, selector in enumerate(tags): found. Optional third argument is a draft score (see
tag, attrs = _parseXPath(selector) Node.draftScore() for details); if given, leaf elements with
for alias in elt.findAllChildren('alias', allDull = True): higher draft scores are ignored."""
if alias.dom.attributes['source'].nodeValue == 'locale':
replace = alias.dom.attributes['path'].nodeValue.split('/')
tags = self.__xpathJoin(tags[:i], replace, tags[i:])
break
else:
for elt in elt.findAllChildren(tag, attrs):
if draft is None or elt.draft <= draft:
break # and process the next selector
else:
break
if replace:
break
else:
# processed all selectors
try: try:
for elt in self.__find(xpath):
try:
if draft is None or elt.draft <= draft:
return elt.dom.firstChild.nodeValue return elt.dom.firstChild.nodeValue
except (AttributeError, KeyError): except (AttributeError, KeyError):
# No match
pass pass
if not replace: except Error as e:
break if default is None:
raise
sought = '/'.join(tags) return default
if sought != xpath:
sought += ' (for {})'.format(xpath)
raise Error('No {} in {}'.format(sought, self.name))
def findOr(self, xpath, fallback = ''):
"""Use a fall-back value if we don't find data.
Like find, but takes a fall-back value to return instead of
raising Error on failure."""
try:
return self.find(xpath)
except Error:
return fallback
def tagCodes(self): def tagCodes(self):
"""Yields four tag codes """Yields four tag codes
@ -305,9 +264,9 @@ class LocaleScanner (object):
""" """
if isoCode: if isoCode:
stem = 'numbers/currencies/currency[{}]/'.format(isoCode) stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
symbol = self.findOr(stem + 'symbol') symbol = self.find(stem + 'symbol', '')
name = ';'.join( name = ';'.join(
self.findOr(stem + 'displayName' + tail) self.find(stem + 'displayName' + tail, '')
for tail in ('',) + tuple( for tail in ('',) + tuple(
'[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other') '[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other')
)) + ';' )) + ';'
@ -409,12 +368,12 @@ class LocaleScanner (object):
yield 'languageEndonym', '' yield 'languageEndonym', ''
yield ('countryEndonym', yield ('countryEndonym',
self.findOr('localeDisplayNames/territories/territory[{}]' self.find('localeDisplayNames/territories/territory[{}]'
.format(country))) .format(country), ''))
def unitData(self): def unitData(self):
yield ('byte_unit', yield ('byte_unit',
self.findOr('units/unitLength[long]/unit[digital-byte]/displayName', self.find('units/unitLength[long]/unit[digital-byte]/displayName',
'bytes')) 'bytes'))
unit = self.__findUnit('', 'B') unit = self.__findUnit('', 'B')
@ -455,6 +414,51 @@ class LocaleScanner (object):
('narrow', 'format', 'narrow'), ('narrow', 'format', 'narrow'),
) # Used for month and day names ) # Used for month and day names
def __find(self, xpath):
retries = [ xpath.split('/') ]
while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
for selector in tags:
tag, attrs = _parseXPath(selector)
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
if not elts:
break
else: # Found matching elements
# Possibly filter elts to prefer the least drafty ?
for elt in elts:
yield elt
# Process roots separately: otherwise the alias-processing
# is excessive.
for i, selector in enumerate(tags):
tag, attrs = _parseXPath(selector)
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
for r in roots)):
if alias.dom.attributes['source'].nodeValue == 'locale':
replace = alias.dom.attributes['path'].nodeValue.split('/')
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
if not roots:
if retries: # Let outer loop fall back on an alias path:
break
sought = '/'.join(tags)
if sought != xpath:
sought += ' (for {})'.format(xpath)
raise Error('All lack child {} for {} in {}'.format(
selector, sought, self.name))
else: # Found matching elements
for elt in roots:
yield elt
sought = '/'.join(tags)
if sought != xpath:
sought += ' (for {})'.format(xpath)
raise Error('No {} in {}'.format(sought, self.name))
def __findUnit(self, keySuffix, quantify, fallback=''): def __findUnit(self, keySuffix, quantify, fallback=''):
# The displayName for a quantified unit in en.xml is kByte # The displayName for a quantified unit in en.xml is kByte
# (even for unitLength[narrow]) instead of kB (etc.), so # (even for unitLength[narrow]) instead of kB (etc.), so