Check all matches for each XPath when searching
Previously, if we found one element with required attributes, we would search into it and ignore any later elements also with those required attributes. This meant that, if the first didn't contain the child elements we were looking for, we'd fail to find what we sought, if it was in a later matching element (e.g. with some ignored attributes). We would then go on to look for a match in a later file, where there might have been a match we should have found in the earlier file. Check all matches, rather than only the first match in each file. Do the search in each file "in parallel" to save reparsing the XPath. This clears the search code of rather hard-to-follow break/else handling in loops; and currently makes no change to the generated data. Change-Id: I86b010e65b9a1fc1b79e5fdd45a5aeff1ed5d5d5 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
parent
89bd12b9ad
commit
963931550d
@ -204,66 +204,25 @@ class LocaleScanner (object):
|
||||
def __init__(self, name, nodes, root):
|
||||
self.name, self.nodes, self.base = name, nodes, root
|
||||
|
||||
def find(self, xpath, draft = None):
|
||||
tags = xpath.split('/')
|
||||
while True:
|
||||
replace = None
|
||||
for elt in self.nodes:
|
||||
for selector in tags:
|
||||
tag, attrs = _parseXPath(selector)
|
||||
for elt in elt.findAllChildren(tag, attrs):
|
||||
if draft is None or elt.draft <= draft:
|
||||
break # and process the next selector
|
||||
else:
|
||||
break # no child, try next elt in self.nodes
|
||||
else:
|
||||
# processed all selectors
|
||||
try:
|
||||
return elt.dom.firstChild.nodeValue
|
||||
except (AttributeError, KeyError):
|
||||
pass # move on to next elt in self.nodes
|
||||
def find(self, xpath, default = None, draft = None):
|
||||
"""XPath search for the content of an element.
|
||||
|
||||
# No match in self.nodes; check root
|
||||
elt = self.base.root
|
||||
for i, selector in enumerate(tags):
|
||||
tag, attrs = _parseXPath(selector)
|
||||
for alias in elt.findAllChildren('alias', allDull = True):
|
||||
if alias.dom.attributes['source'].nodeValue == 'locale':
|
||||
replace = alias.dom.attributes['path'].nodeValue.split('/')
|
||||
tags = self.__xpathJoin(tags[:i], replace, tags[i:])
|
||||
break
|
||||
else:
|
||||
for elt in elt.findAllChildren(tag, attrs):
|
||||
if draft is None or elt.draft <= draft:
|
||||
break # and process the next selector
|
||||
else:
|
||||
break
|
||||
if replace:
|
||||
break
|
||||
else:
|
||||
# processed all selectors
|
||||
try:
|
||||
return elt.dom.firstChild.nodeValue
|
||||
except (AttributeError, KeyError):
|
||||
# No match
|
||||
pass
|
||||
if not replace:
|
||||
break
|
||||
|
||||
sought = '/'.join(tags)
|
||||
if sought != xpath:
|
||||
sought += ' (for {})'.format(xpath)
|
||||
raise Error('No {} in {}'.format(sought, self.name))
|
||||
|
||||
def findOr(self, xpath, fallback = ''):
|
||||
"""Use a fall-back value if we don't find data.
|
||||
|
||||
Like find, but takes a fall-back value to return instead of
|
||||
raising Error on failure."""
|
||||
Required argument, xpath, is the XPath to search for. Optional
|
||||
second argument is a default value to use, if no such node is
|
||||
found. Optional third argument is a draft score (see
|
||||
Node.draftScore() for details); if given, leaf elements with
|
||||
higher draft scores are ignored."""
|
||||
try:
|
||||
return self.find(xpath)
|
||||
except Error:
|
||||
return fallback
|
||||
for elt in self.__find(xpath):
|
||||
try:
|
||||
if draft is None or elt.draft <= draft:
|
||||
return elt.dom.firstChild.nodeValue
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
except Error as e:
|
||||
if default is None:
|
||||
raise
|
||||
return default
|
||||
|
||||
def tagCodes(self):
|
||||
"""Yields four tag codes
|
||||
@ -305,9 +264,9 @@ class LocaleScanner (object):
|
||||
"""
|
||||
if isoCode:
|
||||
stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
|
||||
symbol = self.findOr(stem + 'symbol')
|
||||
symbol = self.find(stem + 'symbol', '')
|
||||
name = ';'.join(
|
||||
self.findOr(stem + 'displayName' + tail)
|
||||
self.find(stem + 'displayName' + tail, '')
|
||||
for tail in ('',) + tuple(
|
||||
'[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other')
|
||||
)) + ';'
|
||||
@ -409,13 +368,13 @@ class LocaleScanner (object):
|
||||
yield 'languageEndonym', ''
|
||||
|
||||
yield ('countryEndonym',
|
||||
self.findOr('localeDisplayNames/territories/territory[{}]'
|
||||
.format(country)))
|
||||
self.find('localeDisplayNames/territories/territory[{}]'
|
||||
.format(country), ''))
|
||||
|
||||
def unitData(self):
|
||||
yield ('byte_unit',
|
||||
self.findOr('units/unitLength[long]/unit[digital-byte]/displayName',
|
||||
'bytes'))
|
||||
self.find('units/unitLength[long]/unit[digital-byte]/displayName',
|
||||
'bytes'))
|
||||
|
||||
unit = self.__findUnit('', 'B')
|
||||
cache = [] # Populated by the SI call, to give hints to the IEC call
|
||||
@ -455,6 +414,51 @@ class LocaleScanner (object):
|
||||
('narrow', 'format', 'narrow'),
|
||||
) # Used for month and day names
|
||||
|
||||
def __find(self, xpath):
|
||||
retries = [ xpath.split('/') ]
|
||||
while retries:
|
||||
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
|
||||
for selector in tags:
|
||||
tag, attrs = _parseXPath(selector)
|
||||
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
|
||||
if not elts:
|
||||
break
|
||||
|
||||
else: # Found matching elements
|
||||
# Possibly filter elts to prefer the least drafty ?
|
||||
for elt in elts:
|
||||
yield elt
|
||||
|
||||
# Process roots separately: otherwise the alias-processing
|
||||
# is excessive.
|
||||
for i, selector in enumerate(tags):
|
||||
tag, attrs = _parseXPath(selector)
|
||||
|
||||
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
|
||||
for r in roots)):
|
||||
if alias.dom.attributes['source'].nodeValue == 'locale':
|
||||
replace = alias.dom.attributes['path'].nodeValue.split('/')
|
||||
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
|
||||
|
||||
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
|
||||
if not roots:
|
||||
if retries: # Let outer loop fall back on an alias path:
|
||||
break
|
||||
sought = '/'.join(tags)
|
||||
if sought != xpath:
|
||||
sought += ' (for {})'.format(xpath)
|
||||
raise Error('All lack child {} for {} in {}'.format(
|
||||
selector, sought, self.name))
|
||||
|
||||
else: # Found matching elements
|
||||
for elt in roots:
|
||||
yield elt
|
||||
|
||||
sought = '/'.join(tags)
|
||||
if sought != xpath:
|
||||
sought += ' (for {})'.format(xpath)
|
||||
raise Error('No {} in {}'.format(sought, self.name))
|
||||
|
||||
def __findUnit(self, keySuffix, quantify, fallback=''):
|
||||
# The displayName for a quantified unit in en.xml is kByte
|
||||
# (even for unitLength[narrow]) instead of kB (etc.), so
|
||||
|
Loading…
x
Reference in New Issue
Block a user