Check all matches for each XPath when searching
Previously, if we found one element with required attributes, we would search into it and ignore any later elements also with those required attributes. This meant that, if the first didn't contain the child elements we were looking for, we'd fail to find what we sought, if it was in a later matching element (e.g. with some ignored attributes). We would then go on to look for a match in a later file, where there might have been a match we should have found in the earlier file. Check all matches, rather than only the first match in each file. Do the search in each file "in parallel" to save reparsing the XPath. This clears the search code of rather hard-to-follow break/else handling in loops; and currently makes no change to the generated data. Change-Id: I86b010e65b9a1fc1b79e5fdd45a5aeff1ed5d5d5 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
parent
89bd12b9ad
commit
963931550d
@ -204,66 +204,25 @@ class LocaleScanner (object):
|
|||||||
def __init__(self, name, nodes, root):
|
def __init__(self, name, nodes, root):
|
||||||
self.name, self.nodes, self.base = name, nodes, root
|
self.name, self.nodes, self.base = name, nodes, root
|
||||||
|
|
||||||
def find(self, xpath, draft = None):
|
def find(self, xpath, default = None, draft = None):
|
||||||
tags = xpath.split('/')
|
"""XPath search for the content of an element.
|
||||||
while True:
|
|
||||||
replace = None
|
|
||||||
for elt in self.nodes:
|
|
||||||
for selector in tags:
|
|
||||||
tag, attrs = _parseXPath(selector)
|
|
||||||
for elt in elt.findAllChildren(tag, attrs):
|
|
||||||
if draft is None or elt.draft <= draft:
|
|
||||||
break # and process the next selector
|
|
||||||
else:
|
|
||||||
break # no child, try next elt in self.nodes
|
|
||||||
else:
|
|
||||||
# processed all selectors
|
|
||||||
try:
|
|
||||||
return elt.dom.firstChild.nodeValue
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
pass # move on to next elt in self.nodes
|
|
||||||
|
|
||||||
# No match in self.nodes; check root
|
Required argument, xpath, is the XPath to search for. Optional
|
||||||
elt = self.base.root
|
second argument is a default value to use, if no such node is
|
||||||
for i, selector in enumerate(tags):
|
found. Optional third argument is a draft score (see
|
||||||
tag, attrs = _parseXPath(selector)
|
Node.draftScore() for details); if given, leaf elements with
|
||||||
for alias in elt.findAllChildren('alias', allDull = True):
|
higher draft scores are ignored."""
|
||||||
if alias.dom.attributes['source'].nodeValue == 'locale':
|
|
||||||
replace = alias.dom.attributes['path'].nodeValue.split('/')
|
|
||||||
tags = self.__xpathJoin(tags[:i], replace, tags[i:])
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
for elt in elt.findAllChildren(tag, attrs):
|
|
||||||
if draft is None or elt.draft <= draft:
|
|
||||||
break # and process the next selector
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
if replace:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# processed all selectors
|
|
||||||
try:
|
|
||||||
return elt.dom.firstChild.nodeValue
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
# No match
|
|
||||||
pass
|
|
||||||
if not replace:
|
|
||||||
break
|
|
||||||
|
|
||||||
sought = '/'.join(tags)
|
|
||||||
if sought != xpath:
|
|
||||||
sought += ' (for {})'.format(xpath)
|
|
||||||
raise Error('No {} in {}'.format(sought, self.name))
|
|
||||||
|
|
||||||
def findOr(self, xpath, fallback = ''):
|
|
||||||
"""Use a fall-back value if we don't find data.
|
|
||||||
|
|
||||||
Like find, but takes a fall-back value to return instead of
|
|
||||||
raising Error on failure."""
|
|
||||||
try:
|
try:
|
||||||
return self.find(xpath)
|
for elt in self.__find(xpath):
|
||||||
except Error:
|
try:
|
||||||
return fallback
|
if draft is None or elt.draft <= draft:
|
||||||
|
return elt.dom.firstChild.nodeValue
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
pass
|
||||||
|
except Error as e:
|
||||||
|
if default is None:
|
||||||
|
raise
|
||||||
|
return default
|
||||||
|
|
||||||
def tagCodes(self):
|
def tagCodes(self):
|
||||||
"""Yields four tag codes
|
"""Yields four tag codes
|
||||||
@ -305,9 +264,9 @@ class LocaleScanner (object):
|
|||||||
"""
|
"""
|
||||||
if isoCode:
|
if isoCode:
|
||||||
stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
|
stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
|
||||||
symbol = self.findOr(stem + 'symbol')
|
symbol = self.find(stem + 'symbol', '')
|
||||||
name = ';'.join(
|
name = ';'.join(
|
||||||
self.findOr(stem + 'displayName' + tail)
|
self.find(stem + 'displayName' + tail, '')
|
||||||
for tail in ('',) + tuple(
|
for tail in ('',) + tuple(
|
||||||
'[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other')
|
'[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other')
|
||||||
)) + ';'
|
)) + ';'
|
||||||
@ -409,13 +368,13 @@ class LocaleScanner (object):
|
|||||||
yield 'languageEndonym', ''
|
yield 'languageEndonym', ''
|
||||||
|
|
||||||
yield ('countryEndonym',
|
yield ('countryEndonym',
|
||||||
self.findOr('localeDisplayNames/territories/territory[{}]'
|
self.find('localeDisplayNames/territories/territory[{}]'
|
||||||
.format(country)))
|
.format(country), ''))
|
||||||
|
|
||||||
def unitData(self):
|
def unitData(self):
|
||||||
yield ('byte_unit',
|
yield ('byte_unit',
|
||||||
self.findOr('units/unitLength[long]/unit[digital-byte]/displayName',
|
self.find('units/unitLength[long]/unit[digital-byte]/displayName',
|
||||||
'bytes'))
|
'bytes'))
|
||||||
|
|
||||||
unit = self.__findUnit('', 'B')
|
unit = self.__findUnit('', 'B')
|
||||||
cache = [] # Populated by the SI call, to give hints to the IEC call
|
cache = [] # Populated by the SI call, to give hints to the IEC call
|
||||||
@ -455,6 +414,51 @@ class LocaleScanner (object):
|
|||||||
('narrow', 'format', 'narrow'),
|
('narrow', 'format', 'narrow'),
|
||||||
) # Used for month and day names
|
) # Used for month and day names
|
||||||
|
|
||||||
|
def __find(self, xpath):
|
||||||
|
retries = [ xpath.split('/') ]
|
||||||
|
while retries:
|
||||||
|
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
|
||||||
|
for selector in tags:
|
||||||
|
tag, attrs = _parseXPath(selector)
|
||||||
|
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
|
||||||
|
if not elts:
|
||||||
|
break
|
||||||
|
|
||||||
|
else: # Found matching elements
|
||||||
|
# Possibly filter elts to prefer the least drafty ?
|
||||||
|
for elt in elts:
|
||||||
|
yield elt
|
||||||
|
|
||||||
|
# Process roots separately: otherwise the alias-processing
|
||||||
|
# is excessive.
|
||||||
|
for i, selector in enumerate(tags):
|
||||||
|
tag, attrs = _parseXPath(selector)
|
||||||
|
|
||||||
|
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
|
||||||
|
for r in roots)):
|
||||||
|
if alias.dom.attributes['source'].nodeValue == 'locale':
|
||||||
|
replace = alias.dom.attributes['path'].nodeValue.split('/')
|
||||||
|
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
|
||||||
|
|
||||||
|
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
|
||||||
|
if not roots:
|
||||||
|
if retries: # Let outer loop fall back on an alias path:
|
||||||
|
break
|
||||||
|
sought = '/'.join(tags)
|
||||||
|
if sought != xpath:
|
||||||
|
sought += ' (for {})'.format(xpath)
|
||||||
|
raise Error('All lack child {} for {} in {}'.format(
|
||||||
|
selector, sought, self.name))
|
||||||
|
|
||||||
|
else: # Found matching elements
|
||||||
|
for elt in roots:
|
||||||
|
yield elt
|
||||||
|
|
||||||
|
sought = '/'.join(tags)
|
||||||
|
if sought != xpath:
|
||||||
|
sought += ' (for {})'.format(xpath)
|
||||||
|
raise Error('No {} in {}'.format(sought, self.name))
|
||||||
|
|
||||||
def __findUnit(self, keySuffix, quantify, fallback=''):
|
def __findUnit(self, keySuffix, quantify, fallback=''):
|
||||||
# The displayName for a quantified unit in en.xml is kByte
|
# The displayName for a quantified unit in en.xml is kByte
|
||||||
# (even for unitLength[narrow]) instead of kB (etc.), so
|
# (even for unitLength[narrow]) instead of kB (etc.), so
|
||||||
|
Loading…
x
Reference in New Issue
Block a user