tools: refactor js2c.py for maximal Python3 compatibility

* add explicit `--target` argument to enable succinct gyp declaration
* simplify js2c semantics

PR-URL: https://github.com/nodejs/node/pull/25518
Reviewed-By: Christian Clauss <cclauss@me.com>
This commit is contained in:
Refael Ackermann 2019-01-14 18:43:16 -05:00
parent a49ab0f89e
commit bfbc035033
2 changed files with 166 additions and 125 deletions

View File

@ -859,6 +859,8 @@
'action_name': 'node_js2c', 'action_name': 'node_js2c',
'process_outputs_as_sources': 1, 'process_outputs_as_sources': 1,
'inputs': [ 'inputs': [
# Put the code first so it's a dependency and can be used for invocation.
'tools/js2c.py',
'<@(library_files)', '<@(library_files)',
'config.gypi', 'config.gypi',
'tools/js2c_macros/check_macros.py' 'tools/js2c_macros/check_macros.py'
@ -878,9 +880,8 @@
}] }]
], ],
'action': [ 'action': [
'python', 'tools/js2c.py', 'python', '<@(_inputs)',
'<@(_outputs)', '--target', '<@(_outputs)',
'<@(_inputs)',
], ],
}, },
], ],

View File

@ -27,37 +27,36 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# This is a utility for converting JavaScript source code into C-style """
# char arrays. It is used for embedded JavaScript code in the V8 This is a utility for converting JavaScript source code into uint16_t[],
# library. that are used for embeding JavaScript code into the Node.js binary.
"""
import argparse
import os import os
import re import re
import sys import functools
import codecs
def ToCArray(elements, step=10):
slices = (elements[i:i+step] for i in range(0, len(elements), step))
slices = map(lambda s: ','.join(str(x) for x in s), slices)
return ',\n'.join(slices)
def ReadFile(filename): def ReadFile(filename):
file = open(filename, "rt") if is_verbose:
try: print(filename)
lines = file.read() with codecs.open(filename, "r", "utf-8") as f:
finally: lines = f.read()
file.close() return lines
return lines
def ReadLines(filename): def ReadMacroFiles(filenames):
"""
:rtype: List(str)
"""
result = [] result = []
for line in open(filename, "rt"): for filename in filenames:
if '#' in line: with open(filename, "rt") as f:
line = line[:line.index('#')] # strip python-like comments and whitespace padding
line = line.strip() lines = [line.split('#')[0].strip() for line in f]
if len(line) > 0: # filter empty lines
result.append(line) result.extend(filter(bool, lines))
return result return result
@ -70,6 +69,7 @@ def ExpandConstants(lines, constants):
def ExpandMacros(lines, macros): def ExpandMacros(lines, macros):
def expander(s): def expander(s):
return ExpandMacros(s, macros) return ExpandMacros(s, macros)
for name, macro in macros.items(): for name, macro in macros.items():
name_pattern = re.compile("\\b%s\\(" % name) name_pattern = re.compile("\\b%s\\(" % name)
pattern_match = name_pattern.search(lines, 0) pattern_match = name_pattern.search(lines, 0)
@ -82,13 +82,15 @@ def ExpandMacros(lines, macros):
last_match = end last_match = end
arg_index = [0] # Wrap state into array, to work around Python "scoping" arg_index = [0] # Wrap state into array, to work around Python "scoping"
mapping = {} mapping = {}
def add_arg(str):
def add_arg(s):
# Remember to expand recursively in the arguments # Remember to expand recursively in the arguments
if arg_index[0] >= len(macro.args): if arg_index[0] >= len(macro.args):
return return
replacement = expander(str.strip()) replacement = expander(s.strip())
mapping[macro.args[arg_index[0]]] = replacement mapping[macro.args[arg_index[0]]] = replacement
arg_index[0] += 1 arg_index[0] += 1
while end < len(lines) and height > 0: while end < len(lines) and height > 0:
# We don't count commas at higher nesting levels. # We don't count commas at higher nesting levels.
if lines[end] == ',' and height == 1: if lines[end] == ',' and height == 1:
@ -100,10 +102,11 @@ def ExpandMacros(lines, macros):
height = height - 1 height = height - 1
end = end + 1 end = end + 1
# Remember to add the last match. # Remember to add the last match.
add_arg(lines[last_match:end-1]) add_arg(lines[last_match:end - 1])
if arg_index[0] < len(macro.args) -1: if arg_index[0] < len(macro.args) - 1:
lineno = lines.count(os.linesep, 0, start) + 1 lineno = lines.count(os.linesep, 0, start) + 1
raise Exception('line %s: Too few arguments for macro "%s"' % (lineno, name)) raise Exception(
'line %s: Too few arguments for macro "%s"' % (lineno, name))
result = macro.expand(mapping) result = macro.expand(mapping)
# Replace the occurrence of the macro with the expansion # Replace the occurrence of the macro with the expansion
lines = lines[:start] + result + lines[end:] lines = lines[:start] + result + lines[end:]
@ -115,33 +118,37 @@ class TextMacro:
def __init__(self, args, body): def __init__(self, args, body):
self.args = args self.args = args
self.body = body self.body = body
def expand(self, mapping): def expand(self, mapping):
result = self.body result = self.body
for key, value in mapping.items(): for key, value in mapping.items():
result = result.replace(key, value) result = result.replace(key, value)
return result return result
class PythonMacro: class PythonMacro:
def __init__(self, args, fun): def __init__(self, args, fun):
self.args = args self.args = args
self.fun = fun self.fun = fun
def expand(self, mapping): def expand(self, mapping):
args = [] args = []
for arg in self.args: for arg in self.args:
args.append(mapping[arg]) args.append(mapping[arg])
return str(self.fun(*args)) return str(self.fun(*args))
CONST_PATTERN = re.compile('^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$') CONST_PATTERN = re.compile('^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
MACRO_PATTERN = re.compile('^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') MACRO_PATTERN = re.compile('^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
PYTHON_MACRO_PATTERN = re.compile('^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') PYTHON_MACRO_PATTERN = re.compile('^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
def ReadMacros(lines):
constants = { } def ReadMacros(macro_files):
macros = { } lines = ReadMacroFiles(macro_files)
constants = {}
macros = {}
for line in lines: for line in lines:
hash = line.find('#') line = line.split('#')[0].strip()
if hash != -1: line = line[:hash]
line = line.strip()
if len(line) == 0: if len(line) == 0:
continue continue
const_match = CONST_PATTERN.match(line) const_match = CONST_PATTERN.match(line)
@ -153,20 +160,20 @@ def ReadMacros(lines):
macro_match = MACRO_PATTERN.match(line) macro_match = MACRO_PATTERN.match(line)
if macro_match: if macro_match:
name = macro_match.group(1) name = macro_match.group(1)
args = [s.strip() for s in macro_match.group(2).split(',')] args = [p.strip() for p in macro_match.group(2).split(',')]
body = macro_match.group(3).strip() body = macro_match.group(3).strip()
macros[name] = TextMacro(args, body) macros[name] = TextMacro(args, body)
else: else:
python_match = PYTHON_MACRO_PATTERN.match(line) python_match = PYTHON_MACRO_PATTERN.match(line)
if python_match: if python_match:
name = python_match.group(1) name = python_match.group(1)
args = [s.strip() for s in python_match.group(2).split(',')] args = [p.strip() for p in macro_match.group(2).split(',')]
body = python_match.group(3).strip() body = python_match.group(3).strip()
fun = eval("lambda " + ",".join(args) + ': ' + body) fun = eval("lambda " + ",".join(args) + ': ' + body)
macros[name] = PythonMacro(args, fun) macros[name] = PythonMacro(args, fun)
else: else:
raise Exception("Illegal line: " + line) raise Exception("Illegal line: " + line)
return (constants, macros) return constants, macros
TEMPLATE = """ TEMPLATE = """
@ -177,14 +184,14 @@ namespace node {{
namespace native_module {{ namespace native_module {{
{definitions} {0}
void NativeModuleLoader::LoadJavaScriptSource() {{ void NativeModuleLoader::LoadJavaScriptSource() {{
{initializers} {1}
}} }}
UnionBytes NativeModuleLoader::GetConfig() {{ UnionBytes NativeModuleLoader::GetConfig() {{
return UnionBytes(config_raw, arraysize(config_raw)); // config.gypi return UnionBytes(config_raw, {2}); // config.gypi
}} }}
}} // namespace native_module }} // namespace native_module
@ -192,104 +199,137 @@ UnionBytes NativeModuleLoader::GetConfig() {{
}} // namespace node }} // namespace node
""" """
ONE_BYTE_STRING = """
static const uint8_t {var}[] = {{ {data} }};
"""
TWO_BYTE_STRING = """ TWO_BYTE_STRING = """
static const uint16_t {var}[] = {{ {data} }}; static const uint16_t {0}[] = {{
{1}
}};
""" """
INITIALIZER = 'source_.emplace("{0}", UnionBytes{{{1}, {2}}});'
INITIALIZER = """ CONFIG_GYPI_ID = 'config_raw'
source_.emplace(
"{module}",
UnionBytes({var}, arraysize({var}))
);
"""
def JS2C(source, target): SLUGGER_RE =re.compile('[.\-/]')
modules = []
consts = {}
macros = {}
macro_lines = []
for s in source: is_verbose = False
if (os.path.split(str(s))[1]).endswith('macros.py'):
macro_lines.extend(ReadLines(str(s)))
else:
modules.append(s)
def GetDefinition(var, source, step=30):
encoded_source = bytearray(source, 'utf-16le')
code_points = [encoded_source[i] + (encoded_source[i+1] * 256) for i in range(0, len(encoded_source), 2)]
# For easier debugging, align to the common 3 char for code-points.
elements_s = ['%3s' % x for x in code_points]
# Put no more then `step` code-points in a line.
slices = [elements_s[i:i + step] for i in range(0, len(elements_s), step)]
lines = [','.join(s) for s in slices]
array_content = ',\n'.join(lines)
definition = TWO_BYTE_STRING.format(var, array_content)
return definition, len(code_points)
def AddModule(filename, consts, macros, definitions, initializers):
code = ReadFile(filename)
code = ExpandConstants(code, consts)
code = ExpandMacros(code, macros)
name = NormalizeFileName(filename)
slug = SLUGGER_RE.sub('_', name)
var = slug + '_raw'
definition, size = GetDefinition(var, code)
initializer = INITIALIZER.format(name, var, size)
definitions.append(definition)
initializers.append(initializer)
def NormalizeFileName(filename):
split = filename.split(os.path.sep)
if split[0] == 'deps':
split = ['internal'] + split
else: # `lib/**/*.js` so drop the 'lib' part
split = split[1:]
filename = '/'.join(split)
return os.path.splitext(filename)[0]
def JS2C(source_files, target):
# Process input from all *macro.py files # Process input from all *macro.py files
(consts, macros) = ReadMacros(macro_lines) consts, macros = ReadMacros(source_files['.py'])
# Build source code lines # Build source code lines
definitions = [] definitions = []
initializers = [] initializers = []
def GetDefinition(var, source): for filename in source_files['.js']:
# Treat non-ASCII as UTF-8 and convert it to UTF-16. AddModule(filename, consts, macros, definitions, initializers)
if any(ord(c) > 127 for c in source):
source = map(ord, source.decode('utf-8').encode('utf-16be'))
source = [source[i] * 256 + source[i+1] for i in range(0, len(source), 2)]
source = ToCArray(source)
return TWO_BYTE_STRING.format(var=var, data=source)
else:
source = ToCArray(map(ord, source), step=20)
return ONE_BYTE_STRING.format(var=var, data=source)
def AddModule(module, source): config_def, config_size = handle_config_gypi(source_files['config.gypi'])
var = '%s_raw' % (module.replace('-', '_').replace('/', '_')) definitions.append(config_def)
definition = GetDefinition(var, source)
initializer = INITIALIZER.format(module=module,
var=var)
definitions.append(definition)
initializers.append(initializer)
for name in modules:
lines = ReadFile(str(name))
lines = ExpandConstants(lines, consts)
lines = ExpandMacros(lines, macros)
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
# so don't assume there is always a slash in the file path.
if '/' in name or '\\' in name:
split = re.split('/|\\\\', name)
if split[0] == 'deps':
split = ['internal'] + split
else:
split = split[1:]
name = '/'.join(split)
# if its a gypi file we're going to want it as json
# later on anyway, so get it out of the way now
if name.endswith('.gypi'):
# Currently only config.gypi is allowed
assert name == 'config.gypi'
lines = re.sub(r'\'true\'', 'true', lines)
lines = re.sub(r'\'false\'', 'false', lines)
lines = re.sub(r'#.*?\n', '', lines)
lines = re.sub(r'\'', '"', lines)
definition = GetDefinition('config_raw', lines)
definitions.append(definition)
else:
AddModule(name.split('.', 1)[0], lines)
# Emit result # Emit result
output = open(str(target[0]), "w") definitions = ''.join(definitions)
output.write( initializers = '\n '.join(initializers)
TEMPLATE.format(definitions=''.join(definitions), out = TEMPLATE.format(definitions, initializers, config_size)
initializers=''.join(initializers))) write_if_chaged(out, target)
output.close()
def handle_config_gypi(config_filename):
# if its a gypi file we're going to want it as json
# later on anyway, so get it out of the way now
config = ReadFile(config_filename)
config = jsonify(config)
config_def, config_size = GetDefinition(CONFIG_GYPI_ID, config)
return config_def, config_size
def jsonify(config):
# 1. string comments
config = re.sub(r'#.*?\n', '', config)
# 3. normalize string literals from ' into "
config = re.sub('\'', '"', config)
# 2. turn pseudo-booleans strings into Booleans
config = re.sub('"true"', 'true', config)
config = re.sub('"false"', 'false', config)
return config
def write_if_chaged(content, target):
if os.path.exists(target):
with open(target, 'rt') as existing:
old_content = existing.read()
else:
old_content = ''
if old_content == content:
return
with open(target, "wt") as output:
output.write(content)
def SourceFileByExt(files_by_ext, filename):
"""
:type files_by_ext: dict
:type filename: str
:rtype: dict
"""
ext = os.path.splitext(filename)[-1]
files_by_ext.setdefault(ext, []).append(filename)
return files_by_ext
def main(): def main():
natives = sys.argv[1] parser = argparse.ArgumentParser(
source_files = sys.argv[2:] description='Convert code files into `uint16_t[]`s',
if source_files[-2] == '-t': fromfile_prefix_chars='@'
global TEMPLATE )
TEMPLATE = source_files[-1] parser.add_argument('--target', help='output file')
source_files = source_files[:-2] parser.add_argument('--verbose', action='store_true', help='output file')
JS2C(source_files, [natives]) parser.add_argument('sources', nargs='*', help='input files')
options = parser.parse_args()
global is_verbose
is_verbose = options.verbose
source_files = functools.reduce(SourceFileByExt, options.sources, {})
# Should have exactly 3 types: `.js`, `.py`, and `.gypi`
assert len(source_files) == 3
# Currently config.gypi is the only `.gypi` file allowed
assert source_files['.gypi'] == ['config.gypi']
source_files['config.gypi'] = source_files.pop('.gypi')[0]
JS2C(source_files, options.target)
if __name__ == "__main__": if __name__ == "__main__":
main() main()