src: support UTF-8 in compiled-in JS source files
Detect it when source files in lib/ are not ASCII. Decode them as UTF-8 and store them as UTF-16 in the binary so they can be used as external string resources without non-ASCII characters getting mangled. Fixes: https://github.com/nodejs/node/issues/10673 PR-URL: https://github.com/nodejs/node/pull/11129 Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
f2023d7b6f
commit
c30fc8d495
8
node.gyp
8
node.gyp
@ -146,7 +146,7 @@
|
|||||||
'src',
|
'src',
|
||||||
'tools/msvs/genfiles',
|
'tools/msvs/genfiles',
|
||||||
'deps/uv/src/ares',
|
'deps/uv/src/ares',
|
||||||
'<(SHARED_INTERMEDIATE_DIR)', # for node_natives.h
|
'<(SHARED_INTERMEDIATE_DIR)',
|
||||||
],
|
],
|
||||||
|
|
||||||
'sources': [
|
'sources': [
|
||||||
@ -167,7 +167,6 @@
|
|||||||
'src/node_debug_options.cc',
|
'src/node_debug_options.cc',
|
||||||
'src/node_file.cc',
|
'src/node_file.cc',
|
||||||
'src/node_http_parser.cc',
|
'src/node_http_parser.cc',
|
||||||
'src/node_javascript.cc',
|
|
||||||
'src/node_main.cc',
|
'src/node_main.cc',
|
||||||
'src/node_os.cc',
|
'src/node_os.cc',
|
||||||
'src/node_revert.cc',
|
'src/node_revert.cc',
|
||||||
@ -243,11 +242,11 @@
|
|||||||
'deps/http_parser/http_parser.h',
|
'deps/http_parser/http_parser.h',
|
||||||
'deps/v8/include/v8.h',
|
'deps/v8/include/v8.h',
|
||||||
'deps/v8/include/v8-debug.h',
|
'deps/v8/include/v8-debug.h',
|
||||||
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h',
|
|
||||||
# javascript files to make for an even more pleasant IDE experience
|
# javascript files to make for an even more pleasant IDE experience
|
||||||
'<@(library_files)',
|
'<@(library_files)',
|
||||||
# node.gyp is added to the project by default.
|
# node.gyp is added to the project by default.
|
||||||
'common.gypi',
|
'common.gypi',
|
||||||
|
'<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
|
||||||
],
|
],
|
||||||
|
|
||||||
'defines': [
|
'defines': [
|
||||||
@ -720,12 +719,13 @@
|
|||||||
'actions': [
|
'actions': [
|
||||||
{
|
{
|
||||||
'action_name': 'node_js2c',
|
'action_name': 'node_js2c',
|
||||||
|
'process_outputs_as_sources': 1,
|
||||||
'inputs': [
|
'inputs': [
|
||||||
'<@(library_files)',
|
'<@(library_files)',
|
||||||
'./config.gypi',
|
'./config.gypi',
|
||||||
],
|
],
|
||||||
'outputs': [
|
'outputs': [
|
||||||
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h',
|
'<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
|
||||||
],
|
],
|
||||||
'conditions': [
|
'conditions': [
|
||||||
[ 'node_use_dtrace=="false" and node_use_etw=="false"', {
|
[ 'node_use_dtrace=="false" and node_use_etw=="false"', {
|
||||||
|
@ -1,51 +0,0 @@
|
|||||||
#include "node.h"
|
|
||||||
#include "node_natives.h"
|
|
||||||
#include "v8.h"
|
|
||||||
#include "env.h"
|
|
||||||
#include "env-inl.h"
|
|
||||||
|
|
||||||
namespace node {
|
|
||||||
|
|
||||||
using v8::Local;
|
|
||||||
using v8::NewStringType;
|
|
||||||
using v8::Object;
|
|
||||||
using v8::String;
|
|
||||||
|
|
||||||
// id##_data is defined in node_natives.h.
|
|
||||||
#define V(id) \
|
|
||||||
static struct : public String::ExternalOneByteStringResource { \
|
|
||||||
const char* data() const override { \
|
|
||||||
return reinterpret_cast<const char*>(id##_data); \
|
|
||||||
} \
|
|
||||||
size_t length() const override { return sizeof(id##_data); } \
|
|
||||||
void Dispose() override { /* Default calls `delete this`. */ } \
|
|
||||||
} id##_external_data;
|
|
||||||
NODE_NATIVES_MAP(V)
|
|
||||||
#undef V
|
|
||||||
|
|
||||||
Local<String> MainSource(Environment* env) {
|
|
||||||
auto maybe_string =
|
|
||||||
String::NewExternalOneByte(
|
|
||||||
env->isolate(),
|
|
||||||
&internal_bootstrap_node_external_data);
|
|
||||||
return maybe_string.ToLocalChecked();
|
|
||||||
}
|
|
||||||
|
|
||||||
void DefineJavaScript(Environment* env, Local<Object> target) {
|
|
||||||
auto context = env->context();
|
|
||||||
#define V(id) \
|
|
||||||
do { \
|
|
||||||
auto key = \
|
|
||||||
String::NewFromOneByte( \
|
|
||||||
env->isolate(), id##_name, NewStringType::kNormal, \
|
|
||||||
sizeof(id##_name)).ToLocalChecked(); \
|
|
||||||
auto value = \
|
|
||||||
String::NewExternalOneByte( \
|
|
||||||
env->isolate(), &id##_external_data).ToLocalChecked(); \
|
|
||||||
CHECK(target->Set(context, key, value).FromJust()); \
|
|
||||||
} while (0);
|
|
||||||
NODE_NATIVES_MAP(V)
|
|
||||||
#undef V
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace node
|
|
125
tools/js2c.py
125
tools/js2c.py
@ -37,13 +37,16 @@ import sys
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
|
|
||||||
def ToCString(contents):
|
def ToCArray(elements, step=10):
|
||||||
step = 20
|
slices = (elements[i:i+step] for i in xrange(0, len(elements), step))
|
||||||
slices = (contents[i:i+step] for i in xrange(0, len(contents), step))
|
slices = map(lambda s: ','.join(str(x) for x in s), slices)
|
||||||
slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices)
|
|
||||||
return ',\n'.join(slices)
|
return ',\n'.join(slices)
|
||||||
|
|
||||||
|
|
||||||
|
def ToCString(contents):
|
||||||
|
return ToCArray(map(ord, contents), step=20)
|
||||||
|
|
||||||
|
|
||||||
def ReadFile(filename):
|
def ReadFile(filename):
|
||||||
file = open(filename, "rt")
|
file = open(filename, "rt")
|
||||||
try:
|
try:
|
||||||
@ -161,34 +164,72 @@ def ReadMacros(lines):
|
|||||||
return (constants, macros)
|
return (constants, macros)
|
||||||
|
|
||||||
|
|
||||||
HEADER_TEMPLATE = """\
|
TEMPLATE = """
|
||||||
#ifndef NODE_NATIVES_H_
|
#include "node.h"
|
||||||
#define NODE_NATIVES_H_
|
#include "node_javascript.h"
|
||||||
|
#include "v8.h"
|
||||||
#include <stdint.h>
|
#include "env.h"
|
||||||
|
#include "env-inl.h"
|
||||||
#define NODE_NATIVES_MAP(V) \\
|
|
||||||
{node_natives_map}
|
|
||||||
|
|
||||||
namespace node {{
|
namespace node {{
|
||||||
{sources}
|
|
||||||
|
{definitions}
|
||||||
|
|
||||||
|
v8::Local<v8::String> MainSource(Environment* env) {{
|
||||||
|
return internal_bootstrap_node_value.ToStringChecked(env->isolate());
|
||||||
|
}}
|
||||||
|
|
||||||
|
void DefineJavaScript(Environment* env, v8::Local<v8::Object> target) {{
|
||||||
|
{initializers}
|
||||||
|
}}
|
||||||
|
|
||||||
}} // namespace node
|
}} // namespace node
|
||||||
|
"""
|
||||||
|
|
||||||
#endif // NODE_NATIVES_H_
|
ONE_BYTE_STRING = """
|
||||||
|
static const uint8_t raw_{var}[] = {{ {data} }};
|
||||||
|
static struct : public v8::String::ExternalOneByteStringResource {{
|
||||||
|
const char* data() const override {{
|
||||||
|
return reinterpret_cast<const char*>(raw_{var});
|
||||||
|
}}
|
||||||
|
size_t length() const override {{ return arraysize(raw_{var}); }}
|
||||||
|
void Dispose() override {{ /* Default calls `delete this`. */ }}
|
||||||
|
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
|
||||||
|
return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
|
||||||
|
}}
|
||||||
|
}} {var};
|
||||||
|
"""
|
||||||
|
|
||||||
|
TWO_BYTE_STRING = """
|
||||||
|
static const uint16_t raw_{var}[] = {{ {data} }};
|
||||||
|
static struct : public v8::String::ExternalStringResource {{
|
||||||
|
const uint16_t* data() const override {{ return raw_{var}; }}
|
||||||
|
size_t length() const override {{ return arraysize(raw_{var}); }}
|
||||||
|
void Dispose() override {{ /* Default calls `delete this`. */ }}
|
||||||
|
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
|
||||||
|
return v8::String::NewExternalTwoByte(isolate, this).ToLocalChecked();
|
||||||
|
}}
|
||||||
|
}} {var};
|
||||||
|
"""
|
||||||
|
|
||||||
|
INITIALIZER = """\
|
||||||
|
CHECK(target->Set(env->context(),
|
||||||
|
{key}.ToStringChecked(env->isolate()),
|
||||||
|
{value}.ToStringChecked(env->isolate())).FromJust());
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
NODE_NATIVES_MAP = """\
|
def Render(var, data):
|
||||||
V({escaped_id}) \\
|
# Treat non-ASCII as UTF-8 and convert it to UTF-16.
|
||||||
"""
|
if any(ord(c) > 127 for c in data):
|
||||||
|
template = TWO_BYTE_STRING
|
||||||
|
data = map(ord, data.decode('utf-8').encode('utf-16be'))
|
||||||
SOURCES = """\
|
data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)]
|
||||||
static const uint8_t {escaped_id}_name[] = {{
|
data = ToCArray(data)
|
||||||
{name}}};
|
else:
|
||||||
static const uint8_t {escaped_id}_data[] = {{
|
template = ONE_BYTE_STRING
|
||||||
{data}}};
|
data = ToCString(data)
|
||||||
"""
|
return template.format(var=var, data=data)
|
||||||
|
|
||||||
|
|
||||||
def JS2C(source, target):
|
def JS2C(source, target):
|
||||||
@ -207,36 +248,32 @@ def JS2C(source, target):
|
|||||||
(consts, macros) = ReadMacros(macro_lines)
|
(consts, macros) = ReadMacros(macro_lines)
|
||||||
|
|
||||||
# Build source code lines
|
# Build source code lines
|
||||||
node_natives_map = []
|
definitions = []
|
||||||
sources = []
|
initializers = []
|
||||||
|
|
||||||
for s in modules:
|
for name in modules:
|
||||||
lines = ReadFile(str(s))
|
lines = ReadFile(str(name))
|
||||||
lines = ExpandConstants(lines, consts)
|
lines = ExpandConstants(lines, consts)
|
||||||
lines = ExpandMacros(lines, macros)
|
lines = ExpandMacros(lines, macros)
|
||||||
data = ToCString(lines)
|
|
||||||
|
|
||||||
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
|
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
|
||||||
# so don't assume there is always a slash in the file path.
|
# so don't assume there is always a slash in the file path.
|
||||||
if '/' in s or '\\' in s:
|
if '/' in name or '\\' in name:
|
||||||
id = '/'.join(re.split('/|\\\\', s)[1:])
|
name = '/'.join(re.split('/|\\\\', name)[1:])
|
||||||
else:
|
|
||||||
id = s
|
|
||||||
|
|
||||||
if '.' in id:
|
name = name.split('.', 1)[0]
|
||||||
id = id.split('.', 1)[0]
|
var = name.replace('-', '_').replace('/', '_')
|
||||||
|
key = '%s_key' % var
|
||||||
|
value = '%s_value' % var
|
||||||
|
|
||||||
name = ToCString(id)
|
definitions.append(Render(key, name))
|
||||||
escaped_id = id.replace('-', '_').replace('/', '_')
|
definitions.append(Render(value, lines))
|
||||||
node_natives_map.append(NODE_NATIVES_MAP.format(**locals()))
|
initializers.append(INITIALIZER.format(key=key, value=value))
|
||||||
sources.append(SOURCES.format(**locals()))
|
|
||||||
|
|
||||||
node_natives_map = ''.join(node_natives_map)
|
|
||||||
sources = ''.join(sources)
|
|
||||||
|
|
||||||
# Emit result
|
# Emit result
|
||||||
output = open(str(target[0]), "w")
|
output = open(str(target[0]), "w")
|
||||||
output.write(HEADER_TEMPLATE.format(**locals()))
|
output.write(TEMPLATE.format(definitions=''.join(definitions),
|
||||||
|
initializers=''.join(initializers)))
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user