net: use icu's punycode implementation
ICU has a punycode implementation built in. Use it instead of the javascript implementation because it's much faster. PR-URL: https://github.com/nodejs/node/pull/7355 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
This commit is contained in:
parent
3d6a01ed3e
commit
7de59ef925
75
benchmark/net/punycode.js
Normal file
75
benchmark/net/punycode.js
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
const common = require('../common.js');
|
||||||
|
const icu = process.binding('icu');
|
||||||
|
const punycode = require('punycode');
|
||||||
|
|
||||||
|
const bench = common.createBenchmark(main, {
|
||||||
|
method: ['punycode', 'icu'],
|
||||||
|
n: [1024],
|
||||||
|
val: [
|
||||||
|
'افغانستا.icom.museum',
|
||||||
|
'الجزائر.icom.museum',
|
||||||
|
'österreich.icom.museum',
|
||||||
|
'বাংলাদেশ.icom.museum',
|
||||||
|
'беларусь.icom.museum',
|
||||||
|
'belgië.icom.museum',
|
||||||
|
'българия.icom.museum',
|
||||||
|
'تشادر.icom.museum',
|
||||||
|
'中国.icom.museum',
|
||||||
|
'القمر.icom.museum',
|
||||||
|
'κυπρος.icom.museum',
|
||||||
|
'českárepublika.icom.museum',
|
||||||
|
'مصر.icom.museum',
|
||||||
|
'ελλάδα.icom.museum',
|
||||||
|
'magyarország.icom.museum',
|
||||||
|
'ísland.icom.museum',
|
||||||
|
'भारत.icom.museum',
|
||||||
|
'ايران.icom.museum',
|
||||||
|
'éire.icom.museum',
|
||||||
|
'איקו״ם.ישראל.museum',
|
||||||
|
'日本.icom.museum',
|
||||||
|
'الأردن.icom.museum'
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
function usingPunycode(val) {
|
||||||
|
punycode.toUnicode(punycode.toASCII(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
function usingICU(val) {
|
||||||
|
icu.toUnicode(icu.toASCII(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
function runPunycode(n, val) {
|
||||||
|
common.v8ForceOptimization(usingPunycode, val);
|
||||||
|
var i = 0;
|
||||||
|
bench.start();
|
||||||
|
for (; i < n; i++)
|
||||||
|
usingPunycode(val);
|
||||||
|
bench.end(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
function runICU(n, val) {
|
||||||
|
common.v8ForceOptimization(usingICU, val);
|
||||||
|
var i = 0;
|
||||||
|
bench.start();
|
||||||
|
for (; i < n; i++)
|
||||||
|
usingICU(val);
|
||||||
|
bench.end(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
function main(conf) {
|
||||||
|
const n = +conf.n;
|
||||||
|
const val = conf.val;
|
||||||
|
switch (conf.method) {
|
||||||
|
case 'punycode':
|
||||||
|
runPunycode(n, val);
|
||||||
|
break;
|
||||||
|
case 'icu':
|
||||||
|
runICU(n, val);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error('Unexpected method');
|
||||||
|
}
|
||||||
|
}
|
12
lib/url.js
12
lib/url.js
@ -1,6 +1,14 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const punycode = require('punycode');
|
function importPunycode() {
|
||||||
|
try {
|
||||||
|
return process.binding('icu');
|
||||||
|
} catch (e) {
|
||||||
|
return require('punycode');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const { toASCII } = importPunycode();
|
||||||
|
|
||||||
exports.parse = urlParse;
|
exports.parse = urlParse;
|
||||||
exports.resolve = urlResolve;
|
exports.resolve = urlResolve;
|
||||||
@ -309,7 +317,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
|
|||||||
// It only converts parts of the domain name that
|
// It only converts parts of the domain name that
|
||||||
// have non-ASCII characters, i.e. it doesn't matter if
|
// have non-ASCII characters, i.e. it doesn't matter if
|
||||||
// you call it with a domain that already is ASCII-only.
|
// you call it with a domain that already is ASCII-only.
|
||||||
this.hostname = punycode.toASCII(this.hostname);
|
this.hostname = toASCII(this.hostname);
|
||||||
}
|
}
|
||||||
|
|
||||||
var p = this.port ? ':' + this.port : '';
|
var p = this.port ? ':' + this.port : '';
|
||||||
|
132
src/node_i18n.cc
132
src/node_i18n.cc
@ -23,8 +23,16 @@
|
|||||||
|
|
||||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||||
|
|
||||||
|
#include "node.h"
|
||||||
|
#include "env.h"
|
||||||
|
#include "env-inl.h"
|
||||||
|
#include "util.h"
|
||||||
|
#include "util-inl.h"
|
||||||
|
#include "v8.h"
|
||||||
|
|
||||||
#include <unicode/putil.h>
|
#include <unicode/putil.h>
|
||||||
#include <unicode/udata.h>
|
#include <unicode/udata.h>
|
||||||
|
#include <unicode/uidna.h>
|
||||||
|
|
||||||
#ifdef NODE_HAVE_SMALL_ICU
|
#ifdef NODE_HAVE_SMALL_ICU
|
||||||
/* if this is defined, we have a 'secondary' entry point.
|
/* if this is defined, we have a 'secondary' entry point.
|
||||||
@ -43,6 +51,13 @@ extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
|
|||||||
|
|
||||||
namespace node {
|
namespace node {
|
||||||
|
|
||||||
|
using v8::Context;
|
||||||
|
using v8::FunctionCallbackInfo;
|
||||||
|
using v8::Local;
|
||||||
|
using v8::Object;
|
||||||
|
using v8::String;
|
||||||
|
using v8::Value;
|
||||||
|
|
||||||
bool flag_icu_data_dir = false;
|
bool flag_icu_data_dir = false;
|
||||||
|
|
||||||
namespace i18n {
|
namespace i18n {
|
||||||
@ -64,7 +79,124 @@ bool InitializeICUDirectory(const char* icu_data_path) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||||
|
const char* input,
|
||||||
|
size_t length) {
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
uint32_t options = UIDNA_DEFAULT;
|
||||||
|
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
|
||||||
|
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
return -1;
|
||||||
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
int32_t len = uidna_nameToUnicodeUTF8(uidna,
|
||||||
|
input, length,
|
||||||
|
**buf, buf->length(),
|
||||||
|
&info,
|
||||||
|
&status);
|
||||||
|
|
||||||
|
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
buf->AllocateSufficientStorage(len);
|
||||||
|
len = uidna_nameToUnicodeUTF8(uidna,
|
||||||
|
input, length,
|
||||||
|
**buf, buf->length(),
|
||||||
|
&info,
|
||||||
|
&status);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
len = -1;
|
||||||
|
|
||||||
|
uidna_close(uidna);
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
||||||
|
const char* input,
|
||||||
|
size_t length) {
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
uint32_t options = UIDNA_DEFAULT;
|
||||||
|
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
|
||||||
|
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
return -1;
|
||||||
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
int32_t len = uidna_nameToASCII_UTF8(uidna,
|
||||||
|
input, length,
|
||||||
|
**buf, buf->length(),
|
||||||
|
&info,
|
||||||
|
&status);
|
||||||
|
|
||||||
|
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
buf->AllocateSufficientStorage(len);
|
||||||
|
len = uidna_nameToASCII_UTF8(uidna,
|
||||||
|
input, length,
|
||||||
|
**buf, buf->length(),
|
||||||
|
&info,
|
||||||
|
&status);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
len = -1;
|
||||||
|
|
||||||
|
uidna_close(uidna);
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
|
||||||
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
CHECK_GE(args.Length(), 1);
|
||||||
|
CHECK(args[0]->IsString());
|
||||||
|
Utf8Value val(env->isolate(), args[0]);
|
||||||
|
MaybeStackBuffer<char> buf;
|
||||||
|
int32_t len = ToUnicode(&buf, *val, val.length());
|
||||||
|
|
||||||
|
if (len < 0) {
|
||||||
|
return env->ThrowError("Cannot convert name to Unicode");
|
||||||
|
}
|
||||||
|
|
||||||
|
args.GetReturnValue().Set(
|
||||||
|
String::NewFromUtf8(env->isolate(),
|
||||||
|
*buf,
|
||||||
|
v8::NewStringType::kNormal,
|
||||||
|
len).ToLocalChecked());
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
|
||||||
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
CHECK_GE(args.Length(), 1);
|
||||||
|
CHECK(args[0]->IsString());
|
||||||
|
Utf8Value val(env->isolate(), args[0]);
|
||||||
|
MaybeStackBuffer<char> buf;
|
||||||
|
int32_t len = ToASCII(&buf, *val, val.length());
|
||||||
|
|
||||||
|
if (len < 0) {
|
||||||
|
return env->ThrowError("Cannot convert name to ASCII");
|
||||||
|
}
|
||||||
|
|
||||||
|
args.GetReturnValue().Set(
|
||||||
|
String::NewFromUtf8(env->isolate(),
|
||||||
|
*buf,
|
||||||
|
v8::NewStringType::kNormal,
|
||||||
|
len).ToLocalChecked());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Init(Local<Object> target,
|
||||||
|
Local<Value> unused,
|
||||||
|
Local<Context> context,
|
||||||
|
void* priv) {
|
||||||
|
Environment* env = Environment::GetCurrent(context);
|
||||||
|
env->SetMethod(target, "toUnicode", ToUnicode);
|
||||||
|
env->SetMethod(target, "toASCII", ToASCII);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace i18n
|
} // namespace i18n
|
||||||
} // namespace node
|
} // namespace node
|
||||||
|
|
||||||
|
NODE_MODULE_CONTEXT_AWARE_BUILTIN(icu, node::i18n::Init)
|
||||||
|
|
||||||
#endif // NODE_HAVE_I18N_SUPPORT
|
#endif // NODE_HAVE_I18N_SUPPORT
|
||||||
|
72
test/parallel/test-icu-punycode.js
Normal file
72
test/parallel/test-icu-punycode.js
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
const icu = getPunycode();
|
||||||
|
const assert = require('assert');
|
||||||
|
|
||||||
|
function getPunycode() {
|
||||||
|
try {
|
||||||
|
return process.binding('icu');
|
||||||
|
} catch (err) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!icu) {
|
||||||
|
common.skip('icu punycode tests because ICU is not present.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
|
||||||
|
const tests = [
|
||||||
|
'افغانستا.icom.museum',
|
||||||
|
'الجزائر.icom.museum',
|
||||||
|
'österreich.icom.museum',
|
||||||
|
'বাংলাদেশ.icom.museum',
|
||||||
|
'беларусь.icom.museum',
|
||||||
|
'belgië.icom.museum',
|
||||||
|
'българия.icom.museum',
|
||||||
|
'تشادر.icom.museum',
|
||||||
|
'中国.icom.museum',
|
||||||
|
'القمر.icom.museum',
|
||||||
|
'κυπρος.icom.museum',
|
||||||
|
'českárepublika.icom.museum',
|
||||||
|
'مصر.icom.museum',
|
||||||
|
'ελλάδα.icom.museum',
|
||||||
|
'magyarország.icom.museum',
|
||||||
|
'ísland.icom.museum',
|
||||||
|
'भारत.icom.museum',
|
||||||
|
'ايران.icom.museum',
|
||||||
|
'éire.icom.museum',
|
||||||
|
'איקו״ם.ישראל.museum',
|
||||||
|
'日本.icom.museum',
|
||||||
|
'الأردن.icom.museum',
|
||||||
|
'қазақстан.icom.museum',
|
||||||
|
'한국.icom.museum',
|
||||||
|
'кыргызстан.icom.museum',
|
||||||
|
'ລາວ.icom.museum',
|
||||||
|
'لبنان.icom.museum',
|
||||||
|
'македонија.icom.museum',
|
||||||
|
'موريتانيا.icom.museum',
|
||||||
|
'méxico.icom.museum',
|
||||||
|
'монголулс.icom.museum',
|
||||||
|
'المغرب.icom.museum',
|
||||||
|
'नेपाल.icom.museum',
|
||||||
|
'عمان.icom.museum',
|
||||||
|
'قطر.icom.museum',
|
||||||
|
'românia.icom.museum',
|
||||||
|
'россия.иком.museum',
|
||||||
|
'србијаицрнагора.иком.museum',
|
||||||
|
'இலங்கை.icom.museum',
|
||||||
|
'españa.icom.museum',
|
||||||
|
'ไทย.icom.museum',
|
||||||
|
'تونس.icom.museum',
|
||||||
|
'türkiye.icom.museum',
|
||||||
|
'украина.icom.museum',
|
||||||
|
'việtnam.icom.museum'
|
||||||
|
];
|
||||||
|
|
||||||
|
// Testing the roundtrip
|
||||||
|
tests.forEach((i) => {
|
||||||
|
assert.strictEqual(i, icu.toUnicode(icu.toASCII(i)));
|
||||||
|
});
|
@ -37,8 +37,7 @@
|
|||||||
'defines': [
|
'defines': [
|
||||||
# ICU cannot swap the initial data without this.
|
# ICU cannot swap the initial data without this.
|
||||||
# http://bugs.icu-project.org/trac/ticket/11046
|
# http://bugs.icu-project.org/trac/ticket/11046
|
||||||
'UCONFIG_NO_LEGACY_CONVERSION=1',
|
'UCONFIG_NO_LEGACY_CONVERSION=1'
|
||||||
'UCONFIG_NO_IDNA=1',
|
|
||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
],
|
],
|
||||||
@ -428,9 +427,6 @@
|
|||||||
#'<(icu_path)/source/common/ubidi_props_data.h',
|
#'<(icu_path)/source/common/ubidi_props_data.h',
|
||||||
# and the callers
|
# and the callers
|
||||||
'<(icu_path)/source/common/ushape.cpp',
|
'<(icu_path)/source/common/ushape.cpp',
|
||||||
'<(icu_path)/source/common/usprep.cpp',
|
|
||||||
'<(icu_path)/source/common/uts46.cpp',
|
|
||||||
'<(icu_path)/source/common/uidna.cpp',
|
|
||||||
]}],
|
]}],
|
||||||
[ 'icu_ver_major == 57', { 'sources!': [
|
[ 'icu_ver_major == 57', { 'sources!': [
|
||||||
# work around http://bugs.icu-project.org/trac/ticket/12451
|
# work around http://bugs.icu-project.org/trac/ticket/12451
|
||||||
@ -447,9 +443,6 @@
|
|||||||
#'<(icu_path)/source/common/ubidi_props_data.h',
|
#'<(icu_path)/source/common/ubidi_props_data.h',
|
||||||
# and the callers
|
# and the callers
|
||||||
'<(icu_path)/source/common/ushape.cpp',
|
'<(icu_path)/source/common/ushape.cpp',
|
||||||
'<(icu_path)/source/common/usprep.cpp',
|
|
||||||
'<(icu_path)/source/common/uts46.cpp',
|
|
||||||
'<(icu_path)/source/common/uidna.cpp',
|
|
||||||
]}],
|
]}],
|
||||||
[ 'OS == "solaris"', { 'defines': [
|
[ 'OS == "solaris"', { 'defines': [
|
||||||
'_XOPEN_SOURCE_EXTENDED=0',
|
'_XOPEN_SOURCE_EXTENDED=0',
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
"region": "none",
|
"region": "none",
|
||||||
"zone": "locales",
|
"zone": "locales",
|
||||||
"converters": "none",
|
"converters": "none",
|
||||||
"stringprep": "none",
|
"stringprep": "locales",
|
||||||
"translit": "none",
|
"translit": "none",
|
||||||
"brkfiles": "none",
|
"brkfiles": "none",
|
||||||
"brkdict": "none",
|
"brkdict": "none",
|
||||||
@ -34,7 +34,6 @@
|
|||||||
"remove": [
|
"remove": [
|
||||||
"cnvalias.icu",
|
"cnvalias.icu",
|
||||||
"postalCodeData.res",
|
"postalCodeData.res",
|
||||||
"uts46.nrm",
|
|
||||||
"genderList.res",
|
"genderList.res",
|
||||||
"brkitr/root.res",
|
"brkitr/root.res",
|
||||||
"unames.icu"
|
"unames.icu"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user