url: update IDNA error conditions
This commit contains three separate changes: - Always return a string from ToUnicode no matter if an error occurred. - Disable CheckHyphens boolean flag. This flag will soon be enabled in the URL Standard, but is implemented manually by selectively ignoring certain errors. - Enable CheckBidi boolean flag per URL Standard update. This allows domain names with hyphens at 3 and 4th position, as well as those with leading and trailing hyphens. They are technically invalid, but seen in the wild. Tests are updated and simplified accordingly. PR-URL: https://github.com/nodejs/node/pull/12966 Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/whatwg/url/pull/309 Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
parent
841bb4c61f
commit
06a617aa21
@ -436,11 +436,9 @@ bool InitializeICUDirectory(const std::string& path) {
|
|||||||
|
|
||||||
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||||
const char* input,
|
const char* input,
|
||||||
size_t length,
|
size_t length) {
|
||||||
bool lenient) {
|
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
uint32_t options = UIDNA_DEFAULT;
|
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
|
||||||
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
|
|
||||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
return -1;
|
return -1;
|
||||||
@ -462,14 +460,10 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
|||||||
&status);
|
&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
// UTS #46's ToUnicode operation applies no validation of domain name length
|
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
|
||||||
// (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For
|
// string, regardless of whether an error occurred.
|
||||||
// that reason, unlike ToASCII below, ICU4C correctly accepts long domain
|
|
||||||
// names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS
|
|
||||||
// #46. Therefore, explicitly filters out that error here.
|
|
||||||
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
|
|
||||||
|
|
||||||
if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
|
if (U_FAILURE(status)) {
|
||||||
len = -1;
|
len = -1;
|
||||||
buf->SetLength(0);
|
buf->SetLength(0);
|
||||||
} else {
|
} else {
|
||||||
@ -485,8 +479,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
|||||||
size_t length,
|
size_t length,
|
||||||
bool lenient) {
|
bool lenient) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
uint32_t options = UIDNA_DEFAULT;
|
uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI;
|
||||||
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
|
|
||||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
return -1;
|
return -1;
|
||||||
@ -518,6 +511,21 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
|||||||
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
|
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
|
||||||
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
|
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
|
||||||
|
|
||||||
|
// These error conditions are mandated unconditionally by UTS #46 version
|
||||||
|
// 9.0.0 (rev. 17), but were found to be incompatible with actual domain
|
||||||
|
// names in the wild. As such, in the current UTS #46 draft (rev. 18) these
|
||||||
|
// checks are made optional depending on the CheckHyphens flag, which will be
|
||||||
|
// disabled in WHATWG URL's "domain to ASCII" algorithm soon.
|
||||||
|
// Refs:
|
||||||
|
// - https://github.com/whatwg/url/issues/53
|
||||||
|
// - https://github.com/whatwg/url/pull/309
|
||||||
|
// - http://www.unicode.org/review/pri317/
|
||||||
|
// - http://www.unicode.org/reports/tr46/tr46-18.html
|
||||||
|
// - https://www.icann.org/news/announcement-2000-01-07-en
|
||||||
|
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
|
||||||
|
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
|
||||||
|
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
|
||||||
|
|
||||||
if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
|
if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
|
||||||
len = -1;
|
len = -1;
|
||||||
buf->SetLength(0);
|
buf->SetLength(0);
|
||||||
@ -534,11 +542,9 @@ static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
|
|||||||
CHECK_GE(args.Length(), 1);
|
CHECK_GE(args.Length(), 1);
|
||||||
CHECK(args[0]->IsString());
|
CHECK(args[0]->IsString());
|
||||||
Utf8Value val(env->isolate(), args[0]);
|
Utf8Value val(env->isolate(), args[0]);
|
||||||
// optional arg
|
|
||||||
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
|
|
||||||
|
|
||||||
MaybeStackBuffer<char> buf;
|
MaybeStackBuffer<char> buf;
|
||||||
int32_t len = ToUnicode(&buf, *val, val.length(), lenient);
|
int32_t len = ToUnicode(&buf, *val, val.length());
|
||||||
|
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
return env->ThrowError("Cannot convert name to Unicode");
|
return env->ThrowError("Cannot convert name to Unicode");
|
||||||
|
@ -43,8 +43,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
|||||||
bool lenient = false);
|
bool lenient = false);
|
||||||
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||||
const char* input,
|
const char* input,
|
||||||
size_t length,
|
size_t length);
|
||||||
bool lenient = false);
|
|
||||||
|
|
||||||
} // namespace i18n
|
} // namespace i18n
|
||||||
} // namespace node
|
} // namespace node
|
||||||
|
37
test/fixtures/url-idna.js
vendored
37
test/fixtures/url-idna.js
vendored
@ -191,22 +191,33 @@ module.exports = {
|
|||||||
{
|
{
|
||||||
ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`,
|
ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`,
|
||||||
unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com`
|
unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com`
|
||||||
|
},
|
||||||
|
// URLs with hyphen
|
||||||
|
{
|
||||||
|
ascii: 'r4---sn-a5mlrn7s.gevideo.com',
|
||||||
|
unicode: 'r4---sn-a5mlrn7s.gevideo.com'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ascii: '-sn-a5mlrn7s.gevideo.com',
|
||||||
|
unicode: '-sn-a5mlrn7s.gevideo.com'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ascii: 'sn-a5mlrn7s-.gevideo.com',
|
||||||
|
unicode: 'sn-a5mlrn7s-.gevideo.com'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ascii: '-sn-a5mlrn7s-.gevideo.com',
|
||||||
|
unicode: '-sn-a5mlrn7s-.gevideo.com'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ascii: '-sn--a5mlrn7s-.gevideo.com',
|
||||||
|
unicode: '-sn--a5mlrn7s-.gevideo.com'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
invalid: [
|
invalid: [
|
||||||
// invalid character
|
// invalid character
|
||||||
{
|
'\ufffd.com',
|
||||||
url: '\ufffd.com',
|
// invalid bi-directional character
|
||||||
mode: 'ascii'
|
'تشادرlatin.icom.museum'
|
||||||
},
|
|
||||||
{
|
|
||||||
url: '\ufffd.com',
|
|
||||||
mode: 'unicode'
|
|
||||||
},
|
|
||||||
// invalid Punycode
|
|
||||||
{
|
|
||||||
url: 'xn---abc.com',
|
|
||||||
mode: 'unicode'
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -23,19 +23,13 @@ const tests = require('../fixtures/url-idna.js');
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const errorRe = {
|
for (const [i, url] of tests.invalid.entries()) {
|
||||||
ascii: /^Error: Cannot convert name to ASCII$/,
|
assert.throws(() => icu.toASCII(url),
|
||||||
unicode: /^Error: Cannot convert name to Unicode$/
|
/^Error: Cannot convert name to ASCII$/,
|
||||||
};
|
`ToASCII invalid case ${i + 1}`);
|
||||||
const convertFunc = {
|
assert.doesNotThrow(() => icu.toASCII(url, true),
|
||||||
ascii: icu.toASCII,
|
`ToASCII invalid case ${i + 1} in lenient mode`);
|
||||||
unicode: icu.toUnicode
|
assert.doesNotThrow(() => icu.toUnicode(url),
|
||||||
};
|
`ToUnicode invalid case ${i + 1}`);
|
||||||
|
|
||||||
for (const [i, { url, mode }] of tests.invalid.entries()) {
|
|
||||||
assert.throws(() => convertFunc[mode](url), errorRe[mode],
|
|
||||||
`Invalid case ${i + 1}`);
|
|
||||||
assert.doesNotThrow(() => convertFunc[mode](url, true),
|
|
||||||
`Invalid case ${i + 1} in lenient mode`);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,8 @@ const domainToASCII = url.domainToASCII;
|
|||||||
const domainToUnicode = url.domainToUnicode;
|
const domainToUnicode = url.domainToUnicode;
|
||||||
|
|
||||||
const domainWithASCII = [
|
const domainWithASCII = [
|
||||||
['ıídيٴ', 'xn--d-iga7ro0q9f'],
|
['ıíd', 'xn--d-iga7r'],
|
||||||
|
['يٴ', 'xn--mhb8f'],
|
||||||
['www.ϧƽəʐ.com', 'www.xn--cja62apfr6c.com'],
|
['www.ϧƽəʐ.com', 'www.xn--cja62apfr6c.com'],
|
||||||
['новини.com', 'xn--b1amarcd.com'],
|
['новини.com', 'xn--b1amarcd.com'],
|
||||||
['名がドメイン.com', 'xn--v8jxj3d1dzdz08w.com'],
|
['名がドメイン.com', 'xn--v8jxj3d1dzdz08w.com'],
|
||||||
|
@ -35,11 +35,8 @@ const tests = require('../fixtures/url-idna.js');
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const convertFunc = {
|
for (const [i, url] of tests.invalid.entries()) {
|
||||||
ascii: domainToASCII,
|
assert.strictEqual(domainToASCII(url), '', `Invalid case ${i + 1}`);
|
||||||
unicode: domainToUnicode
|
assert.strictEqual(domainToUnicode(url), '', `Invalid case ${i + 1}`);
|
||||||
};
|
}
|
||||||
|
|
||||||
for (const [i, { url, mode }] of tests.invalid.entries())
|
|
||||||
assert.strictEqual(convertFunc[mode](url), '', `Invalid case ${i + 1}`);
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user