url: improve spec compliance of WHATWG URL

This patch contains the following changes:

url: make IPv4 parser more spec compliant

* Return int64_t from ParseNumber to prevent overflow for valid big numbers
* Don't throw when there are more than 4 parts (it cannot be an IP
address)
* Correctly interpret the address and don't always throw when there are
numbers > 255

Ref: https://url.spec.whatwg.org/#concept-ipv4-parser
Fixes: https://github.com/nodejs/node/issues/10306

url: percent encode fragment to follow spec change

Ref: https://github.com/whatwg/url/issues/150
Ref: 373dbedbbf

url: fix URL#search setter

The check for empty string must be done before removing the leading '?'.

Ref: https://url.spec.whatwg.org/#dom-url-search

url: set port to null if an empty string is given

This is to follow a spec change.

Ref: https://github.com/whatwg/url/pull/113

url: fix parsing of paths with Windows drive letter

test: update WHATWG URL test fixtures

PR-URL: https://github.com/nodejs/node/pull/10317
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
This commit is contained in:
Michaël Zasso 2016-12-17 13:49:05 +01:00
parent ee9df357ff
commit db18dd8356
4 changed files with 429 additions and 51 deletions

View File

@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return; return;
port = String(port); port = String(port);
if (port === '') { if (port === '') {
// Currently, if port number is empty, left unchanged. ctx.port = undefined;
// TODO(jasnell): This might be changing in the spec
return; return;
} }
binding.parse(port, binding.kPort, null, ctx, binding.parse(port, binding.kPort, null, ctx,
@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) { set(search) {
const ctx = this[context]; const ctx = this[context];
search = String(search); search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) { if (!search) {
ctx.query = null; ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {}; this[searchParams][searchParams] = {};
return; return;
} }
if (search[0] === '?') search = search.slice(1);
ctx.query = ''; ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx, binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this)); onParseSearchComplete.bind(this));

View File

@ -261,7 +261,7 @@ namespace url {
return type; return type;
} }
static inline int ParseNumber(const char* start, const char* end) { static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10; unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2; start += 2;
@ -293,7 +293,7 @@ namespace url {
} }
p++; p++;
} }
return strtol(start, NULL, R); return strtoll(start, NULL, R);
} }
static url_host_type ParseIPv4Host(url_host* host, static url_host_type ParseIPv4Host(url_host* host,
@ -305,7 +305,8 @@ namespace url {
const char* end = pointer + length; const char* end = pointer + length;
int parts = 0; int parts = 0;
uint32_t val = 0; uint32_t val = 0;
unsigned numbers[4]; uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0) if (length == 0)
goto end; goto end;
@ -313,20 +314,16 @@ namespace url {
const char ch = pointer < end ? pointer[0] : kEOL; const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1; const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) { if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0) if (++parts > 4)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
goto end; goto end;
} if (pointer - mark == 0)
if (pointer - mark == 10) {
numbers[parts - 1] = n;
break; break;
} int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;
if (n > 255) { if (n > 255) {
type = HOST_TYPE_FAILED; tooBigNumbers++;
goto end;
} }
numbers[parts - 1] = n; numbers[parts - 1] = n;
mark = pointer + 1; mark = pointer + 1;
@ -335,15 +332,24 @@ namespace url {
} }
pointer++; pointer++;
} }
CHECK_GT(parts, 0);
// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}
type = HOST_TYPE_IPV4; type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1]; val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) { for (int n = 0; n < parts - 1; n++) {
double b = 3 - n; double b = 3 - n;
val += numbers[n] * pow(256, b); val += numbers[n] * pow(256, b);
} }
}
host->value.ipv4 = val; host->value.ipv4 = val;
end: end:
@ -618,6 +624,13 @@ namespace url {
} }
} }
static inline void ShortenUrlPath(struct url_data* url) {
if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}
static void Parse(Environment* env, static void Parse(Environment* env,
Local<Value> recv, Local<Value> recv,
const char* input, const char* input,
@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) { if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH() SET_HAVE_PATH()
url.path = base.path; url.path = base.path;
if (!url.path.empty()) ShortenUrlPath(&url);
url.path.pop_back();
} }
url.port = base.port; url.port = base.port;
state = kPath; state = kPath;
@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH() SET_HAVE_PATH()
url.path = base.path; url.path = base.path;
} }
if (!url.path.empty()) ShortenUrlPath(&url);
url.path.pop_back();
} }
state = kPath; state = kPath;
continue; continue;
@ -1172,8 +1183,7 @@ namespace url {
special_back_slash || special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) { (!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) { if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty()) ShortenUrlPath(&url);
url.path.pop_back();
if (ch != '/' && !special_back_slash) { if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH() SET_HAVE_PATH()
url.path.push_back(""); url.path.push_back("");
@ -1247,7 +1257,7 @@ namespace url {
case 0: case 0:
break; break;
default: default:
buffer += ch; AppendOrEscape(&buffer, ch, SimpleEncodeSet);
} }
break; break;
default: default:

View File

@ -347,7 +347,7 @@
} }
}, },
{ {
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", "comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080", "href": "http://example.net:8080",
"new_value": "example.com:", "new_value": "example.com:",
"expected": { "expected": {
@ -358,7 +358,6 @@
} }
}, },
{ {
"comment": "The empty host is not valid for special schemes", "comment": "The empty host is not valid for special schemes",
"href": "http://example.net", "href": "http://example.net",
"new_value": "", "new_value": "",
@ -763,14 +762,14 @@
} }
}, },
{ {
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", "comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080", "href": "http://example.net:8080",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "http://example.net:8080/", "href": "http://example.net/",
"host": "example.net:8080", "host": "example.net",
"hostname": "example.net", "hostname": "example.net",
"port": "8080" "port": ""
} }
}, },
{ {
@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9", "href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9" "pathname": "/..%c3%89t%C3%A9"
} }
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
} }
], ],
"search": [ "search": [
@ -1011,7 +1019,6 @@
} }
}, },
{ {
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav", "href": "https://example.net?lang=en-US#nav",
"new_value": "?", "new_value": "?",
"expected": { "expected": {
@ -1096,7 +1103,6 @@
} }
}, },
{ {
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav", "href": "https://example.net?lang=en-US#nav",
"new_value": "#", "new_value": "#",
"expected": { "expected": {
@ -1113,12 +1119,12 @@
} }
}, },
{ {
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed", "comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/", "href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": { "expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé" "hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
} }
}, },
{ {
@ -1126,8 +1132,8 @@
"href": "http://example.net", "href": "http://example.net",
"new_value": "%c3%89té", "new_value": "%c3%89té",
"expected": { "expected": {
"href": "http://example.net/#%c3%89té", "href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89té" "hash": "#%c3%89t%C3%A9"
} }
} }
] ]

View File

@ -842,6 +842,36 @@
"search": "", "search": "",
"hash": "" "hash": ""
}, },
{
"input": "http://[::127.0.0.1]",
"base": "http://example.org/foo/bar",
"href": "http://[::7f00:1]/",
"origin": "http://[::7f00:1]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::7f00:1]",
"hostname": "[::7f00:1]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://[0:0:0:0:0:0:13.1.68.3]",
"base": "http://example.org/foo/bar",
"href": "http://[::d01:4403]/",
"origin": "http://[::d01:4403]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::d01:4403]",
"hostname": "[::d01:4403]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{ {
"input": "http://[2001::1]:80", "input": "http://[2001::1]:80",
"base": "http://example.org/foo/bar", "base": "http://example.org/foo/bar",
@ -1264,7 +1294,7 @@
{ {
"input": "#β", "input": "#β",
"base": "http://example.org/foo/bar", "base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar#β", "href": "http://example.org/foo/bar#%CE%B2",
"origin": "http://example.org", "origin": "http://example.org",
"protocol": "http:", "protocol": "http:",
"username": "", "username": "",
@ -1274,7 +1304,7 @@
"port": "", "port": "",
"pathname": "/foo/bar", "pathname": "/foo/bar",
"search": "", "search": "",
"hash": "#β" "hash": "#%CE%B2"
}, },
{ {
"input": "data:text/html,test#test", "input": "data:text/html,test#test",
@ -1291,6 +1321,21 @@
"search": "", "search": "",
"hash": "#test" "hash": "#test"
}, },
{
"input": "tel:1234567890",
"base": "http://example.org/foo/bar",
"href": "tel:1234567890",
"origin": "null",
"protocol": "tel:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "1234567890",
"search": "",
"hash": ""
},
"# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html",
{ {
"input": "file:c:\\foo\\bar.html", "input": "file:c:\\foo\\bar.html",
@ -2118,7 +2163,7 @@
{ {
"input": "http://www.google.com/foo?bar=baz# »", "input": "http://www.google.com/foo?bar=baz# »",
"base": "about:blank", "base": "about:blank",
"href": "http://www.google.com/foo?bar=baz# »", "href": "http://www.google.com/foo?bar=baz# %C2%BB",
"origin": "http://www.google.com", "origin": "http://www.google.com",
"protocol": "http:", "protocol": "http:",
"username": "", "username": "",
@ -2128,12 +2173,12 @@
"port": "", "port": "",
"pathname": "/foo", "pathname": "/foo",
"search": "?bar=baz", "search": "?bar=baz",
"hash": "# »" "hash": "# %C2%BB"
}, },
{ {
"input": "data:test# »", "input": "data:test# »",
"base": "about:blank", "base": "about:blank",
"href": "data:test# »", "href": "data:test# %C2%BB",
"origin": "null", "origin": "null",
"protocol": "data:", "protocol": "data:",
"username": "", "username": "",
@ -2143,7 +2188,7 @@
"port": "", "port": "",
"pathname": "test", "pathname": "test",
"search": "", "search": "",
"hash": "# »" "hash": "# %C2%BB"
}, },
{ {
"input": "http://[www.google.com]/", "input": "http://[www.google.com]/",
@ -4165,6 +4210,22 @@
"search": "", "search": "",
"hash": "" "hash": ""
}, },
"# unknown scheme with path looking like a password",
{
"input": "sc::a@example.net",
"base": "about:blank",
"href": "sc::a@example.net",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": ":a@example.net",
"search": "",
"hash": ""
},
"# tests from jsdom/whatwg-url designed for code coverage", "# tests from jsdom/whatwg-url designed for code coverage",
{ {
"input": "http://127.0.0.1:10100/relative_import.html", "input": "http://127.0.0.1:10100/relative_import.html",
@ -4226,5 +4287,307 @@
"pathname": "/path", "pathname": "/path",
"search": "?query", "search": "?query",
"hash": "#frag" "hash": "#frag"
},
"# Stringification of URL.searchParams",
{
"input": "?a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar?a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "?a=b&c=d",
"searchParams": "a=b&c=d",
"hash": ""
},
{
"input": "??a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar??a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "??a=b&c=d",
"searchParams": "%3Fa=b&c=d",
"hash": ""
},
"# Scheme only",
{
"input": "http:",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "",
"searchParams": "",
"hash": ""
},
{
"input": "http:",
"base": "https://example.org/foo/bar",
"failure": true
},
{
"input": "sc:",
"base": "https://example.org/foo/bar",
"href": "sc:",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "",
"search": "",
"searchParams": "",
"hash": ""
},
"# Percent encoding of fragments",
{
"input": "http://foo.bar/baz?qux#foo\bbar",
"base": "about:blank",
"href": "http://foo.bar/baz?qux#foo%08bar",
"origin": "http://foo.bar",
"protocol": "http:",
"username": "",
"password": "",
"host": "foo.bar",
"hostname": "foo.bar",
"port": "",
"pathname": "/baz",
"search": "?qux",
"searchParams": "",
"hash": "#foo%08bar"
},
"# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)",
{
"input": "http://192.168.257",
"base": "http://other.com/",
"href": "http://192.168.1.1/",
"origin": "http://192.168.1.1",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.1.1",
"hostname": "192.168.1.1",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://192.168.257.com",
"base": "http://other.com/",
"href": "http://192.168.257.com/",
"origin": "http://192.168.257.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.257.com",
"hostname": "192.168.257.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256",
"base": "http://other.com/",
"href": "http://0.0.1.0/",
"origin": "http://0.0.1.0",
"protocol": "http:",
"username": "",
"password": "",
"host": "0.0.1.0",
"hostname": "0.0.1.0",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256.com",
"base": "http://other.com/",
"href": "http://256.com/",
"origin": "http://256.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.com",
"hostname": "256.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999",
"base": "http://other.com/",
"href": "http://59.154.201.255/",
"origin": "http://59.154.201.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "59.154.201.255",
"hostname": "59.154.201.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999.com",
"base": "http://other.com/",
"href": "http://999999999.com/",
"origin": "http://999999999.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "999999999.com",
"hostname": "999999999.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://10000000000",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://10000000000.com",
"base": "http://other.com/",
"href": "http://10000000000.com/",
"origin": "http://10000000000.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "10000000000.com",
"hostname": "10000000000.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967295",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967296",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://0xffffffff",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://0xffffffff1",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256.256",
"base": "http://other.com/",
"href": "http://256.256.256.256.256/",
"origin": "http://256.256.256.256.256",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.256.256.256.256",
"hostname": "256.256.256.256.256",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
"# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)",
{
"input": "pix/submit.gif",
"base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html",
"href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///C:/",
"href": "file:///C:/",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///",
"href": "file:///",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
} }
] ]