url: improve spec compliance of WHATWG URL

This patch contains the following changes:

url: make IPv4 parser more spec compliant

* Return int64_t from ParseNumber to prevent overflow for valid big numbers
* Don't throw when there are more than 4 parts (it cannot be an IP
address)
* Correctly interpret the address and don't always throw when there are
numbers > 255

Ref: https://url.spec.whatwg.org/#concept-ipv4-parser
Fixes: https://github.com/nodejs/node/issues/10306

url: percent encode fragment to follow spec change

Ref: https://github.com/whatwg/url/issues/150
Ref: 373dbedbbf

url: fix URL#search setter

The check for empty string must be done before removing the leading '?'.

Ref: https://url.spec.whatwg.org/#dom-url-search

url: set port to null if an empty string is given

This is to follow a spec change.

Ref: https://github.com/whatwg/url/pull/113

url: fix parsing of paths with Windows drive letter

test: update WHATWG URL test fixtures

PR-URL: https://github.com/nodejs/node/pull/10317
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
This commit is contained in:
Michaël Zasso 2016-12-17 13:49:05 +01:00
parent ee9df357ff
commit db18dd8356
4 changed files with 429 additions and 51 deletions

View File

@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
ctx.port = undefined;
return;
}
binding.parse(port, binding.kPort, null, ctx,
@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) {
const ctx = this[context];
search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) {
ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {};
return;
}
if (search[0] === '?') search = search.slice(1);
ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this));

View File

@ -261,7 +261,7 @@ namespace url {
return type;
}
static inline int ParseNumber(const char* start, const char* end) {
static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
@ -293,7 +293,7 @@ namespace url {
}
p++;
}
return strtol(start, NULL, R);
return strtoll(start, NULL, R);
}
static url_host_type ParseIPv4Host(url_host* host,
@ -305,7 +305,8 @@ namespace url {
const char* end = pointer + length;
int parts = 0;
uint32_t val = 0;
unsigned numbers[4];
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
goto end;
@ -313,20 +314,16 @@ namespace url {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
if (++parts > 4)
goto end;
}
if (pointer - mark == 10) {
numbers[parts - 1] = n;
if (pointer - mark == 0)
break;
}
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;
if (n > 255) {
type = HOST_TYPE_FAILED;
goto end;
tooBigNumbers++;
}
numbers[parts - 1] = n;
mark = pointer + 1;
@ -335,14 +332,23 @@ namespace url {
}
pointer++;
}
CHECK_GT(parts, 0);
// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}
type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3-n;
val += numbers[n] * pow(256, b);
}
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}
host->value.ipv4 = val;
@ -618,6 +624,13 @@ namespace url {
}
}
static inline void ShortenUrlPath(struct url_data* url) {
if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}
static void Parse(Environment* env,
Local<Value> recv,
const char* input,
@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH()
url.path = base.path;
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
url.port = base.port;
state = kPath;
@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH()
url.path = base.path;
}
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
state = kPath;
continue;
@ -1172,8 +1183,7 @@ namespace url {
special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH()
url.path.push_back("");
@ -1247,7 +1257,7 @@ namespace url {
case 0:
break;
default:
buffer += ch;
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
}
break;
default:

View File

@ -347,7 +347,7 @@
}
},
{
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080",
"new_value": "example.com:",
"expected": {
@ -358,7 +358,6 @@
}
},
{
"comment": "The empty host is not valid for special schemes",
"href": "http://example.net",
"new_value": "",
@ -763,14 +762,14 @@
}
},
{
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080",
"new_value": "",
"expected": {
"href": "http://example.net:8080/",
"host": "example.net:8080",
"href": "http://example.net/",
"host": "example.net",
"hostname": "example.net",
"port": "8080"
"port": ""
}
},
{
@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9"
}
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
}
],
"search": [
@ -1011,7 +1019,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "?",
"expected": {
@ -1096,7 +1103,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "#",
"expected": {
@ -1113,12 +1119,12 @@
}
},
{
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
}
},
{
@ -1126,9 +1132,9 @@
"href": "http://example.net",
"new_value": "%c3%89té",
"expected": {
"href": "http://example.net/#%c3%89té",
"hash": "#%c3%89té"
"href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89t%C3%A9"
}
}
]
}
}

View File

@ -842,6 +842,36 @@
"search": "",
"hash": ""
},
{
"input": "http://[::127.0.0.1]",
"base": "http://example.org/foo/bar",
"href": "http://[::7f00:1]/",
"origin": "http://[::7f00:1]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::7f00:1]",
"hostname": "[::7f00:1]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://[0:0:0:0:0:0:13.1.68.3]",
"base": "http://example.org/foo/bar",
"href": "http://[::d01:4403]/",
"origin": "http://[::d01:4403]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::d01:4403]",
"hostname": "[::d01:4403]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://[2001::1]:80",
"base": "http://example.org/foo/bar",
@ -1264,7 +1294,7 @@
{
"input": "#β",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar#β",
"href": "http://example.org/foo/bar#%CE%B2",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
@ -1274,7 +1304,7 @@
"port": "",
"pathname": "/foo/bar",
"search": "",
"hash": "#β"
"hash": "#%CE%B2"
},
{
"input": "data:text/html,test#test",
@ -1291,6 +1321,21 @@
"search": "",
"hash": "#test"
},
{
"input": "tel:1234567890",
"base": "http://example.org/foo/bar",
"href": "tel:1234567890",
"origin": "null",
"protocol": "tel:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "1234567890",
"search": "",
"hash": ""
},
"# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html",
{
"input": "file:c:\\foo\\bar.html",
@ -2118,7 +2163,7 @@
{
"input": "http://www.google.com/foo?bar=baz# »",
"base": "about:blank",
"href": "http://www.google.com/foo?bar=baz# »",
"href": "http://www.google.com/foo?bar=baz# %C2%BB",
"origin": "http://www.google.com",
"protocol": "http:",
"username": "",
@ -2128,12 +2173,12 @@
"port": "",
"pathname": "/foo",
"search": "?bar=baz",
"hash": "# »"
"hash": "# %C2%BB"
},
{
"input": "data:test# »",
"base": "about:blank",
"href": "data:test# »",
"href": "data:test# %C2%BB",
"origin": "null",
"protocol": "data:",
"username": "",
@ -2143,7 +2188,7 @@
"port": "",
"pathname": "test",
"search": "",
"hash": "# »"
"hash": "# %C2%BB"
},
{
"input": "http://[www.google.com]/",
@ -4165,6 +4210,22 @@
"search": "",
"hash": ""
},
"# unknown scheme with path looking like a password",
{
"input": "sc::a@example.net",
"base": "about:blank",
"href": "sc::a@example.net",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": ":a@example.net",
"search": "",
"hash": ""
},
"# tests from jsdom/whatwg-url designed for code coverage",
{
"input": "http://127.0.0.1:10100/relative_import.html",
@ -4226,5 +4287,307 @@
"pathname": "/path",
"search": "?query",
"hash": "#frag"
},
"# Stringification of URL.searchParams",
{
"input": "?a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar?a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "?a=b&c=d",
"searchParams": "a=b&c=d",
"hash": ""
},
{
"input": "??a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar??a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "??a=b&c=d",
"searchParams": "%3Fa=b&c=d",
"hash": ""
},
"# Scheme only",
{
"input": "http:",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "",
"searchParams": "",
"hash": ""
},
{
"input": "http:",
"base": "https://example.org/foo/bar",
"failure": true
},
{
"input": "sc:",
"base": "https://example.org/foo/bar",
"href": "sc:",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "",
"search": "",
"searchParams": "",
"hash": ""
},
"# Percent encoding of fragments",
{
"input": "http://foo.bar/baz?qux#foo\bbar",
"base": "about:blank",
"href": "http://foo.bar/baz?qux#foo%08bar",
"origin": "http://foo.bar",
"protocol": "http:",
"username": "",
"password": "",
"host": "foo.bar",
"hostname": "foo.bar",
"port": "",
"pathname": "/baz",
"search": "?qux",
"searchParams": "",
"hash": "#foo%08bar"
},
"# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)",
{
"input": "http://192.168.257",
"base": "http://other.com/",
"href": "http://192.168.1.1/",
"origin": "http://192.168.1.1",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.1.1",
"hostname": "192.168.1.1",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://192.168.257.com",
"base": "http://other.com/",
"href": "http://192.168.257.com/",
"origin": "http://192.168.257.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.257.com",
"hostname": "192.168.257.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256",
"base": "http://other.com/",
"href": "http://0.0.1.0/",
"origin": "http://0.0.1.0",
"protocol": "http:",
"username": "",
"password": "",
"host": "0.0.1.0",
"hostname": "0.0.1.0",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256.com",
"base": "http://other.com/",
"href": "http://256.com/",
"origin": "http://256.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.com",
"hostname": "256.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999",
"base": "http://other.com/",
"href": "http://59.154.201.255/",
"origin": "http://59.154.201.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "59.154.201.255",
"hostname": "59.154.201.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999.com",
"base": "http://other.com/",
"href": "http://999999999.com/",
"origin": "http://999999999.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "999999999.com",
"hostname": "999999999.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://10000000000",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://10000000000.com",
"base": "http://other.com/",
"href": "http://10000000000.com/",
"origin": "http://10000000000.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "10000000000.com",
"hostname": "10000000000.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967295",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967296",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://0xffffffff",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://0xffffffff1",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256.256",
"base": "http://other.com/",
"href": "http://256.256.256.256.256/",
"origin": "http://256.256.256.256.256",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.256.256.256.256",
"hostname": "256.256.256.256.256",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
"# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)",
{
"input": "pix/submit.gif",
"base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html",
"href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///C:/",
"href": "file:///C:/",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///",
"href": "file:///",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
}
]
]