url: added url fragment lookup table
Percent-encoded additional characters in fragment state with new FRAGMENT_ENCODE_SET lookup table. The fragment percent-encode set includes the C0 control percent-encode set and code points U+0020, U+0022, U+003C, U+003E, and U+0060. PR-URL: https://github.com/nodejs/node/pull/17627 Fixes: https://github.com/nodejs/node/issues/17540 Reviewed-By: Timothy Gu <timothygu99@gmail.com> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
203ce5f9e0
commit
897f457f5d
@ -1107,12 +1107,15 @@ forward slash (`/`) character is encoded as `%3C`.
|
||||
The [WHATWG URL Standard][] uses a more selective and fine grained approach to
|
||||
selecting encoded characters than that used by the Legacy API.
|
||||
|
||||
The WHATWG algorithm defines three "percent-encode sets" that describe ranges
|
||||
The WHATWG algorithm defines four "percent-encode sets" that describe ranges
|
||||
of characters that must be percent-encoded:
|
||||
|
||||
* The *C0 control percent-encode set* includes code points in range U+0000 to
|
||||
U+001F (inclusive) and all code points greater than U+007E.
|
||||
|
||||
* The *fragment percent-encode set* includes the *C0 control percent-encode set*
|
||||
and code points U+0020, U+0022, U+003C, U+003E, and U+0060.
|
||||
|
||||
* The *path percent-encode set* includes the *C0 control percent-encode set*
|
||||
and code points U+0020, U+0022, U+0023, U+003C, U+003E, U+003F, U+0060,
|
||||
U+007B, and U+007D.
|
||||
@ -1123,9 +1126,9 @@ of characters that must be percent-encoded:
|
||||
|
||||
The *userinfo percent-encode set* is used exclusively for username and
|
||||
passwords encoded within the URL. The *path percent-encode set* is used for the
|
||||
path of most URLs. The *C0 control percent-encode set* is used for all
|
||||
other cases, including URL fragments in particular, but also host and path
|
||||
under certain specific conditions.
|
||||
path of most URLs. The *fragment percent-encode set* is used for URL fragments.
|
||||
The *C0 control percent-encode set* is used for host and path under certain
|
||||
specific conditions, in addition to all other cases.
|
||||
|
||||
When non-ASCII characters appear within a hostname, the hostname is encoded
|
||||
using the [Punycode][] algorithm. Note, however, that a hostname *may* contain
|
||||
|
@ -325,6 +325,74 @@ const uint8_t C0_CONTROL_ENCODE_SET[32] = {
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
|
||||
};
|
||||
|
||||
const uint8_t FRAGMENT_ENCODE_SET[32] = {
|
||||
// 00 01 02 03 04 05 06 07
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 08 09 0A 0B 0C 0D 0E 0F
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 10 11 12 13 14 15 16 17
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 18 19 1A 1B 1C 1D 1E 1F
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 20 21 22 23 24 25 26 27
|
||||
0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 28 29 2A 2B 2C 2D 2E 2F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 30 31 32 33 34 35 36 37
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 38 39 3A 3B 3C 3D 3E 3F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
|
||||
// 40 41 42 43 44 45 46 47
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 48 49 4A 4B 4C 4D 4E 4F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 50 51 52 53 54 55 56 57
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 58 59 5A 5B 5C 5D 5E 5F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 60 61 62 63 64 65 66 67
|
||||
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 68 69 6A 6B 6C 6D 6E 6F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 70 71 72 73 74 75 76 77
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
||||
// 78 79 7A 7B 7C 7D 7E 7F
|
||||
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
|
||||
// 80 81 82 83 84 85 86 87
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 88 89 8A 8B 8C 8D 8E 8F
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 90 91 92 93 94 95 96 97
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// 98 99 9A 9B 9C 9D 9E 9F
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// A0 A1 A2 A3 A4 A5 A6 A7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// A8 A9 AA AB AC AD AE AF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// B0 B1 B2 B3 B4 B5 B6 B7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// B8 B9 BA BB BC BD BE BF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// C0 C1 C2 C3 C4 C5 C6 C7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// C8 C9 CA CB CC CD CE CF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// D0 D1 D2 D3 D4 D5 D6 D7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// D8 D9 DA DB DC DD DE DF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// E0 E1 E2 E3 E4 E5 E6 E7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// E8 E9 EA EB EC ED EE EF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// F0 F1 F2 F3 F4 F5 F6 F7
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
// F8 F9 FA FB FC FD FE FF
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
|
||||
};
|
||||
|
||||
|
||||
const uint8_t PATH_ENCODE_SET[32] = {
|
||||
// 00 01 02 03 04 05 06 07
|
||||
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
|
||||
@ -1889,7 +1957,7 @@ void URL::Parse(const char* input,
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
AppendOrEscape(&buffer, ch, C0_CONTROL_ENCODE_SET);
|
||||
AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
46
test/fixtures/url-setter-tests.js
vendored
46
test/fixtures/url-setter-tests.js
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
/* The following tests are copied from WPT. Modifications to them should be
|
||||
upstreamed first. Refs:
|
||||
https://github.com/w3c/web-platform-tests/blob/b30abaecf4/url/setters_tests.json
|
||||
https://github.com/w3c/web-platform-tests/blob/ed4bb727ed/url/setters_tests.json
|
||||
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
|
||||
*/
|
||||
module.exports =
|
||||
@ -1793,13 +1793,53 @@ module.exports =
|
||||
"hash": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"href": "http://example.net",
|
||||
"new_value": "#foo bar",
|
||||
"expected": {
|
||||
"href": "http://example.net/#foo%20bar",
|
||||
"hash": "#foo%20bar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"href": "http://example.net",
|
||||
"new_value": "#foo\"bar",
|
||||
"expected": {
|
||||
"href": "http://example.net/#foo%22bar",
|
||||
"hash": "#foo%22bar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"href": "http://example.net",
|
||||
"new_value": "#foo<bar",
|
||||
"expected": {
|
||||
"href": "http://example.net/#foo%3Cbar",
|
||||
"hash": "#foo%3Cbar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"href": "http://example.net",
|
||||
"new_value": "#foo>bar",
|
||||
"expected": {
|
||||
"href": "http://example.net/#foo%3Ebar",
|
||||
"hash": "#foo%3Ebar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"href": "http://example.net",
|
||||
"new_value": "#foo`bar",
|
||||
"expected": {
|
||||
"href": "http://example.net/#foo%60bar",
|
||||
"hash": "#foo%60bar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
|
||||
"href": "a:/",
|
||||
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
|
||||
"expected": {
|
||||
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
|
||||
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
|
||||
"href": "a:/#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
|
||||
"hash": "#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
82
test/fixtures/url-tests.js
vendored
82
test/fixtures/url-tests.js
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
/* The following tests are copied from WPT. Modifications to them should be
|
||||
upstreamed first. Refs:
|
||||
https://github.com/w3c/web-platform-tests/blob/11757f1/url/urltestdata.json
|
||||
https://github.com/w3c/web-platform-tests/blob/ed4bb727ed/url/urltestdata.json
|
||||
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
|
||||
*/
|
||||
module.exports =
|
||||
@ -161,7 +161,7 @@ module.exports =
|
||||
{
|
||||
"input": "http://f:21/ b ? d # e ",
|
||||
"base": "http://example.org/foo/bar",
|
||||
"href": "http://f:21/%20b%20?%20d%20# e",
|
||||
"href": "http://f:21/%20b%20?%20d%20#%20e",
|
||||
"origin": "http://f:21",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
@ -171,12 +171,12 @@ module.exports =
|
||||
"port": "21",
|
||||
"pathname": "/%20b%20",
|
||||
"search": "?%20d%20",
|
||||
"hash": "# e"
|
||||
"hash": "#%20e"
|
||||
},
|
||||
{
|
||||
"input": "lolscheme:x x#x x",
|
||||
"base": "about:blank",
|
||||
"href": "lolscheme:x x#x x",
|
||||
"href": "lolscheme:x x#x%20x",
|
||||
"protocol": "lolscheme:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
@ -185,7 +185,7 @@ module.exports =
|
||||
"port": "",
|
||||
"pathname": "x x",
|
||||
"search": "",
|
||||
"hash": "#x x"
|
||||
"hash": "#x%20x"
|
||||
},
|
||||
{
|
||||
"input": "http://f:/c",
|
||||
@ -2268,7 +2268,7 @@ module.exports =
|
||||
{
|
||||
"input": "http://www.google.com/foo?bar=baz# »",
|
||||
"base": "about:blank",
|
||||
"href": "http://www.google.com/foo?bar=baz# %C2%BB",
|
||||
"href": "http://www.google.com/foo?bar=baz#%20%C2%BB",
|
||||
"origin": "http://www.google.com",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
@ -2278,12 +2278,12 @@ module.exports =
|
||||
"port": "",
|
||||
"pathname": "/foo",
|
||||
"search": "?bar=baz",
|
||||
"hash": "# %C2%BB"
|
||||
"hash": "#%20%C2%BB"
|
||||
},
|
||||
{
|
||||
"input": "data:test# »",
|
||||
"base": "about:blank",
|
||||
"href": "data:test# %C2%BB",
|
||||
"href": "data:test#%20%C2%BB",
|
||||
"origin": "null",
|
||||
"protocol": "data:",
|
||||
"username": "",
|
||||
@ -2293,7 +2293,7 @@ module.exports =
|
||||
"port": "",
|
||||
"pathname": "test",
|
||||
"search": "",
|
||||
"hash": "# %C2%BB"
|
||||
"hash": "#%20%C2%BB"
|
||||
},
|
||||
{
|
||||
"input": "http://www.google.com",
|
||||
@ -4795,6 +4795,70 @@ module.exports =
|
||||
"searchParams": "qux=",
|
||||
"hash": "#foo%08bar"
|
||||
},
|
||||
{
|
||||
"input": "http://foo.bar/baz?qux#foo\"bar",
|
||||
"base": "about:blank",
|
||||
"href": "http://foo.bar/baz?qux#foo%22bar",
|
||||
"origin": "http://foo.bar",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "foo.bar",
|
||||
"hostname": "foo.bar",
|
||||
"port": "",
|
||||
"pathname": "/baz",
|
||||
"search": "?qux",
|
||||
"searchParams": "qux=",
|
||||
"hash": "#foo%22bar"
|
||||
},
|
||||
{
|
||||
"input": "http://foo.bar/baz?qux#foo<bar",
|
||||
"base": "about:blank",
|
||||
"href": "http://foo.bar/baz?qux#foo%3Cbar",
|
||||
"origin": "http://foo.bar",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "foo.bar",
|
||||
"hostname": "foo.bar",
|
||||
"port": "",
|
||||
"pathname": "/baz",
|
||||
"search": "?qux",
|
||||
"searchParams": "qux=",
|
||||
"hash": "#foo%3Cbar"
|
||||
},
|
||||
{
|
||||
"input": "http://foo.bar/baz?qux#foo>bar",
|
||||
"base": "about:blank",
|
||||
"href": "http://foo.bar/baz?qux#foo%3Ebar",
|
||||
"origin": "http://foo.bar",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "foo.bar",
|
||||
"hostname": "foo.bar",
|
||||
"port": "",
|
||||
"pathname": "/baz",
|
||||
"search": "?qux",
|
||||
"searchParams": "qux=",
|
||||
"hash": "#foo%3Ebar"
|
||||
},
|
||||
{
|
||||
"input": "http://foo.bar/baz?qux#foo`bar",
|
||||
"base": "about:blank",
|
||||
"href": "http://foo.bar/baz?qux#foo%60bar",
|
||||
"origin": "http://foo.bar",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "foo.bar",
|
||||
"hostname": "foo.bar",
|
||||
"port": "",
|
||||
"pathname": "/baz",
|
||||
"search": "?qux",
|
||||
"searchParams": "qux=",
|
||||
"hash": "#foo%60bar"
|
||||
},
|
||||
"# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)",
|
||||
{
|
||||
"input": "http://192.168.257",
|
||||
|
Loading…
x
Reference in New Issue
Block a user