url: fix C0 control and whitespace handling
PR-URL: https://github.com/nodejs/node/pull/12846 Fixes: https://github.com/nodejs/node/issues/12825 Refs: https://github.com/w3c/web-platform-tests/pull/5792 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
This commit is contained in:
parent
525497596a
commit
841bb4c61f
@ -133,6 +133,9 @@ enum url_error_cb_args {
|
||||
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
|
||||
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
|
||||
|
||||
// https://infra.spec.whatwg.org/#c0-control-or-space
|
||||
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-digit
|
||||
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
|
||||
|
||||
@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) {
|
||||
}
|
||||
|
||||
void URL::Parse(const char* input,
|
||||
const size_t len,
|
||||
size_t len,
|
||||
enum url_parse_state state_override,
|
||||
struct url_data* url,
|
||||
bool has_url,
|
||||
const struct url_data* base,
|
||||
bool has_base) {
|
||||
const char* p = input;
|
||||
const char* end = input + len;
|
||||
|
||||
if (!has_url) {
|
||||
for (const char* ptr = p; ptr < end; ptr++) {
|
||||
if (IsC0ControlOrSpace(*ptr))
|
||||
p++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
for (const char* ptr = end - 1; ptr >= p; ptr--) {
|
||||
if (IsC0ControlOrSpace(*ptr))
|
||||
end--;
|
||||
else
|
||||
break;
|
||||
}
|
||||
len = end - p;
|
||||
}
|
||||
|
||||
std::string whitespace_stripped;
|
||||
whitespace_stripped.reserve(len);
|
||||
for (const char* ptr = p; ptr < end; ptr++)
|
||||
if (!IsASCIITabOrNewline(*ptr))
|
||||
whitespace_stripped += *ptr;
|
||||
|
||||
input = whitespace_stripped.c_str();
|
||||
len = whitespace_stripped.size();
|
||||
p = input;
|
||||
end = input + len;
|
||||
|
||||
bool atflag = false;
|
||||
bool sbflag = false;
|
||||
bool uflag = false;
|
||||
int wskip = 0;
|
||||
|
||||
std::string buffer;
|
||||
url->scheme.reserve(len);
|
||||
@ -1159,9 +1192,6 @@ void URL::Parse(const char* input,
|
||||
enum url_parse_state state = has_state_override ? state_override :
|
||||
kSchemeStart;
|
||||
|
||||
const char* p = input;
|
||||
const char* end = input + len;
|
||||
|
||||
if (state < kSchemeStart || state > kFragment) {
|
||||
url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
|
||||
return;
|
||||
@ -1171,18 +1201,6 @@ void URL::Parse(const char* input,
|
||||
const char ch = p < end ? p[0] : kEOL;
|
||||
const size_t remaining = end == p ? 0 : (end - p - 1);
|
||||
|
||||
if (IsASCIITabOrNewline(ch)) {
|
||||
if (state == kAuthority) {
|
||||
// It's necessary to keep track of how much whitespace
|
||||
// is being ignored when in kAuthority state because of
|
||||
// how the buffer is managed. TODO: See if there's a better
|
||||
// way
|
||||
wskip++;
|
||||
}
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool special = (url->flags & URL_FLAGS_SPECIAL);
|
||||
bool cannot_be_base;
|
||||
const bool special_back_slash = (special && ch == '\\');
|
||||
@ -1500,7 +1518,7 @@ void URL::Parse(const char* input,
|
||||
url->flags |= URL_FLAGS_FAILED;
|
||||
return;
|
||||
}
|
||||
p -= buffer.size() + 1 + wskip;
|
||||
p -= buffer.size() + 1;
|
||||
buffer.clear();
|
||||
state = kHost;
|
||||
} else {
|
||||
@ -1892,16 +1910,17 @@ static void Parse(Environment* env,
|
||||
HandleScope handle_scope(isolate);
|
||||
Context::Scope context_scope(context);
|
||||
|
||||
const bool has_context = context_obj->IsObject();
|
||||
const bool has_base = base_obj->IsObject();
|
||||
|
||||
struct url_data base;
|
||||
struct url_data url;
|
||||
if (context_obj->IsObject())
|
||||
if (has_context)
|
||||
HarvestContext(env, &url, context_obj.As<Object>());
|
||||
if (has_base)
|
||||
HarvestBase(env, &base, base_obj.As<Object>());
|
||||
|
||||
URL::Parse(input, len, state_override, &url, &base, has_base);
|
||||
URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
|
||||
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
|
||||
((state_override != kUnknownState) &&
|
||||
(url.flags & URL_FLAGS_TERMINATED)))
|
||||
|
@ -81,30 +81,35 @@ struct url_data {
|
||||
class URL {
|
||||
public:
|
||||
static void Parse(const char* input,
|
||||
const size_t len,
|
||||
size_t len,
|
||||
enum url_parse_state state_override,
|
||||
struct url_data* url,
|
||||
bool has_url,
|
||||
const struct url_data* base,
|
||||
bool has_base);
|
||||
|
||||
URL(const char* input, const size_t len) {
|
||||
Parse(input, len, kUnknownState, &context_, nullptr, false);
|
||||
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
|
||||
}
|
||||
|
||||
URL(const char* input, const size_t len, const URL* base) {
|
||||
if (base != nullptr)
|
||||
Parse(input, len, kUnknownState, &context_, &(base->context_), true);
|
||||
Parse(input, len, kUnknownState,
|
||||
&context_, false,
|
||||
&(base->context_), true);
|
||||
else
|
||||
Parse(input, len, kUnknownState, &context_, nullptr, false);
|
||||
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
|
||||
}
|
||||
|
||||
URL(const char* input, const size_t len,
|
||||
const char* base, const size_t baselen) {
|
||||
if (base != nullptr && baselen > 0) {
|
||||
URL _base(base, baselen);
|
||||
Parse(input, len, kUnknownState, &context_, &(_base.context_), true);
|
||||
Parse(input, len, kUnknownState,
|
||||
&context_, false,
|
||||
&(_base.context_), true);
|
||||
} else {
|
||||
Parse(input, len, kUnknownState, &context_, nullptr, false);
|
||||
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
32
test/fixtures/url-tests.js
vendored
32
test/fixtures/url-tests.js
vendored
@ -1,7 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
/* WPT Refs:
|
||||
https://github.com/w3c/web-platform-tests/blob/28541bb/url/urltestdata.json
|
||||
https://github.com/w3c/web-platform-tests/blob/0f26c418a5/url/urltestdata.json
|
||||
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
|
||||
*/
|
||||
module.exports =
|
||||
@ -3566,6 +3566,22 @@ module.exports =
|
||||
"search": "",
|
||||
"hash": ""
|
||||
},
|
||||
"Leading and trailing C0 control or space",
|
||||
{
|
||||
"input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ",
|
||||
"base": "about:blank",
|
||||
"href": "http://example.com/",
|
||||
"origin": "http://example.com",
|
||||
"protocol": "http:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "example.com",
|
||||
"hostname": "example.com",
|
||||
"port": "",
|
||||
"pathname": "/",
|
||||
"search": "",
|
||||
"hash": ""
|
||||
},
|
||||
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
|
||||
{
|
||||
"input": "http://www.foo。bar.com",
|
||||
@ -5487,6 +5503,20 @@ module.exports =
|
||||
"search": "",
|
||||
"hash": ""
|
||||
},
|
||||
{
|
||||
"input": "C|\n/",
|
||||
"base": "file://host/dir/file",
|
||||
"href": "file:///C:/",
|
||||
"protocol": "file:",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"host": "",
|
||||
"hostname": "",
|
||||
"port": "",
|
||||
"pathname": "/C:/",
|
||||
"search": "",
|
||||
"hash": ""
|
||||
},
|
||||
{
|
||||
"input": "C|\\",
|
||||
"base": "file://host/dir/file",
|
||||
|
Loading…
x
Reference in New Issue
Block a user