url: use SafeSet to filter known special protocols

Avoids a maintenance hazard when reviewers assume that
`hostlessProtocol` and `slashedProtocol` are disjoint.

The following may be counter-intuitive:

```js
// These objects seem to have no keys in common
const hostlessProtocol = { 'javascript': true };
const slashedProtocol = { 'http': true };
// A reasonable reviewer may assumes bothTrue is never truthy
function bothTrue(lowerProto) {
  return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto];
}
// But
console.log(Boolean(bothTrue('constructor')));  // true
```

This change uses SafeSet instead of plain-old objects.

----

Rejected alternative:

We could have used object with a `null` prototype as lookup tables
so that `lowerProto` is never treated as a key into `Object.prototype`.

```js
const hostlessProtocol = { __proto__: null, 'javascript': true };
const slashedProtocol = { __proto__: null, 'http': true };

function bothTrue(lowerProto) {
  return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto];
}

console.log(Boolean(bothTrue('constructor')));  // false
```

PR-URL: https://github.com/nodejs/node/pull/24703
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
Mike Samuel 2018-11-28 13:06:46 -05:00 committed by Rich Trott
parent 76faccccbb
commit 0d2311820d

View File

@ -26,6 +26,8 @@ const { toASCII } = process.binding('config').hasIntl ?
const { hexTable } = require('internal/querystring'); const { hexTable } = require('internal/querystring');
const { SafeSet } = require('internal/safe_globals');
const { const {
ERR_INVALID_ARG_TYPE ERR_INVALID_ARG_TYPE
} = require('internal/errors').codes; } = require('internal/errors').codes;
@ -76,28 +78,28 @@ const simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/;
const hostnameMaxLen = 255; const hostnameMaxLen = 255;
// protocols that can allow "unsafe" and "unwise" chars. // protocols that can allow "unsafe" and "unwise" chars.
const unsafeProtocol = { const unsafeProtocol = new SafeSet([
'javascript': true, 'javascript',
'javascript:': true 'javascript:'
}; ]);
// protocols that never have a hostname. // protocols that never have a hostname.
const hostlessProtocol = { const hostlessProtocol = new SafeSet([
'javascript': true, 'javascript',
'javascript:': true 'javascript:'
}; ]);
// protocols that always contain a // bit. // protocols that always contain a // bit.
const slashedProtocol = { const slashedProtocol = new SafeSet([
'http': true, 'http',
'http:': true, 'http:',
'https': true, 'https',
'https:': true, 'https:',
'ftp': true, 'ftp',
'ftp:': true, 'ftp:',
'gopher': true, 'gopher',
'gopher:': true, 'gopher:',
'file': true, 'file',
'file:': true 'file:'
}; ]);
const { const {
CHAR_SPACE, CHAR_SPACE,
CHAR_TAB, CHAR_TAB,
@ -267,14 +269,14 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
if (slashesDenoteHost || proto || hostPattern.test(rest)) { if (slashesDenoteHost || proto || hostPattern.test(rest)) {
var slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH && var slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH &&
rest.charCodeAt(1) === CHAR_FORWARD_SLASH; rest.charCodeAt(1) === CHAR_FORWARD_SLASH;
if (slashes && !(proto && hostlessProtocol[lowerProto])) { if (slashes && !(proto && hostlessProtocol.has(lowerProto))) {
rest = rest.slice(2); rest = rest.slice(2);
this.slashes = true; this.slashes = true;
} }
} }
if (!hostlessProtocol[lowerProto] && if (!hostlessProtocol.has(lowerProto) &&
(slashes || (proto && !slashedProtocol[proto]))) { (slashes || (proto && !slashedProtocol.has(proto)))) {
// there's a hostname. // there's a hostname.
// the first instance of /, ?, ;, or # ends the host. // the first instance of /, ?, ;, or # ends the host.
@ -400,7 +402,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
// now rest is set to the post-host stuff. // now rest is set to the post-host stuff.
// chop off any delim chars. // chop off any delim chars.
if (!unsafeProtocol[lowerProto]) { if (!unsafeProtocol.has(lowerProto)) {
// First, make 100% sure that any "autoEscape" chars get // First, make 100% sure that any "autoEscape" chars get
// escaped, even if encodeURIComponent doesn't think they // escaped, even if encodeURIComponent doesn't think they
// need to be. // need to be.
@ -447,7 +449,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
} else if (firstIdx > 0) { } else if (firstIdx > 0) {
this.pathname = rest.slice(0, firstIdx); this.pathname = rest.slice(0, firstIdx);
} }
if (slashedProtocol[lowerProto] && if (slashedProtocol.has(lowerProto) &&
this.hostname && !this.pathname) { this.hostname && !this.pathname) {
this.pathname = '/'; this.pathname = '/';
} }
@ -629,7 +631,7 @@ Url.prototype.format = function format() {
// only the slashedProtocols get the //. Not mailto:, xmpp:, etc. // only the slashedProtocols get the //. Not mailto:, xmpp:, etc.
// unless they had them to begin with. // unless they had them to begin with.
if (this.slashes || slashedProtocol[protocol]) { if (this.slashes || slashedProtocol.has(protocol)) {
if (this.slashes || host) { if (this.slashes || host) {
if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH) if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH)
pathname = '/' + pathname; pathname = '/' + pathname;
@ -701,7 +703,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
} }
// urlParse appends trailing / to urls like http://www.example.com // urlParse appends trailing / to urls like http://www.example.com
if (slashedProtocol[result.protocol] && if (slashedProtocol.has(result.protocol) &&
result.hostname && !result.pathname) { result.hostname && !result.pathname) {
result.path = result.pathname = '/'; result.path = result.pathname = '/';
} }
@ -719,7 +721,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
// if it is file:, then the host is dropped, // if it is file:, then the host is dropped,
// because that's known to be hostless. // because that's known to be hostless.
// anything else is assumed to be absolute. // anything else is assumed to be absolute.
if (!slashedProtocol[relative.protocol]) { if (!slashedProtocol.has(relative.protocol)) {
var keys = Object.keys(relative); var keys = Object.keys(relative);
for (var v = 0; v < keys.length; v++) { for (var v = 0; v < keys.length; v++) {
var k = keys[v]; var k = keys[v];
@ -732,7 +734,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
result.protocol = relative.protocol; result.protocol = relative.protocol;
if (!relative.host && if (!relative.host &&
!/^file:?$/.test(relative.protocol) && !/^file:?$/.test(relative.protocol) &&
!hostlessProtocol[relative.protocol]) { !hostlessProtocol.has(relative.protocol)) {
const relPath = (relative.pathname || '').split('/'); const relPath = (relative.pathname || '').split('/');
while (relPath.length && !(relative.host = relPath.shift())); while (relPath.length && !(relative.host = relPath.shift()));
if (!relative.host) relative.host = ''; if (!relative.host) relative.host = '';
@ -769,7 +771,8 @@ Url.prototype.resolveObject = function resolveObject(relative) {
var removeAllDots = mustEndAbs; var removeAllDots = mustEndAbs;
var srcPath = result.pathname && result.pathname.split('/') || []; var srcPath = result.pathname && result.pathname.split('/') || [];
var relPath = relative.pathname && relative.pathname.split('/') || []; var relPath = relative.pathname && relative.pathname.split('/') || [];
var noLeadingSlashes = result.protocol && !slashedProtocol[result.protocol]; var noLeadingSlashes = result.protocol &&
!slashedProtocol.has(result.protocol);
// if the url is a non-slashed url, then relative // if the url is a non-slashed url, then relative
// links like ../.. should be able // links like ../.. should be able