url: reduce deplicated codes in autoEscapeStr

PR-URL: https://github.com/nodejs/node/pull/18613
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
This commit is contained in:
Weijia Wang 2018-02-07 11:22:51 +08:00
parent 8476815f01
commit 3cef3e61d6
2 changed files with 47 additions and 87 deletions

View File

@ -0,0 +1,22 @@
'use strict';
const common = require('../common.js');
const url = require('url');
const inputs = {
normal: 'http://foo.com/bar',
escaped: 'https://foo.bar/{}^`/abcd'
};
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
n: [1e7]
});
function main({ type, n }) {
const input = inputs[type] || '';
bench.start();
for (var i = 0; i < n; i += 1)
url.parse(input);
bench.end(n);
}

View File

@ -439,6 +439,24 @@ function validateHostname(self, rest, hostname) {
}
}
// Escaped characters. Use empty strings to fill up unused entries.
// Using Array is faster than Object/Map
const escapedCodes = [
/*0 - 9*/ '', '', '', '', '', '', '', '', '', '%09',
/*10 - 19*/ '%0A', '', '', '%0D', '', '', '', '', '', '',
/*20 - 29*/ '', '', '', '', '', '', '', '', '', '',
/*30 - 39*/ '', '', '%20', '', '%22', '', '', '', '', '%27',
/*40 - 49*/ '', '', '', '', '', '', '', '', '', '',
/*50 - 59*/ '', '', '', '', '', '', '', '', '', '',
/*60 - 69*/ '%3C', '', '%3E', '', '', '', '', '', '', '',
/*70 - 79*/ '', '', '', '', '', '', '', '', '', '',
/*80 - 89*/ '', '', '', '', '', '', '', '', '', '',
/*90 - 99*/ '', '', '%5C', '', '%5E', '', '%60', '', '', '',
/*100 - 109*/ '', '', '', '', '', '', '', '', '', '',
/*110 - 119*/ '', '', '', '', '', '', '', '', '', '',
/*120 - 125*/ '', '', '', '%7B', '%7C', '%7D'
];
// Automatically escape all delimiters and unwise characters from RFC 2396.
// Also escape single quotes in case of an XSS attack.
// Return the escaped string.
@ -446,94 +464,14 @@ function autoEscapeStr(rest) {
var escaped = '';
var lastEscapedPos = 0;
for (var i = 0; i < rest.length; ++i) {
// Manual switching is faster than using a Map/Object.
// `escaped` contains substring up to the last escaped character.
switch (rest.charCodeAt(i)) {
case 9: // '\t'
// Concat if there are ordinary characters in the middle.
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%09';
lastEscapedPos = i + 1;
break;
case 10: // '\n'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%0A';
lastEscapedPos = i + 1;
break;
case 13: // '\r'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%0D';
lastEscapedPos = i + 1;
break;
case 32: // ' '
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%20';
lastEscapedPos = i + 1;
break;
case 34: // '"'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%22';
lastEscapedPos = i + 1;
break;
case 39: // '\''
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%27';
lastEscapedPos = i + 1;
break;
case 60: // '<'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%3C';
lastEscapedPos = i + 1;
break;
case 62: // '>'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%3E';
lastEscapedPos = i + 1;
break;
case 92: // '\\'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%5C';
lastEscapedPos = i + 1;
break;
case 94: // '^'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%5E';
lastEscapedPos = i + 1;
break;
case 96: // '`'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%60';
lastEscapedPos = i + 1;
break;
case 123: // '{'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%7B';
lastEscapedPos = i + 1;
break;
case 124: // '|'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%7C';
lastEscapedPos = i + 1;
break;
case 125: // '}'
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += '%7D';
lastEscapedPos = i + 1;
break;
var escapedChar = escapedCodes[rest.charCodeAt(i)];
if (escapedChar) {
// Concat if there are ordinary characters in the middle.
if (i > lastEscapedPos)
escaped += rest.slice(lastEscapedPos, i);
escaped += escapedChar;
lastEscapedPos = i + 1;
}
}
if (lastEscapedPos === 0) // Nothing has been escaped.