test,url: improve escaping in url.parse

- rename variables in autoEscapeStr so they are easier to understand
- comment the escaping algorithm
- increase coverage for autoEscapeStr

PR-URL: https://github.com/nodejs/node/pull/10083
Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
joyeecheung 2016-12-01 19:11:43 -06:00 committed by Anna Henningsen
parent 72b1f79a64
commit 4422c35c5e
No known key found for this signature in database
GPG Key ID: D8B9F5AEAE84E4CF
2 changed files with 85 additions and 65 deletions

View File

@ -437,105 +437,111 @@ function validateHostname(self, rest, hostname) {
} }
} }
// Automatically escape all delimiters and unwise characters from RFC 2396.
// Also escape single quotes in case of an XSS attack.
// Return undefined if the string doesn't need escaping,
// otherwise return the escaped string.
function autoEscapeStr(rest) { function autoEscapeStr(rest) {
var newRest = ''; var escaped = '';
var lastPos = 0; var lastEscapedPos = 0;
for (var i = 0; i < rest.length; ++i) { for (var i = 0; i < rest.length; ++i) {
// Automatically escape all delimiters and unwise characters from RFC 2396 // Manual switching is faster than using a Map/Object.
// Also escape single quotes in case of an XSS attack // `escaped` contains substring up to the last escaped cahracter.
switch (rest.charCodeAt(i)) { switch (rest.charCodeAt(i)) {
case 9: // '\t' case 9: // '\t'
if (i - lastPos > 0) // Concat if there are ordinary characters in the middle.
newRest += rest.slice(lastPos, i); if (i > lastEscapedPos)
newRest += '%09'; escaped += rest.slice(lastEscapedPos, i);
lastPos = i + 1; escaped += '%09';
lastEscapedPos = i + 1;
break; break;
case 10: // '\n' case 10: // '\n'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%0A'; escaped += '%0A';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 13: // '\r' case 13: // '\r'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%0D'; escaped += '%0D';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 32: // ' ' case 32: // ' '
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%20'; escaped += '%20';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 34: // '"' case 34: // '"'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%22'; escaped += '%22';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 39: // '\'' case 39: // '\''
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%27'; escaped += '%27';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 60: // '<' case 60: // '<'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%3C'; escaped += '%3C';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 62: // '>' case 62: // '>'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%3E'; escaped += '%3E';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 92: // '\\' case 92: // '\\'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%5C'; escaped += '%5C';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 94: // '^' case 94: // '^'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%5E'; escaped += '%5E';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 96: // '`' case 96: // '`'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%60'; escaped += '%60';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 123: // '{' case 123: // '{'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%7B'; escaped += '%7B';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 124: // '|' case 124: // '|'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%7C'; escaped += '%7C';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
case 125: // '}' case 125: // '}'
if (i - lastPos > 0) if (i > lastEscapedPos)
newRest += rest.slice(lastPos, i); escaped += rest.slice(lastEscapedPos, i);
newRest += '%7D'; escaped += '%7D';
lastPos = i + 1; lastEscapedPos = i + 1;
break; break;
} }
} }
if (lastPos === 0) if (lastEscapedPos === 0) // Nothing has been escaped.
return; return;
if (lastPos < rest.length) // There are ordinary characters at the end.
return newRest + rest.slice(lastPos); if (lastEscapedPos < rest.length)
else return escaped + rest.slice(lastEscapedPos);
return newRest; else // The last character is escaped.
return escaped;
} }
// format a parsed object into a url string // format a parsed object into a url string

View File

@ -834,6 +834,20 @@ var parseTests = {
query: '@c' query: '@c'
}, },
'http://a.b/\tbc\ndr\ref g"hq\'j<kl>?mn\\op^q=r`99{st|uv}wz': {
protocol: 'http:',
slashes: true,
host: 'a.b',
port: null,
hostname: 'a.b',
hash: null,
pathname: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E',
path: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
search: '?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
query: 'mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
href: 'http://a.b/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz'
},
'http://a\r" \t\n<\'b:b@c\r\nd/e?f': { 'http://a\r" \t\n<\'b:b@c\r\nd/e?f': {
protocol: 'http:', protocol: 'http:',
slashes: true, slashes: true,