querystring: improve unescapeBuffer() performance

Before this, v8 would deopt when an out of bounds `inIndex` would get
passed to charCodeAt(). charCodeAt() returns NaN in such cases, so we
directly emulate that behavior as well.

Also, calls to charCodeAt() for constant strings have been replaced
by the raw character codes and parser state is now stored as an
integer instead of a string. Both of these provide a slight
performance increase.

PR-URL: https://github.com/nodejs/node/pull/5012
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Roman Reiss <me@silverwind.io>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
Brian White 2016-01-31 14:55:34 -05:00
parent a2a69a2b63
commit c8e650dee7

View File

@ -6,29 +6,25 @@ const QueryString = exports;
const Buffer = require('buffer').Buffer; const Buffer = require('buffer').Buffer;
function charCode(c) {
return c.charCodeAt(0);
}
// a safe fast alternative to decodeURIComponent // a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) { QueryString.unescapeBuffer = function(s, decodeSpaces) {
var out = new Buffer(s.length); var out = new Buffer(s.length);
var state = 'CHAR'; // states: CHAR, HEX0, HEX1 var state = 0;
var n, m, hexchar; var n, m, hexchar;
for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) { for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) {
var c = s.charCodeAt(inIndex); var c = inIndex < s.length ? s.charCodeAt(inIndex) : NaN;
switch (state) { switch (state) {
case 'CHAR': case 0: // Any character
switch (c) { switch (c) {
case charCode('%'): case 37: // '%'
n = 0; n = 0;
m = 0; m = 0;
state = 'HEX0'; state = 1;
break; break;
case charCode('+'): case 43: // '+'
if (decodeSpaces) c = charCode(' '); if (decodeSpaces)
c = 32; // ' '
// falls through // falls through
default: default:
out[outIndex++] = c; out[outIndex++] = c;
@ -36,33 +32,33 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
} }
break; break;
case 'HEX0': case 1: // First hex digit
state = 'HEX1';
hexchar = c; hexchar = c;
if (charCode('0') <= c && c <= charCode('9')) { if (c >= 48/*0*/ && c <= 57/*9*/) {
n = c - charCode('0'); n = c - 48/*0*/;
} else if (charCode('a') <= c && c <= charCode('f')) { } else if (c >= 65/*A*/ && c <= 70/*F*/) {
n = c - charCode('a') + 10; n = c - 65/*A*/ + 10;
} else if (charCode('A') <= c && c <= charCode('F')) { } else if (c >= 97/*a*/ && c <= 102/*f*/) {
n = c - charCode('A') + 10; n = c - 97/*a*/ + 10;
} else { } else {
out[outIndex++] = charCode('%'); out[outIndex++] = 37/*%*/;
out[outIndex++] = c; out[outIndex++] = c;
state = 'CHAR'; state = 0;
break; break;
} }
state = 2;
break; break;
case 'HEX1': case 2: // Second hex digit
state = 'CHAR'; state = 0;
if (charCode('0') <= c && c <= charCode('9')) { if (c >= 48/*0*/ && c <= 57/*9*/) {
m = c - charCode('0'); m = c - 48/*0*/;
} else if (charCode('a') <= c && c <= charCode('f')) { } else if (c >= 65/*A*/ && c <= 70/*F*/) {
m = c - charCode('a') + 10; m = c - 65/*A*/ + 10;
} else if (charCode('A') <= c && c <= charCode('F')) { } else if (c >= 97/*a*/ && c <= 102/*f*/) {
m = c - charCode('A') + 10; m = c - 97/*a*/ + 10;
} else { } else {
out[outIndex++] = charCode('%'); out[outIndex++] = 37/*%*/;
out[outIndex++] = hexchar; out[outIndex++] = hexchar;
out[outIndex++] = c; out[outIndex++] = c;
break; break;