querystring: improve parse() performance
This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: https://github.com/nodejs/node/pull/5012 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Roman Reiss <me@silverwind.io> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
parent
90451a67ca
commit
a2a69a2b63
@ -3,7 +3,13 @@ var querystring = require('querystring');
|
||||
var v8 = require('v8');
|
||||
|
||||
var bench = common.createBenchmark(main, {
|
||||
type: ['noencode', 'encodemany', 'encodelast', 'multivalue'],
|
||||
type: ['noencode',
|
||||
'multicharsep',
|
||||
'encodemany',
|
||||
'encodelast',
|
||||
'multivalue',
|
||||
'multivaluemany',
|
||||
'manypairs'],
|
||||
n: [1e6],
|
||||
});
|
||||
|
||||
@ -13,22 +19,38 @@ function main(conf) {
|
||||
|
||||
var inputs = {
|
||||
noencode: 'foo=bar&baz=quux&xyzzy=thud',
|
||||
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
|
||||
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
|
||||
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
|
||||
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz'
|
||||
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
|
||||
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
|
||||
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
|
||||
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
|
||||
};
|
||||
var input = inputs[type];
|
||||
|
||||
// Force-optimize querystring.parse() so that the benchmark doesn't get
|
||||
// disrupted by the optimizer kicking in halfway through.
|
||||
for (var name in inputs)
|
||||
querystring.parse(inputs[name]);
|
||||
|
||||
v8.setFlagsFromString('--allow_natives_syntax');
|
||||
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
||||
|
||||
bench.start();
|
||||
for (var i = 0; i < n; i += 1)
|
||||
if (type !== 'multicharsep') {
|
||||
querystring.parse(input);
|
||||
bench.end(n);
|
||||
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
||||
querystring.parse(input);
|
||||
} else {
|
||||
querystring.parse(input, '&&&&&&&&&&');
|
||||
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
||||
querystring.parse(input, '&&&&&&&&&&');
|
||||
}
|
||||
|
||||
if (type !== 'multicharsep') {
|
||||
bench.start();
|
||||
for (var i = 0; i < n; i += 1)
|
||||
querystring.parse(input);
|
||||
bench.end(n);
|
||||
} else {
|
||||
bench.start();
|
||||
for (var i = 0; i < n; i += 1)
|
||||
querystring.parse(input, '&&&&&&&&&&');
|
||||
bench.end(n);
|
||||
}
|
||||
}
|
||||
|
@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
|
||||
};
|
||||
|
||||
|
||||
QueryString.unescape = function(s, decodeSpaces) {
|
||||
function qsUnescape(s, decodeSpaces) {
|
||||
try {
|
||||
return decodeURIComponent(s);
|
||||
} catch (e) {
|
||||
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
QueryString.unescape = qsUnescape;
|
||||
|
||||
|
||||
var hexTable = new Array(256);
|
||||
@ -198,63 +199,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
|
||||
return '';
|
||||
};
|
||||
|
||||
// Parse a key=val string.
|
||||
// Parse a key/val string.
|
||||
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
||||
sep = sep || '&';
|
||||
eq = eq || '=';
|
||||
const eqLen = eq.length;
|
||||
var obj = {};
|
||||
|
||||
const obj = {};
|
||||
|
||||
if (typeof qs !== 'string' || qs.length === 0) {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (typeof sep !== 'string')
|
||||
sep += '';
|
||||
|
||||
const eqLen = eq.length;
|
||||
const sepLen = sep.length;
|
||||
|
||||
var maxKeys = 1000;
|
||||
if (options && typeof options.maxKeys === 'number') {
|
||||
maxKeys = options.maxKeys;
|
||||
}
|
||||
|
||||
// maxKeys <= 0 means that we should not limit keys count
|
||||
if (maxKeys > 0 && isFinite(maxKeys)) {
|
||||
qs = qs.split(sep, maxKeys);
|
||||
} else {
|
||||
qs = qs.split(sep);
|
||||
}
|
||||
|
||||
var len = qs.length;
|
||||
var pairs = Infinity;
|
||||
if (maxKeys > 0)
|
||||
pairs = maxKeys;
|
||||
|
||||
var decode = QueryString.unescape;
|
||||
if (options && typeof options.decodeURIComponent === 'function') {
|
||||
decode = options.decodeURIComponent;
|
||||
}
|
||||
const customDecode = (decode !== qsUnescape);
|
||||
|
||||
var keys = [];
|
||||
for (var i = 0; i < len; ++i) {
|
||||
// replacePlus() is used instead of a regexp because it is ~15-30% faster
|
||||
// with v8 4.7
|
||||
const x = replacePlus(qs[i]);
|
||||
const idx = x.indexOf(eq);
|
||||
var k, v;
|
||||
const keys = [];
|
||||
var lastPos = 0;
|
||||
var sepIdx = 0;
|
||||
var eqIdx = 0;
|
||||
var key = '';
|
||||
var value = '';
|
||||
var keyEncoded = customDecode;
|
||||
var valEncoded = customDecode;
|
||||
var encodeCheck = 0;
|
||||
for (var i = 0; i < qs.length; ++i) {
|
||||
const code = qs.charCodeAt(i);
|
||||
|
||||
if (idx >= 0) {
|
||||
k = decodeStr(x.substring(0, idx), decode);
|
||||
v = decodeStr(x.substring(idx + eqLen), decode);
|
||||
// Try matching key/value pair separator (e.g. '&')
|
||||
if (code === sep.charCodeAt(sepIdx)) {
|
||||
if (++sepIdx === sepLen) {
|
||||
// Key/value pair separator match!
|
||||
const end = i - sepIdx + 1;
|
||||
if (eqIdx < eqLen) {
|
||||
// If we didn't find the key/value separator, treat the substring as
|
||||
// part of the key instead of the value
|
||||
if (lastPos < end)
|
||||
key += qs.slice(lastPos, end);
|
||||
} else if (lastPos < end)
|
||||
value += qs.slice(lastPos, end);
|
||||
if (keyEncoded)
|
||||
key = decodeStr(key, decode);
|
||||
if (valEncoded)
|
||||
value = decodeStr(value, decode);
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||
// slower
|
||||
if (keys.indexOf(key) === -1) {
|
||||
obj[key] = value;
|
||||
keys[keys.length] = key;
|
||||
} else {
|
||||
const curValue = obj[key];
|
||||
// `instanceof Array` is used instead of Array.isArray() because it
|
||||
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||
// using it with values being created within this function
|
||||
if (curValue instanceof Array)
|
||||
curValue[curValue.length] = value;
|
||||
else
|
||||
obj[key] = [curValue, value];
|
||||
}
|
||||
if (--pairs === 0)
|
||||
break;
|
||||
keyEncoded = valEncoded = customDecode;
|
||||
encodeCheck = 0;
|
||||
key = value = '';
|
||||
lastPos = i + 1;
|
||||
sepIdx = eqIdx = 0;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
k = decodeStr(x, decode);
|
||||
v = '';
|
||||
sepIdx = 0;
|
||||
if (!valEncoded) {
|
||||
// Try to match an (valid) encoded byte (once) to minimize unnecessary
|
||||
// calls to string decoding functions
|
||||
if (code === 37/*%*/) {
|
||||
encodeCheck = 1;
|
||||
} else if (encodeCheck > 0 &&
|
||||
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||
if (++encodeCheck === 3)
|
||||
valEncoded = true;
|
||||
} else {
|
||||
encodeCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is slower
|
||||
if (keys.indexOf(k) === -1) {
|
||||
obj[k] = v;
|
||||
keys.push(k);
|
||||
} else if (obj[k] instanceof Array) {
|
||||
// `instanceof Array` is used instead of Array.isArray() because it is
|
||||
// ~15-20% faster with v8 4.7 and is safe to use because we are using it
|
||||
// with values being created within this function
|
||||
obj[k].push(v);
|
||||
// Try matching key/value separator (e.g. '=') if we haven't already
|
||||
if (eqIdx < eqLen) {
|
||||
if (code === eq.charCodeAt(eqIdx)) {
|
||||
if (++eqIdx === eqLen) {
|
||||
// Key/value separator match!
|
||||
const end = i - eqIdx + 1;
|
||||
if (lastPos < end)
|
||||
key += qs.slice(lastPos, end);
|
||||
encodeCheck = 0;
|
||||
lastPos = i + 1;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
eqIdx = 0;
|
||||
if (!keyEncoded) {
|
||||
// Try to match an (valid) encoded byte once to minimize unnecessary
|
||||
// calls to string decoding functions
|
||||
if (code === 37/*%*/) {
|
||||
encodeCheck = 1;
|
||||
} else if (encodeCheck > 0 &&
|
||||
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||
if (++encodeCheck === 3)
|
||||
keyEncoded = true;
|
||||
} else {
|
||||
encodeCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (code === 43/*+*/) {
|
||||
if (eqIdx < eqLen) {
|
||||
if (i - lastPos > 0)
|
||||
key += qs.slice(lastPos, i);
|
||||
key += '%20';
|
||||
keyEncoded = true;
|
||||
} else {
|
||||
if (i - lastPos > 0)
|
||||
value += qs.slice(lastPos, i);
|
||||
value += '%20';
|
||||
valEncoded = true;
|
||||
}
|
||||
lastPos = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we have leftover key or value data
|
||||
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
|
||||
if (lastPos < qs.length) {
|
||||
if (eqIdx < eqLen)
|
||||
key += qs.slice(lastPos);
|
||||
else if (sepIdx < sepLen)
|
||||
value += qs.slice(lastPos);
|
||||
}
|
||||
if (keyEncoded)
|
||||
key = decodeStr(key, decode);
|
||||
if (valEncoded)
|
||||
value = decodeStr(value, decode);
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||
// slower
|
||||
if (keys.indexOf(key) === -1) {
|
||||
obj[key] = value;
|
||||
keys[keys.length] = key;
|
||||
} else {
|
||||
obj[k] = [obj[k], v];
|
||||
const curValue = obj[key];
|
||||
// `instanceof Array` is used instead of Array.isArray() because it
|
||||
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||
// using it with values being created within this function
|
||||
if (curValue instanceof Array)
|
||||
curValue[curValue.length] = value;
|
||||
else
|
||||
obj[key] = [curValue, value];
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,23 +383,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
||||
};
|
||||
|
||||
|
||||
function replacePlus(str) {
|
||||
var ret = '';
|
||||
var start = 0;
|
||||
var i = -1;
|
||||
while ((i = str.indexOf('+', i + 1)) !== -1) {
|
||||
ret += str.slice(start, i);
|
||||
ret += '%20';
|
||||
start = i + 1;
|
||||
}
|
||||
if (start === 0)
|
||||
return str;
|
||||
if (start < str.length)
|
||||
ret += str.slice(start);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// v8 does not optimize functions with try-catch blocks, so we isolate them here
|
||||
// to minimize the damage
|
||||
function decodeStr(s, decoder) {
|
||||
|
@ -248,3 +248,6 @@ qs.unescape = function(str) {
|
||||
};
|
||||
assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
|
||||
qs.unescape = prevUnescape;
|
||||
|
||||
// test separator and "equals" parsing order
|
||||
assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });
|
||||
|
Loading…
x
Reference in New Issue
Block a user