querystring: improve parse() performance
This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: https://github.com/nodejs/node/pull/5012 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Roman Reiss <me@silverwind.io> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
parent
90451a67ca
commit
a2a69a2b63
@ -3,7 +3,13 @@ var querystring = require('querystring');
|
|||||||
var v8 = require('v8');
|
var v8 = require('v8');
|
||||||
|
|
||||||
var bench = common.createBenchmark(main, {
|
var bench = common.createBenchmark(main, {
|
||||||
type: ['noencode', 'encodemany', 'encodelast', 'multivalue'],
|
type: ['noencode',
|
||||||
|
'multicharsep',
|
||||||
|
'encodemany',
|
||||||
|
'encodelast',
|
||||||
|
'multivalue',
|
||||||
|
'multivaluemany',
|
||||||
|
'manypairs'],
|
||||||
n: [1e6],
|
n: [1e6],
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -13,22 +19,38 @@ function main(conf) {
|
|||||||
|
|
||||||
var inputs = {
|
var inputs = {
|
||||||
noencode: 'foo=bar&baz=quux&xyzzy=thud',
|
noencode: 'foo=bar&baz=quux&xyzzy=thud',
|
||||||
|
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
|
||||||
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
|
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
|
||||||
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
|
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
|
||||||
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz'
|
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
|
||||||
|
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
|
||||||
|
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
|
||||||
|
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
|
||||||
};
|
};
|
||||||
var input = inputs[type];
|
var input = inputs[type];
|
||||||
|
|
||||||
// Force-optimize querystring.parse() so that the benchmark doesn't get
|
// Force-optimize querystring.parse() so that the benchmark doesn't get
|
||||||
// disrupted by the optimizer kicking in halfway through.
|
// disrupted by the optimizer kicking in halfway through.
|
||||||
for (var name in inputs)
|
|
||||||
querystring.parse(inputs[name]);
|
|
||||||
|
|
||||||
v8.setFlagsFromString('--allow_natives_syntax');
|
v8.setFlagsFromString('--allow_natives_syntax');
|
||||||
|
if (type !== 'multicharsep') {
|
||||||
|
querystring.parse(input);
|
||||||
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
||||||
|
querystring.parse(input);
|
||||||
|
} else {
|
||||||
|
querystring.parse(input, '&&&&&&&&&&');
|
||||||
|
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
|
||||||
|
querystring.parse(input, '&&&&&&&&&&');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type !== 'multicharsep') {
|
||||||
bench.start();
|
bench.start();
|
||||||
for (var i = 0; i < n; i += 1)
|
for (var i = 0; i < n; i += 1)
|
||||||
querystring.parse(input);
|
querystring.parse(input);
|
||||||
bench.end(n);
|
bench.end(n);
|
||||||
|
} else {
|
||||||
|
bench.start();
|
||||||
|
for (var i = 0; i < n; i += 1)
|
||||||
|
querystring.parse(input, '&&&&&&&&&&');
|
||||||
|
bench.end(n);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
QueryString.unescape = function(s, decodeSpaces) {
|
function qsUnescape(s, decodeSpaces) {
|
||||||
try {
|
try {
|
||||||
return decodeURIComponent(s);
|
return decodeURIComponent(s);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
|
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
QueryString.unescape = qsUnescape;
|
||||||
|
|
||||||
|
|
||||||
var hexTable = new Array(256);
|
var hexTable = new Array(256);
|
||||||
@ -198,63 +199,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
|
|||||||
return '';
|
return '';
|
||||||
};
|
};
|
||||||
|
|
||||||
// Parse a key=val string.
|
// Parse a key/val string.
|
||||||
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
||||||
sep = sep || '&';
|
sep = sep || '&';
|
||||||
eq = eq || '=';
|
eq = eq || '=';
|
||||||
const eqLen = eq.length;
|
|
||||||
var obj = {};
|
const obj = {};
|
||||||
|
|
||||||
if (typeof qs !== 'string' || qs.length === 0) {
|
if (typeof qs !== 'string' || qs.length === 0) {
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (typeof sep !== 'string')
|
||||||
|
sep += '';
|
||||||
|
|
||||||
|
const eqLen = eq.length;
|
||||||
|
const sepLen = sep.length;
|
||||||
|
|
||||||
var maxKeys = 1000;
|
var maxKeys = 1000;
|
||||||
if (options && typeof options.maxKeys === 'number') {
|
if (options && typeof options.maxKeys === 'number') {
|
||||||
maxKeys = options.maxKeys;
|
maxKeys = options.maxKeys;
|
||||||
}
|
}
|
||||||
|
|
||||||
// maxKeys <= 0 means that we should not limit keys count
|
var pairs = Infinity;
|
||||||
if (maxKeys > 0 && isFinite(maxKeys)) {
|
if (maxKeys > 0)
|
||||||
qs = qs.split(sep, maxKeys);
|
pairs = maxKeys;
|
||||||
} else {
|
|
||||||
qs = qs.split(sep);
|
|
||||||
}
|
|
||||||
|
|
||||||
var len = qs.length;
|
|
||||||
|
|
||||||
var decode = QueryString.unescape;
|
var decode = QueryString.unescape;
|
||||||
if (options && typeof options.decodeURIComponent === 'function') {
|
if (options && typeof options.decodeURIComponent === 'function') {
|
||||||
decode = options.decodeURIComponent;
|
decode = options.decodeURIComponent;
|
||||||
}
|
}
|
||||||
|
const customDecode = (decode !== qsUnescape);
|
||||||
|
|
||||||
var keys = [];
|
const keys = [];
|
||||||
for (var i = 0; i < len; ++i) {
|
var lastPos = 0;
|
||||||
// replacePlus() is used instead of a regexp because it is ~15-30% faster
|
var sepIdx = 0;
|
||||||
// with v8 4.7
|
var eqIdx = 0;
|
||||||
const x = replacePlus(qs[i]);
|
var key = '';
|
||||||
const idx = x.indexOf(eq);
|
var value = '';
|
||||||
var k, v;
|
var keyEncoded = customDecode;
|
||||||
|
var valEncoded = customDecode;
|
||||||
|
var encodeCheck = 0;
|
||||||
|
for (var i = 0; i < qs.length; ++i) {
|
||||||
|
const code = qs.charCodeAt(i);
|
||||||
|
|
||||||
if (idx >= 0) {
|
// Try matching key/value pair separator (e.g. '&')
|
||||||
k = decodeStr(x.substring(0, idx), decode);
|
if (code === sep.charCodeAt(sepIdx)) {
|
||||||
v = decodeStr(x.substring(idx + eqLen), decode);
|
if (++sepIdx === sepLen) {
|
||||||
|
// Key/value pair separator match!
|
||||||
|
const end = i - sepIdx + 1;
|
||||||
|
if (eqIdx < eqLen) {
|
||||||
|
// If we didn't find the key/value separator, treat the substring as
|
||||||
|
// part of the key instead of the value
|
||||||
|
if (lastPos < end)
|
||||||
|
key += qs.slice(lastPos, end);
|
||||||
|
} else if (lastPos < end)
|
||||||
|
value += qs.slice(lastPos, end);
|
||||||
|
if (keyEncoded)
|
||||||
|
key = decodeStr(key, decode);
|
||||||
|
if (valEncoded)
|
||||||
|
value = decodeStr(value, decode);
|
||||||
|
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||||
|
// slower
|
||||||
|
if (keys.indexOf(key) === -1) {
|
||||||
|
obj[key] = value;
|
||||||
|
keys[keys.length] = key;
|
||||||
} else {
|
} else {
|
||||||
k = decodeStr(x, decode);
|
const curValue = obj[key];
|
||||||
v = '';
|
// `instanceof Array` is used instead of Array.isArray() because it
|
||||||
|
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||||
|
// using it with values being created within this function
|
||||||
|
if (curValue instanceof Array)
|
||||||
|
curValue[curValue.length] = value;
|
||||||
|
else
|
||||||
|
obj[key] = [curValue, value];
|
||||||
|
}
|
||||||
|
if (--pairs === 0)
|
||||||
|
break;
|
||||||
|
keyEncoded = valEncoded = customDecode;
|
||||||
|
encodeCheck = 0;
|
||||||
|
key = value = '';
|
||||||
|
lastPos = i + 1;
|
||||||
|
sepIdx = eqIdx = 0;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
sepIdx = 0;
|
||||||
|
if (!valEncoded) {
|
||||||
|
// Try to match an (valid) encoded byte (once) to minimize unnecessary
|
||||||
|
// calls to string decoding functions
|
||||||
|
if (code === 37/*%*/) {
|
||||||
|
encodeCheck = 1;
|
||||||
|
} else if (encodeCheck > 0 &&
|
||||||
|
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||||
|
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||||
|
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||||
|
if (++encodeCheck === 3)
|
||||||
|
valEncoded = true;
|
||||||
|
} else {
|
||||||
|
encodeCheck = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use a key array lookup instead of using hasOwnProperty(), which is slower
|
// Try matching key/value separator (e.g. '=') if we haven't already
|
||||||
if (keys.indexOf(k) === -1) {
|
if (eqIdx < eqLen) {
|
||||||
obj[k] = v;
|
if (code === eq.charCodeAt(eqIdx)) {
|
||||||
keys.push(k);
|
if (++eqIdx === eqLen) {
|
||||||
} else if (obj[k] instanceof Array) {
|
// Key/value separator match!
|
||||||
// `instanceof Array` is used instead of Array.isArray() because it is
|
const end = i - eqIdx + 1;
|
||||||
// ~15-20% faster with v8 4.7 and is safe to use because we are using it
|
if (lastPos < end)
|
||||||
// with values being created within this function
|
key += qs.slice(lastPos, end);
|
||||||
obj[k].push(v);
|
encodeCheck = 0;
|
||||||
|
lastPos = i + 1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
} else {
|
} else {
|
||||||
obj[k] = [obj[k], v];
|
eqIdx = 0;
|
||||||
|
if (!keyEncoded) {
|
||||||
|
// Try to match an (valid) encoded byte once to minimize unnecessary
|
||||||
|
// calls to string decoding functions
|
||||||
|
if (code === 37/*%*/) {
|
||||||
|
encodeCheck = 1;
|
||||||
|
} else if (encodeCheck > 0 &&
|
||||||
|
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||||
|
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||||
|
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||||
|
if (++encodeCheck === 3)
|
||||||
|
keyEncoded = true;
|
||||||
|
} else {
|
||||||
|
encodeCheck = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === 43/*+*/) {
|
||||||
|
if (eqIdx < eqLen) {
|
||||||
|
if (i - lastPos > 0)
|
||||||
|
key += qs.slice(lastPos, i);
|
||||||
|
key += '%20';
|
||||||
|
keyEncoded = true;
|
||||||
|
} else {
|
||||||
|
if (i - lastPos > 0)
|
||||||
|
value += qs.slice(lastPos, i);
|
||||||
|
value += '%20';
|
||||||
|
valEncoded = true;
|
||||||
|
}
|
||||||
|
lastPos = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have leftover key or value data
|
||||||
|
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
|
||||||
|
if (lastPos < qs.length) {
|
||||||
|
if (eqIdx < eqLen)
|
||||||
|
key += qs.slice(lastPos);
|
||||||
|
else if (sepIdx < sepLen)
|
||||||
|
value += qs.slice(lastPos);
|
||||||
|
}
|
||||||
|
if (keyEncoded)
|
||||||
|
key = decodeStr(key, decode);
|
||||||
|
if (valEncoded)
|
||||||
|
value = decodeStr(value, decode);
|
||||||
|
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||||
|
// slower
|
||||||
|
if (keys.indexOf(key) === -1) {
|
||||||
|
obj[key] = value;
|
||||||
|
keys[keys.length] = key;
|
||||||
|
} else {
|
||||||
|
const curValue = obj[key];
|
||||||
|
// `instanceof Array` is used instead of Array.isArray() because it
|
||||||
|
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||||
|
// using it with values being created within this function
|
||||||
|
if (curValue instanceof Array)
|
||||||
|
curValue[curValue.length] = value;
|
||||||
|
else
|
||||||
|
obj[key] = [curValue, value];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -262,23 +383,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
function replacePlus(str) {
|
|
||||||
var ret = '';
|
|
||||||
var start = 0;
|
|
||||||
var i = -1;
|
|
||||||
while ((i = str.indexOf('+', i + 1)) !== -1) {
|
|
||||||
ret += str.slice(start, i);
|
|
||||||
ret += '%20';
|
|
||||||
start = i + 1;
|
|
||||||
}
|
|
||||||
if (start === 0)
|
|
||||||
return str;
|
|
||||||
if (start < str.length)
|
|
||||||
ret += str.slice(start);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// v8 does not optimize functions with try-catch blocks, so we isolate them here
|
// v8 does not optimize functions with try-catch blocks, so we isolate them here
|
||||||
// to minimize the damage
|
// to minimize the damage
|
||||||
function decodeStr(s, decoder) {
|
function decodeStr(s, decoder) {
|
||||||
|
@ -248,3 +248,6 @@ qs.unescape = function(str) {
|
|||||||
};
|
};
|
||||||
assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
|
assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
|
||||||
qs.unescape = prevUnescape;
|
qs.unescape = prevUnescape;
|
||||||
|
|
||||||
|
// test separator and "equals" parsing order
|
||||||
|
assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user