querystring: improve parse() performance

This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: https://github.com/nodejs/node/pull/5012 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Roman Reiss <me@silverwind.io> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
2016-01-31 13:54:18 -05:00 · 2016-01-31 13:54:18 -05:00 · a2a69a2b63
commit a2a69a2b63
parent 90451a67ca
3 changed files with 191 additions and 62 deletions
--- a/benchmark/querystring/querystring-parse.js
+++ b/benchmark/querystring/querystring-parse.js
@ -3,7 +3,13 @@ var querystring = require('querystring');
 var v8 = require('v8');
 var bench = common.createBenchmark(main, {
-  type: ['noencode', 'encodemany', 'encodelast', 'multivalue'],
+  type: ['noencode',
         'multicharsep',
         'encodemany',
         'encodelast',
         'multivalue',
         'multivaluemany',
         'manypairs'],
  n: [1e6],
 });
@ -13,22 +19,38 @@ function main(conf) {
  var inputs = {
    noencode: 'foo=bar&baz=quux&xyzzy=thud',
    multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
    encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
    encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
-    multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz'
+    multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
    multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
                    'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
    manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
  };
  var input = inputs[type];
  // Force-optimize querystring.parse() so that the benchmark doesn't get
  // disrupted by the optimizer kicking in halfway through.
  for (var name in inputs)
    querystring.parse(inputs[name]);
  v8.setFlagsFromString('--allow_natives_syntax');
  if (type !== 'multicharsep') {
    querystring.parse(input);
    eval('%OptimizeFunctionOnNextCall(querystring.parse)');
    querystring.parse(input);
  } else {
    querystring.parse(input, '&&&&&&&&&&');
    eval('%OptimizeFunctionOnNextCall(querystring.parse)');
    querystring.parse(input, '&&&&&&&&&&');
  }
  if (type !== 'multicharsep') {
    bench.start();
    for (var i = 0; i < n; i += 1)
      querystring.parse(input);
    bench.end(n);
  } else {
    bench.start();
    for (var i = 0; i < n; i += 1)
      querystring.parse(input, '&&&&&&&&&&');
    bench.end(n);
  }
 }
--- a/lib/querystring.js
+++ b/lib/querystring.js
@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
 };
-QueryString.unescape = function(s, decodeSpaces) {
+function qsUnescape(s, decodeSpaces) {
  try {
    return decodeURIComponent(s);
  } catch (e) {
    return QueryString.unescapeBuffer(s, decodeSpaces).toString();
  }
-};
+}
 QueryString.unescape = qsUnescape;
 var hexTable = new Array(256);
@ -198,63 +199,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
  return '';
 };
-// Parse a key=val string.
+// Parse a key/val string.
 QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
  sep = sep || '&';
  eq = eq || '=';
-  const eqLen = eq.length;
+
-  var obj = {};
+  const obj = {};
  if (typeof qs !== 'string' || qs.length === 0) {
    return obj;
  }
  if (typeof sep !== 'string')
    sep += '';
  const eqLen = eq.length;
  const sepLen = sep.length;
  var maxKeys = 1000;
  if (options && typeof options.maxKeys === 'number') {
    maxKeys = options.maxKeys;
  }
-  // maxKeys <= 0 means that we should not limit keys count
+  var pairs = Infinity;
-  if (maxKeys > 0 && isFinite(maxKeys)) {
+  if (maxKeys > 0)
-    qs = qs.split(sep, maxKeys);
+    pairs = maxKeys;
  } else {
    qs = qs.split(sep);
  }
  var len = qs.length;
  var decode = QueryString.unescape;
  if (options && typeof options.decodeURIComponent === 'function') {
    decode = options.decodeURIComponent;
  }
  const customDecode = (decode !== qsUnescape);
-  var keys = [];
+  const keys = [];
-  for (var i = 0; i < len; ++i) {
+  var lastPos = 0;
-    // replacePlus() is used instead of a regexp because it is ~15-30% faster
+  var sepIdx = 0;
-    // with v8 4.7
+  var eqIdx = 0;
-    const x = replacePlus(qs[i]);
+  var key = '';
-    const idx = x.indexOf(eq);
+  var value = '';
-    var k, v;
+  var keyEncoded = customDecode;
  var valEncoded = customDecode;
  var encodeCheck = 0;
  for (var i = 0; i < qs.length; ++i) {
    const code = qs.charCodeAt(i);
-    if (idx >= 0) {
+    // Try matching key/value pair separator (e.g. '&')
-      k = decodeStr(x.substring(0, idx), decode);
+    if (code === sep.charCodeAt(sepIdx)) {
-      v = decodeStr(x.substring(idx + eqLen), decode);
+      if (++sepIdx === sepLen) {
        // Key/value pair separator match!
        const end = i - sepIdx + 1;
        if (eqIdx < eqLen) {
          // If we didn't find the key/value separator, treat the substring as
          // part of the key instead of the value
          if (lastPos < end)
            key += qs.slice(lastPos, end);
        } else if (lastPos < end)
          value += qs.slice(lastPos, end);
        if (keyEncoded)
          key = decodeStr(key, decode);
        if (valEncoded)
          value = decodeStr(value, decode);
        // Use a key array lookup instead of using hasOwnProperty(), which is
        // slower
        if (keys.indexOf(key) === -1) {
          obj[key] = value;
          keys[keys.length] = key;
        } else {
-      k = decodeStr(x, decode);
+          const curValue = obj[key];
-      v = '';
+          // `instanceof Array` is used instead of Array.isArray() because it
          // is ~15-20% faster with v8 4.7 and is safe to use because we are
          // using it with values being created within this function
          if (curValue instanceof Array)
            curValue[curValue.length] = value;
          else
            obj[key] = [curValue, value];
        }
        if (--pairs === 0)
          break;
        keyEncoded = valEncoded = customDecode;
        encodeCheck = 0;
        key = value = '';
        lastPos = i + 1;
        sepIdx = eqIdx = 0;
      }
      continue;
    } else {
      sepIdx = 0;
      if (!valEncoded) {
        // Try to match an (valid) encoded byte (once) to minimize unnecessary
        // calls to string decoding functions
        if (code === 37/*%*/) {
          encodeCheck = 1;
        } else if (encodeCheck > 0 &&
                   ((code >= 48/*0*/ && code <= 57/*9*/) ||
                    (code >= 65/*A*/ && code <= 70/*Z*/) ||
                    (code >= 97/*a*/ && code <= 102/*z*/))) {
          if (++encodeCheck === 3)
            valEncoded = true;
        } else {
          encodeCheck = 0;
        }
      }
    }
-    // Use a key array lookup instead of using hasOwnProperty(), which is slower
+    // Try matching key/value separator (e.g. '=') if we haven't already
-    if (keys.indexOf(k) === -1) {
+    if (eqIdx < eqLen) {
-      obj[k] = v;
+      if (code === eq.charCodeAt(eqIdx)) {
-      keys.push(k);
+        if (++eqIdx === eqLen) {
-    } else if (obj[k] instanceof Array) {
+          // Key/value separator match!
-      // `instanceof Array` is used instead of Array.isArray() because it is
+          const end = i - eqIdx + 1;
-      // ~15-20% faster with v8 4.7 and is safe to use because we are using it
+          if (lastPos < end)
-      // with values being created within this function
+            key += qs.slice(lastPos, end);
-      obj[k].push(v);
+          encodeCheck = 0;
          lastPos = i + 1;
        }
        continue;
      } else {
-      obj[k] = [obj[k], v];
+        eqIdx = 0;
        if (!keyEncoded) {
          // Try to match an (valid) encoded byte once to minimize unnecessary
          // calls to string decoding functions
          if (code === 37/*%*/) {
            encodeCheck = 1;
          } else if (encodeCheck > 0 &&
                     ((code >= 48/*0*/ && code <= 57/*9*/) ||
                      (code >= 65/*A*/ && code <= 70/*Z*/) ||
                      (code >= 97/*a*/ && code <= 102/*z*/))) {
            if (++encodeCheck === 3)
              keyEncoded = true;
          } else {
            encodeCheck = 0;
          }
        }
      }
    }
    if (code === 43/*+*/) {
      if (eqIdx < eqLen) {
        if (i - lastPos > 0)
          key += qs.slice(lastPos, i);
        key += '%20';
        keyEncoded = true;
      } else {
        if (i - lastPos > 0)
          value += qs.slice(lastPos, i);
        value += '%20';
        valEncoded = true;
      }
      lastPos = i + 1;
    }
  }
  // Check if we have leftover key or value data
  if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
    if (lastPos < qs.length) {
      if (eqIdx < eqLen)
        key += qs.slice(lastPos);
      else if (sepIdx < sepLen)
        value += qs.slice(lastPos);
    }
    if (keyEncoded)
      key = decodeStr(key, decode);
    if (valEncoded)
      value = decodeStr(value, decode);
    // Use a key array lookup instead of using hasOwnProperty(), which is
    // slower
    if (keys.indexOf(key) === -1) {
      obj[key] = value;
      keys[keys.length] = key;
    } else {
      const curValue = obj[key];
      // `instanceof Array` is used instead of Array.isArray() because it
      // is ~15-20% faster with v8 4.7 and is safe to use because we are
      // using it with values being created within this function
      if (curValue instanceof Array)
        curValue[curValue.length] = value;
      else
        obj[key] = [curValue, value];
    }
  }
@ -262,23 +383,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
 };
 function replacePlus(str) {
  var ret = '';
  var start = 0;
  var i = -1;
  while ((i = str.indexOf('+', i + 1)) !== -1) {
    ret += str.slice(start, i);
    ret += '%20';
    start = i + 1;
  }
  if (start === 0)
    return str;
  if (start < str.length)
    ret += str.slice(start);
  return ret;
 }
 // v8 does not optimize functions with try-catch blocks, so we isolate them here
 // to minimize the damage
 function decodeStr(s, decoder) {
--- a/test/parallel/test-querystring.js
+++ b/test/parallel/test-querystring.js
@ -248,3 +248,6 @@ qs.unescape = function(str) {
 };
 assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
 qs.unescape = prevUnescape;
 // test separator and "equals" parsing order
 assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });