benchmark: pre-generate data set for URL benchmarks

This patch:

- Introduces `common.bakeUrlData` which can be used to pre-generate
  the data set for the URL benchmarks to loop through instead of
  looping over a constant.
- Add the option to use WPT data in benchmarks for better diversity
  in the input
- Add the option to benchmark URL parsing with base URLs (whatwg only)
- Moves the data in `benchmark/fixtures/url-inputs.js` to
  `benchmark/common.js`

PR-URL: https://github.com/nodejs/node/pull/24302
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
Joyee Cheung 2018-11-11 16:44:19 +08:00
parent 5f25dd1e2e
commit a365bb9cb1
No known key found for this signature in database
GPG Key ID: 92B78A53C8303B8D
12 changed files with 201 additions and 138 deletions

View File

@ -254,3 +254,95 @@ exports.binding = function(bindingName) {
return process.binding(bindingName);
}
};
const urls = {
long: 'http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/' +
'/5d49/b3020/url.html#test?payload1=true&payload2=false&test=1' +
'&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&' +
'key=f5c65e1e98fe07e648249ad41e1cfdb0',
short: 'https://nodejs.org/en/blog/',
idn: 'http://你好你好.在线',
auth: 'https://user:pass@example.com/path?search=1',
file: 'file:///foo/bar/test/node.js',
ws: 'ws://localhost:9229/f46db715-70df-43ad-a359-7f9949f39868',
javascript: 'javascript:alert("node is awesome");',
percent: 'https://%E4%BD%A0/foo',
dot: 'https://example.org/./a/../b/./c'
};
exports.urls = urls;
const searchParams = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
};
exports.searchParams = searchParams;
function getUrlData(withBase) {
const data = require('../test/fixtures/wpt/url/resources/urltestdata.json');
const result = [];
for (const item of data) {
if (item.failure || !item.input) continue;
if (withBase) {
result.push([item.input, item.base]);
} else if (item.base !== 'about:blank') {
result.push(item.base);
}
}
return result;
}
exports.urlDataTypes = Object.keys(urls).concat(['wpt']);
/**
* Generate an array of data for URL benchmarks to use.
* The size of the resulting data set is the original data size * 2 ** `e`.
* The 'wpt' type contains about 400 data points when `withBase` is true,
* and 200 data points when `withBase` is false.
* Other types contain 200 data points with or without base.
*
* @param {string} type Type of the data, 'wpt' or a key of `urls`
* @param {number} e The repetition of the data, as exponent of 2
* @param {boolean} withBase Whether to include a base URL
* @param {boolean} asUrl Whether to return the results as URL objects
* @return {string[] | string[][] | URL[]}
*/
function bakeUrlData(type, e = 0, withBase = false, asUrl = false) {
let result = [];
if (type === 'wpt') {
result = getUrlData(withBase);
} else if (urls[type]) {
const input = urls[type];
const item = withBase ? [input, 'about:blank'] : input;
// Roughly the size of WPT URL test data
result = new Array(200).fill(item);
} else {
throw new Error(`Unknown url data type ${type}`);
}
if (typeof e !== 'number') {
throw new Error(`e must be a number, received ${e}`);
}
for (let i = 0; i < e; ++i) {
result = result.concat(result);
}
if (asUrl) {
if (withBase) {
result = result.map(([input, base]) => new URL(input, base));
} else {
result = result.map((input) => new URL(input));
}
}
return result;
}
exports.bakeUrlData = bakeUrlData;

View File

@ -1,30 +0,0 @@
'use strict';
exports.urls = {
long: 'http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/' +
'/5d49/b3020/url.html#test?payload1=true&payload2=false&test=1' +
'&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&' +
'key=f5c65e1e98fe07e648249ad41e1cfdb0',
short: 'https://nodejs.org/en/blog/',
idn: 'http://你好你好.在线',
auth: 'https://user:pass@example.com/path?search=1',
file: 'file:///foo/bar/test/node.js',
ws: 'ws://localhost:9229/f46db715-70df-43ad-a359-7f9949f39868',
javascript: 'javascript:alert("node is awesome");',
percent: 'https://%E4%BD%A0/foo',
dot: 'https://example.org/./a/../b/./c'
};
exports.searchParams = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
};

View File

@ -1,7 +1,7 @@
'use strict';
const common = require('../common.js');
const querystring = require('querystring');
const inputs = require('../fixtures/url-inputs.js').searchParams;
const inputs = common.searchParams;
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),

View File

@ -3,20 +3,15 @@ const common = require('../common.js');
const url = require('url');
const URL = url.URL;
const assert = require('assert');
const inputs = require('../fixtures/url-inputs.js').urls;
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
type: common.urlDataTypes,
method: ['legacy', 'whatwg'],
n: [1e5]
e: [1]
});
// At the time of writing, when using a passed property name to index
// the object, Crankshaft would generate a LoadKeyedGeneric even when it
// remains a constant in the function, so here we must use the literal
// instead to get a LoadNamedField.
function useLegacy(n, input) {
const obj = url.parse(input);
function useLegacy(data) {
const obj = url.parse(data[0]);
const noDead = {
protocol: obj.protocol,
auth: obj.auth,
@ -27,10 +22,12 @@ function useLegacy(n, input) {
search: obj.search,
hash: obj.hash
};
const len = data.length;
// It's necessary to assign the values to an object
// to avoid loop invariant code motion.
bench.start();
for (var i = 0; i < n; i += 1) {
for (var i = 0; i < len; i++) {
const obj = data[i];
noDead.protocol = obj.protocol;
noDead.auth = obj.auth;
noDead.host = obj.host;
@ -40,12 +37,12 @@ function useLegacy(n, input) {
noDead.search = obj.search;
noDead.hash = obj.hash;
}
bench.end(n);
bench.end(len);
return noDead;
}
function useWHATWG(n, input) {
const obj = new URL(input);
function useWHATWG(data) {
const obj = new URL(data[0]);
const noDead = {
protocol: obj.protocol,
auth: `${obj.username}:${obj.password}`,
@ -56,8 +53,10 @@ function useWHATWG(n, input) {
search: obj.search,
hash: obj.hash
};
const len = data.length;
bench.start();
for (var i = 0; i < n; i += 1) {
for (var i = 0; i < len; i++) {
const obj = data[i];
noDead.protocol = obj.protocol;
noDead.auth = `${obj.username}:${obj.password}`;
noDead.host = obj.host;
@ -67,23 +66,22 @@ function useWHATWG(n, input) {
noDead.search = obj.search;
noDead.hash = obj.hash;
}
bench.end(n);
bench.end(len);
return noDead;
}
function main({ type, n, method }) {
const input = inputs[type];
if (!input) {
throw new Error(`Unknown input type "${type}"`);
}
function main({ type, method, e }) {
e = +e;
var data;
var noDead; // Avoid dead code elimination.
switch (method) {
case 'legacy':
noDead = useLegacy(n, input);
data = common.bakeUrlData(type, e, false, false);
noDead = useLegacy(data.map((i) => url.parse(i)));
break;
case 'whatwg':
noDead = useWHATWG(n, input);
data = common.bakeUrlData(type, e, false, true);
noDead = useWHATWG(data);
break;
default:
throw new Error(`Unknown method "${method}"`);

View File

@ -3,47 +3,61 @@ const common = require('../common.js');
const url = require('url');
const URL = url.URL;
const assert = require('assert');
const inputs = require('../fixtures/url-inputs.js').urls;
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
method: ['legacy', 'whatwg'],
n: [1e5]
withBase: ['true', 'false'],
type: common.urlDataTypes,
e: [1],
method: ['legacy', 'whatwg']
});
function useLegacy(n, input) {
var noDead = url.parse(input);
function useLegacy(data) {
const len = data.length;
var result = url.parse(data[0]); // avoid dead code elimination
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = url.parse(input);
for (var i = 0; i < len; ++i) {
result = url.parse(data[i]);
}
bench.end(n);
return noDead;
bench.end(len);
return result;
}
function useWHATWG(n, input) {
var noDead = new URL(input);
function useWHATWGWithBase(data) {
const len = data.length;
var result = new URL(data[0][0], data[0][1]); // avoid dead code elimination
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = new URL(input);
for (var i = 0; i < len; ++i) {
const item = data[i];
result = new URL(item[0], item[1]);
}
bench.end(n);
return noDead;
bench.end(len);
return result;
}
function main({ type, n, method }) {
const input = inputs[type];
if (!input) {
throw new Error(`Unknown input type "${type}"`);
function useWHATWGWithoutBase(data) {
const len = data.length;
var result = new URL(data[0]); // avoid dead code elimination
bench.start();
for (var i = 0; i < len; ++i) {
result = new URL(data[i]);
}
bench.end(len);
return result;
}
function main({ e, method, type, withBase }) {
e = +e;
withBase = withBase === 'true';
var noDead; // Avoid dead code elimination.
var data;
switch (method) {
case 'legacy':
noDead = useLegacy(n, input);
data = common.bakeUrlData(type, e, false, false);
noDead = useLegacy(data);
break;
case 'whatwg':
noDead = useWHATWG(n, input);
data = common.bakeUrlData(type, e, withBase, false);
noDead = withBase ? useWHATWGWithBase(data) : useWHATWGWithoutBase(data);
break;
default:
throw new Error(`Unknown method ${method}`);

View File

@ -2,7 +2,7 @@
const common = require('../common.js');
const { URLSearchParams } = require('url');
const querystring = require('querystring');
const searchParams = require('../fixtures/url-inputs.js').searchParams;
const searchParams = common.searchParams;
const bench = common.createBenchmark(main, {
searchParam: Object.keys(searchParams),

View File

@ -2,7 +2,7 @@
const common = require('../common.js');
const { URLSearchParams } = require('url');
const querystring = require('querystring');
const searchParams = require('../fixtures/url-inputs.js').searchParams;
const searchParams = common.searchParams;
const bench = common.createBenchmark(main, {
searchParam: Object.keys(searchParams),

View File

@ -3,49 +3,48 @@ const common = require('../common.js');
const url = require('url');
const URL = url.URL;
const assert = require('assert');
const inputs = require('../fixtures/url-inputs.js').urls;
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
type: common.urlDataTypes,
method: ['legacy', 'whatwg'],
n: [1e5]
e: [1]
});
function useLegacy(n, input, prop) {
const obj = url.parse(input);
function useLegacy(data) {
const obj = url.parse(data[0]);
const len = data.length;
var noDead = url.format(obj);
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = url.format(obj);
for (var i = 0; i < len; i++) {
noDead = data[i].toString();
}
bench.end(n);
bench.end(len);
return noDead;
}
function useWHATWG(n, input, prop) {
const obj = new URL(input);
function useWHATWG(data) {
const obj = new URL(data[0]);
const len = data.length;
var noDead = obj.toString();
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = obj.toString();
for (var i = 0; i < len; i++) {
noDead = data[i].toString();
}
bench.end(n);
bench.end(len);
return noDead;
}
function main({ type, n, method }) {
const input = inputs[type];
if (!input) {
throw new Error(`Unknown input type "${type}"`);
}
function main({ type, e, method }) {
e = +e;
const data = common.bakeUrlData(type, e, false, false);
var noDead; // Avoid dead code elimination.
switch (method) {
case 'legacy':
noDead = useLegacy(n, input);
noDead = useLegacy(data);
break;
case 'whatwg':
noDead = useWHATWG(n, input);
noDead = useWHATWG(data);
break;
default:
throw new Error(`Unknown method ${method}`);

View File

@ -1,7 +1,7 @@
'use strict';
const common = require('../common.js');
const url = require('url');
const hrefs = require('../fixtures/url-inputs.js').urls;
const hrefs = common.urls;
hrefs.noscheme = 'some.ran/dom/url.thing?oh=yes#whoo';
const paths = {

View File

@ -3,6 +3,7 @@ const common = require('../common.js');
const URLSearchParams = require('url').URLSearchParams;
const inputs = {
wpt: 'wpt', // to work around tests
empty: '',
sorted: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
almostsorted: 'a&b&c&d&e&f&g&i&h&j&k&l&m&n&o&p&q&r&s&t&u&w&v&x&y&z',

View File

@ -1,55 +1,42 @@
'use strict';
const common = require('../common.js');
const URL = require('url').URL;
const inputs = require('../fixtures/url-inputs.js').urls;
const bench = common.createBenchmark(main, {
input: Object.keys(inputs),
withBase: ['true', 'false'],
type: ['wpt'], // Too many combinations - just use WPT by default
e: [1],
prop: ['href', 'origin', 'protocol',
'username', 'password', 'host', 'hostname', 'port',
'pathname', 'search', 'searchParams', 'hash'],
n: [3e5]
'pathname', 'search', 'searchParams', 'hash']
});
function setAndGet(n, url, prop, alternative) {
const old = url[prop];
function setAndGet(data, prop) {
const len = data.length;
var result = data[0][prop];
bench.start();
for (var i = 0; i < n; i += 1) {
url[prop] = n % 2 === 0 ? alternative : old; // set
url[prop]; // get
for (var i = 0; i < len; ++i) {
result = data[i][prop];
data[i][prop] = result;
}
bench.end(n);
bench.end(len);
return result;
}
function get(n, url, prop) {
function get(data, prop) {
const len = data.length;
var result = data[0][prop];
bench.start();
for (var i = 0; i < n; i += 1) {
url[prop]; // get
for (var i = 0; i < len; ++i) {
result = data[i][prop]; // get
}
bench.end(n);
bench.end(len);
return result;
}
const alternatives = {
href: 'http://user:pass@foo.bar.com:21/aaa/zzz?l=25#test',
protocol: 'https:',
username: 'user2',
password: 'pass2',
host: 'foo.bar.net:22',
hostname: 'foo.bar.org',
port: '23',
pathname: '/aaa/bbb',
search: '?k=99',
hash: '#abcd'
};
function getAlternative(prop) {
return alternatives[prop];
}
function main({ n, input, prop }) {
const value = inputs[input];
const url = new URL(value);
function main({ e, type, prop, withBase }) {
e = +e;
withBase = withBase === 'true';
const data = common.bakeUrlData(type, e, withBase, true);
switch (prop) {
case 'protocol':
case 'username':
@ -61,11 +48,11 @@ function main({ n, input, prop }) {
case 'search':
case 'hash':
case 'href':
setAndGet(n, url, prop, getAlternative(prop));
setAndGet(data, prop);
break;
case 'origin':
case 'searchParams':
get(n, url, prop);
get(data, prop);
break;
default:
throw new Error('Unknown prop');

View File

@ -7,6 +7,7 @@ const runBenchmark = require('../common/benchmark');
runBenchmark('url',
[
'method=legacy',
'e=0',
'loopMethod=forEach',
'accessMethod=get',
'type=short',
@ -18,6 +19,7 @@ runBenchmark('url',
'to=ascii',
'prop=href',
'n=1',
'param=one'
'param=one',
'withBase=false'
],
{ NODEJS_BENCHMARK_ZERO_ALLOWED: 1 });