benchmark: add script for creating scatter plot

Previously this a tool in `plot.R`. It is now are more complete tool
which executes the benchmarks many times and creates a boxplot.

PR-URL: https://github.com/nodejs/node/pull/7094
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Reviewed-By: Jeremiah Senkpiel <fishrock123@rocketmail.com>
Reviewed-By: Brian White <mscdex@mscdex.net>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
Andreas Madsen 2016-02-07 18:01:39 +01:00
parent 855009af7f
commit 0c0f34e2fe
2 changed files with 151 additions and 0 deletions

78
benchmark/scatter.R Normal file
View File

@ -0,0 +1,78 @@
#!/usr/bin/env Rscript
library(ggplot2);
library(plyr);
# get __dirname and load ./_cli.R
args = commandArgs(trailingOnly = F);
dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
source(paste0(dirname, '/_cli.R'), chdir=T);
if (is.null(args.options$xaxis) || is.null(args.options$category) ||
(!is.null(args.options$plot) && args.options$plot == TRUE)) {
stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
--xaxis variable variable name to use as xaxis (required)
--category variable variable name to use as colored category (required)
--plot filename save plot to filename
--log use a log-2 scale for xaxis in the plot");
}
plot.filename = args.options$plot;
# parse options
x.axis.name = args.options$xaxis;
category.name = args.options$category;
use.log2 = !is.null(args.options$log);
# parse data
dat = read.csv(file('stdin'), strip.white=TRUE);
dat = data.frame(dat);
# List of aggregated variables
aggregate = names(dat);
aggregate = aggregate[
! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
];
# Variables that don't change aren't aggregated
for (aggregate.key in aggregate) {
if (length(unique(dat[[aggregate.key]])) == 1) {
aggregate = aggregate[aggregate != aggregate.key];
}
}
# Print out aggregated variables
for (aggregate.variable in aggregate) {
cat(sprintf('aggregating variable: %s\n', aggregate.variable));
}
if (length(aggregate) > 0) {
cat('\n');
}
# Calculate statistics
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
rate = subdat$rate;
# calculate standard error of the mean
se = sqrt(var(rate)/length(rate));
# calculate mean and 95 % confidence interval
r = list(
rate = mean(rate),
confidence.interval = se * qt(0.975, length(rate) - 1)
);
return(data.frame(r));
});
print(stats, row.names=F);
if (!is.null(plot.filename)) {
p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
if (use.log2) {
p = p + scale_x_continuous(trans='log2');
}
p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
p = p + geom_point();
p = p + ylab("rate of operations (higher is better)");
p = p + ggtitle(dat[1, 1]);
ggsave(plot.filename, p);
}

73
benchmark/scatter.js Normal file
View File

@ -0,0 +1,73 @@
'use strict';
const fork = require('child_process').fork;
const path = require('path');
const CLI = require('./_cli.js');
//
// Parse arguments
//
const cli = CLI(`usage: ./node scatter.js [options] [--] <filename>
Run the benchmark script <filename> many times and output the rate (ops/s)
together with the benchmark variables as a csv.
--runs 30 number of samples
--set variable=value set benchmark variable (can be repeated)
`, {
arrayArgs: ['set']
});
if (cli.items.length !== 1) {
cli.abort(cli.usage);
return;
}
// Create queue from the benchmarks list such both node versions are tested
// `runs` amount of times each.
const filepath = path.resolve(cli.items[0]);
const name = filepath.slice(__dirname.length + 1);
const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30;
let printHeader = true;
function csvEncodeValue(value) {
if (typeof value === 'number') {
return value.toString();
} else {
return '"' + value.replace(/"/g, '""') + '"';
}
}
(function recursive(i) {
const child = fork(path.resolve(__dirname, filepath), cli.optional.set);
child.on('message', function(data) {
// print csv header
if (printHeader) {
const confHeader = Object.keys(data.conf)
.map(csvEncodeValue)
.join(', ');
console.log(`"filename", ${confHeader}, "rate", "time"`);
printHeader = false;
}
// print data row
const confData = Object.keys(data.conf)
.map((key) => csvEncodeValue(data.conf[key]))
.join(', ');
console.log(`"${name}", ${confData}, ${data.rate}, ${data.time}`);
});
child.once('close', function(code) {
if (code) {
process.exit(code);
return;
}
// If there are more benchmarks execute the next
if (i + 1 < runs) {
recursive(i + 1);
}
});
})(0);