benchmark: ignore significance when using --runs 1
Because the standard deviation can't be calculated when there is only one observation the R scripts raises an error. However it may still be useful to run them for non-statistical purposes. This changes the behaviour such when there is only one observation, the values that depends on the standard deviation becomes Not Applicable (NA). Fixes: https://github.com/nodejs/node/issues/8288 PR-URL: https://github.com/nodejs/node/pull/8299 Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
parent
6f9157fbab
commit
d3834a1fa3
@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {
|
||||
|
||||
# Print a table with results
|
||||
statistics = ddply(dat, "name", function(subdat) {
|
||||
# Perform a statistics test to see of there actually is a difference in
|
||||
# performace.
|
||||
w = t.test(rate ~ binary, data=subdat);
|
||||
old.rate = subset(subdat, binary == "old")$rate;
|
||||
new.rate = subset(subdat, binary == "new")$rate;
|
||||
|
||||
# Calculate improvement for the "new" binary compared with the "old" binary
|
||||
new_mu = mean(subset(subdat, binary == "new")$rate);
|
||||
old_mu = mean(subset(subdat, binary == "old")$rate);
|
||||
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
|
||||
old.mu = mean(old.rate);
|
||||
new.mu = mean(new.rate);
|
||||
improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
|
||||
|
||||
p.value = NA;
|
||||
significant = 'NA';
|
||||
# Check if there is enough data to calulate the calculate the p-value
|
||||
if (length(old.rate) > 1 && length(new.rate) > 1) {
|
||||
# Perform a statistics test to see of there actually is a difference in
|
||||
# performance.
|
||||
w = t.test(rate ~ binary, data=subdat);
|
||||
p.value = w$p.value;
|
||||
|
||||
# Add user friendly stars to the table. There should be at least one star
|
||||
# before you can say that there is an improvement.
|
||||
significant = '';
|
||||
if (w$p.value < 0.001) {
|
||||
if (p.value < 0.001) {
|
||||
significant = '***';
|
||||
} else if (w$p.value < 0.01) {
|
||||
} else if (p.value < 0.01) {
|
||||
significant = '**';
|
||||
} else if (w$p.value < 0.05) {
|
||||
} else if (p.value < 0.05) {
|
||||
significant = '*';
|
||||
}
|
||||
}
|
||||
|
||||
r = list(
|
||||
improvement = improvement,
|
||||
significant = significant,
|
||||
p.value = w$p.value
|
||||
p.value = p.value
|
||||
);
|
||||
return(data.frame(r));
|
||||
});
|
||||
|
@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
|
||||
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
|
||||
rate = subdat$rate;
|
||||
|
||||
# calculate standard error of the mean
|
||||
# calculate confidence interval of the mean
|
||||
ci = NA;
|
||||
if (length(rate) > 1) {
|
||||
se = sqrt(var(rate)/length(rate));
|
||||
ci = se * qt(0.975, length(rate) - 1)
|
||||
}
|
||||
|
||||
# calculate mean and 95 % confidence interval
|
||||
r = list(
|
||||
rate = mean(rate),
|
||||
confidence.interval = se * qt(0.975, length(rate) - 1)
|
||||
confidence.interval = ci
|
||||
);
|
||||
|
||||
return(data.frame(r));
|
||||
@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
|
||||
print(stats, row.names=F);
|
||||
|
||||
if (!is.null(plot.filename)) {
|
||||
p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
|
||||
p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
|
||||
if (use.log2) {
|
||||
p = p + scale_x_continuous(trans='log2');
|
||||
}
|
||||
p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
|
||||
p = p + geom_errorbar(
|
||||
aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
|
||||
width=.1, na.rm=TRUE
|
||||
);
|
||||
p = p + geom_point();
|
||||
p = p + ylab("rate of operations (higher is better)");
|
||||
p = p + ggtitle(dat[1, 1]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user