benchmark: ignore significance when using --runs 1

Because the standard deviation can't be calculated when there is only one observation the R scripts raises an error. However it may still be useful to run them for non-statistical purposes. This changes the behaviour such when there is only one observation, the values that depends on the standard deviation becomes Not Applicable (NA). Fixes: https://github.com/nodejs/node/issues/8288 PR-URL: https://github.com/nodejs/node/pull/8299 Reviewed-By: Anna Henningsen <anna@addaleax.net>
2016-08-27 13:27:02 +02:00 · 2016-08-27 13:27:02 +02:00 · d3834a1fa3
commit d3834a1fa3
parent 6f9157fbab
2 changed files with 37 additions and 21 deletions
--- a/benchmark/compare.R
+++ b/benchmark/compare.R
@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {

 # Print a table with results
 statistics = ddply(dat, "name", function(subdat) {
-  # Perform a statistics test to see of there actually is a difference in
-  # performace.
-  w = t.test(rate ~ binary, data=subdat);
+  old.rate = subset(subdat, binary == "old")$rate;
+  new.rate = subset(subdat, binary == "new")$rate;

  # Calculate improvement for the "new" binary compared with the "old" binary
-  new_mu = mean(subset(subdat, binary == "new")$rate);
-  old_mu = mean(subset(subdat, binary == "old")$rate);
-  improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
+  old.mu = mean(old.rate);
+  new.mu = mean(new.rate);
+  improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
+
+  p.value = NA;
+  significant = 'NA';
+  # Check if there is enough data to calulate the calculate the p-value
+  if (length(old.rate) > 1 && length(new.rate) > 1) {
+    # Perform a statistics test to see of there actually is a difference in
+    # performance.
+    w = t.test(rate ~ binary, data=subdat);
+    p.value = w$p.value;

    # Add user friendly stars to the table. There should be at least one star
    # before you can say that there is an improvement.
    significant = '';
-  if (w$p.value < 0.001) {
+    if (p.value < 0.001) {
      significant = '***';
-  } else if (w$p.value < 0.01) {
+    } else if (p.value < 0.01) {
      significant = '**';
-  } else if (w$p.value < 0.05) {
+    } else if (p.value < 0.05) {
      significant = '*';
    }
+  }

  r = list(
    improvement = improvement,
    significant = significant,
-    p.value = w$p.value
+    p.value = p.value
  );
  return(data.frame(r));
 });
--- a/benchmark/scatter.R
+++ b/benchmark/scatter.R
@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
 stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
  rate = subdat$rate;

-  # calculate standard error of the mean
+  # calculate confidence interval of the mean
+  ci = NA;
+  if (length(rate) > 1) {
    se = sqrt(var(rate)/length(rate));
+    ci = se * qt(0.975, length(rate) - 1)
+  }

  # calculate mean and 95 % confidence interval
  r = list(
    rate = mean(rate),
-    confidence.interval = se * qt(0.975, length(rate) - 1)
+    confidence.interval = ci
  );

  return(data.frame(r));
@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
 print(stats, row.names=F);

 if (!is.null(plot.filename)) {
-  p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
+  p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
  if (use.log2) {
    p = p + scale_x_continuous(trans='log2');
  }
-  p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
+  p = p + geom_errorbar(
+    aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
+    width=.1, na.rm=TRUE
+  );
  p = p + geom_point();
  p = p + ylab("rate of operations (higher is better)");
  p = p + ggtitle(dat[1, 1]);