diff --git a/benchmark/scatter.R b/benchmark/scatter.R new file mode 100644 index 00000000000000..7b98611482dfb2 --- /dev/null +++ b/benchmark/scatter.R @@ -0,0 +1,78 @@ +#!/usr/bin/env Rscript +library(ggplot2); +library(plyr); + +# get __dirname and load ./_cli.R +args = commandArgs(trailingOnly = F); +dirname = dirname(sub("--file=", "", args[grep("--file", args)])); +source(paste0(dirname, '/_cli.R'), chdir=T); + +if (is.null(args.options$xaxis) || is.null(args.options$category) || + (!is.null(args.options$plot) && args.options$plot == TRUE)) { + stop("usage: cat file.csv | Rscript scatter.R [variable=value ...] + --xaxis variable variable name to use as xaxis (required) + --category variable variable name to use as colored category (required) + --plot filename save plot to filename + --log use a log-2 scale for xaxis in the plot"); +} + +plot.filename = args.options$plot; + +# parse options +x.axis.name = args.options$xaxis; +category.name = args.options$category; +use.log2 = !is.null(args.options$log); + +# parse data +dat = read.csv(file('stdin'), strip.white=TRUE); +dat = data.frame(dat); + +# List of aggregated variables +aggregate = names(dat); +aggregate = aggregate[ + ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name) +]; +# Variables that don't change aren't aggregated +for (aggregate.key in aggregate) { + if (length(unique(dat[[aggregate.key]])) == 1) { + aggregate = aggregate[aggregate != aggregate.key]; + } +} + +# Print out aggregated variables +for (aggregate.variable in aggregate) { + cat(sprintf('aggregating variable: %s\n', aggregate.variable)); +} +if (length(aggregate) > 0) { + cat('\n'); +} + +# Calculate statistics +stats = ddply(dat, c(x.axis.name, category.name), function(subdat) { + rate = subdat$rate; + + # calculate standard error of the mean + se = sqrt(var(rate)/length(rate)); + + # calculate mean and 95 % confidence interval + r = list( + rate = mean(rate), + confidence.interval = se * qt(0.975, length(rate) - 1) + ); + + return(data.frame(r)); +}); + +print(stats, row.names=F); + +if (!is.null(plot.filename)) { + p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name)); + if (use.log2) { + p = p + scale_x_continuous(trans='log2'); + } + p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1); + p = p + geom_point(); + p = p + ylab("rate of operations (higher is better)"); + p = p + ggtitle(dat[1, 1]); + ggsave(plot.filename, p); +} diff --git a/benchmark/scatter.js b/benchmark/scatter.js new file mode 100644 index 00000000000000..3003616b58eee4 --- /dev/null +++ b/benchmark/scatter.js @@ -0,0 +1,73 @@ +'use strict'; + +const fork = require('child_process').fork; +const path = require('path'); +const CLI = require('./_cli.js'); + +// +// Parse arguments +// +const cli = CLI(`usage: ./node scatter.js [options] [--] + Run the benchmark script many times and output the rate (ops/s) + together with the benchmark variables as a csv. + + --runs 30 number of samples + --set variable=value set benchmark variable (can be repeated) +`, { + arrayArgs: ['set'] +}); + +if (cli.items.length !== 1) { + cli.abort(cli.usage); + return; +} + +// Create queue from the benchmarks list such both node versions are tested +// `runs` amount of times each. +const filepath = path.resolve(cli.items[0]); +const name = filepath.slice(__dirname.length + 1); +const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30; + +let printHeader = true; + +function csvEncodeValue(value) { + if (typeof value === 'number') { + return value.toString(); + } else { + return '"' + value.replace(/"/g, '""') + '"'; + } +} + +(function recursive(i) { + const child = fork(path.resolve(__dirname, filepath), cli.optional.set); + + child.on('message', function(data) { + // print csv header + if (printHeader) { + const confHeader = Object.keys(data.conf) + .map(csvEncodeValue) + .join(', '); + console.log(`"filename", ${confHeader}, "rate", "time"`); + printHeader = false; + } + + // print data row + const confData = Object.keys(data.conf) + .map((key) => csvEncodeValue(data.conf[key])) + .join(', '); + + console.log(`"${name}", ${confData}, ${data.rate}, ${data.time}`); + }); + + child.once('close', function(code) { + if (code) { + process.exit(code); + return; + } + + // If there are more benchmarks execute the next + if (i + 1 < runs) { + recursive(i + 1); + } + }); +})(0);