Skip to content

Commit

Permalink
feat: add statistical convergence and outlier detection (#26)
Browse files Browse the repository at this point in the history
* Fix few spellings

* Simplify the creation of benchmark fn

* Fix the error when undefined was shown in the logs

* Show the pending tasks

* Add some benchmark error cases to test manually

* Refactor the convergence logic

* Remove unused comment

* Add CV convergence logic

* Add cli option for convergence

* Add unit tests for the math utilities

* Add cli option for outlier cleanup averge

* Add more comments

* Add doc comments for convergence
  • Loading branch information
nazarhussain authored Jan 28, 2025
1 parent 1f36c5c commit 7efbd6f
Show file tree
Hide file tree
Showing 14 changed files with 691 additions and 127 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"test:unit": "vitest run test/unit/**/*.test.ts",
"lint": "eslint --color src/ test/",
"prepublishOnly": "yarn build",
"benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/*.test.ts'",
"benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/@(!(errors)).test.ts'",
"writeDocs": "node --loader ts-node/esm scripts/writeOptionsMd.ts"
},
"devDependencies": {
Expand Down
59 changes: 25 additions & 34 deletions src/benchmark/benchmarkFn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,40 @@ import {createChainable} from "@vitest/runner/utils";
import {store} from "./globalState.js";
import {BenchApi, BenchmarkOpts, BenchmarkRunOptsWithFn, PartialBy} from "../types.js";
import {runBenchFn} from "./runBenchmarkFn.js";
import {optionsDefault} from "../cli/options.js";
import {getBenchmarkOptionsWithDefaults} from "./options.js";

export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
this: Record<"skip" | "only", boolean | undefined>,
idOrOpts: string | PartialBy<BenchmarkRunOptsWithFn<T, T2>, "fn">,
fn?: (arg: T) => void | Promise<void>
) {
const {fn: benchTask, ...opts} = coerceToOptsObj(idOrOpts, fn);
const {fn: benchTask, before, beforeEach, ...opts} = coerceToOptsObj(idOrOpts, fn);
const currentSuite = getCurrentSuite();

const globalOptions = store.getGlobalOptions() ?? {};
const parentOptions = store.getOptions(getCurrentSuite()) ?? {};
const options = {...globalOptions, ...parentOptions, ...opts};
const {timeoutBench, maxMs, minMs} = options;

let timeout = timeoutBench ?? optionsDefault.timeoutBench;
if (maxMs && maxMs > timeout) {
timeout = maxMs * 1.5;
}

if (minMs && minMs > timeout) {
timeout = minMs * 1.5;
}
const parentOptions = store.getOptions(currentSuite) ?? {};
const options = getBenchmarkOptionsWithDefaults({...globalOptions, ...parentOptions, ...opts});

async function handler(): Promise<void> {
// Ensure bench id is unique
if (store.getResult(opts.id) && !opts.skip) {
throw Error(`test titles must be unique, duplicated: '${opts.id}'`);
}

// Persist full results if requested. dir is created in `beforeAll`
const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
const persistRunsNs = Boolean(benchmarkResultsCsvDir);

const {result, runsNs} = await runBenchFn({...options, fn: benchTask}, persistRunsNs);
const {result, runsNs} = await runBenchFn<T, T2>({
...options,
fn: benchTask,
before,
beforeEach,
} as BenchmarkRunOptsWithFn<T, T2>);

// Store result for:
// - to persist benchmark data latter
// - to render with the custom reporter
store.setResult(opts.id, result);

// Persist full results if requested. dir is created in `beforeAll`
const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
if (benchmarkResultsCsvDir) {
fs.mkdirSync(benchmarkResultsCsvDir, {recursive: true});
const filename = `${result.id}.csv`;
Expand All @@ -59,27 +52,25 @@ export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
only: opts.only ?? this.only,
sequential: true,
concurrent: false,
timeout,
timeout: options.timeoutBench,
meta: {
"chainsafe/benchmark": true,
},
});

const {id: _, ...optionsWithoutId} = opts;
setFn(task, handler);
store.setOptions(task, optionsWithoutId);

task.onFinished = [
() => {
store.removeOptions(task);
},
() => {
// Clear up the assigned handler to clean the memory
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
setFn(task, null);
},
];
store.setOptions(task, opts);

const cleanup = (): void => {
store.removeOptions(task);
// Clear up the assigned handler to clean the memory
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
setFn(task, null);
};

task.onFailed = [cleanup];
task.onFinished = [cleanup];
});

function createBenchmarkFunction(
Expand Down
40 changes: 40 additions & 0 deletions src/benchmark/options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import {BenchmarkOpts} from "../types.js";

export const defaultBenchmarkOptions: Required<BenchmarkOpts> = {
minRuns: 1,
maxRuns: Infinity,
minMs: 100,
maxMs: Infinity,
maxWarmUpRuns: 1000,
maxWarmUpMs: 500,
convergeFactor: 0.5 / 100, // 0.5%
runsFactor: 1,
yieldEventLoopAfterEach: false,
timeoutBench: 10_000,
noThreshold: false,
triggerGC: false,
setupFiles: [],
skip: false,
only: false,
threshold: 2,
convergence: "linear",
averageCalculation: "simple",
};

export function getBenchmarkOptionsWithDefaults(opts: BenchmarkOpts): Required<BenchmarkOpts> {
const options = Object.assign({}, defaultBenchmarkOptions, opts);

if (options.noThreshold) {
options.threshold = Infinity;
}

if (options.maxMs && options.maxMs > options.timeoutBench) {
options.timeoutBench = options.maxMs * 1.5;
}

if (options.minMs && options.minMs > options.timeoutBench) {
options.timeoutBench = options.minMs * 1.5;
}

return options;
}
20 changes: 13 additions & 7 deletions src/benchmark/reporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {color, consoleLog, symbols} from "../utils/output.js";
import {store} from "./globalState.js";
import {Benchmark, BenchmarkOpts, BenchmarkResult} from "../types.js";
import {formatResultRow} from "./format.js";
import {optionsDefault} from "../cli/options.js";
import {defaultBenchmarkOptions} from "./options.js";

export class BenchmarkReporter {
indents = 0;
Expand All @@ -16,7 +16,7 @@ export class BenchmarkReporter {

constructor({prevBench, benchmarkOpts}: {prevBench: Benchmark | null; benchmarkOpts: BenchmarkOpts}) {
this.prevResults = new Map<string, BenchmarkResult>();
this.threshold = benchmarkOpts.threshold ?? optionsDefault.threshold;
this.threshold = benchmarkOpts.threshold ?? defaultBenchmarkOptions.threshold;

if (prevBench) {
for (const bench of prevBench.results) {
Expand All @@ -25,9 +25,14 @@ export class BenchmarkReporter {
}
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
onTestStarted(_task: Task): void {
// this.log(task.name, "started");
onTestStarted(task: Task): void {
if (task.mode === "skip") {
this.skipped++;
consoleLog(`${this.indent()}${color("pending", " - %s")}`, task.name);
} else if (task.mode === "todo") {
this.skipped++;
consoleLog(`${this.indent()}${color("pending", " - %s")}`, task.name);
}
}

onTestFinished(task: Task): void {
Expand All @@ -46,8 +51,9 @@ export class BenchmarkReporter {
}
case "fail": {
this.failed++;
consoleLog(this.indent() + color("fail", " %d) %s"), ++this.failed, task.name);
consoleLog(task.result?.errors);
const fmt = this.indent() + color("fail", " " + symbols.err) + color("fail", " %s");
consoleLog(fmt, task.name);
consoleLog(task.result?.errors?.map((e) => e.stackStr).join("\n"));
break;
}
case "pass": {
Expand Down
138 changes: 66 additions & 72 deletions src/benchmark/runBenchmarkFn.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import {BenchmarkResult, BenchmarkOpts} from "../types.js";
import {calcSum, filterOutliers, OutlierSensitivity} from "../utils/math.js";
import {getBenchmarkOptionsWithDefaults} from "./options.js";
import {createCVConvergenceCriteria, createLinearConvergenceCriteria} from "./termination.js";

const convergenceCriteria = {
["linear"]: createLinearConvergenceCriteria,
["cv"]: createCVConvergenceCriteria,
};

export type BenchmarkRunOpts = BenchmarkOpts & {
id: string;
Expand All @@ -12,49 +20,54 @@ export type BenchmarkRunOptsWithFn<T, T2> = BenchmarkOpts & {
};

export async function runBenchFn<T, T2>(
opts: BenchmarkRunOptsWithFn<T, T2>,
persistRunsNs?: boolean
opts: BenchmarkRunOptsWithFn<T, T2>
): Promise<{result: BenchmarkResult; runsNs: bigint[]}> {
const minRuns = opts.minRuns || 1;
const maxRuns = opts.maxRuns || Infinity;
const maxMs = opts.maxMs || Infinity;
const minMs = opts.minMs || 100;
const maxWarmUpMs = opts.maxWarmUpMs !== undefined ? opts.maxWarmUpMs : 500;
const maxWarmUpRuns = opts.maxWarmUpRuns !== undefined ? opts.maxWarmUpRuns : 1000;
// Ratio of maxMs that the warmup is allow to take from ellapsedMs
const {id, before, beforeEach, fn, ...rest} = opts;
const benchOptions = getBenchmarkOptionsWithDefaults(rest);
const {maxMs, maxRuns, maxWarmUpMs, maxWarmUpRuns, runsFactor, threshold, convergence, averageCalculation} =
benchOptions;

if (maxWarmUpMs >= maxMs) {
throw new Error(`Warmup time must be lower than max run time. maxWarmUpMs: ${maxWarmUpMs}, maxMs: ${maxMs}`);
}

if (maxWarmUpRuns >= maxRuns) {
throw new Error(`Warmup runs must be lower than max runs. maxWarmUpRuns: ${maxWarmUpRuns}, maxRuns: ${maxRuns}`);
}

if (averageCalculation !== "simple" && averageCalculation !== "clean-outliers") {
throw new Error(`Average calculation logic is not defined. ${averageCalculation}`);
}

if (convergence !== "linear" && convergence !== "cv") {
throw new Error(`Unknown convergence value ${convergence}`);
}

// Ratio of maxMs that the warmup is allow to take from elapsedMs
const maxWarmUpRatio = 0.5;
const convergeFactor = opts.convergeFactor || 0.5 / 100; // 0.5%
const runsFactor = opts.runsFactor || 1;
const maxWarmUpNs = BigInt(maxWarmUpMs) * BigInt(1e6);
const sampleEveryMs = 100;
const maxWarmUpNs = BigInt(benchOptions.maxWarmUpMs) * BigInt(1e6);

const runsNs: bigint[] = [];
const startRunMs = Date.now();

const shouldTerminate = convergenceCriteria[convergence](startRunMs, benchOptions);

let runIdx = 0;
let totalNs = BigInt(0);

let totalWarmUpNs = BigInt(0);
let totalWarmUpRuns = 0;
let prevAvg0 = 0;
let prevAvg1 = 0;
let lastConvergenceSample = startRunMs;
let isWarmUp = maxWarmUpNs > 0 && maxWarmUpRuns > 0;
let isWarmUpPhase = maxWarmUpNs > 0 && maxWarmUpRuns > 0;

const inputAll = opts.before ? await opts.before() : (undefined as unknown as T2);
const inputAll = before ? await before() : (undefined as unknown as T2);

while (true) {
const ellapsedMs = Date.now() - startRunMs;
const mustStop = ellapsedMs >= maxMs || runIdx >= maxRuns;
const mayStop = ellapsedMs > minMs && runIdx > minRuns;
// Exceeds limits, must stop now
if (mustStop) {
break;
}
const elapsedMs = Date.now() - startRunMs;

const input = opts.beforeEach ? await opts.beforeEach(inputAll, runIdx) : (undefined as unknown as T);
const input = beforeEach ? await beforeEach(inputAll, runIdx) : (undefined as unknown as T);

const startNs = process.hrtime.bigint();
await opts.fn(input);
await fn(input);
const endNs = process.hrtime.bigint();

const runNs = endNs - startNs;
Expand All @@ -64,54 +77,26 @@ export async function runBenchFn<T, T2>(
await new Promise((r) => setTimeout(r, 0));
}

if (isWarmUp) {
if (isWarmUpPhase) {
// Warm-up, do not count towards results
totalWarmUpRuns += 1;
totalWarmUpNs += runNs;

// On any warm-up finish condition, mark isWarmUp = true to prevent having to check them again
if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || ellapsedMs / maxMs >= maxWarmUpRatio) {
isWarmUp = false;
}
} else {
// Persist results
runIdx += 1;
totalNs += runNs;
// If the caller wants the exact times of all runs, persist them
if (persistRunsNs) runsNs.push(runNs);

// When is a good time to stop a benchmark? A naive answer is after N miliseconds or M runs.
// This code aims to stop the benchmark when the average fn run time has converged at a value
// within a given convergence factor. To prevent doing expensive math to often for fast fn,
// it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
// a very rough linear and quadratic convergence.
if (Date.now() - lastConvergenceSample > sampleEveryMs) {
lastConvergenceSample = Date.now();
const avg = Number(totalNs / BigInt(runIdx));

// Compute convergence (1st order + 2nd order)
const a = prevAvg0;
const b = prevAvg1;
const c = avg;

// Only do convergence math if it may stop
if (mayStop) {
// Aprox linear convergence
const convergence1 = Math.abs(c - a);
// Aprox quadratic convergence
const convergence2 = Math.abs(b - (a + c) / 2);
// Take the greater of both to enfore linear and quadratic are below convergeFactor
const convergence = Math.max(convergence1, convergence2) / a;

// Okay to stop + has converged, stop now
if (convergence < convergeFactor) {
break;
}
}

prevAvg0 = prevAvg1;
prevAvg1 = avg;
if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || elapsedMs / maxMs >= maxWarmUpRatio) {
isWarmUpPhase = false;
}

continue;
}

// Persist results
runIdx += 1;
totalNs += runNs;
runsNs.push(runNs);

if (shouldTerminate(runIdx, totalNs, runsNs)) {
break;
}
}

Expand All @@ -135,15 +120,24 @@ either the before(), beforeEach() or fn() functions are too slow.
}
}

const averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
let averageNs!: number;

if (averageCalculation === "simple") {
averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
}

if (averageCalculation === "clean-outliers") {
const cleanData = filterOutliers(runsNs, false, OutlierSensitivity.Mild);
averageNs = Number(calcSum(cleanData) / BigInt(cleanData.length)) / runsFactor;
}

return {
result: {
id: opts.id,
id: id,
averageNs,
runsDone: runIdx,
totalMs: Date.now() - startRunMs,
threshold: opts.noThreshold === true ? Infinity : opts.threshold,
threshold,
},
runsNs,
};
Expand Down
Loading

0 comments on commit 7efbd6f

Please sign in to comment.