feat: add statistical convergence and outlier detection (#26)

* Fix few spellings * Simplify the creation of benchmark fn * Fix the error when undefined was shown in the logs * Show the pending tasks * Add some benchmark error cases to test manually * Refactor the convergence logic * Remove unused comment * Add CV convergence logic * Add cli option for convergence * Add unit tests for the math utilities * Add cli option for outlier cleanup averge * Add more comments * Add doc comments for convergence
ChainSafe · Jan 28, 2025 · 7efbd6f · 7efbd6f
1 parent 1f36c5c
commit 7efbd6f
Show file tree

Hide file tree

Showing 14 changed files with 691 additions and 127 deletions.
diff --git a/package.json b/package.json
@@ -33,7 +33,7 @@
     "test:unit": "vitest run test/unit/**/*.test.ts",
     "lint": "eslint --color src/ test/",
     "prepublishOnly": "yarn build",
-    "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/*.test.ts'",
+    "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/@(!(errors)).test.ts'",
     "writeDocs": "node --loader ts-node/esm scripts/writeOptionsMd.ts"
   },
   "devDependencies": {

diff --git a/src/benchmark/benchmarkFn.ts b/src/benchmark/benchmarkFn.ts
@@ -5,47 +5,40 @@ import {createChainable} from "@vitest/runner/utils";
 import {store} from "./globalState.js";
 import {BenchApi, BenchmarkOpts, BenchmarkRunOptsWithFn, PartialBy} from "../types.js";
 import {runBenchFn} from "./runBenchmarkFn.js";
-import {optionsDefault} from "../cli/options.js";
+import {getBenchmarkOptionsWithDefaults} from "./options.js";
 
 export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
   this: Record<"skip" | "only", boolean | undefined>,
   idOrOpts: string | PartialBy<BenchmarkRunOptsWithFn<T, T2>, "fn">,
   fn?: (arg: T) => void | Promise<void>
 ) {
-  const {fn: benchTask, ...opts} = coerceToOptsObj(idOrOpts, fn);
+  const {fn: benchTask, before, beforeEach, ...opts} = coerceToOptsObj(idOrOpts, fn);
   const currentSuite = getCurrentSuite();
 
   const globalOptions = store.getGlobalOptions() ?? {};
-  const parentOptions = store.getOptions(getCurrentSuite()) ?? {};
-  const options = {...globalOptions, ...parentOptions, ...opts};
-  const {timeoutBench, maxMs, minMs} = options;
-
-  let timeout = timeoutBench ?? optionsDefault.timeoutBench;
-  if (maxMs && maxMs > timeout) {
-    timeout = maxMs * 1.5;
-  }
-
-  if (minMs && minMs > timeout) {
-    timeout = minMs * 1.5;
-  }
+  const parentOptions = store.getOptions(currentSuite) ?? {};
+  const options = getBenchmarkOptionsWithDefaults({...globalOptions, ...parentOptions, ...opts});
 
   async function handler(): Promise<void> {
     // Ensure bench id is unique
     if (store.getResult(opts.id) && !opts.skip) {
       throw Error(`test titles must be unique, duplicated: '${opts.id}'`);
     }
 
-    // Persist full results if requested. dir is created in `beforeAll`
-    const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
-    const persistRunsNs = Boolean(benchmarkResultsCsvDir);
-
-    const {result, runsNs} = await runBenchFn({...options, fn: benchTask}, persistRunsNs);
+    const {result, runsNs} = await runBenchFn<T, T2>({
+      ...options,
+      fn: benchTask,
+      before,
+      beforeEach,
+    } as BenchmarkRunOptsWithFn<T, T2>);
 
     // Store result for:
     // - to persist benchmark data latter
     // - to render with the custom reporter
     store.setResult(opts.id, result);
 
+    // Persist full results if requested. dir is created in `beforeAll`
+    const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
     if (benchmarkResultsCsvDir) {
       fs.mkdirSync(benchmarkResultsCsvDir, {recursive: true});
       const filename = `${result.id}.csv`;
@@ -59,27 +52,25 @@ export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
     only: opts.only ?? this.only,
     sequential: true,
     concurrent: false,
-    timeout,
+    timeout: options.timeoutBench,
     meta: {
       "chainsafe/benchmark": true,
     },
   });
 
-  const {id: _, ...optionsWithoutId} = opts;
   setFn(task, handler);
-  store.setOptions(task, optionsWithoutId);
-
-  task.onFinished = [
-    () => {
-      store.removeOptions(task);
-    },
-    () => {
-      // Clear up the assigned handler to clean the memory
-      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-      // @ts-expect-error
-      setFn(task, null);
-    },
-  ];
+  store.setOptions(task, opts);
+
+  const cleanup = (): void => {
+    store.removeOptions(task);
+    // Clear up the assigned handler to clean the memory
+    // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+    // @ts-expect-error
+    setFn(task, null);
+  };
+
+  task.onFailed = [cleanup];
+  task.onFinished = [cleanup];
 });
 
 function createBenchmarkFunction(

diff --git a/src/benchmark/options.ts b/src/benchmark/options.ts
@@ -0,0 +1,40 @@
+import {BenchmarkOpts} from "../types.js";
+
+export const defaultBenchmarkOptions: Required<BenchmarkOpts> = {
+  minRuns: 1,
+  maxRuns: Infinity,
+  minMs: 100,
+  maxMs: Infinity,
+  maxWarmUpRuns: 1000,
+  maxWarmUpMs: 500,
+  convergeFactor: 0.5 / 100, // 0.5%
+  runsFactor: 1,
+  yieldEventLoopAfterEach: false,
+  timeoutBench: 10_000,
+  noThreshold: false,
+  triggerGC: false,
+  setupFiles: [],
+  skip: false,
+  only: false,
+  threshold: 2,
+  convergence: "linear",
+  averageCalculation: "simple",
+};
+
+export function getBenchmarkOptionsWithDefaults(opts: BenchmarkOpts): Required<BenchmarkOpts> {
+  const options = Object.assign({}, defaultBenchmarkOptions, opts);
+
+  if (options.noThreshold) {
+    options.threshold = Infinity;
+  }
+
+  if (options.maxMs && options.maxMs > options.timeoutBench) {
+    options.timeoutBench = options.maxMs * 1.5;
+  }
+
+  if (options.minMs && options.minMs > options.timeoutBench) {
+    options.timeoutBench = options.minMs * 1.5;
+  }
+
+  return options;
+}
diff --git a/src/benchmark/reporter.ts b/src/benchmark/reporter.ts
@@ -3,7 +3,7 @@ import {color, consoleLog, symbols} from "../utils/output.js";
 import {store} from "./globalState.js";
 import {Benchmark, BenchmarkOpts, BenchmarkResult} from "../types.js";
 import {formatResultRow} from "./format.js";
-import {optionsDefault} from "../cli/options.js";
+import {defaultBenchmarkOptions} from "./options.js";
 
 export class BenchmarkReporter {
   indents = 0;
@@ -16,7 +16,7 @@ export class BenchmarkReporter {
 
   constructor({prevBench, benchmarkOpts}: {prevBench: Benchmark | null; benchmarkOpts: BenchmarkOpts}) {
     this.prevResults = new Map<string, BenchmarkResult>();
-    this.threshold = benchmarkOpts.threshold ?? optionsDefault.threshold;
+    this.threshold = benchmarkOpts.threshold ?? defaultBenchmarkOptions.threshold;
 
     if (prevBench) {
       for (const bench of prevBench.results) {
@@ -25,9 +25,14 @@ export class BenchmarkReporter {
     }
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
-  onTestStarted(_task: Task): void {
-    // this.log(task.name, "started");
+  onTestStarted(task: Task): void {
+    if (task.mode === "skip") {
+      this.skipped++;
+      consoleLog(`${this.indent()}${color("pending", "  - %s")}`, task.name);
+    } else if (task.mode === "todo") {
+      this.skipped++;
+      consoleLog(`${this.indent()}${color("pending", "  - %s")}`, task.name);
+    }
   }
 
   onTestFinished(task: Task): void {
@@ -46,8 +51,9 @@ export class BenchmarkReporter {
       }
       case "fail": {
         this.failed++;
-        consoleLog(this.indent() + color("fail", "  %d) %s"), ++this.failed, task.name);
-        consoleLog(task.result?.errors);
+        const fmt = this.indent() + color("fail", "  " + symbols.err) + color("fail", " %s");
+        consoleLog(fmt, task.name);
+        consoleLog(task.result?.errors?.map((e) => e.stackStr).join("\n"));
         break;
       }
       case "pass": {

diff --git a/src/benchmark/runBenchmarkFn.ts b/src/benchmark/runBenchmarkFn.ts
@@ -1,4 +1,12 @@
 import {BenchmarkResult, BenchmarkOpts} from "../types.js";
+import {calcSum, filterOutliers, OutlierSensitivity} from "../utils/math.js";
+import {getBenchmarkOptionsWithDefaults} from "./options.js";
+import {createCVConvergenceCriteria, createLinearConvergenceCriteria} from "./termination.js";
+
+const convergenceCriteria = {
+  ["linear"]: createLinearConvergenceCriteria,
+  ["cv"]: createCVConvergenceCriteria,
+};
 
 export type BenchmarkRunOpts = BenchmarkOpts & {
   id: string;
@@ -12,49 +20,54 @@ export type BenchmarkRunOptsWithFn<T, T2> = BenchmarkOpts & {
 };
 
 export async function runBenchFn<T, T2>(
-  opts: BenchmarkRunOptsWithFn<T, T2>,
-  persistRunsNs?: boolean
+  opts: BenchmarkRunOptsWithFn<T, T2>
 ): Promise<{result: BenchmarkResult; runsNs: bigint[]}> {
-  const minRuns = opts.minRuns || 1;
-  const maxRuns = opts.maxRuns || Infinity;
-  const maxMs = opts.maxMs || Infinity;
-  const minMs = opts.minMs || 100;
-  const maxWarmUpMs = opts.maxWarmUpMs !== undefined ? opts.maxWarmUpMs : 500;
-  const maxWarmUpRuns = opts.maxWarmUpRuns !== undefined ? opts.maxWarmUpRuns : 1000;
-  // Ratio of maxMs that the warmup is allow to take from ellapsedMs
+  const {id, before, beforeEach, fn, ...rest} = opts;
+  const benchOptions = getBenchmarkOptionsWithDefaults(rest);
+  const {maxMs, maxRuns, maxWarmUpMs, maxWarmUpRuns, runsFactor, threshold, convergence, averageCalculation} =
+    benchOptions;
+
+  if (maxWarmUpMs >= maxMs) {
+    throw new Error(`Warmup time must be lower than max run time. maxWarmUpMs: ${maxWarmUpMs}, maxMs: ${maxMs}`);
+  }
+
+  if (maxWarmUpRuns >= maxRuns) {
+    throw new Error(`Warmup runs must be lower than max runs. maxWarmUpRuns: ${maxWarmUpRuns}, maxRuns: ${maxRuns}`);
+  }
+
+  if (averageCalculation !== "simple" && averageCalculation !== "clean-outliers") {
+    throw new Error(`Average calculation logic is not defined. ${averageCalculation}`);
+  }
+
+  if (convergence !== "linear" && convergence !== "cv") {
+    throw new Error(`Unknown convergence value ${convergence}`);
+  }
+
+  // Ratio of maxMs that the warmup is allow to take from elapsedMs
   const maxWarmUpRatio = 0.5;
-  const convergeFactor = opts.convergeFactor || 0.5 / 100; // 0.5%
-  const runsFactor = opts.runsFactor || 1;
-  const maxWarmUpNs = BigInt(maxWarmUpMs) * BigInt(1e6);
-  const sampleEveryMs = 100;
+  const maxWarmUpNs = BigInt(benchOptions.maxWarmUpMs) * BigInt(1e6);
 
   const runsNs: bigint[] = [];
   const startRunMs = Date.now();
 
+  const shouldTerminate = convergenceCriteria[convergence](startRunMs, benchOptions);
+
   let runIdx = 0;
   let totalNs = BigInt(0);
+
   let totalWarmUpNs = BigInt(0);
   let totalWarmUpRuns = 0;
-  let prevAvg0 = 0;
-  let prevAvg1 = 0;
-  let lastConvergenceSample = startRunMs;
-  let isWarmUp = maxWarmUpNs > 0 && maxWarmUpRuns > 0;
+  let isWarmUpPhase = maxWarmUpNs > 0 && maxWarmUpRuns > 0;
 
-  const inputAll = opts.before ? await opts.before() : (undefined as unknown as T2);
+  const inputAll = before ? await before() : (undefined as unknown as T2);
 
   while (true) {
-    const ellapsedMs = Date.now() - startRunMs;
-    const mustStop = ellapsedMs >= maxMs || runIdx >= maxRuns;
-    const mayStop = ellapsedMs > minMs && runIdx > minRuns;
-    // Exceeds limits, must stop now
-    if (mustStop) {
-      break;
-    }
+    const elapsedMs = Date.now() - startRunMs;
 
-    const input = opts.beforeEach ? await opts.beforeEach(inputAll, runIdx) : (undefined as unknown as T);
+    const input = beforeEach ? await beforeEach(inputAll, runIdx) : (undefined as unknown as T);
 
     const startNs = process.hrtime.bigint();
-    await opts.fn(input);
+    await fn(input);
     const endNs = process.hrtime.bigint();
 
     const runNs = endNs - startNs;
@@ -64,54 +77,26 @@ export async function runBenchFn<T, T2>(
       await new Promise((r) => setTimeout(r, 0));
     }
 
-    if (isWarmUp) {
+    if (isWarmUpPhase) {
       // Warm-up, do not count towards results
       totalWarmUpRuns += 1;
       totalWarmUpNs += runNs;
 
       // On any warm-up finish condition, mark isWarmUp = true to prevent having to check them again
-      if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || ellapsedMs / maxMs >= maxWarmUpRatio) {
-        isWarmUp = false;
-      }
-    } else {
-      // Persist results
-      runIdx += 1;
-      totalNs += runNs;
-      // If the caller wants the exact times of all runs, persist them
-      if (persistRunsNs) runsNs.push(runNs);
-
-      // When is a good time to stop a benchmark? A naive answer is after N miliseconds or M runs.
-      // This code aims to stop the benchmark when the average fn run time has converged at a value
-      // within a given convergence factor. To prevent doing expensive math to often for fast fn,
-      // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
-      // a very rough linear and quadratic convergence.
-      if (Date.now() - lastConvergenceSample > sampleEveryMs) {
-        lastConvergenceSample = Date.now();
-        const avg = Number(totalNs / BigInt(runIdx));
-
-        // Compute convergence (1st order + 2nd order)
-        const a = prevAvg0;
-        const b = prevAvg1;
-        const c = avg;
-
-        // Only do convergence math if it may stop
-        if (mayStop) {
-          // Aprox linear convergence
-          const convergence1 = Math.abs(c - a);
-          // Aprox quadratic convergence
-          const convergence2 = Math.abs(b - (a + c) / 2);
-          // Take the greater of both to enfore linear and quadratic are below convergeFactor
-          const convergence = Math.max(convergence1, convergence2) / a;
-
-          // Okay to stop + has converged, stop now
-          if (convergence < convergeFactor) {
-            break;
-          }
-        }
-
-        prevAvg0 = prevAvg1;
-        prevAvg1 = avg;
+      if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || elapsedMs / maxMs >= maxWarmUpRatio) {
+        isWarmUpPhase = false;
       }
+
+      continue;
+    }
+
+    // Persist results
+    runIdx += 1;
+    totalNs += runNs;
+    runsNs.push(runNs);
+
+    if (shouldTerminate(runIdx, totalNs, runsNs)) {
+      break;
     }
   }
 
@@ -135,15 +120,24 @@ either the before(), beforeEach() or fn() functions are too slow.
     }
   }
 
-  const averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
+  let averageNs!: number;
+
+  if (averageCalculation === "simple") {
+    averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
+  }
+
+  if (averageCalculation === "clean-outliers") {
+    const cleanData = filterOutliers(runsNs, false, OutlierSensitivity.Mild);
+    averageNs = Number(calcSum(cleanData) / BigInt(cleanData.length)) / runsFactor;
+  }
 
   return {
     result: {
-      id: opts.id,
+      id: id,
       averageNs,
       runsDone: runIdx,
       totalMs: Date.now() - startRunMs,
-      threshold: opts.noThreshold === true ? Infinity : opts.threshold,
+      threshold,
     },
     runsNs,
   };