builtins/math: impl sumPrecise

test262: 55.76% (+0.01) | 🧪 50259 | 🤠 28026 (+6) | ❌ 7293 | 💀 13720 (-6) | 🏗️ 32 | 💥 177 | ⏰ 11 | 📝 1000
CanadaHonk · Jan 6, 2025 · 784776b · 784776b
1 parent 83c0f21
commit 784776b
Show file tree

Hide file tree

Showing 7 changed files with 464 additions and 355 deletions.
diff --git a/compiler/builtins.js b/compiler/builtins.js
@@ -1025,5 +1025,24 @@ export const BuiltinFuncs = function() {
     table: true
   };
 
+  this.__Porffor_number_getExponent = {
+    params: [ Valtype.f64 ],
+    returns: [ Valtype.i32 ],
+    returnType: TYPES.number,
+    wasm: [
+      // extract exponent bits from f64 with bit manipulation
+      [ Opcodes.local_get, 0 ],
+      [ Opcodes.i64_reinterpret_f64 ],
+      number(52, Valtype.i64),
+      [ Opcodes.i64_shr_u ],
+      number(0x7FF, Valtype.i64),
+      [ Opcodes.i64_and ],
+      [ Opcodes.i32_wrap_i64 ],
+      number(1023, Valtype.i32),
+      [ Opcodes.i32_sub ]
+    ]
+  };
+
+
   PrecompiledBuiltins.BuiltinFuncs.call(this);
 };
diff --git a/compiler/builtins/math.ts b/compiler/builtins/math.ts
@@ -264,7 +264,7 @@ export const __Math_expm1 = (x: number): number => {
     return x;
   }
 
-  // use exp(x) - 1 for large x (perf)
+  // opt: use exp(x) - 1 for large x
   if (Math.abs(x) > 1e-5) return Math.exp(x) - 1;
 
   // Taylor series
@@ -285,7 +285,7 @@ export const __Math_log1p = (x: number): number => {
   if (x == -1) return -Infinity; // log(0) = -inf
   if (!Number.isFinite(x)) return x;
 
-  // use exp(x) - 1 for large x (perf)
+  // opt: use exp(x) - 1 for large x
   if (Math.abs(x) > 1e-5) return Math.log(1 + x);
 
   // Taylor series
@@ -459,4 +459,83 @@ export const __Math_atan2 = (y: number, x: number): number => {
 
   if (y >= 0) return Math.atan(ratio) + Math.PI;
   return Math.atan(ratio) - Math.PI;
+};
+
+export const __Math_sumPrecise = (values: any[]): number => {
+  // based on "Fast exact summation using small and large superaccumulators" by Radford M. Neal
+  // https://arxiv.org/abs/1505.05571
+  // accuracy is top priority, it is fine for this to be slow(er)
+
+  // small superaccumulator uses 67 chunks: (1 << (11 - 5)) + 3
+  //   11 is the number of exponent bits in IEEE-754 double precision
+  //   5 is the number of low-order exponent bits stored per chunk
+  const SMALL_SLOTS: number = 67;
+  const SMALL_MIN: number = -970;
+  const small: Float64Array = new Float64Array(SMALL_SLOTS);
+
+  // large superaccumulator uses 4096 chunks: 1 << (11 + 1)
+  //   11 is the number of exponent bits in IEEE-754 double precision
+  const LARGE_SLOTS: number = 4096;
+  const LARGE_MIN: number = -1074;
+  const large: Float64Array = new Float64Array(LARGE_SLOTS);
+
+  for (const _ of values) {
+    if (Porffor.rawType(_) != Porffor.TYPES.number) throw new TypeError('Math.sumPrecise must have only numbers in values');
+
+    const v: number = _;
+    if (v == 0) continue;
+
+    const exp: number = Porffor.number.getExponent(v);
+
+    // check if value fits in small superaccumulator
+    if (exp >= SMALL_MIN && exp < SMALL_MIN + SMALL_SLOTS) {
+      // map the exponent to an array index (-970 -> 0, -969 -> 1, etc)
+      const slot: number = exp - SMALL_MIN;
+      let y: number = v;
+
+      // cascade up through slots, similar to carrying digits in decimal
+      // but operating in binary and handling floating point carefully
+      for (let i: number = slot; i < SMALL_SLOTS - 1; i++) {
+        const sum: number = small[i] + y;
+        y = sum;
+
+        // a number fits in slot i if its magnitude is less than 2^(i+SMALL_MIN+1)
+        const slotLimit: number = Math.pow(2, i + SMALL_MIN + 1);
+        if (y >= -slotLimit && y < slotLimit) {
+          small[i] = y;
+          y = 0;
+          break;
+        }
+
+        // doesn't fit, clear this slot and continue cascading
+        small[i] = 0;
+      }
+
+      // if we still have a non-zero value after cascading through small,
+      // it needs to go into the large superaccumulator
+      if (y != 0) {
+        large[Porffor.number.getExponent(y) - LARGE_MIN] += y;
+      }
+    } else {
+      // exponent is outside small superaccumulator range,
+      // put it directly in the large superaccumulator
+      large[Porffor.number.getExponent(v) - LARGE_MIN] += v;
+    }
+  }
+
+  // combine results from both superaccumulators,
+  // process from highest to lowest to maintain precision
+  // todo: handle -0 (see test262 test)
+  let sum: number = -0;
+  for (let i: number = LARGE_SLOTS - 1; i >= 0; i--) {
+    sum += large[i];
+  }
+
+  for (let i: number = SMALL_SLOTS - 1; i >= 0; i--) {
+    sum += small[i];
+  }
+
+  // todo: free large and small
+
+  return sum;
 };
diff --git a/compiler/builtins/porffor.d.ts b/compiler/builtins/porffor.d.ts
@@ -89,6 +89,10 @@ type PorfforGlobal = {
     appendPadNum(str: bytestring, num: number, len: number): i32;
   }
 
+  number: {
+    getExponent(v: f64): i32;
+  }
+
   print(x: any): i32;
 
   randomByte(): i32;