Skip to content

Commit

Permalink
choice randomization: better approximation of JR behaviour, fixes #49 (
Browse files Browse the repository at this point in the history
…#241)

Choice randomization: better approximation of JR behaviour, fixes #49

Co-authored-by: eyelidlessness <eyelidlessness@users.noreply.github.com>
  • Loading branch information
brontolosone and eyelidlessness authored Jan 24, 2025
1 parent 3ba7579 commit 0700362
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 21 deletions.
7 changes: 7 additions & 0 deletions .changeset/strange-brooms-rush.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@getodk/xpath": patch
---

Choice list order randomization seed handling: better correspondence with JavaRosa behaviour,
including the addition of derivation of seeds from non-numeric inputs.
Previously, entering a non-integer in a form field seed input would result in an exception being thrown.
42 changes: 40 additions & 2 deletions packages/xpath/src/functions/xforms/node-set.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { SHA256 } from 'crypto-js';

import type { XPathNode } from '../../adapter/interface/XPathNode.ts';
import type { XPathDOMProvider } from '../../adapter/xpathDOMProvider.ts';
import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts';
Expand Down Expand Up @@ -384,8 +386,44 @@ export const randomize = new NodeSetFunction(

const nodeResults = Array.from(results.values());
const nodes = nodeResults.map(({ value }) => value);
const seed = seedExpression?.evaluate(context).toNumber();

return seededRandomize(nodes, seed);
if (seedExpression === undefined) return seededRandomize(nodes);

const seed = seedExpression.evaluate(context);
const asNumber = seed.toNumber(); // TODO: There are some peculiarities to address: https://github.com/getodk/web-forms/issues/240
let finalSeed: number | bigint | undefined;
if (Number.isNaN(asNumber)) {
// Specific behaviors for when a seed value is not interpretable as numeric.
// We still want to derive a seed in those cases, see https://github.com/getodk/javarosa/issues/800
const seedString = seed.toString();
if (seedString === '') {
finalSeed = 0; // special case: JR behaviour
} else {
// any other string, we'll convert to a number via a digest function
finalSeed = toBigIntHash(seedString);
}
} else {
finalSeed = asNumber;
}
return seededRandomize(nodes, finalSeed);
}
);

const toBigIntHash = (text: string): bigint => {
/**
Hash text with sha256, and interpret the first 64 bits of output
(the first and second int32s ("words") of CryptoJS digest output)
as an int64 (in JS represented in a BigInt).
Thus the entropy of the hash is reduced to 64 bits, which
for some applications is sufficient.
The underlying representations are big-endian regardless of the endianness
of the machine this runs on, as is the equivalent JavaRosa implementation.
({@link https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 | see here}).
*/
const buffer = new ArrayBuffer(8);
const dataview = new DataView(buffer);
SHA256(text)
.words.slice(0, 2)
.forEach((val, ix) => dataview.setInt32(ix * Int32Array.BYTES_PER_ELEMENT, val));
return dataview.getBigInt64(0);
};
45 changes: 36 additions & 9 deletions packages/xpath/src/lib/collections/sort.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@ class UnseededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator
}
}

class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator {
class ParkMillerPRNG implements PseudoRandomNumberGenerator {
protected seed: number;

constructor(seed: Int) {
let initialSeed = seed % SEED_MODULO_OPERAND;

constructor(seed: Int | bigint) {
let initialSeed: number;
if (typeof seed === 'bigint') {
// the result of the modulo operation is always smaller than Number.MAX_SAFE_INTEGER,
// thus it's safe to convert to a Number.
initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND));
} else {
initialSeed = seed % SEED_MODULO_OPERAND;
}
if (initialSeed <= 0) {
initialSeed += MAX_INT_32 - 1;
}
Expand All @@ -38,17 +44,38 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator {
}
}

const isInt = (value: number): value is Int => value % 1 === 0;
class JavaRosaPRNG extends ParkMillerPRNG {
// Per issue #49 (https://github.com/getodk/web-forms/issues/49) this is intended to be "bug-or-feature-compatible"
// with JavaRosa's implementation; org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of
// the double produced by randomSeedPathExpr.eval() — see https://github.com/getodk/javarosa/blob/6ce13527c/src/main/java/org/javarosa/core/model/ItemsetBinding.java#L311:L317 .
// That results in a 0L when the double is NaN, which happens (for instance) when there
// is a string that does not look like a number (which is a problem in itself, as any non-numeric
// looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800).
// We'll emulate Java's Double -> Long conversion here (for NaN and some other double values)
// so that we produce the same randomization as JR.

constructor(seed: Int | bigint) {
let finalSeed: number | bigint;
// In Java, a NaN double's .longValue is 0
if (Number.isNaN(seed)) finalSeed = 0;
// In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt.
else if (seed === Infinity) finalSeed = 2n ** 63n - 1n;
// Analogous with the above conversion, but for -Infinity
else if (seed === -Infinity) finalSeed = -(2n ** 63n);
// A Java Double's .longValue drops the fractional part.
else if (typeof seed === 'number' && !Number.isInteger(seed)) finalSeed = Math.trunc(seed);
else finalSeed = seed;
super(finalSeed);
}
}

export const seededRandomize = <T>(values: readonly T[], seed?: number): T[] => {
export const seededRandomize = <T>(values: readonly T[], seed?: number | bigint): T[] => {
let generator: PseudoRandomNumberGenerator;

if (seed == null) {
generator = new UnseededPseudoRandomNumberGenerator();
} else if (!isInt(seed)) {
throw 'todo not an int';
} else {
generator = new SeededPseudoRandomNumberGenerator(seed);
generator = new JavaRosaPRNG(seed);
}

const { length } = values;
Expand Down
31 changes: 21 additions & 10 deletions packages/xpath/test/xforms/randomize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ describe('randomize()', () => {
});

const SELECTOR = '//xhtml:div[@id="FunctionRandomize"]/xhtml:div';
const MIRROR = 'mirror';
const MIRROR_HASH_VALUE = 5989458117437254;
const MIRROR_HASH_SORT_ORDER = 'ACBEDF';

describe('shuffles nodesets', () => {
beforeEach(() => {
Expand All @@ -44,7 +47,10 @@ describe('randomize()', () => {
<p>3</p>
<p>4</p>
</div>
</body>
<div id="testFunctionNodeset3">
<p>${MIRROR}</p>
</div>
</body>
</html>`,
{ namespaceResolver }
);
Expand Down Expand Up @@ -74,8 +80,15 @@ describe('randomize()', () => {
{ seed: 1, expected: 'BFEACD' },
{ seed: 11111111, expected: 'ACDBFE' },
{ seed: 'int(1)', expected: 'BFEACD' },
{ seed: 1.1, expected: 'BFEACD' },
{ seed: 0, expected: 'CBEAFD' },
{ seed: NaN, expected: 'CBEAFD' },
{ seed: Infinity, expected: 'CBEAFD' },
{ seed: -Infinity, expected: 'CFBEAD' },
{ seed: 'floor(1.1)', expected: 'BFEACD' },
{ seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' },
{ seed: MIRROR_HASH_VALUE, expected: MIRROR_HASH_SORT_ORDER },
{ seed: '//xhtml:div[@id="testFunctionNodeset3"]/xhtml:p', expected: MIRROR_HASH_SORT_ORDER },
].forEach(({ seed, expected }) => {
it(`with a seed: ${seed}`, () => {
const expression = `randomize(${SELECTOR}, ${seed})`;
Expand All @@ -88,15 +101,13 @@ describe('randomize()', () => {
});
});

[
{ expression: 'randomize()' },
{ expression: `randomize(${SELECTOR}, 'a')` },
{ expression: `randomize(${SELECTOR}, 1, 2)` },
].forEach(({ expression }) => {
it.fails(`${expression} with invalid args, throws an error`, () => {
testContext.evaluate(expression);
});
});
[{ expression: 'randomize()' }, { expression: `randomize(${SELECTOR}, 1, 2)` }].forEach(
({ expression }) => {
it.fails(`${expression} with invalid argument count, throws an error`, () => {
testContext.evaluate(expression);
});
}
);

it('randomizes nodes', () => {
testContext = createXFormsTestContext(`
Expand Down

0 comments on commit 0700362

Please sign in to comment.