From 5410239245b4a6fe8c1976f8aa33c970923f9f40 Mon Sep 17 00:00:00 2001 From: ST-DDT Date: Wed, 4 Oct 2023 18:13:00 +0200 Subject: [PATCH] feat: support custom randomizer (#2284) --- docs/.vitepress/api-pages.ts | 1 + docs/.vitepress/config.ts | 4 + docs/guide/randomizer.md | 116 ++++++++++++++++++ scripts/apidoc/fakerClass.ts | 46 ++++--- scripts/apidoc/generate.ts | 3 +- src/faker.ts | 35 +++++- src/index.ts | 1 + .../{mersenne/twister.ts => mersenne.ts} | 28 ++++- src/internal/mersenne/mersenne.ts | 45 ------- src/modules/number/index.ts | 12 +- src/randomizer.ts | 65 ++++++++++ src/simple-faker.ts | 31 ++++- test/all_functional.spec.ts | 2 +- test/faker.spec.ts | 16 +++ test/mersenne.spec.ts | 14 +-- 15 files changed, 331 insertions(+), 88 deletions(-) create mode 100644 docs/guide/randomizer.md rename src/internal/{mersenne/twister.ts => mersenne.ts} (94%) delete mode 100644 src/internal/mersenne/mersenne.ts create mode 100644 src/randomizer.ts diff --git a/docs/.vitepress/api-pages.ts b/docs/.vitepress/api-pages.ts index ae820604eaf..dad86954967 100644 --- a/docs/.vitepress/api-pages.ts +++ b/docs/.vitepress/api-pages.ts @@ -30,5 +30,6 @@ export const apiPages = [ { text: 'System', link: '/api/system.html' }, { text: 'Vehicle', link: '/api/vehicle.html' }, { text: 'Word', link: '/api/word.html' }, + { text: 'Randomizer', link: '/api/randomizer.html' }, { text: 'Utilities', link: '/api/utils.html' }, ]; diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index f43896de465..12fea107d34 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -200,6 +200,10 @@ const config = defineConfig({ text: 'Frameworks', link: '/guide/frameworks', }, + { + text: 'Randomizer', + link: '/guide/randomizer', + }, { text: 'Upgrading to v8', link: '/guide/upgrading', diff --git a/docs/guide/randomizer.md b/docs/guide/randomizer.md new file mode 100644 index 00000000000..e03152a74ad --- /dev/null +++ b/docs/guide/randomizer.md @@ -0,0 +1,116 @@ +# Randomizer + +The [`Randomizer`](/api/randomizer) interface allows you to use a custom randomness source within Faker. + +::: warning Important +Faker's default `Randomizer` is sufficient in most cases. +Change this only if you want to use it to achieve a specific goal, such as sharing the same random generator with other instances/tools. +::: + +There are two connected use cases we have considered where this might be needed: + +1. Re-Use of the same `Randomizer` within multiple `Faker` instances. +2. The use of a random number generator from a third party library. + +## Using `Randomizer`s + +A `Randomizer` has to be set during construction of the instance: + +```ts +import { Faker, Randomizer } from '@faker-js/faker'; + +const customFaker = new Faker({ + locale: ..., + randomizer: ..., +}); +``` + +The following methods take a `Randomizer` as argument: + +- [new SimpleFaker(...)](/api/simpleFaker#constructor) +- [new Faker(...)](/api/faker#constructor) + +## Re-Using a `Randomizer` + +Sometimes it might be required to generate values in two different locales. +E.g. a Chinese person might have an English identity to simplify the communication with foreigners. +While this could also be achieved with two independent `Faker` instances like this: + +```ts +import { fakerEN, fakerZH_TW } from '@faker-js/faker'; + +fakerZH_TW.seed(5); +fakerEN.seed(5); + +const firstName = fakerZH_TW.person.firstName(); // 炫明 +const alias = fakerEN.person.firstName(); // Arthur +``` + +There might be issues regarding reproducibility, when seeding only one of them. + +By sharing a `Randomizer` between the two instances, you omit this issue by affecting all instances simultaneously. + +::: tip Note +This gets more important if the seeding happens at a different location than the data generation (e.g. due to nesting). +::: + +```ts +import { en, Faker, Randomizer, zh_TW } from '@faker-js/faker'; + +const randomizer: Randomizer = ...; + +const customFakerEN = new Faker({ + locale: en, + randomizer, +}); + +const customFakerZH_TW = new Faker({ + locale: [zh_TW, en], + randomizer, +}); + +randomizer.seed(5); +// customFakerEN.seed(5); // Redundant +// customFakerZH_TW.seed(5); // Redundant + +const firstName = fakerZH_TW.person.firstName(); // 炫明 +const alias = fakerEN.person.firstName(); // John (different from before, because it is now the second call) +``` + +This is also relevant when trying to use faker's random number generator in third party libraries. +E.g. some libraries that can generate `string`s from a `RegExp` can be customized with a custom random number generator as well, +and since they will be used in the same context it makes sense to rely on the same randomness source to ensure the values are reproducible. + +## Third-Party `Randomizer`s + +Sometimes you might want to use a custom/third-party random number generator. +This can be achieved by implementing your own `Randomizer` and passing it to [supported methods](#using-randomizers). + +::: tip Note +Faker does not ship `Randomizers` for third-party libraries and does not provide support for bridging the gap between libraries. +The following examples show how the interface can be implemented, but they are not tested for correctness. +Feel free to submit more `Randomizer` examples for other popular packages. +::: + +### Pure-Rand + +The following is an example for a [pure-rand](https://github.com/dubzzz/pure-rand) based `Randomizer`: + +```ts +import { Faker, Randomizer, SimpleFaker } from '@faker-js/faker'; +import { RandomGenerator, xoroshiro128plus } from 'pure-rand'; + +export function generatePureRandRandomizer( + seed: number | number[] = Date.now() ^ (Math.random() * 0x100000000), + factory: (seed: number) => RandomGenerator = xoroshiro128plus +): Randomizer { + const self = { + next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, + seed: (seed: number | number[]) => { + self.generator = factory(typeof seed === 'number' ? seed : seed[0]); + }, + } as Randomizer & { generator: RandomGenerator }; + self.seed(seed); + return self; +} +``` diff --git a/scripts/apidoc/fakerClass.ts b/scripts/apidoc/fakerClass.ts index d7206cf8302..ea190cc7e7c 100644 --- a/scripts/apidoc/fakerClass.ts +++ b/scripts/apidoc/fakerClass.ts @@ -21,23 +21,33 @@ export async function processFakerClasses( return Promise.all(fakerClasses.map(processClass)); } +export async function processFakerRandomizer( + project: ProjectReflection +): Promise { + const randomizerClass = project + .getChildrenByKind(ReflectionKind.Interface) + .find((clazz) => clazz.name === 'Randomizer'); + + return processClass(randomizerClass); +} + async function processClass( - fakerClass: DeclarationReflection + clazz: DeclarationReflection ): Promise { - const { name } = fakerClass; - const moduleFieldName = extractModuleFieldName(fakerClass); + const { name } = clazz; + const moduleFieldName = extractModuleFieldName(clazz); console.log(`Processing ${name} class`); - const { comment, deprecated, examples } = analyzeModule(fakerClass); + const { comment, deprecated, examples } = analyzeModule(clazz); const methods: Method[] = []; - console.debug(`- constructor`); - methods.push(await processConstructor(fakerClass)); + if (hasConstructor(clazz)) { + console.debug(`- constructor`); + methods.push(await processConstructor(clazz)); + } - methods.push( - ...(await processModuleMethods(fakerClass, `${moduleFieldName}.`)) - ); + methods.push(...(await processModuleMethods(clazz, `${moduleFieldName}.`))); return writeApiDocsModule( name, @@ -49,20 +59,20 @@ async function processClass( ); } +function hasConstructor(clazz: DeclarationReflection): boolean { + return clazz + .getChildrenByKind(ReflectionKind.Constructor) + .some((constructor) => constructor.signatures.length > 0); +} + async function processConstructor( - fakerClass: DeclarationReflection + clazz: DeclarationReflection ): Promise { - const constructor = fakerClass.getChildrenByKind( - ReflectionKind.Constructor - )[0]; + const constructor = clazz.getChildrenByKind(ReflectionKind.Constructor)[0]; const signature = selectApiSignature(constructor); - const method = await analyzeSignature( - signature, - '', - `new ${fakerClass.name}` - ); + const method = await analyzeSignature(signature, '', `new ${clazz.name}`); return { ...method, diff --git a/scripts/apidoc/generate.ts b/scripts/apidoc/generate.ts index d1fefe8f7f5..f047cc496ba 100644 --- a/scripts/apidoc/generate.ts +++ b/scripts/apidoc/generate.ts @@ -5,7 +5,7 @@ import { writeApiSearchIndex, writeSourceBaseUrl, } from './apiDocsWriter'; -import { processFakerClasses } from './fakerClass'; +import { processFakerClasses, processFakerRandomizer } from './fakerClass'; import { processFakerUtilities } from './fakerUtilities'; import { processModules } from './moduleMethods'; import { loadProject } from './typedoc'; @@ -27,6 +27,7 @@ export async function generate(): Promise { ...(await processModules(project)).sort((a, b) => a.text.localeCompare(b.text) ), + await processFakerRandomizer(project), processFakerUtilities(project), ]); await writeApiPagesIndex(pages.map(({ text, link }) => ({ text, link }))); diff --git a/src/faker.ts b/src/faker.ts index 43811f056de..e6b5889af36 100644 --- a/src/faker.ts +++ b/src/faker.ts @@ -28,6 +28,7 @@ import { ScienceModule } from './modules/science'; import { SystemModule } from './modules/system'; import { VehicleModule } from './modules/vehicle'; import { WordModule } from './modules/word'; +import type { Randomizer } from './randomizer'; import { SimpleFaker } from './simple-faker'; import { mergeLocales } from './utils/merge-locales'; @@ -123,6 +124,10 @@ export class Faker extends SimpleFaker { * * @param options The options to use. * @param options.locale The locale data to use. + * @param options.randomizer The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * Defaults to faker's Mersenne Twister based pseudo random number generator. * * @example * import { Faker, es } from '@faker-js/faker'; @@ -144,6 +149,15 @@ export class Faker extends SimpleFaker { * @see mergeLocales */ locale: LocaleDefinition | LocaleDefinition[]; + + /** + * The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * + * @default generateMersenne32Randomizer() + */ + randomizer?: Randomizer; }); /** * Creates a new instance of Faker. @@ -180,6 +194,10 @@ export class Faker extends SimpleFaker { * @param options.locale The locale data to use or the name of the main locale. * @param options.locales The locale data to use. * @param options.localeFallback The name of the fallback locale to use. + * @param options.randomizer The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * Defaults to faker's Mersenne Twister based pseudo random number generator. * * @example * import { Faker, es } from '@faker-js/faker'; @@ -203,6 +221,15 @@ export class Faker extends SimpleFaker { * @see mergeLocales */ locale: LocaleDefinition | LocaleDefinition[]; + + /** + * The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * + * @default generateMersenne32Randomizer() + */ + randomizer?: Randomizer; } | { /** @@ -231,14 +258,18 @@ export class Faker extends SimpleFaker { ); constructor( options: - | { locale: LocaleDefinition | LocaleDefinition[] } + | { + locale: LocaleDefinition | LocaleDefinition[]; + randomizer?: Randomizer; + } | { locales: Record; locale?: string; localeFallback?: string; + randomizer?: Randomizer; } ) { - super(); + super({ randomizer: options.randomizer }); const { locales } = options as { locales: Record; diff --git a/src/index.ts b/src/index.ts index 000541b7dea..8adcd3843c7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -128,5 +128,6 @@ export type { StringModule } from './modules/string'; export type { SystemModule } from './modules/system'; export type { VehicleModule } from './modules/vehicle'; export type { WordModule } from './modules/word'; +export type { Randomizer } from './randomizer'; export { SimpleFaker, simpleFaker } from './simple-faker'; export { mergeLocales } from './utils/merge-locales'; diff --git a/src/internal/mersenne/twister.ts b/src/internal/mersenne.ts similarity index 94% rename from src/internal/mersenne/twister.ts rename to src/internal/mersenne.ts index d25e3acf583..d97ec716b88 100644 --- a/src/internal/mersenne/twister.ts +++ b/src/internal/mersenne.ts @@ -1,3 +1,5 @@ +import type { Randomizer } from '../randomizer'; + /** * Copyright (c) 2022-2023 Faker * @@ -71,7 +73,7 @@ * * @internal */ -export default class MersenneTwister19937 { +class MersenneTwister19937 { private readonly N = 624; private readonly M = 397; private readonly MATRIX_A = 0x9908b0df; // constant vector a @@ -323,3 +325,27 @@ export default class MersenneTwister19937 { } // These real versions are due to Isaku Wada, 2002/01/09 } + +/** + * Generates a MersenneTwister19937 randomizer with 32 bits of precision. + * + * @internal + */ +export function generateMersenne32Randomizer(): Randomizer { + const twister = new MersenneTwister19937(); + + twister.initGenrand(Math.ceil(Math.random() * Number.MAX_SAFE_INTEGER)); + + return { + next(): number { + return twister.genrandReal2(); + }, + seed(seed: number | number[]): void { + if (typeof seed === 'number') { + twister.initGenrand(seed); + } else if (Array.isArray(seed)) { + twister.initByArray(seed, seed.length); + } + }, + }; +} diff --git a/src/internal/mersenne/mersenne.ts b/src/internal/mersenne/mersenne.ts deleted file mode 100644 index c823af7e2d2..00000000000 --- a/src/internal/mersenne/mersenne.ts +++ /dev/null @@ -1,45 +0,0 @@ -import Twister from './twister'; - -/** - * Generate seed based random numbers. - * - * @internal - */ -export interface Mersenne { - /** - * Generates a random float between `[0, 1)`. - * This method is called `next` so that it could be used as an [iterator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_iterator_protocol) - */ - next(): number; - - /** - * Sets the seed to use. - * - * @param seed The seed to use. - */ - seed(seed: number | number[]): void; -} - -/** - * Generate seed based random numbers. - * - * @internal - */ -export default function mersenne(): Mersenne { - const twister = new Twister(); - - twister.initGenrand(Math.ceil(Math.random() * Number.MAX_SAFE_INTEGER)); - - return { - next(): number { - return twister.genrandReal2(); - }, - seed(seed: number | number[]): void { - if (typeof seed === 'number') { - twister.initGenrand(seed); - } else if (Array.isArray(seed)) { - twister.initByArray(seed, seed.length); - } - }, - }; -} diff --git a/src/modules/number/index.ts b/src/modules/number/index.ts index a2e64724d53..9216df7a583 100644 --- a/src/modules/number/index.ts +++ b/src/modules/number/index.ts @@ -1,7 +1,6 @@ import type { SimpleFaker } from '../..'; import { FakerError } from '../../errors/faker-error'; import { bindThisToMemberFunctions } from '../../internal/bind-this-to-member-functions'; -import type { Mersenne } from '../../internal/mersenne/mersenne'; /** * Module to generate numbers of any kind. @@ -83,10 +82,9 @@ export class NumberModule { throw new FakerError(`Max ${max} should be greater than min ${min}.`); } - const mersenne: Mersenne = - // @ts-expect-error: access private member field - this.faker._mersenne; - const real = mersenne.next(); + // @ts-expect-error: access private member field + const randomizer = this.faker._randomizer; + const real = randomizer.next(); return Math.floor(real * (effectiveMax + 1 - effectiveMin) + effectiveMin); } @@ -160,8 +158,8 @@ export class NumberModule { } // @ts-expect-error: access private member field - const mersenne: Mersenne = this.faker._mersenne; - const real = mersenne.next(); + const randomizer = this.faker._randomizer; + const real = randomizer.next(); return real * (max - min) + min; } diff --git a/src/randomizer.ts b/src/randomizer.ts new file mode 100644 index 00000000000..3264ec1da82 --- /dev/null +++ b/src/randomizer.ts @@ -0,0 +1,65 @@ +/** + * Interface for a random number generator. + * + * **Note:** Normally there is no need to implement this interface directly, + * unless you want to achieve a specific goal with it. + * + * This interface enables you to use random generators from third party libraries such as [pure-rand](https://github.com/dubzzz/pure-rand). + * + * Instances are expected to be ready for use before being passed to any Faker constructor, + * this includes being `seed()`ed with either a random or fixed value. + * + * For more information please refer to the [documentation](/api/randomizer). + * + * @example + * import { Faker, Randomizer, SimpleFaker } from '@faker-js/faker'; + * import { RandomGenerator, xoroshiro128plus } from 'pure-rand'; + * + * function generatePureRandRandomizer( + * seed: number | number[] = Date.now() ^ (Math.random() * 0x100000000), + * factory: (seed: number) => RandomGenerator = xoroshiro128plus + * ): Randomizer { + * const self = { + * next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, + * seed: (seed: number | number[]) => { + * self.generator = factory(typeof seed === 'number' ? seed : seed[0]); + * }, + * } as Randomizer & { generator: RandomGenerator }; + * self.seed(seed); + * return self; + * } + * + * const randomizer = generatePureRandRandomizer(); + * + * const simpleFaker = new SimpleFaker({ randomizer }); + * + * const faker = new Faker({ + * locale: ..., + * randomizer, + * }); + */ +export interface Randomizer { + /** + * Generates a random float between 0 (inclusive) and 1 (exclusive). + * + * @example + * randomizer.next() // 0.3404027920160495 + * randomizer.next() // 0.929890375900335 + * randomizer.next() // 0.5866362918861691 + */ + next(): number; + + /** + * Sets the seed to use. + * + * @param seed The seed to use. + * + * @example + * // Random seeds + * randomizer.seed(Date.now() ^ (Math.random() * 0x100000000)); + * // Fixed seeds (for reproducibility) + * randomizer.seed(42); + * randomizer.seed([42, 13.37]); + */ + seed(seed: number | number[]): void; +} diff --git a/src/simple-faker.ts b/src/simple-faker.ts index c4ecb8dfc38..49fc3f1c26a 100644 --- a/src/simple-faker.ts +++ b/src/simple-faker.ts @@ -1,10 +1,10 @@ -import type { Mersenne } from './internal/mersenne/mersenne'; -import mersenne from './internal/mersenne/mersenne'; +import { generateMersenne32Randomizer } from './internal/mersenne'; import { DatatypeModule } from './modules/datatype'; import { SimpleDateModule } from './modules/date'; import { SimpleHelpersModule } from './modules/helpers'; import { NumberModule } from './modules/number'; import { StringModule } from './modules/string'; +import type { Randomizer } from './randomizer'; /** * This is a simplified Faker class that doesn't need any localized data to generate its output. @@ -77,7 +77,7 @@ export class SimpleFaker { } /** @internal */ - private readonly _mersenne: Mersenne = mersenne(); + private readonly _randomizer: Randomizer = generateMersenne32Randomizer(); readonly datatype: DatatypeModule = new DatatypeModule(this); readonly date: SimpleDateModule = new SimpleDateModule(this); @@ -89,9 +89,28 @@ export class SimpleFaker { * Creates a new instance of SimpleFaker. * * In nearly any case you should use the prebuilt `simpleFaker` instances instead of the constructor. + * + * @param options The options to use. + * @param options.randomizer The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * Defaults to faker's Mersenne Twister based pseudo random number generator. */ - constructor() { - // This empty constructor just exists for VitePress docs + constructor( + options: { + /** + * The Randomizer to use. + * Specify this only if you want to use it to achieve a specific goal, + * such as sharing the same random generator with other instances/tools. + * + * @default generateMersenne32Randomizer() + */ + randomizer?: Randomizer; + } = {} + ) { + const { randomizer = generateMersenne32Randomizer() } = options; + + this._randomizer = randomizer; } /** @@ -211,7 +230,7 @@ export class SimpleFaker { seed( seed: number | number[] = Math.ceil(Math.random() * Number.MAX_SAFE_INTEGER) ): number | number[] { - this._mersenne.seed(seed); + this._randomizer.seed(seed); return seed; } diff --git a/test/all_functional.spec.ts b/test/all_functional.spec.ts index c63592d2069..65c9386d14b 100644 --- a/test/all_functional.spec.ts +++ b/test/all_functional.spec.ts @@ -6,7 +6,7 @@ const IGNORED_MODULES = [ 'rawDefinitions', 'definitions', 'helpers', - '_mersenne', + '_randomizer', '_defaultRefDate', ]; diff --git a/test/faker.spec.ts b/test/faker.spec.ts index 7a305c94788..2ff5aab9468 100644 --- a/test/faker.spec.ts +++ b/test/faker.spec.ts @@ -68,6 +68,22 @@ describe('faker', () => { }); }); + describe('randomizer', () => { + it('should be possible to provide a custom Randomizer', () => { + const customFaker = new Faker({ + locale: {}, + randomizer: { + next: () => 0, + seed: () => void 0, + }, + }); + + expect(customFaker.number.int()).toBe(0); + expect(customFaker.number.int()).toBe(0); + expect(customFaker.number.int()).toBe(0); + }); + }); + // This is only here for coverage // The actual test is in mersenne.spec.ts describe('seed()', () => { diff --git a/test/mersenne.spec.ts b/test/mersenne.spec.ts index 11b174f4705..3f40da2c446 100644 --- a/test/mersenne.spec.ts +++ b/test/mersenne.spec.ts @@ -1,23 +1,23 @@ import { beforeAll, beforeEach, describe, expect, it } from 'vitest'; -import type { Mersenne } from '../src/internal/mersenne/mersenne'; -import mersenneFn from '../src/internal/mersenne/mersenne'; +import { generateMersenne32Randomizer } from '../src/internal/mersenne'; +import type { Randomizer } from '../src/randomizer'; import { seededRuns } from './support/seededRuns'; import { times } from './support/times'; const NON_SEEDED_BASED_RUN = 25; describe('mersenne twister', () => { - const mersenne: Mersenne = mersenneFn(); + const randomizer: Randomizer = generateMersenne32Randomizer(); describe.each( [...seededRuns, ...seededRuns.map((v) => [v, 1, 2])].map((v) => [v]) )('seed: %j', (seed) => { beforeEach(() => { - mersenne.seed(seed); + randomizer.seed(seed); }); it('should return deterministic value for next()', () => { - const actual = mersenne.next(); + const actual = randomizer.next(); expect(actual).toMatchSnapshot(); }); @@ -35,12 +35,12 @@ describe('mersenne twister', () => { ]) )('random seeded tests %j', (seed) => { beforeAll(() => { - mersenne.seed(seed); + randomizer.seed(seed); }); describe('next', () => { it('should return random number from interval [0, 1)', () => { - const actual = mersenne.next(); + const actual = randomizer.next(); expect(actual).toBeGreaterThanOrEqual(0); expect(actual).toBeLessThan(1);