diff --git a/.github/ISSUE_TEMPLATE/package--ethereumjs-binarytree.md b/.github/ISSUE_TEMPLATE/package--ethereumjs-binarytree.md new file mode 100644 index 0000000000..403c1f82a3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/package--ethereumjs-binarytree.md @@ -0,0 +1,7 @@ +--- +name: 'Package: @ethereumjs/binarytree' +about: Create issue for @ethereumjs/binarytree package +title: '' +labels: 'package: binarytree' +assignees: '' +--- diff --git a/.github/workflows/binarytree-build.yml b/.github/workflows/binarytree-build.yml new file mode 100644 index 0000000000..1204e998ec --- /dev/null +++ b/.github/workflows/binarytree-build.yml @@ -0,0 +1,56 @@ +name: binarytree +on: + workflow_call: + inputs: + dep-cache-key: + required: false + type: string + workflow_dispatch: + inputs: + dep-cache-key: + required: false + default: 'none' + submodule-cache-key: + required: false + default: 'none' + +env: + cwd: ${{github.workspace}}/packages/binarytree + +defaults: + run: + working-directory: packages/binarytree + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-binarytree + cancel-in-progress: true + +jobs: + test-binarytree: + runs-on: ubuntu-latest + + steps: + # We clone the repo and submodules if triggered from work-flow dispatch + - if: inputs.submodule-cache-key == 'none' + uses: actions/checkout@v4 + + # We restore the code/deps from cache if triggered from workflow_call (i.e. have valid cache key) + - if: inputs.dep-cache-key != 'none' + uses: actions/cache/restore@v4 + id: dep-cache + with: + path: ${{github.workspace}} + key: ${{ inputs.dep-cache-key }} + + - name: Use Node.js 20 + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + + - name: Install Dependencies (if not restored from cache) + if: steps.dep-cache.outputs.cache-hit != 'true' + run: npm ci + working-directory: ${{ github.workspace }} + + - run: npm run test diff --git a/README.md b/README.md index c93033cfd2..4afb25a2cc 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Below you can find a list of the packages included in this repository. | package | npm | issues | tests | coverage | | ------------------------------------------------ | --------------------------------------------------------------- | ----------------------------------------------------------------------------- | -------------------------------------------------------------------------- | --------------------------------------------------------------------------- | +| [@ethereumjs/binarytree][binarytree-package] | [![NPM Package][binarytree-npm-badge]][binarytree-npm-link] | [![VM Issues][binarytree-issues-badge]][binarytree-issues-link] | [![Actions Status][binarytree-actions-badge]][binarytree-actions-link] | [![Code Coverage][binarytree-coverage-badge]][binarytree-coverage-link] | [@ethereumjs/block][block-package] | [![NPM Package][block-npm-badge]][block-npm-link] | [![Block Issues][block-issues-badge]][block-issues-link] | [![Actions Status][block-actions-badge]][block-actions-link] | [![Code Coverage][block-coverage-badge]][block-coverage-link] | | [@ethereumjs/blockchain][blockchain-package] | [![NPM Package][blockchain-npm-badge]][blockchain-npm-link] | [![Blockchain Issues][blockchain-issues-badge]][blockchain-issues-link] | [![Actions Status][blockchain-actions-badge]][blockchain-actions-link] | [![Code Coverage][blockchain-coverage-badge]][blockchain-coverage-link] | | [@ethereumjs/client][client-package] | [![NPM Package][client-npm-badge]][client-npm-link] | [![Client Issues][client-issues-badge]][client-issues-link] | [![Actions Status][client-actions-badge]][client-actions-link] | [![Code Coverage][client-coverage-badge]][client-coverage-link] | diff --git a/config/cspell-md.json b/config/cspell-md.json index 34aee1aeb7..816a794cf0 100644 --- a/config/cspell-md.json +++ b/config/cspell-md.json @@ -4,6 +4,7 @@ "/0x[0-9A-Fa-f]+/" ], "words": [ + "binarytree", "trienode", "t8ntool", "calldatasize", @@ -330,4 +331,4 @@ "ethportal", "bytevector" ] -} \ No newline at end of file +} diff --git a/config/cspell-ts.json b/config/cspell-ts.json index db1ff025b0..24e49eb3b7 100644 --- a/config/cspell-ts.json +++ b/config/cspell-ts.json @@ -24,6 +24,8 @@ } ], "words": [ + "binarytree", + "merkelize", "kaust", "EEST", "paulmillr", diff --git a/package-lock.json b/package-lock.json index 06ce79cc83..58079c7b3b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1699,6 +1699,10 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/@ethereumjs/binarytree": { + "resolved": "packages/binarytree", + "link": true + }, "node_modules/@ethereumjs/block": { "resolved": "packages/block", "link": true @@ -16996,6 +17000,34 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "packages/binarytree": { + "name": "@ethereumjs/binarytree", + "version": "0.0.1", + "license": "MIT", + "dependencies": { + "@ethereumjs/rlp": "^6.0.0-alpha.1", + "@ethereumjs/util": "^10.0.0-alpha.1", + "@noble/hashes": "^1.7.1", + "debug": "^4.3.4", + "ethereum-cryptography": "^3.1.0", + "lru-cache": "10.1.0" + }, + "engines": { + "node": ">=18" + } + }, + "packages/binarytree/node_modules/@noble/hashes": { + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@noble/hashes/-/hashes-1.7.1.tgz", + "integrity": "sha512-B8XBPsn4vT/KJAGqDzbwztd+6Yte3P4V7iafm24bxgDe/mlRuK6xmWPuCNrKt2vDafZ8MfJLlchDG/vYafQEjQ==", + "license": "MIT", + "engines": { + "node": "^14.21.3 || >=16" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "packages/block": { "name": "@ethereumjs/block", "version": "6.0.0-alpha.1", diff --git a/packages/binarytree/.c8rc.json b/packages/binarytree/.c8rc.json new file mode 100644 index 0000000000..52eb43c23b --- /dev/null +++ b/packages/binarytree/.c8rc.json @@ -0,0 +1,4 @@ +{ + "extends": "../../config/.c8rc.json", + "include": ["src/**/*.ts"] +} diff --git a/packages/binarytree/.eslintrc.cjs b/packages/binarytree/.eslintrc.cjs new file mode 100644 index 0000000000..9b9a33ad5c --- /dev/null +++ b/packages/binarytree/.eslintrc.cjs @@ -0,0 +1,14 @@ +module.exports = { + extends: '../../config/eslint.cjs', + parserOptions: { + project: ['./tsconfig.lint.json'], + }, + overrides: [ + { + files: ['benchmarks/*.ts', 'examples/*.ts'], + rules: { + 'no-console': 'off', + }, + }, + ], +} diff --git a/packages/binarytree/.gitignore b/packages/binarytree/.gitignore new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/packages/binarytree/.gitignore @@ -0,0 +1 @@ + diff --git a/packages/binarytree/.npmignore b/packages/binarytree/.npmignore new file mode 100644 index 0000000000..55c65cf8bd --- /dev/null +++ b/packages/binarytree/.npmignore @@ -0,0 +1,2 @@ +test/ +src/ \ No newline at end of file diff --git a/packages/binarytree/CHANGELOG.md b/packages/binarytree/CHANGELOG.md new file mode 100644 index 0000000000..50ad35a22a --- /dev/null +++ b/packages/binarytree/CHANGELOG.md @@ -0,0 +1,11 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +(modification: no type change headlines) and this project adheres to +[Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## 0.0.1 - 2025-01-30 + +- Initial development release diff --git a/packages/binarytree/LICENSE b/packages/binarytree/LICENSE new file mode 100644 index 0000000000..3a54fe57f0 --- /dev/null +++ b/packages/binarytree/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2025 EthereumJS + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/packages/binarytree/README.md b/packages/binarytree/README.md new file mode 100644 index 0000000000..46c5fcaec2 --- /dev/null +++ b/packages/binarytree/README.md @@ -0,0 +1,40 @@ +# @ethereumjs/binarytree + +[![NPM Package][binarytree-npm-badge]][binarytree-npm-link] +[![GitHub Issues][binarytree-issues-badge]][binarytree-issues-link] +[![Actions Status][binarytree-actions-badge]][binarytree-actions-link] +[![Code Coverage][binarytree-coverage-badge]][binarytree-coverage-link] +[![Discord][discord-badge]][discord-link] + +| Implementation of Binary Trees as specified in [EIP-7864](https://eips.ethereum.org/EIPS/eip-7864) | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | + +> Binary Trees are a novel cryptographic data structure proposed for use in Ethereum to optimize state storage and proof verification. Unlike Merkle Patricia Tries, Binary Trees use a strict binary branching structure, allowing smaller proof sizes. By reducing storage overhead and improving proof efficiency, Binary Trees aim to enhance Ethereum’s scalability while maintaining robust security guarantees. + +This package is currently in early alpha and is a work in progress. It is not intended for use in production environments, but rather for research and development purposes. Any help in improving the package is very much welcome. + +## Installation + +To obtain the latest version, simply install the project using `npm`: + +```shell +npm install @ethereumjs/binarytree +``` +## EthereumJS + +See our organizational [documentation](https://ethereumjs.readthedocs.io) for an introduction to `EthereumJS` as well as information on current standards and best practices. If you want to join for work or carry out improvements on the libraries, please review our [contribution guidelines](https://ethereumjs.readthedocs.io/en/latest/contributing.html) first. + +## License + +[MIT](https://opensource.org/licenses/MIT) + +[discord-badge]: https://img.shields.io/static/v1?logo=discord&label=discord&message=Join&color=blue +[discord-link]: https://discord.gg/TNwARpR +[binarytree-npm-badge]: https://img.shields.io/npm/v/@ethereumjs/binarytree.svg +[binarytree-npm-link]: https://www.npmjs.com/package/@ethereumjs/binarytree +[binarytree-issues-badge]: https://img.shields.io/github/issues/ethereumjs/ethereumjs-monorepo/package:%20binarytree?label=issues +[binarytree-issues-link]: https://github.com/ethereumjs/ethereumjs-monorepo/issues?q=is%3Aopen+is%3Aissue+label%3A"package%3A+binarytree" +[binarytree-actions-badge]: https://github.com/ethereumjs/ethereumjs-monorepo/workflows/binarytree/badge.svg +[binarytree-actions-link]: https://github.com/ethereumjs/ethereumjs-monorepo/actions?query=workflow%3A%22binarytree%22 +[binarytree-coverage-badge]: https://codecov.io/gh/ethereumjs/ethereumjs-monorepo/branch/master/graph/badge.svg?flag=binarytree +[binarytree-coverage-link]: https://codecov.io/gh/ethereumjs/ethereumjs-monorepo/tree/master/packages/binarytree diff --git a/packages/binarytree/package.json b/packages/binarytree/package.json new file mode 100644 index 0000000000..7ece62e850 --- /dev/null +++ b/packages/binarytree/package.json @@ -0,0 +1,59 @@ +{ + "name": "@ethereumjs/binarytree", + "version": "0.0.1", + "description": "Implementation of binary trees as used in Ethereum.", + "keywords": ["binary", "tree", "trie", "ethereum"], + "homepage": "https://github.com/ethereumjs/ethereumjs-monorepo/tree/master/packages/binarytree#readme", + "bugs": { + "url": "https://github.com/ethereumjs/ethereumjs-monorepo/issues?q=is%3Aissue+label%3A%22package%3A+binarytree%22" + }, + "repository": { + "type": "git", + "url": "https://github.com/ethereumjs/ethereumjs-monorepo.git" + }, + "license": "MIT", + "author": "EthereumJS Team", + "contributors": [ + { + "name": "Gabriel Rocheleau", + "url": "https://github.com/gabrocheleau" + } + ], + "type": "module", + "sideEffects": false, + "main": "dist/cjs/index.js", + "module": "dist/esm/index.js", + "exports": { + ".": { + "import": "./dist/esm/index.js", + "require": "./dist/cjs/index.js" + } + }, + "files": ["dist", "src"], + "scripts": { + "biome": "npx @biomejs/biome check", + "biome:fix": "npx @biomejs/biome check --write", + "build": "../../config/cli/ts-build.sh", + "clean": "../../config/cli/clean-package.sh", + "coverage": "DEBUG=ethjs npx vitest run -c ../../config/vitest.config.coverage.mts", + "docs:build": "typedoc --options typedoc.cjs", + "lint": "npm run biome && eslint --config .eslintrc.cjs . --ext .js,.ts", + "lint:fix": "npm run biome:fix && eslint --fix --config .eslintrc.cjs . --ext .js,.ts", + "prepublishOnly": "../../config/cli/prepublish.sh", + "test": "npm run test:node", + "test:node": "npx vitest run", + "test:browser": "npx vitest run --config=../../config/vitest.config.browser.mts", + "tsc": "../../config/cli/ts-compile.sh" + }, + "dependencies": { + "@ethereumjs/rlp": "^6.0.0-alpha.1", + "@ethereumjs/util": "^10.0.0-alpha.1", + "@noble/hashes": "^1.7.1", + "debug": "^4.3.4", + "ethereum-cryptography": "^3.1.0", + "lru-cache": "10.1.0" + }, + "engines": { + "node": ">=18" + } +} diff --git a/packages/binarytree/src/binaryTree.ts b/packages/binarytree/src/binaryTree.ts new file mode 100644 index 0000000000..ceb148003b --- /dev/null +++ b/packages/binarytree/src/binaryTree.ts @@ -0,0 +1,750 @@ +import { + Lock, + bitsToBytes, + bytesToBits, + bytesToHex, + concatBytes, + equalsBits, + equalsBytes, + matchingBitsLength, + setLengthRight, +} from '@ethereumjs/util' +import debug from 'debug' + +import { CheckpointDB } from './db/index.js' +import { InternalBinaryNode } from './node/internalNode.js' +import { StemBinaryNode } from './node/stemNode.js' +import { decodeBinaryNode, isInternalBinaryNode, isStemBinaryNode } from './node/util.js' +import { type BinaryTreeOpts, ROOT_DB_KEY } from './types.js' + +import type { BinaryNode } from './node/types.js' +import type { PutBatch } from '@ethereumjs/util' +import type { Debugger } from 'debug' + +interface Path { + node: BinaryNode | null + remaining: number[] + stack: Array<[BinaryNode, number[]]> +} + +/** + * The basic binary tree interface, use with `import { BinaryTree } from '@ethereumjs/binarytree'`. + */ +export class BinaryTree { + /** The options for instantiating the binary tree */ + protected _opts: BinaryTreeOpts + + /** The root for an empty tree */ + EMPTY_TREE_ROOT: Uint8Array + + protected _db!: CheckpointDB + protected _hashLen: number + protected _lock = new Lock() + protected _root: Uint8Array + + protected DEBUG: boolean + protected _debug: Debugger = debug('binarytree:#') + protected debug: (...args: any) => void + /** + * Creates a new binary tree. + * @param opts Options for instantiating the binary tree + * + * Note: in most cases, the static {@link createBinaryTree} constructor should be used. It uses the same API but provides sensible defaults + */ + constructor(opts: BinaryTreeOpts) { + this._opts = opts + + if (opts.db instanceof CheckpointDB) { + throw new Error('Cannot pass in an instance of CheckpointDB') + } + this._db = new CheckpointDB({ db: opts.db, cacheSize: opts.cacheSize }) + + this.EMPTY_TREE_ROOT = new Uint8Array(32) + this._hashLen = 32 + this._root = this.EMPTY_TREE_ROOT + + if (opts?.root) { + this.root(opts.root as any) + } + + this.DEBUG = + typeof window === 'undefined' ? (process?.env?.DEBUG?.includes('ethjs') ?? false) : false + this.debug = this.DEBUG + ? (message: string, namespaces: string[] = []) => { + let log = this._debug + for (const name of namespaces) { + log = log.extend(name) + } + log(message) + } + : (..._: any) => {} + + this.DEBUG && + this.debug(`Trie created: + || Root: ${bytesToHex(this._root)} + || Persistent: ${this._opts.useRootPersistence} + || CacheSize: ${this._opts.cacheSize} + || ----------------`) + } + + /** + * Gets and/or Sets the current root of the `tree` + */ + root(value?: Uint8Array | null): Uint8Array { + if (value !== undefined) { + if (value === null) { + value = this.EMPTY_TREE_ROOT + } + + if (value.length !== this._hashLen) { + throw new Error(`Invalid root length. Roots are ${this._hashLen} bytes`) + } + + this._root = value + } + + return this._root + } + + /** + * Checks if a given root exists. + */ + async checkRoot(root: Uint8Array): Promise { + try { + const value = await this._db.get(root) + return value !== undefined + } catch (error: any) { + if (error.message === 'Missing node in DB') { + return equalsBytes(root, this.EMPTY_TREE_ROOT) + } else { + throw error + } + } + } + + /** + * Gets values at a given binary tree `stem` and set of suffixes + * @param stem - the stem of the stem node where we're seeking values + * @param suffixes - an array of suffixes corresponding to the values desired + * @returns A Promise that resolves to an array of `Uint8Array`s or `null` depending on if values were found. + * If the stem is not found, will return an empty array. + */ + async get(stem: Uint8Array, suffixes: number[]): Promise<(Uint8Array | null)[]> { + if (stem.length !== 31) throw new Error(`expected stem with length 31; got ${stem.length}`) + this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}; Suffix: ${suffixes}`, ['get']) + const stemPath = await this.findPath(stem) + if (stemPath.node instanceof StemBinaryNode) { + // The retrieved stem node contains an array of 256 possible values. + // We read all the suffixes to get the desired values + const values = [] + for (const suffix of suffixes) { + const value = stemPath.node.getValue(suffix) + this.DEBUG && + this.debug(`Suffix: ${suffix}; Value: ${value === null ? 'null' : bytesToHex(value)}`, [ + 'get', + ]) + values.push(value) + } + return values + } + + return [] + } + + /** + * Stores a given `value` at the given `key` or performs a deletion if `value` is null. + * @param stem - the stem (must be 31 bytes) to store the value at. + * @param suffixes - array of suffixes at which to store individual values. + * @param values - the value(s) to store (or null for deletion). + * @returns A Promise that resolves once the value is stored. + */ + async put(stem: Uint8Array, suffixes: number[], values: (Uint8Array | null)[]): Promise { + if (stem.length !== 31) throw new Error(`expected stem with length 31, got ${stem.length}`) + if (values.length > 0 && values.length !== suffixes.length) + throw new Error( + `expected number of values (${values.length}) to equal number of suffixes (${suffixes.length})`, + ) + + this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}`, ['put']) + const putStack: [Uint8Array, BinaryNode | null][] = [] // A stack of updated nodes starting with the stem node being updated/created to be saved to the DB + + // If the tree is empty, initialize it. + if (equalsBytes(this.root(), this.EMPTY_TREE_ROOT)) { + await this._createInitialNode(stem, suffixes, values) + return + } + + // Find the path to the node (or the nearest node) for the given stem. + const foundPath = await this.findPath(stem) + + // We should always at least get the root node back + if (foundPath.stack.length === 0) throw new Error(`Root node not found in trie`) + + // Step 1) Create or update the stem node + let stemNode: StemBinaryNode + let newStem = false + // If we found a stem node with the same stem, we'll update it. + if ( + foundPath.node && + isStemBinaryNode(foundPath.node) && + equalsBytes(foundPath.node.stem, stem) + ) { + stemNode = foundPath.node + } else { + // Otherwise, we'll create a new stem node. + newStem = true + stemNode = StemBinaryNode.create(stem) + this.DEBUG && this.debug(`Creating new stem node for stem: ${bytesToHex(stem)}`, ['put']) + } + + // Update the values in the stem node. + for (let i = 0; i < suffixes.length; i++) { + const suffix = suffixes[i] + const value = values[i] + stemNode.setValue(suffix, value) + this.DEBUG && + this.debug( + `Setting value for suffix: ${suffix} to value: ${value instanceof Uint8Array ? bytesToHex(value) : value} at stem node with stem: ${bytesToHex(stem)}`, + ['put'], + ) + } + + // If all values are null then we treat this as a deletion. + if (stemNode.values.every((val) => val === null)) { + if (foundPath.node !== null) { + this.DEBUG && this.debug(`Deleting stem node for stem: ${bytesToHex(stem)}`, ['put']) + putStack.push([this.merkelize(stemNode), null]) + } else { + return // nothing to delete + } + } else { + // Otherwise, we add the new or updated stemNode to the putStack + putStack.push([this.merkelize(stemNode), stemNode]) + } + + // Get the bit representation of the stem. + const stemBits = bytesToBits(stemNode.stem) + // We keep a reference to the current "parent" node path as we update up the tree. + let lastUpdatedParentPath: number[] = [] + + // Step 2: Add any needed new internal nodes if inserting a new stem. + if (foundPath.stack.length > 1 && newStem) { + // Pop the nearest node on the path. + const [nearestNode, nearestNodePath] = foundPath.stack.pop()! + const parentPath = foundPath.stack[foundPath.stack.length - 1]?.[1] ?? [] + this.DEBUG && this.debug(`Adding necessary internal nodes.`, ['put']) + // Update the parent branch if necessary. + // If an update was necessary, updateBranch returns a stack of internal nodes + // that connect the new stem node to the previous parent inner node + const updated = this.updateBranch(stemNode, nearestNode, nearestNodePath, parentPath) + if (updated !== undefined) { + for (const update of updated) { + putStack.push([this.merkelize(update.node), update.node]) + lastUpdatedParentPath = update.parentPath + } + } + } + + // Step 3: Update remaining parent node hashes + while (foundPath.stack.length > 1) { + const [node, path] = foundPath.stack.pop()! + if (isInternalBinaryNode(node)) { + // Set child pointer to the last internal node in the putStack (last updated internal node) + node.setChild(lastUpdatedParentPath[lastUpdatedParentPath.length - 1], { + hash: putStack[putStack.length - 1][0], // Reuse hash already computed above + path: lastUpdatedParentPath, + }) + putStack.push([this.merkelize(node), node]) // Update node hash and add to putStack + lastUpdatedParentPath = path + this.DEBUG && + this.debug(`Updated parent internal node hash for path ${path.join(',')}`, ['put']) + } else { + throw new Error(`Expected internal node at path ${path.join(',')}, got ${node}`) + } + } + + // Step 4: Update the root node. + let rootNode = foundPath.stack.pop()![0] // The root node. + const childReference = putStack[putStack.length - 1][1] + + if (isStemBinaryNode(rootNode)) { + // If the root is a stem node but its stem differs from the one we're updating, + // then we need to split the root. Per the spec, when two stems share a common prefix, + // we create one internal node per bit in that common prefix, and then at the first + // divergence, an internal node that points to both stem nodes. + if (!equalsBytes(rootNode.stem, stem)) { + this.DEBUG && this.debug(`Root stem differs from new stem. Splitting root.`, ['put']) + const rootBits = bytesToBits(rootNode.stem) + const commonPrefixLength = matchingBitsLength(rootBits, stemBits) + // Create the split node at the divergence bit. + const splitNode = InternalBinaryNode.create() + const branchForNew = stemBits[commonPrefixLength] + const branchForExisting = rootBits[commonPrefixLength] + splitNode.setChild(branchForNew, { + hash: this.merkelize(stemNode), + path: stemBits, + }) + splitNode.setChild(branchForExisting, { + hash: this.merkelize(rootNode), + path: rootBits, + }) + + let newRoot = splitNode + + // If there is a common prefix (i.e. commonPrefixLength > 0), we build a chain + // of internal nodes representing that prefix. + for (let depth = commonPrefixLength - 1; depth >= 0; depth--) { + this.DEBUG && this.debug(`Creating internal node at depth ${depth}`, ['put']) + putStack.push([this.merkelize(newRoot), newRoot]) + const parent = InternalBinaryNode.create() + // At each level, the branch is determined by the bit of the new stem at position i. + parent.setChild(stemBits[depth], { + hash: this.merkelize(newRoot), + path: stemBits.slice(0, depth + 1), + }) + newRoot = parent + } + // Now newRoot is an internal node chain that represents the entire common prefix, + // ending in a split node that distinguishes the two different stems. + rootNode = newRoot + } + } else { + // For an internal root node, we assign the last update child reference to the root. + if (childReference !== null) { + rootNode.setChild( + stemBits[0], + childReference !== null + ? { + hash: this.merkelize(childReference), + path: isStemBinaryNode(childReference) ? stemBits : lastUpdatedParentPath, + } + : null, + ) + } + } + + this.root(this.merkelize(rootNode)) + putStack.push([this._root, rootNode]) + this.DEBUG && this.debug(`Updated root hash to ${bytesToHex(this._root)}`, ['put']) + await this.saveStack(putStack) + } + + /** + * Helper method for updating or creating the parent internal node for a given stem node. + * If the nearest node is a stem node with a different stem, a new internal node is created + * to branch at the first differing bit. + * If the nearest node is an internal node, its child reference is updated. + * + * @param stemNode - The child stem node that will be referenced by the new/updated internal node. + * @param nearestNode - The nearest node to the new stem node. + * @param pathToNode - The path (in bits) to `nearestNode` as known from the trie. + * @returns An array of nodes and their partial paths from the new stem node to the branch parent node + * or `undefined` if no changes were made. + */ + updateBranch( + stemNode: StemBinaryNode, + nearestNode: BinaryNode, + pathToNode: number[], + pathToParent: number[], + ): { node: BinaryNode; parentPath: number[] }[] | undefined { + const stemBits = bytesToBits(stemNode.stem) + if (isStemBinaryNode(nearestNode)) { + // For two different stems, find the first differing bit. + const nearestNodeStemBits = bytesToBits(nearestNode.stem) + const diffIndex = matchingBitsLength(stemBits, nearestNodeStemBits) + const parentDiffIndex = matchingBitsLength(pathToNode, pathToParent) + + const newInternal = InternalBinaryNode.create() + // Set the child pointer for the new stem node using the bit at diffIndex. + newInternal.setChild(stemBits[diffIndex], { + hash: this.merkelize(stemNode), + path: stemBits, + }) + + // Set the child pointer for the existing stem node. + newInternal.setChild(nearestNodeStemBits[diffIndex], { + hash: this.merkelize(nearestNode), + path: nearestNodeStemBits, + }) + const putStack = [{ node: newInternal, parentPath: stemBits.slice(0, diffIndex) }] + + let parent = newInternal + for (let depth = diffIndex - 1; depth > parentDiffIndex; depth--) { + this.DEBUG && this.debug(`Creating internal node at depth ${depth}`, ['put']) + const newParent = InternalBinaryNode.create() + // At each level, the branch is determined by the bit of the new stem at position i. + newParent.setChild(stemBits[depth], { + hash: this.merkelize(parent), + path: stemBits.slice(0, depth + 1), + }) + putStack.push({ node: newParent, parentPath: stemBits.slice(0, depth) }) + parent = newParent + } + + // Return the stack of new internal nodes that connect the new stem node to the previous parent inner node + return putStack + } else if (isInternalBinaryNode(nearestNode)) { + // For an internal node, determine the branch index using the parent's known path length. + const branchIndex = stemBits[pathToNode.length] + nearestNode.setChild(branchIndex, { + hash: this.merkelize(stemNode), + path: stemBits, + }) + return [{ node: nearestNode, parentPath: pathToNode }] + } + return undefined + } + + /** + * Tries to find a path to the node for the given key. + * It returns a `Path` object containing: + * - `node`: the found node (if any), + * - `stack`: an array of tuples [node, path] representing the nodes encountered, + * - `remaining`: the bits of the key that were not matched. + * + * @param keyInBytes - the search key as a byte array. + * @returns A Promise that resolves to a Path object. + */ + async findPath(keyInBytes: Uint8Array): Promise { + const keyInBits = bytesToBits(keyInBytes) + this.DEBUG && this.debug(`Searching for key: ${bytesToHex(keyInBytes)}`, ['find_path']) + const result: Path = { + node: null, + stack: [], + remaining: keyInBits, + } + + // If tree is empty, return empty path. + if (equalsBytes(this.root(), this.EMPTY_TREE_ROOT)) return result + + // Get the root node. + let rawNode = await this._db.get(this.root()) + if (rawNode === undefined) throw new Error('root node should exist') + const rootNode = decodeBinaryNode(rawNode) + + this.DEBUG && this.debug(`Starting with Root Node: [${bytesToHex(this.root())}]`, ['find_path']) + // Treat the root as being at an empty path. + result.stack.push([rootNode, []]) + + // If the root node is a stem node, we're done. + if (isStemBinaryNode(rootNode)) { + this.DEBUG && this.debug(`Found stem node at root.`, ['find_path']) + if (equalsBytes(keyInBytes, rootNode.stem)) { + result.node = rootNode + result.remaining = [] + } + return result + } + + // The root is an internal node. Determine the branch to follow using the first bit of the key + let childNode = rootNode.getChild(keyInBits[0]) + + // If no child exists on that branch, return what we have. + if (childNode === null) { + this.DEBUG && this.debug(`Partial Path ${keyInBits[0]} - found no child.`, ['find_path']) + return result + } + let finished = false + while (!finished) { + if (childNode === null) break + + // Look up child node by its node hash. + rawNode = await this._db.get(childNode.hash) + if (rawNode === undefined) throw new Error(`missing node at ${childNode.path}`) + const decodedNode = decodeBinaryNode(rawNode) + + // Determine how many bits match between keyInBits and the stored path in childNode. + const matchingKeyLength = matchingBitsLength(keyInBits, childNode.path) + + // If we have an exact match (i.e. the stored path equals a prefix of the key) + // and either the key is fully consumed or we have reached a stem node, we stop. + if ( + matchingKeyLength === childNode.path.length && + (matchingKeyLength === keyInBits.length || isStemBinaryNode(decodedNode)) + ) { + finished = true + if ( + matchingKeyLength === keyInBits.length && + equalsBits(keyInBits, childNode.path) === true + ) { + this.DEBUG && + this.debug( + `Path ${bytesToHex(keyInBytes)} - found full path to node ${bytesToHex( + this.merkelize(decodedNode), + )}.`, + ['find_path'], + ) + result.node = decodedNode + result.remaining = [] + return result + } + // Otherwise, record the unmatched tail of the key. + result.remaining = keyInBits.slice(matchingKeyLength) + result.stack.push([decodedNode, childNode.path]) + return result + } + // Otherwise, push this internal node and continue. + result.stack.push([decodedNode, keyInBits.slice(0, matchingKeyLength)]) + this.DEBUG && + this.debug( + `Partial Path ${keyInBits.slice(0, matchingKeyLength)} - found next node in path ${bytesToHex( + this.merkelize(decodedNode), + )}.`, + ['find_path'], + ) + + // If the decoded node is not internal, then we cannot traverse further. + if (!isInternalBinaryNode(decodedNode)) { + result.remaining = keyInBits.slice(matchingKeyLength) + finished = true + break + } + // The next branch is determined by the next bit after the matched prefix. + const childIndex = keyInBits[matchingKeyLength] + childNode = decodedNode.getChild(childIndex) + if (childNode === null) { + result.remaining = keyInBits.slice(matchingKeyLength) + finished = true + } + } + this.DEBUG && + this.debug( + `Found partial path ${bytesToHex(bitsToBytes(keyInBits.slice(256 - result.remaining.length)))} but sought node is not present in trie.`, + ['find_path'], + ) + return result + } + + /** + * Deletes a given `key` from the tree. + * @param stem - the stem of the stem node to delete from + * @param suffixes - the suffixes to delete + * @returns A Promise that resolves once the key is deleted. + */ + async del(stem: Uint8Array, suffixes: number[]): Promise { + this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}; Suffix(es): ${suffixes}`, ['del']) + await this.put(stem, suffixes, new Array(suffixes.length).fill(null)) + } + + /** + * Create empty root node for initializing an empty tree. + */ + async createRootNode(): Promise { + const rootNode = null + this.DEBUG && this.debug(`No root node. Creating new root node`, ['initialize']) + this.root(this.merkelize(rootNode)) + await this.saveStack([[this.root(), rootNode]]) + return + } + + /** + * Creates the initial node from an empty tree. + * @private + */ + protected async _createInitialNode( + stem: Uint8Array, + indexes: number[], + values: (Uint8Array | null)[], + ): Promise { + const initialNode = StemBinaryNode.create(stem) + for (let i = 0; i < indexes.length; i++) { + initialNode.setValue(indexes[i], values[i]) + } + this.root(this.merkelize(initialNode)) + await this._db.put(this.root(), initialNode.serialize()) + await this.persistRoot() + } + + /** + * Saves a stack of nodes to the database. + * + * @param putStack - an array of tuples of keys (the partial path of the node in the trie) and nodes (BinaryNodes) + */ + + async saveStack(putStack: [Uint8Array, BinaryNode | null][]): Promise { + const opStack = putStack.map(([key, node]) => { + return { + type: node !== null ? 'put' : 'del', + key, + value: node !== null ? node.serialize() : null, + } as PutBatch + }) + await this._db.batch(opStack) + } + + /** + * Saves the nodes from a proof into the tree. + * @param proof + */ + async fromProof(_proof: any): Promise { + throw new Error('Not implemented') + } + + /** + * Creates a proof from a tree and key that can be verified using {@link BinaryTree.verifyBinaryProof}. + * @param key + */ + async createBinaryProof(_key: Uint8Array): Promise { + throw new Error('Not implemented') + } + + /** + * Verifies a proof. + * @param rootHash + * @param key + * @param proof + * @throws If proof is found to be invalid. + * @returns The value from the key, or null if valid proof of non-existence. + */ + async verifyBinaryProof( + _rootHash: Uint8Array, + _key: Uint8Array, + _proof: any, + ): Promise { + throw new Error('Not implemented') + } + + /** + * The `data` event is given an `Object` that has two properties; the `key` and the `value`. Both should be Uint8Arrays. + * @return Returns a [stream](https://nodejs.org/dist/latest-v12.x/docs/api/stream.html#stream_class_stream_readable) of the contents of the `tree` + */ + createReadStream(): any { + throw new Error('Not implemented') + } + + /** + * Returns a copy of the underlying tree. + * + * Note on db: the copy will create a reference to the + * same underlying database. + * + * Note on cache: for memory reasons a copy will not + * recreate a new LRU cache but initialize with cache + * being deactivated. + * + * @param includeCheckpoints - If true and during a checkpoint, the copy will contain the checkpointing metadata and will use the same scratch as underlying db. + */ + shallowCopy(includeCheckpoints = true): BinaryTree { + const tree = new BinaryTree({ + ...this._opts, + db: this._db.db.shallowCopy(), + root: this.root(), + cacheSize: 0, + }) + if (includeCheckpoints && this.hasCheckpoints()) { + tree._db.setCheckpoints(this._db.checkpoints) + } + return tree + } + + /** + * Persists the root hash in the underlying database + */ + async persistRoot() { + if (this._opts.useRootPersistence === true) { + await this._db.put(ROOT_DB_KEY, this.root()) + } + } + + /** + * Is the tree during a checkpoint phase? + */ + hasCheckpoints() { + return this._db.hasCheckpoints() + } + + /** + * Creates a checkpoint that can later be reverted to or committed. + * After this is called, all changes can be reverted until `commit` is called. + */ + checkpoint() { + this._db.checkpoint(this.root()) + } + + /** + * Commits a checkpoint to disk, if current checkpoint is not nested. + * If nested, only sets the parent checkpoint as current checkpoint. + * @throws If not during a checkpoint phase + */ + async commit(): Promise { + if (!this.hasCheckpoints()) { + throw new Error('trying to commit when not checkpointed') + } + + await this._lock.acquire() + await this._db.commit() + await this.persistRoot() + this._lock.release() + } + + /** + * Reverts the tree to the state it was at when `checkpoint` was first called. + * If during a nested checkpoint, sets root to most recent checkpoint, and sets + * parent checkpoint as current. + */ + async revert(): Promise { + if (!this.hasCheckpoints()) { + throw new Error('trying to revert when not checkpointed') + } + + await this._lock.acquire() + this.root(await this._db.revert()) + await this.persistRoot() + this._lock.release() + } + + /** + * Flushes all checkpoints, restoring the initial checkpoint state. + */ + flushCheckpoints() { + this._db.checkpoints = [] + } + + protected hash(msg: Uint8Array | null): Uint8Array { + // As per spec, if value is null or a 64-byte array of 0s, hash(msg) is a 32-byte array of 0s + if (msg === null || (msg.length === 64 && msg.every((byte) => byte === 0))) { + return new Uint8Array(32) + } + + if (msg.length !== 32 && msg.length !== 64) { + throw new Error('Data must be 32 or 64 bytes') + } + + return Uint8Array.from(this._opts.hashFunction.call(undefined, msg)) + } + + protected merkelize(node: BinaryNode | null): Uint8Array { + if (node === null) { + return new Uint8Array(32) + } + + if (isInternalBinaryNode(node)) { + const [leftChild, rightChild] = node.children + + return this.hash( + concatBytes( + leftChild === null ? this.hash(null) : leftChild.hash, + rightChild === null ? this.hash(null) : rightChild.hash, + ), + ) + } + + // Otherwise, it's a stem node. + // Map each value in node.values through the hash function. + let currentLayerHashes = node.values.map((value) => this.hash(value)) + + // While there is more than one hash at the current layer, combine them pairwise. + while (currentLayerHashes.length > 1) { + const newLayerHashes = [] + for (let i = 0; i < currentLayerHashes.length; i += 2) { + newLayerHashes.push( + this.hash(concatBytes(currentLayerHashes[i], currentLayerHashes[i + 1])), + ) + } + currentLayerHashes = newLayerHashes + } + + // Return the hash of the concatenation of node.stem appended with 00 and the final level hash. + return this.hash(concatBytes(setLengthRight(node.stem, 32), currentLayerHashes[0])) + } +} diff --git a/packages/binarytree/src/constructors.ts b/packages/binarytree/src/constructors.ts new file mode 100644 index 0000000000..6b9a9827f5 --- /dev/null +++ b/packages/binarytree/src/constructors.ts @@ -0,0 +1,50 @@ +import { + KeyEncoding, + MapDB, + ValueEncoding, + bytesToHex, + unprefixedHexToBytes, +} from '@ethereumjs/util' +import { blake3 } from '@noble/hashes/blake3' + +import { BinaryTree } from './binaryTree.js' +import { ROOT_DB_KEY } from './types.js' + +import type { BinaryTreeOpts } from './types.js' + +export async function createBinaryTree(opts?: Partial) { + const key = bytesToHex(ROOT_DB_KEY) + + // Provide sensible default options + const parsedOptions = { + ...opts, + db: opts?.db ?? new MapDB(), + useRootPersistence: opts?.useRootPersistence ?? false, + cacheSize: opts?.cacheSize ?? 0, + hashFunction: opts?.hashFunction ?? blake3, + } + + if (parsedOptions.useRootPersistence === true) { + if (parsedOptions.root === undefined) { + const root = await parsedOptions.db.get(key, { + keyEncoding: KeyEncoding.Bytes, + valueEncoding: ValueEncoding.Bytes, + }) + if (typeof root === 'string') { + parsedOptions.root = unprefixedHexToBytes(root) + } else { + parsedOptions.root = root + } + } else { + await parsedOptions.db.put(key, parsedOptions.root, { + keyEncoding: KeyEncoding.Bytes, + valueEncoding: ValueEncoding.Bytes, + }) + } + } + + const tree = new BinaryTree(parsedOptions) + // If the root node does not exist, initialize the empty root node + if (parsedOptions.root === undefined) await tree.createRootNode() + return tree +} diff --git a/packages/binarytree/src/db/checkpoint.ts b/packages/binarytree/src/db/checkpoint.ts new file mode 100644 index 0000000000..27eca08562 --- /dev/null +++ b/packages/binarytree/src/db/checkpoint.ts @@ -0,0 +1,294 @@ +import { + KeyEncoding, + ValueEncoding, + bytesToUnprefixedHex, + unprefixedHexToBytes, +} from '@ethereumjs/util' +import { LRUCache } from 'lru-cache' + +import type { Checkpoint, CheckpointDBOpts } from '../types.js' +import type { BatchDBOp, DB, EncodingOpts } from '@ethereumjs/util' + +/** + * DB is a thin wrapper around the underlying levelup db, + * which validates inputs and sets encoding type. + */ +export class CheckpointDB implements DB { + public checkpoints: Checkpoint[] + public db: DB + public readonly cacheSize: number + private readonly valueEncoding: ValueEncoding + + // Starting with lru-cache v8 undefined and null are not allowed any more + // as cache values. At the same time our design works well, since undefined + // indicates for us that we know that the value is not present in the + // underlying trie database as well (so it carries real value). + // + // Solution here seems therefore adequate, other solutions would rather + // be some not so clean workaround. + // + // (note that @ts-ignore doesn't work since stripped on declaration (.d.ts) files) + protected _cache?: LRUCache + // protected _cache?: LRUCache + + _stats = { + cache: { + reads: 0, + hits: 0, + writes: 0, + }, + db: { + reads: 0, + hits: 0, + writes: 0, + }, + } + + /** + * Initialize a DB instance. + */ + constructor(opts: CheckpointDBOpts) { + this.db = opts.db + this.cacheSize = opts.cacheSize ?? 0 + this.valueEncoding = opts.valueEncoding ?? ValueEncoding.String + // Roots of trie at the moment of checkpoint + this.checkpoints = [] + + if (this.cacheSize > 0) { + this._cache = new LRUCache({ + max: this.cacheSize, + updateAgeOnGet: true, + }) + } + } + + /** + * Flush the checkpoints and use the given checkpoints instead. + * @param {Checkpoint[]} checkpoints + */ + setCheckpoints(checkpoints: Checkpoint[]) { + this.checkpoints = [] + + for (let i = 0; i < checkpoints.length; i++) { + this.checkpoints.push({ + root: checkpoints[i].root, + keyValueMap: new Map(checkpoints[i].keyValueMap), + }) + } + } + + /** + * Is the DB during a checkpoint phase? + */ + hasCheckpoints() { + return this.checkpoints.length > 0 + } + + /** + * Adds a new checkpoint to the stack + * @param root + */ + checkpoint(root: Uint8Array) { + this.checkpoints.push({ keyValueMap: new Map(), root }) + } + + /** + * Commits the latest checkpoint + */ + async commit() { + const { keyValueMap } = this.checkpoints.pop()! + if (!this.hasCheckpoints()) { + // This was the final checkpoint, we should now commit and flush everything to disk + const batchOp: BatchDBOp[] = [] + for (const [key, value] of keyValueMap.entries()) { + if (value === undefined) { + batchOp.push({ + type: 'del', + key: unprefixedHexToBytes(key), + }) + } else { + batchOp.push({ + type: 'put', + key: unprefixedHexToBytes(key), + value, + }) + } + } + await this.batch(batchOp) + } else { + // dump everything into the current (higher level) diff cache + const currentKeyValueMap = this.checkpoints[this.checkpoints.length - 1].keyValueMap + for (const [key, value] of keyValueMap.entries()) { + currentKeyValueMap.set(key, value) + } + } + } + + /** + * Reverts the latest checkpoint + */ + async revert() { + const { root } = this.checkpoints.pop()! + return root + } + + /** + * @inheritDoc + */ + async get(key: Uint8Array): Promise { + const keyHex = bytesToUnprefixedHex(key) + if (this._cache !== undefined) { + const value = this._cache.get(keyHex) + this._stats.cache.reads += 1 + if (value !== undefined) { + this._stats.cache.hits += 1 + return value + } + } + + // Lookup the value in our diff cache. We return the latest checkpointed value (which should be the value on disk) + for (let index = this.checkpoints.length - 1; index >= 0; index--) { + if (this.checkpoints[index].keyValueMap.has(keyHex)) { + return this.checkpoints[index].keyValueMap.get(keyHex) + } + } + // Nothing has been found in diff cache, look up from disk + const value = await this.db.get(keyHex, { + keyEncoding: KeyEncoding.String, + valueEncoding: this.valueEncoding, + }) + this._stats.db.reads += 1 + if (value !== undefined) { + this._stats.db.hits += 1 + } + const returnValue = + value !== undefined + ? value instanceof Uint8Array + ? value + : unprefixedHexToBytes(value) + : undefined + this._cache?.set(keyHex, returnValue) + if (this.hasCheckpoints()) { + // Since we are a checkpoint, put this value in diff cache, + // so future `get` calls will not look the key up again from disk. + this.checkpoints[this.checkpoints.length - 1].keyValueMap.set(keyHex, returnValue) + } + + return returnValue + } + + /** + * @inheritDoc + */ + async put(key: Uint8Array, value: Uint8Array): Promise { + const keyHex = bytesToUnprefixedHex(key) + if (this.hasCheckpoints()) { + // put value in diff cache + this.checkpoints[this.checkpoints.length - 1].keyValueMap.set(keyHex, value) + } else { + const valuePut = + this.valueEncoding === ValueEncoding.Bytes ? value : bytesToUnprefixedHex(value) + await this.db.put(keyHex, valuePut, { + keyEncoding: KeyEncoding.String, + valueEncoding: this.valueEncoding, + }) + this._stats.db.writes += 1 + + if (this._cache !== undefined) { + this._cache.set(keyHex, value) + this._stats.cache.writes += 1 + } + } + } + + /** + * @inheritDoc + */ + async del(key: Uint8Array): Promise { + const keyHex = bytesToUnprefixedHex(key) + if (this.hasCheckpoints()) { + // delete the value in the current diff cache + this.checkpoints[this.checkpoints.length - 1].keyValueMap.set(keyHex, undefined) + } else { + // delete the value on disk + await this.db.del(keyHex, { + keyEncoding: KeyEncoding.String, + }) + this._stats.db.writes += 1 + + if (this._cache !== undefined) { + this._cache.set(keyHex, undefined) + this._stats.cache.writes += 1 + } + } + } + + /** + * @inheritDoc + */ + async batch(opStack: BatchDBOp[]): Promise { + if (this.hasCheckpoints()) { + for (const op of opStack) { + if (op.type === 'put') { + await this.put(op.key, op.value) + } else if (op.type === 'del') { + await this.del(op.key) + } + } + } else { + const convertedOps = opStack.map((op) => { + const convertedOp: { + key: string + value: Uint8Array | string | undefined + type: 'put' | 'del' + opts?: EncodingOpts + } = { + key: bytesToUnprefixedHex(op.key), + value: op.type === 'put' ? op.value : undefined, + type: op.type, + opts: { ...op.opts, ...{ valueEncoding: this.valueEncoding } }, + } + this._stats.db.writes += 1 + if (op.type === 'put' && this.valueEncoding === ValueEncoding.String) { + convertedOp.value = bytesToUnprefixedHex(convertedOp.value) + } + return convertedOp + }) + await this.db.batch(convertedOps) + } + } + + stats(reset = true) { + const stats = { ...this._stats, size: this._cache?.size ?? 0 } + if (reset) { + this._stats = { + cache: { + reads: 0, + hits: 0, + writes: 0, + }, + db: { + reads: 0, + hits: 0, + writes: 0, + }, + } + } + return stats + } + + /** + * @inheritDoc + */ + shallowCopy(): CheckpointDB { + return new CheckpointDB({ + db: this.db, + cacheSize: this.cacheSize, + valueEncoding: this.valueEncoding, + }) + } + + open() { + return Promise.resolve() + } +} diff --git a/packages/binarytree/src/db/index.ts b/packages/binarytree/src/db/index.ts new file mode 100644 index 0000000000..63e8f6b033 --- /dev/null +++ b/packages/binarytree/src/db/index.ts @@ -0,0 +1 @@ +export * from './checkpoint.js' diff --git a/packages/binarytree/src/index.ts b/packages/binarytree/src/index.ts new file mode 100644 index 0000000000..07c7d7a45c --- /dev/null +++ b/packages/binarytree/src/index.ts @@ -0,0 +1,5 @@ +export * from './binaryTree.js' +export * from './constructors.js' +export * from './db/index.js' +export * from './node/index.js' +export * from './types.js' diff --git a/packages/binarytree/src/node/index.ts b/packages/binarytree/src/node/index.ts new file mode 100644 index 0000000000..559dac4d90 --- /dev/null +++ b/packages/binarytree/src/node/index.ts @@ -0,0 +1,3 @@ +export * from './internalNode.js' +export * from './types.js' +export * from './util.js' diff --git a/packages/binarytree/src/node/internalNode.ts b/packages/binarytree/src/node/internalNode.ts new file mode 100644 index 0000000000..fe46741f7d --- /dev/null +++ b/packages/binarytree/src/node/internalNode.ts @@ -0,0 +1,112 @@ +import { RLP } from '@ethereumjs/rlp' +import { bitsToBytes, bytesToBits } from '@ethereumjs/util' + +import { BinaryNodeType } from './types.js' + +import type { BinaryNodeOptions, ChildBinaryNode } from './types.js' + +export class InternalBinaryNode { + public children: Array + + public type = BinaryNodeType.Internal + + constructor(options: BinaryNodeOptions[BinaryNodeType.Internal]) { + this.children = options.children ?? Array(2).fill(null) + } + + static fromRawNode(rawNode: Uint8Array[]): InternalBinaryNode { + const nodeType = rawNode[0][0] + if (nodeType !== BinaryNodeType.Internal) { + throw new Error('Invalid node type') + } + + // The length of the rawNode should be the # of children * 2 (for hash and path) + 1 for the node type + + if (rawNode.length !== 2 * 2 + 1) { + throw new Error('Invalid node length') + } + const [, leftChildHash, rightChildHash, leftChildRawPath, rightChildRawPath] = rawNode + + const decodeChild = (hash: Uint8Array, rawPath: Uint8Array): ChildBinaryNode | null => { + if (hash.length === 0) return null + const decoded = RLP.decode(rawPath) + + if (!Array.isArray(decoded) || decoded.length !== 2) { + throw new Error('Invalid RLP encoding for child path') + } + + const [encodedLength, encodedPath] = decoded as Uint8Array[] + + if (encodedLength.length !== 1) { + throw new Error('Invalid path length encoding') + } + + const pathLength = encodedLength[0] + const path = bytesToBits(encodedPath, pathLength) + + return { hash, path } + } + + const children = [ + decodeChild(leftChildHash, leftChildRawPath), + decodeChild(rightChildHash, rightChildRawPath), + ] + + return new InternalBinaryNode({ children }) + } + + /** + * Generates a new Internal node + * @param children the children nodes + * @returns a new Internal node + */ + static create(children?: (ChildBinaryNode | null)[]): InternalBinaryNode { + if (children !== undefined && children.length !== 2) { + throw new Error('Internal node must have 2 children') + } + return new InternalBinaryNode({ children }) + } + + getChild(index: number): ChildBinaryNode | null { + return this.children[index] + } + + setChild(index: number, child: ChildBinaryNode | null): void { + this.children[index] = child + } + + /** + * @returns the RLP serialized node + */ + serialize(): Uint8Array { + return RLP.encode(this.raw()) + } + + /** + * Returns the raw serialized representation of this internal node as an array of Uint8Arrays. + * + * The returned array contains: + * 1. A single-byte Uint8Array indicating the node type (BinaryNodeType.Internal). + * 2. For each child (left then right): + * - The child’s hash, or an empty Uint8Array if the child is null. + * 3. For each child (left then right): + * - An RLP-encoded tuple [pathLength, packedPathBytes] where: + * - `pathLength` is a one-byte Uint8Array representing the number of meaningful bits in the child’s path. + * - `packedPathBytes` is the packed byte representation of the child's bit path (as produced by `bitsToBytes`). + * + * @returns {Uint8Array[]} An array of Uint8Arrays representing the node's serialized internal data. + * @dev When decoding, the stored child path (an RLP-encoded tuple) must be converted back into the original bit array. + */ + + raw(): Uint8Array[] { + return [ + new Uint8Array([BinaryNodeType.Internal]), + ...this.children.map((child) => (child !== null ? child.hash : new Uint8Array())), + ...this.children.map((child) => + child !== null + ? RLP.encode([new Uint8Array([child.path.length]), bitsToBytes(child.path)]) + : new Uint8Array(), + ), + ] + } +} diff --git a/packages/binarytree/src/node/stemNode.ts b/packages/binarytree/src/node/stemNode.ts new file mode 100644 index 0000000000..a8a23918d1 --- /dev/null +++ b/packages/binarytree/src/node/stemNode.ts @@ -0,0 +1,86 @@ +import { RLP } from '@ethereumjs/rlp' + +import { BinaryNodeType, NODE_WIDTH } from './types.js' + +import type { BinaryNodeOptions } from './types.js' + +export class StemBinaryNode { + public stem: Uint8Array + public values: (Uint8Array | null)[] // Array of 256 possible values represented as 32 byte Uint8Arrays + + public type = BinaryNodeType.Stem + + constructor(options: BinaryNodeOptions[BinaryNodeType.Stem]) { + this.stem = options.stem + this.values = options.values ?? new Array(256).fill(null) + } + + static fromRawNode(rawNode: Uint8Array[]): StemBinaryNode { + const nodeType = rawNode[0][0] + if (nodeType !== BinaryNodeType.Stem) { + throw new Error('Invalid node type') + } + + // The length of the rawNode should be the # of values (node width) + 2 for the node type and the stem + if (rawNode.length !== NODE_WIDTH + 2) { + throw new Error('Invalid node length') + } + + const stem = rawNode[1] + const rawValues = rawNode.slice(2, rawNode.length) + const values = rawValues.map((el) => (el.length === 0 ? null : el)) + + return new StemBinaryNode({ stem, values }) + } + + /** + * Generates a new Stem node + * @param stem the 31 byte stem corresponding to the where the stem node is located in the tree + * @returns a new Stem node + */ + static create(stem: Uint8Array): StemBinaryNode { + return new StemBinaryNode({ stem }) + } + + // Retrieve the value at the provided index from the values array + getValue(index: number): Uint8Array | null { + return this.values[index] + } + + setValue(index: number, value: Uint8Array | null): void { + this.values[index] = value + } + + /** + * @returns the RLP serialized node + */ + serialize(): Uint8Array { + return RLP.encode(this.raw()) + } + + /** + * Returns the raw serialized representation of the node as an array of Uint8Arrays. + * The returned array is constructed as follows: + * - The first element is a Uint8Array containing a single byte that represents the node type, + * - The second element is the node's `stem` property. + * - The remaining elements are derived from the node's `values` array: + * - For each value, if it is `null`, it is converted to an empty Uint8Array. + * - Otherwise, the value is included as-is. + * + * @returns {Uint8Array[]} An array of Uint8Arrays representing the node's raw data. + */ + raw(): Uint8Array[] { + return [ + new Uint8Array([BinaryNodeType.Stem]), + this.stem, + ...this.values.map((val) => { + switch (val) { + case null: + return new Uint8Array() + default: + return val + } + }), + ] + } +} diff --git a/packages/binarytree/src/node/types.ts b/packages/binarytree/src/node/types.ts new file mode 100644 index 0000000000..8a234e2507 --- /dev/null +++ b/packages/binarytree/src/node/types.ts @@ -0,0 +1,40 @@ +import type { InternalBinaryNode } from './internalNode.js' +import type { StemBinaryNode } from './stemNode.js' + +export enum BinaryNodeType { + Internal, + Stem, +} + +export interface TypedBinaryNode { + [BinaryNodeType.Internal]: InternalBinaryNode + [BinaryNodeType.Stem]: StemBinaryNode +} + +export type BinaryNode = TypedBinaryNode[BinaryNodeType] + +/** + * @dev A child node in a binary tree internal node. + * @param hash The hash of the child node. + * @param path The path to the child node, in bits. + * */ +export type ChildBinaryNode = { + hash: Uint8Array + path: number[] +} + +interface InternalBinaryNodeOptions { + children?: (ChildBinaryNode | null)[] +} + +interface StemBinaryNodeOptions { + stem: Uint8Array + values?: (Uint8Array | null)[] +} + +export interface BinaryNodeOptions { + [BinaryNodeType.Internal]: InternalBinaryNodeOptions + [BinaryNodeType.Stem]: StemBinaryNodeOptions +} + +export const NODE_WIDTH = 256 diff --git a/packages/binarytree/src/node/util.ts b/packages/binarytree/src/node/util.ts new file mode 100644 index 0000000000..20b5e6ec05 --- /dev/null +++ b/packages/binarytree/src/node/util.ts @@ -0,0 +1,37 @@ +import { RLP } from '@ethereumjs/rlp' + +import { InternalBinaryNode } from './internalNode.js' +import { StemBinaryNode } from './stemNode.js' +import { type BinaryNode, BinaryNodeType } from './types.js' + +export function decodeRawBinaryNode(raw: Uint8Array[]): BinaryNode { + const nodeType = raw[0][0] + switch (nodeType) { + case BinaryNodeType.Internal: + return InternalBinaryNode.fromRawNode(raw) + case BinaryNodeType.Stem: + return StemBinaryNode.fromRawNode(raw) + default: + throw new Error('Invalid node type') + } +} + +export function decodeBinaryNode(raw: Uint8Array) { + const decoded = RLP.decode(Uint8Array.from(raw)) as Uint8Array[] + if (!Array.isArray(decoded)) { + throw new Error('Invalid node') + } + return decodeRawBinaryNode(decoded) +} + +export function isRawBinaryNode(node: Uint8Array | Uint8Array[]): node is Uint8Array[] { + return Array.isArray(node) && !(node instanceof Uint8Array) +} + +export function isInternalBinaryNode(node: BinaryNode): node is InternalBinaryNode { + return node.type === BinaryNodeType.Internal +} + +export function isStemBinaryNode(node: BinaryNode): node is StemBinaryNode { + return node.type === BinaryNodeType.Stem +} diff --git a/packages/binarytree/src/types.ts b/packages/binarytree/src/types.ts new file mode 100644 index 0000000000..5590f369b6 --- /dev/null +++ b/packages/binarytree/src/types.ts @@ -0,0 +1,58 @@ +import { utf8ToBytes } from '@ethereumjs/util' + +import type { DB, ValueEncoding } from '@ethereumjs/util' + +export interface BinaryTreeOpts { + /** + * A database instance. + */ + db: DB + + /** + * A `Uint8Array` for the root of a previously stored tree + */ + root?: Uint8Array + + /** + * Store the root inside the database after every `write` operation + */ + useRootPersistence: boolean + + /** + * LRU cache for tree nodes to allow for faster node retrieval. + * + * Default: 0 (deactivated) + */ + cacheSize: number + + /** + * Hash function used for hashing the tree nodes. + */ + hashFunction: (msg: Uint8Array) => Uint8Array +} + +export interface CheckpointDBOpts { + /** + * A database instance. + */ + db: DB + + /** + * ValueEncoding of the database (the values which are `put`/`get` in the db are of this type). Defaults to `string` + */ + valueEncoding?: ValueEncoding + + /** + * Cache size (default: 0) + */ + cacheSize?: number +} + +export type Checkpoint = { + // We cannot use a Uint8Array => Uint8Array map directly. If you create two Uint8Arrays with the same internal value, + // then when setting a value on the Map, it actually creates two indices. + keyValueMap: Map + root: Uint8Array +} + +export const ROOT_DB_KEY = utf8ToBytes('__root__') diff --git a/packages/binarytree/src/util.ts b/packages/binarytree/src/util.ts new file mode 100644 index 0000000000..b9e02311c3 --- /dev/null +++ b/packages/binarytree/src/util.ts @@ -0,0 +1,80 @@ +import { type PrefixedHexString, bytesToHex, concatBytes, equalsBytes } from '@ethereumjs/util' + +import { InternalBinaryNode } from './node/internalNode.js' +import { StemBinaryNode } from './node/stemNode.js' +import { decodeBinaryNode } from './node/util.js' + +import type { BinaryTree } from './binaryTree.js' +import type { ChildBinaryNode } from './node/types.js' + +/** + * Recursively walks down the tree from a given starting node and returns all the leaf values + * @param tree - The binary tree + * @param startingNode - The starting node + * @returns An array of key-value pairs containing the tree keys and associated values + */ +export const dumpLeafValues = async ( + tree: BinaryTree, + startingNode: Uint8Array, +): Promise<[PrefixedHexString, PrefixedHexString][] | undefined> => { + if (equalsBytes(startingNode, tree.EMPTY_TREE_ROOT) === true) return + // Retrieve starting node from DB + const rawNode = await tree['_db'].get(startingNode) + if (rawNode === undefined) return + const node = decodeBinaryNode(rawNode) + const entries: [PrefixedHexString, PrefixedHexString][] = [] + if (node instanceof StemBinaryNode) { + for (const [idx, val] of node.values.entries()) { + if (val !== null) { + entries.push([bytesToHex(concatBytes(node.stem, Uint8Array.from([idx]))), bytesToHex(val)]) + } + } + return entries + } else { + const childPaths = node.children + .filter((value) => value !== null) + .map((value) => dumpLeafValues(tree, value!.hash)) + + const res = (await Promise.all(childPaths)).filter((val) => val !== undefined) + return res.flat(1) as [PrefixedHexString, PrefixedHexString][] + } +} +/** + * Recursively walks down the tree from a given starting node and returns all the node paths and hashes + * @param tree - The binary tree + * @param startingNode - The starting node + * @returns An array of key-value pairs containing the tree paths and associated hashes + */ +export const dumpNodeHashes = async ( + tree: BinaryTree, + startingNode: Uint8Array, +): Promise<[string, PrefixedHexString][] | undefined> => { + let entries: [string, PrefixedHexString][] = [] + // Retrieve starting node from DB + const rawNode = await tree['_db'].get(startingNode) + if (rawNode === undefined) return + + const node = decodeBinaryNode(rawNode) + // If current node is root, push '0x' for path and node hash for commitment + equalsBytes(startingNode, tree.root()) && entries.push(['0x', bytesToHex(startingNode)]) + if (node instanceof InternalBinaryNode) { + const children = node.children.filter((value) => value !== null) as ChildBinaryNode[] + + // Push non-null children paths and hashes + for (const child of children) { + entries.push([child.path.join(''), bytesToHex(child.hash)]) + } + + // Recursively call dumpNodeHashes on each child node + const childPaths = ( + await Promise.all(children.map((value) => dumpNodeHashes(tree, value.hash))) + ) + .filter((val) => val !== undefined) + .flat(1) + + // Add all child paths and hashes to entries + entries = [...entries, ...childPaths] as [string, PrefixedHexString][] + } + + return entries +} diff --git a/packages/binarytree/test/binarytree.spec.ts b/packages/binarytree/test/binarytree.spec.ts new file mode 100644 index 0000000000..744ec1d614 --- /dev/null +++ b/packages/binarytree/test/binarytree.spec.ts @@ -0,0 +1,554 @@ +import { bytesToHex, equalsBytes, hexToBytes } from '@ethereumjs/util' +import { blake3 } from '@noble/hashes/blake3' +import { assert, describe, expect, it } from 'vitest' + +import { createBinaryTree } from '../src/index.js' +import { dumpLeafValues, dumpNodeHashes } from '../src/util.js' + +describe('insert', () => { + it('should not destroy a previous root', async () => { + const tree = await createBinaryTree({ useRootPersistence: true }) + await tree.put( + hexToBytes('0x318dea512b6f3237a2d4763cf49bf26de3b617fb0cabe38a97807a5549df4d'), + [0], + [hexToBytes('0x0100000000000000000000000000000000000000000000000000000000000000')], + ) + const root = tree.root() + + const tree2 = await createBinaryTree({ + db: tree['_db'].db, + useRootPersistence: true, + root, + }) + assert.deepEqual(tree2.root(), root) + + const tree3 = tree.shallowCopy() + assert.deepEqual(tree3.root(), root) + }) + it('should set and check root', async () => { + const tree = await createBinaryTree() + await tree.put( + hexToBytes('0x318dea512b6f3237a2d4763cf49bf26de3b617fb0cabe38a97807a5549df4d'), + [0], + [hexToBytes('0x0100000000000000000000000000000000000000000000000000000000000000')], + ) + const root = tree.root() + + const tree2 = await createBinaryTree({ + db: tree['_db'].db, + }) + + tree2.root(root) + assert.deepEqual(tree2.root(), root) + + const rootExists = await tree2.checkRoot(root) + assert.isTrue(rootExists) + + const invalidRootExists = await tree2.checkRoot(Uint8Array.from([1, 2, 3, 4])) + assert.isFalse(invalidRootExists) + + tree2.root(null) + assert.deepEqual(tree2.root(), tree2.EMPTY_TREE_ROOT) + + try { + tree2.root(Uint8Array.from([1, 2, 3])) + assert.fail('Should have thrown an error') + } catch (e: any) { + assert.equal(e.message, `Invalid root length. Roots are ${tree['_hashLen']} bytes`) + } + + // TODO: Determine if this is the correct behavior + // const nullRootExists = await tree2.checkRoot(tree2.EMPTY_TREE_ROOT) + // assert.isTrue(nullRootExists) + }) + + it('should put, retrieve and compute the correct state root', async () => { + const tree = await createBinaryTree() + + const key = hexToBytes(`0x${'00'.repeat(32)}`) + const value = hexToBytes(`0x${'01'.repeat(32)}`) + + // Derive the stem from the key by by taking the first 31 bytes. + const stem = key.slice(0, 31) + + // Extract the index from the last byte of the key. + const index = key[31] + + await tree.put(stem, [index], [value]) + const [retrievedValue] = await tree.get(stem, [index]) + assert.exists(retrievedValue, 'Retrieved value should exist') + assert.isTrue( + equalsBytes(retrievedValue!, value), + 'Retrieved value should match inserted value', + ) + + // Assert that the computed state root matches the expected hash. + assert.equal( + bytesToHex(tree.root()), + '0x694545468677064fd833cddc8455762fe6b21c6cabe2fc172529e0f573181cd5', + ) + }) + + it('should correctly compute state root and retrieve values for two entries differing in the first bit', async () => { + const tree = await createBinaryTree() + + // First entry: key is 32 bytes of 0, value is 32 bytes of 1. + const key1 = hexToBytes(`0x${'00'.repeat(32)}`) + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + + // Second entry: key is 0x80 followed by 31 bytes of 0, value is 32 bytes of 2. + const key2 = hexToBytes(`0x${'80' + '00'.repeat(31)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + + // Insert both entries into the tree. + await tree.put(stem1, [index1], [value1]) + await tree.put(stem2, [index2], [value2]) + + const [retrievedValue1] = await tree.get(stem1, [index1]) + const [retrievedValue2] = await tree.get(stem2, [index2]) + + // Retrieved values should exist + assert.exists(retrievedValue1, 'Value for key1 should exist') + assert.exists(retrievedValue2, 'Value for key2 should exist') + + // Check that the computed state root matches the expected hash. + assert.equal( + bytesToHex(tree.root()), + '0x85fc622076752a6fcda2c886c18058d639066a83473d9684704b5a29455ed2ed', + ) + }) + + it('should handle one stem with colocated values', async () => { + const tree = await createBinaryTree() + + const stem = hexToBytes(`0x${'00'.repeat(31)}`) + const suffixes = [0x03, 0x04, 0x09, 0xff] + const values = [ + hexToBytes(`0x${'01'.repeat(32)}`), + hexToBytes(`0x${'02'.repeat(32)}`), + hexToBytes(`0x${'03'.repeat(32)}`), + hexToBytes(`0x${'04'.repeat(32)}`), + ] + + await tree.put(stem, suffixes, values) + + for (let i = 0; i < suffixes.length; i++) { + const [retrievedValue] = await tree.get(stem, [suffixes[i]]) + assert.exists(retrievedValue, `Value at suffix ${suffixes[i]} should exist`) + assert.isTrue( + equalsBytes(retrievedValue!, values[i]), + `Value at suffix ${suffixes[i]} should match inserted value`, + ) + } + }) + + it('should handle two stems with colocated values', async () => { + const tree = await createBinaryTree() + + // Stem 1: 0...0 + const stem1 = hexToBytes(`0x${'00'.repeat(31)}`) + const suffixes1 = [0x03, 0x04] + const values1 = [hexToBytes(`0x${'01'.repeat(32)}`), hexToBytes(`0x${'02'.repeat(32)}`)] + + // Stem 2: 10...0 + const stem2 = hexToBytes(`0x${'80'.repeat(31)}`) + const suffixes2 = [0x03, 0x04] + const values2 = [hexToBytes(`0x${'01'.repeat(32)}`), hexToBytes(`0x${'02'.repeat(32)}`)] + + await tree.put(stem1, suffixes1, values1) + await tree.put(stem2, suffixes2, values2) + + for (let i = 0; i < suffixes1.length; i++) { + const [retrievedValue1] = await tree.get(stem1, [suffixes1[i]]) + const [retrievedValue2] = await tree.get(stem2, [suffixes2[i]]) + assert.exists(retrievedValue1, `Value at suffix ${suffixes1[i]} should exist`) + assert.exists(retrievedValue2, `Value at suffix ${suffixes2[i]} should exist`) + assert.isTrue( + equalsBytes(retrievedValue1!, values1[i]), + `Value at suffix ${suffixes1[i]} should match inserted value`, + ) + assert.isTrue( + equalsBytes(retrievedValue2!, values2[i]), + `Value at suffix ${suffixes2[i]} should match inserted value`, + ) + } + }) + it('should handle two keys that match in the first 42 bits', async () => { + const tree = await createBinaryTree() + + // Two keys with the same prefix of 42 bits + const key1 = hexToBytes(`0x${'00'.repeat(5)}${'C0'.repeat(27)}`) + const key2 = hexToBytes(`0x${'00'.repeat(5)}E0${'00'.repeat(26)}`) + + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + + await tree.put(stem1, [index1], [value1]) + await tree.put(stem2, [index2], [value2]) + + const [retrievedValue1] = await tree.get(stem1, [index1]) + const [retrievedValue2] = await tree.get(stem2, [index2]) + + assert.exists(retrievedValue1, 'Value for key1 should exist') + assert.exists(retrievedValue2, 'Value for key2 should exist') + assert.isTrue( + equalsBytes(retrievedValue1!, value1), + 'Value for key1 should match inserted value', + ) + assert.isTrue( + equalsBytes(retrievedValue2!, value2), + 'Value for key2 should match inserted value', + ) + }) + + it('should handle three keys, and compute a consistent root regardless of insert ordering', async () => { + const tree1 = await createBinaryTree() + + const key1 = hexToBytes(`0x${'C0'.repeat(32)}`) + const key2 = hexToBytes(`0xE0${'00'.repeat(31)}`) + const key3 = hexToBytes(`0x00${'01'.repeat(31)}`) + + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + const value3 = hexToBytes(`0x${'03'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + const stem3 = key3.slice(0, 31) + const index3 = key3[31] + + await tree1.put(stem1, [index1], [value1]) + await tree1.put(stem2, [index2], [value2]) + await tree1.put(stem3, [index3], [value3]) + + const [retrievedValue1] = await tree1.get(stem1, [index1]) + const [retrievedValue2] = await tree1.get(stem2, [index2]) + const [retrievedValue3] = await tree1.get(stem3, [index3]) + + assert.exists(retrievedValue1, 'Value for key1 should exist') + assert.exists(retrievedValue2, 'Value for key2 should exist') + assert.exists(retrievedValue3, 'Value for key3 should exist') + assert.isTrue( + equalsBytes(retrievedValue1!, value1), + 'Value for key1 should match inserted value', + ) + assert.isTrue( + equalsBytes(retrievedValue2!, value2), + 'Value for key2 should match inserted value', + ) + assert.isTrue( + equalsBytes(retrievedValue3!, value3), + 'Value for key3 should match inserted value', + ) + + // We should end up with the same tree root regardless of the order of the put operations + const tree2 = await createBinaryTree() + await tree2.put(stem3, [index3], [value3]) + await tree2.put(stem1, [index1], [value1]) + await tree2.put(stem2, [index2], [value2]) + + assert.isTrue(equalsBytes(tree1.root(), tree2.root())) + }) + + it('should handle three keys, when all three have partial match', async () => { + const tree1 = await createBinaryTree() + + const key1 = hexToBytes(`0x${'C0'.repeat(32)}`) + const key2 = hexToBytes(`0xE0${'00'.repeat(31)}`) + const key3 = hexToBytes(`0xE0${'01'.repeat(31)}`) + + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + const value3 = hexToBytes(`0x${'03'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + const stem3 = key3.slice(0, 31) + const index3 = key3[31] + + await tree1.put(stem1, [index1], [value1]) + await tree1.put(stem2, [index2], [value2]) + await tree1.put(stem3, [index3], [value3]) + const [retrievedValue1] = await tree1.get(stem1, [index1]) + const [retrievedValue2] = await tree1.get(stem2, [index2]) + const [retrievedValue3] = await tree1.get(stem3, [index3]) + + assert.exists(retrievedValue1, 'Value for key1 should exist') + assert.exists(retrievedValue2, 'Value for key2 should exist') + assert.exists(retrievedValue3, 'Value for key3 should exist') + assert.isTrue( + equalsBytes(retrievedValue1!, value1), + 'Value for key1 should match inserted value', + ) + assert.isTrue( + equalsBytes(retrievedValue2!, value2), + 'Value for key2 should match inserted value', + ) + assert.isTrue( + equalsBytes(retrievedValue3!, value3), + 'Value for key3 should match inserted value', + ) + }) + + it('should handle checkpointing and reverting', async () => { + const tree1 = await createBinaryTree() + + const key1 = hexToBytes(`0x${'C0'.repeat(32)}`) + const key2 = hexToBytes(`0xE0${'00'.repeat(31)}`) + const key3 = hexToBytes(`0xE0${'01'.repeat(31)}`) + + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + const value3 = hexToBytes(`0x${'03'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + const stem3 = key3.slice(0, 31) + const index3 = key3[31] + + await tree1.put(stem1, [index1], [value1]) + tree1.checkpoint() + assert.isTrue(tree1.hasCheckpoints()) + const root = tree1.root() + + await tree1.put(stem2, [index2], [value2]) + await tree1.put(stem3, [index3], [value3]) + + const root2 = tree1.root() + assert.notDeepEqual(root, root2) + + await tree1.revert() + assert.isFalse(tree1.hasCheckpoints()) + assert.deepEqual(tree1.root(), root) + + await tree1.put(stem2, [index2], [value2]) + tree1.checkpoint() + + const root3 = tree1.root() + assert.deepEqual(root3, tree1.root()) + + assert.isTrue(tree1.hasCheckpoints()) + + await tree1.commit() + assert.isFalse(tree1.hasCheckpoints()) + const root4 = tree1.root() + + assert.notDeepEqual(root4, root) + + try { + await tree1.revert() + assert.fail('Should have thrown an error') + } catch (e: any) { + assert.equal(e.message, 'trying to revert when not checkpointed') + } + + await tree1.put(stem3, [index3], [value3]) + + tree1.checkpoint() + assert.isTrue(tree1.hasCheckpoints()) + + tree1.flushCheckpoints() + assert.isFalse(tree1.hasCheckpoints()) + + try { + await tree1.commit() + assert.fail('Should have thrown an error') + } catch (e: any) { + assert.equal(e.message, 'trying to commit when not checkpointed') + } + }) + + it('should handle 100 similar key/value pairs hashed with blake3', async () => { + const tree1 = await createBinaryTree() + + // Create an array of 100 random key/value pairs by hashing keys. + const keyValuePairs = [] + for (let i = 0; i < 100; i++) { + const key = new Uint8Array(32).fill(0) + key[31] = i // vary the last byte to differentiate keys + + const hashedKey = blake3(key) + + // Create a value also based on i (filled with 0xBB and ending with i) + const value = new Uint8Array(32).fill(1) + value[31] = i + + keyValuePairs.push({ originalKey: key, hashedKey, value }) + } + + // Insert each key/value pair into the tree. + // The hashedKey is split into a 31-byte stem and a 1-byte index. + for (const { hashedKey, value } of keyValuePairs) { + const stem = hashedKey.slice(0, 31) + const index = hashedKey[31] + await tree1.put(stem, [index], [value]) + } + + // Retrieve and verify each key/value pair from the tree. + for (const { originalKey, hashedKey, value } of keyValuePairs) { + const stem = hashedKey.slice(0, 31) + const index = hashedKey[31] + const [retrievedValue] = await tree1.get(stem, [index]) + assert.exists( + retrievedValue, + `Value for key ${bytesToHex(hashedKey)} | unhashed: ${bytesToHex(originalKey)} should exist`, + ) + assert.isTrue( + equalsBytes(retrievedValue!, value), + `Value for key ${bytesToHex(hashedKey)} | unhashed: ${bytesToHex(originalKey)} should match the inserted value`, + ) + } + // Create a second tree and insert the same key/value pairs in reverse order + const tree2 = await createBinaryTree() + + // Insert in reverse order + for (let i = keyValuePairs.length - 1; i >= 0; i--) { + const { hashedKey, value } = keyValuePairs[i] + const stem = hashedKey.slice(0, 31) + const index = hashedKey[31] + await tree2.put(stem, [index], [value]) + } + + // Verify the roots match despite different insertion orders + assert.deepEqual( + tree1.root(), + tree2.root(), + 'Tree roots should match regardless of insertion order', + ) + + // Insert a new value on an existing stem and verify the roots match + await tree1.put( + keyValuePairs[0].originalKey.slice(0, 31), + [6], + [hexToBytes(`0x${'06'.repeat(32)}`)], + ) + await tree2.put( + keyValuePairs[0].originalKey.slice(0, 31), + [6], + [hexToBytes(`0x${'06'.repeat(32)}`)], + ) + assert.deepEqual(tree1.root(), tree2.root()) + }) + it('should dump leaf values and node hashes', async () => { + const tree1 = await createBinaryTree() + + // Create an array of 100 random key/value pairs by hashing keys. + const keyValuePairs = [] + for (let i = 0; i < 100; i++) { + const key = new Uint8Array(32).fill(0) + key[31] = i // vary the last byte to differentiate keys + + const hashedKey = blake3(key) + + // Create a value also based on i (filled with 0xBB and ending with i) + const value = new Uint8Array(32).fill(1) + value[31] = i + + keyValuePairs.push({ originalKey: key, hashedKey, value }) + } + + // Insert each key/value pair into the tree. + // The hashedKey is split into a 31-byte stem and a 1-byte index. + for (const { hashedKey, value } of keyValuePairs) { + const stem = hashedKey.slice(0, 31) + const index = hashedKey[31] + await tree1.put(stem, [index], [value]) + } + + const leafValues = await dumpLeafValues(tree1, tree1.root()) + assert.exists(leafValues) + assert.equal(leafValues!.length, 100) + + const expectedValues = keyValuePairs.map(({ value }) => bytesToHex(value)).sort() + const actualValues = leafValues!.map(([_, value]) => value).sort() + assert.deepEqual(actualValues, expectedValues) + + const expectedKeys = keyValuePairs.map(({ hashedKey }) => bytesToHex(hashedKey)).sort() + const actualKeys = leafValues!.map(([key]) => key).sort() + assert.deepEqual(actualKeys, expectedKeys) + + const nodeHashes = await dumpNodeHashes(tree1, tree1.root()) + assert.exists(nodeHashes) + expect(nodeHashes!.length).toBeGreaterThan(100) + assert.equal(nodeHashes![0][1], bytesToHex(tree1.root())) + }) + + it('should update value when inserting a duplicate key', async () => { + const tree = await createBinaryTree() + + const key = hexToBytes(`0x${'01'.repeat(32)}`) + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + + const stem = key.slice(0, 31) + const index = key[31] + + await tree.put(stem, [index], [value1]) + await tree.put(stem, [index], [value2]) + + const [retrievedValue] = await tree.get(stem, [index]) + + assert.exists(retrievedValue, 'Retrieved value should exist') + assert.isTrue( + equalsBytes(retrievedValue!, value2), + 'Retrieved value should match the updated value', + ) + }) + + // Not sure if this test targets expect behavior (since we can't delete values?) + it.skip('should recover previous root when adding and then deleting a value', async () => { + const tree = await createBinaryTree() + + const key1 = hexToBytes(`0x${'01'.repeat(32)}`) + const key2 = hexToBytes(`0x${'02'.repeat(32)}`) + const value1 = hexToBytes(`0x${'01'.repeat(32)}`) + const value2 = hexToBytes(`0x${'02'.repeat(32)}`) + + const stem1 = key1.slice(0, 31) + const index1 = key1[31] + const stem2 = key2.slice(0, 31) + const index2 = key2[31] + + await tree.put(stem1, [index1], [value1]) + + const initialRoot = tree.root() + + await tree.put(stem2, [index2], [value2]) + + const updatedRoot = tree.root() + assert.isFalse( + equalsBytes(initialRoot, updatedRoot), + 'Updated root should not match initial root', + ) + + await tree.del(stem2, [index2]) + + const recoveredRoot = tree.root() + + const [retrievedValue1] = await tree.get(stem1, [index1]) + const [retrievedValue2] = await tree.get(stem2, [index2]) + + assert.exists(retrievedValue1, 'Retrieved value should exist') + assert.notExists(retrievedValue2, 'Deleted value should not exist') + assert.isTrue( + equalsBytes(initialRoot, recoveredRoot), + 'Recovered root should match initial root', + ) + }) +}) diff --git a/packages/binarytree/test/node/internalNode.spec.ts b/packages/binarytree/test/node/internalNode.spec.ts new file mode 100644 index 0000000000..76af3ff043 --- /dev/null +++ b/packages/binarytree/test/node/internalNode.spec.ts @@ -0,0 +1,47 @@ +import { equalsBytes, hexToBytes } from '@ethereumjs/util' +import { assert, describe, it } from 'vitest' + +import { BinaryNodeType, InternalBinaryNode, decodeBinaryNode } from '../../src/index.js' +import {} from '../../src/types.js' + +describe('InternalBinaryNode', () => { + it('should round-trip encode and decode an internal node', () => { + // Create dummy child pointers: + const leftCanonicalChild = { + hash: hexToBytes('0x' + '11'.repeat(32)), + path: [0, 1, 1, 0, 1, 0], + } + const rightCanonicalChild = { + hash: hexToBytes('0x' + '22'.repeat(32)), + path: [1, 1, 0, 0], + } + const node = InternalBinaryNode.create([leftCanonicalChild, rightCanonicalChild]) + const serialized = node.serialize() + const decoded = decodeBinaryNode(serialized) + + // Verify the type + assert.equal(decoded.type, BinaryNodeType.Internal) + const [leftRecoveredChild, rightRecoveredChild] = (decoded as InternalBinaryNode).children + assert.exists(leftRecoveredChild, 'Left child should exist') + assert.exists(rightRecoveredChild, 'Right child should exist') + assert.isTrue( + equalsBytes(leftRecoveredChild!.hash, leftCanonicalChild.hash), + 'Left child hash should round-trip', + ) + + assert.deepEqual( + leftRecoveredChild!.path, + leftCanonicalChild.path, + 'Left child path should round-trip', + ) + assert.isTrue( + equalsBytes(rightRecoveredChild!.hash, rightCanonicalChild.hash), + 'Right child hash should round-trip', + ) + assert.deepEqual( + rightRecoveredChild!.path, + rightCanonicalChild.path, + 'Right child path should round-trip', + ) + }) +}) diff --git a/packages/binarytree/test/node/stemNode.spec.ts b/packages/binarytree/test/node/stemNode.spec.ts new file mode 100644 index 0000000000..42fe1f824d --- /dev/null +++ b/packages/binarytree/test/node/stemNode.spec.ts @@ -0,0 +1,68 @@ +import { equalsBytes, hexToBytes } from '@ethereumjs/util' +import { assert, describe, it } from 'vitest' + +import { BinaryNodeType, decodeBinaryNode } from '../../src/index.js' +import { StemBinaryNode } from '../../src/node/stemNode.js' + +describe('StemBinaryNode', () => { + it('should round-trip encode and decode a stem node', () => { + // Create a 31-byte stem (for example, all 0x01 bytes) + const stem = hexToBytes('0x' + '01'.repeat(31)) + + // Create an array of 256 possible values (initially all null) + const values: (Uint8Array | null)[] = new Array(256).fill(null) + // Set a few non-null values at specific indices + const value3 = hexToBytes('0x' + '02'.repeat(32)) + const value100 = hexToBytes('0x' + '03'.repeat(32)) + const value255 = hexToBytes('0x' + '04'.repeat(32)) + values[3] = value3 + values[100] = value100 + values[255] = value255 + + // Create the stem node with the given stem and values array. + const node = new StemBinaryNode({ stem, values }) + + // Serialize the node. + const serialized = node.serialize() + + // Decode the node using the provided decodeBinaryNode helper. + const decoded = decodeBinaryNode(serialized) + // Verify that the decoded node is of type Stem. + assert.equal(decoded.type, BinaryNodeType.Stem, 'Node type should be Stem') + + // Cast the decoded node to StemBinaryNode. + const recovered = decoded as StemBinaryNode + + // Verify that the stem round-trips. + assert.isTrue(equalsBytes(recovered.stem, stem), 'Stem should round-trip correctly') + + // Verify that the values array has the correct length. + assert.equal(recovered.values.length, 256, 'Values array should have 256 elements') + + // Check that the non-null values round-trip. + assert.exists(recovered.values[3], 'Value at index 3 should exist') + assert.isTrue( + equalsBytes(recovered.values[3]!, value3), + 'Value at index 3 should round-trip correctly', + ) + + assert.exists(recovered.values[100], 'Value at index 100 should exist') + assert.isTrue( + equalsBytes(recovered.values[100]!, value100), + 'Value at index 100 should round-trip correctly', + ) + + assert.exists(recovered.values[255], 'Value at index 255 should exist') + assert.isTrue( + equalsBytes(recovered.values[255]!, value255), + 'Value at index 255 should round-trip correctly', + ) + + // Verify that all other indexes remain null. + for (let i = 0; i < 256; i++) { + if (i !== 3 && i !== 100 && i !== 255) { + assert.isNull(recovered.values[i], `Value at index ${i} should be null`) + } + } + }) +}) diff --git a/packages/binarytree/tsconfig.benchmarks.json b/packages/binarytree/tsconfig.benchmarks.json new file mode 100644 index 0000000000..87f297c0ec --- /dev/null +++ b/packages/binarytree/tsconfig.benchmarks.json @@ -0,0 +1,4 @@ +{ + "extends": "../../config/tsconfig.prod.cjs.json", + "include": ["benchmarks/*.ts"] +} diff --git a/packages/binarytree/tsconfig.json b/packages/binarytree/tsconfig.json new file mode 100644 index 0000000000..03ee66c13b --- /dev/null +++ b/packages/binarytree/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../config/tsconfig.json", + "compilerOptions": { + "outDir": "./dist" + }, + "include": ["src/**/*.ts", "test/**/*.spec.ts"] +} diff --git a/packages/binarytree/tsconfig.lint.json b/packages/binarytree/tsconfig.lint.json new file mode 100644 index 0000000000..3698f4f0be --- /dev/null +++ b/packages/binarytree/tsconfig.lint.json @@ -0,0 +1,3 @@ +{ + "extends": "../../config/tsconfig.lint.json" +} diff --git a/packages/binarytree/tsconfig.prod.cjs.json b/packages/binarytree/tsconfig.prod.cjs.json new file mode 100644 index 0000000000..dc2aca1da0 --- /dev/null +++ b/packages/binarytree/tsconfig.prod.cjs.json @@ -0,0 +1,10 @@ +{ + "extends": "../../config/tsconfig.prod.cjs.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist/cjs", + "composite": true + }, + "include": ["src/**/*.ts"], + "references": [{ "path": "../util/tsconfig.prod.cjs.json" }] +} diff --git a/packages/binarytree/tsconfig.prod.esm.json b/packages/binarytree/tsconfig.prod.esm.json new file mode 100644 index 0000000000..a2bc268c50 --- /dev/null +++ b/packages/binarytree/tsconfig.prod.esm.json @@ -0,0 +1,10 @@ +{ + "extends": "../../config/tsconfig.prod.esm.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist/esm", + "composite": true + }, + "include": ["src/**/*.ts"], + "references": [{ "path": "../util/tsconfig.prod.esm.json" }] +} diff --git a/packages/binarytree/typedoc.cjs b/packages/binarytree/typedoc.cjs new file mode 100644 index 0000000000..701fee055f --- /dev/null +++ b/packages/binarytree/typedoc.cjs @@ -0,0 +1,6 @@ +module.exports = { + extends: '../../config/typedoc.cjs', + entryPoints: ['src'], + out: 'docs', + exclude: ['test/**/*.ts'], +} diff --git a/packages/util/src/bytes.ts b/packages/util/src/bytes.ts index 4fa767f98d..9bb589fd43 100644 --- a/packages/util/src/bytes.ts +++ b/packages/util/src/bytes.ts @@ -494,10 +494,45 @@ export function hexToBigInt(input: PrefixedHexString): bigint { return bytesToBigInt(hexToBytes(isHexString(input) ? input : `0x${input}`)) } +/** + * Converts a Uint8Array of bytes into an array of bits. + * @param {Uint8Array} bytes - The input byte array. + * @param {number} bitLength - The number of bits to extract from the input bytes. + * @returns {number[]} An array of bits (each 0 or 1) corresponding to the input bytes. + */ +export function bytesToBits(bytes: Uint8Array, bitLength?: number): number[] { + const bits: number[] = [] + + for (let i = 0; i < (bitLength ?? bytes.length * 8); i++) { + const byteIndex = Math.floor(i / 8) + const bitIndex = 7 - (i % 8) + bits.push((bytes[byteIndex] >> bitIndex) & 1) + } + + return bits +} + +/** + * Converts an array of bits into a Uint8Array. + * The input bits are grouped into sets of 8, with the first bit in each group being the most significant. + * @param {number[]} bits - The input array of bits (each should be 0 or 1). Its length should be a multiple of 8. + * @returns {Uint8Array} A Uint8Array constructed from the input bits. + */ +export function bitsToBytes(bits: number[]): Uint8Array { + const numBytes = Math.ceil(bits.length / 8) // Ensure partial byte storage + const byteData = new Uint8Array(numBytes) + + for (let i = 0; i < bits.length; i++) { + const byteIndex = Math.floor(i / 8) + const bitIndex = 7 - (i % 8) + byteData[byteIndex] |= bits[i] << bitIndex + } + + return byteData +} + /** * Compares two byte arrays and returns the count of consecutively matching items from the start. - * - * @function * @param {Uint8Array} bytes1 - The first Uint8Array to compare. * @param {Uint8Array} bytes2 - The second Uint8Array to compare. * @returns {number} The count of consecutively matching items from the start. @@ -516,3 +551,43 @@ export function matchingBytesLength(bytes1: Uint8Array, bytes2: Uint8Array): num } return count } + +/** + * Compares two arrays of bits (0 or 1) and returns the count of consecutively matching bits from the start. + * @param {number[]} bits1 - The first array of bits, in bytes or bits. + * @param {number[]} bits2 - The second array of bits, in bytes or bits. + * @returns {number} The count of consecutively matching bits from the start. + */ +export function matchingBitsLength(bits1: number[], bits2: number[]): number { + let count = 0 + const minLength = Math.min(bits1.length, bits2.length) + for (let i = 0; i < minLength; i++) { + if (bits1[i] === bits2[i]) { + count++ + } else { + return count + } + } + return count +} + +/** + * Checks whether two arrays of bits are equal. + * + * Two arrays are considered equal if they have the same length and each corresponding element is identical. + * + * @param {number[]} bits1 - The first bits array. + * @param {number[]} bits2 - The second bits array. + * @returns {boolean} True if the arrays are equal; otherwise, false. + */ +export function equalsBits(bits1: number[], bits2: number[]): boolean { + if (bits1.length !== bits2.length) { + return false + } + for (let i = 0; i < bits1.length; i++) { + if (bits1[i] !== bits2[i]) { + return false + } + } + return true +} diff --git a/packages/util/test/bytes.spec.ts b/packages/util/test/bytes.spec.ts index 43cd04de3e..56831f1fbb 100644 --- a/packages/util/test/bytes.spec.ts +++ b/packages/util/test/bytes.spec.ts @@ -6,10 +6,13 @@ import { bigIntToBytes, bigIntToHex, bigIntToUnpaddedBytes, + bitsToBytes, bytesToBigInt, + bytesToBits, bytesToHex, bytesToInt, createAddressFromString, + equalsBits, equalsBytes, fromSigned, hexToBytes, @@ -17,6 +20,7 @@ import { intToHex, intToUnpaddedBytes, isZeroAddress, + matchingBitsLength, matchingBytesLength, setLengthLeft, setLengthRight, @@ -518,3 +522,354 @@ describe('matchingBytesLength', () => { assert.equal(matchingBytesLength(bytes1, bytes2), 1000000) }) }) + +describe('matchingBitsLength', () => { + it('should return 0 when both arrays are empty', () => { + const bits1: number[] = [] + const bits2: number[] = [] + assert.equal(matchingBitsLength(bits1, bits2), 0) + }) + + it('should return 0 when one of the arrays is empty', () => { + const bits1: number[] = [1, 0, 1, 1, 0, 0, 1, 1] // Example bits + const bits2: number[] = [] + assert.equal(matchingBitsLength(bits1, bits2), 0) + }) + + it('should return 0 when arrays have no matching bits in the first byte', () => { + // 0xff = 11111111, 0x7f = 01111111: first bit mismatches. + const bits1: number[] = [1, 1, 1, 1, 1, 1, 1, 1] // 0xff + const bits2: number[] = [0, 1, 1, 1, 1, 1, 1, 1] // 0x7f + assert.equal(matchingBitsLength(bits1, bits2), 0) + }) + + it('should return correct count for partially matching bits in the first byte', () => { + // 0xff = 11111111, 0xf0 = 11110000: + // The first four bits match, then the 5th bit mismatches. + const bits1: number[] = [1, 1, 1, 1, 1, 1, 1, 1] // 0xff + const bits2: number[] = [1, 1, 1, 1, 0, 0, 0, 0] // 0xf0 + assert.equal(matchingBitsLength(bits1, bits2), 4) + }) + + it('should handle arrays with matching bits across multiple bytes', () => { + // First byte: 0xaa (10101010) matches exactly. + // Second byte: 0xff (11111111) vs 0xf0 (11110000) match for the first 4 bits. + // Total matching bits: 8 (from the first byte) + 4 = 12. + const bits1: number[] = [ + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, // 0xaa + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, // 0xff + ] + const bits2: number[] = [ + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, // 0xaa + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, // 0xf0 + ] + assert.equal(matchingBitsLength(bits1, bits2), 12) + }) + + it('should handle arrays with same elements but different lengths', () => { + // The first three bytes match exactly (3 * 8 = 24 bits), even if the second array is longer. + const bits1: number[] = [ + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, // 0x12 + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, // 0x34 + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, // 0x56 + ] + const bits2: number[] = [ + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, // 0x12 + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, // 0x34 + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, // 0x56 + 0, + 1, + 1, + 1, + 1, + 0, + 0, + 0, // 0x78 (extra) + ] + assert.equal(matchingBitsLength(bits1, bits2), 24) + }) + + it('should handle arrays with matching bits at the start then mismatch mid-byte', () => { + // First two bytes match fully (16 bits). + // Third byte: 0x56 = 01010110, 0x00 = 00000000. + // Bit-by-bit in the third byte: the most significant bit (bit 7) is 0 in both, then bit 6: 1 vs 0 (mismatch). + // Total matching bits: 16 (first two bytes) + 1 (first bit of third byte) = 17. + const bits1: number[] = [ + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, // 0x12 + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, // 0x34 + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, // 0x56 + ] + const bits2: number[] = [ + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, // 0x12 + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, // 0x34 + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x00 + ] + assert.equal(matchingBitsLength(bits1, bits2), 17) + }) + + it('should handle arrays with a large number of elements', () => { + const length = 100000 * 8 // Convert to bits + const arr1: number[] = [] + const arr2: number[] = [] + for (let i = 0; i < length; i++) { + arr1.push(i % 2) // Alternating 0,1 pattern + arr2.push(i % 2) // Same pattern + } + // Each bit matches exactly. + assert.equal(matchingBitsLength(arr1, arr2), length) + }) +}) + +describe('bytesToBits', () => { + it('should return an empty array for an empty Uint8Array', () => { + const input = new Uint8Array([]) + const expected: number[] = [] + assert.deepEqual(bytesToBits(input), expected) + }) + + it('should correctly convert 0x00 to eight 0 bits', () => { + const input = new Uint8Array([0x00]) + const expected = [0, 0, 0, 0, 0, 0, 0, 0] + assert.deepEqual(bytesToBits(input), expected) + }) + + it('should correctly convert 0xFF to eight 1 bits', () => { + const input = new Uint8Array([0xff]) + const expected = [1, 1, 1, 1, 1, 1, 1, 1] + assert.deepEqual(bytesToBits(input), expected) + }) + + it('should correctly convert 0x80 (10000000) to bits', () => { + const input = new Uint8Array([0x80]) + const expected = [1, 0, 0, 0, 0, 0, 0, 0] + assert.deepEqual(bytesToBits(input), expected) + }) + + it('should correctly convert multiple bytes to bits', () => { + // 0xAA = 10101010 and 0x55 = 01010101 + const input = new Uint8Array([0xaa, 0x55]) + const expected = [ + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, // for 0xAA + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, // for 0x55 + ] + assert.deepEqual(bytesToBits(input), expected) + }) +}) + +describe('bitsToBytes', () => { + it('should return an empty Uint8Array for an empty bits array', () => { + const input: number[] = [] + const expected = new Uint8Array([]) + assert.deepEqual(bitsToBytes(input), expected) + }) + + it('should correctly convert eight 0 bits to 0x00', () => { + const input = [0, 0, 0, 0, 0, 0, 0, 0] + const expected = new Uint8Array([0x00]) + assert.deepEqual(bitsToBytes(input), expected) + }) + + it('should correctly convert eight 1 bits to 0xFF', () => { + const input = [1, 1, 1, 1, 1, 1, 1, 1] + const expected = new Uint8Array([0xff]) + assert.deepEqual(bitsToBytes(input), expected) + }) + + it('should correctly convert bits representing 0x80 to a byte', () => { + const input = [1, 0, 0, 0, 0, 0, 0, 0] + const expected = new Uint8Array([0x80]) + assert.deepEqual(bitsToBytes(input), expected) + }) + + it('should correctly convert multiple groups of 8 bits to bytes', () => { + // 0xAA = 10101010 and 0x55 = 01010101 + const input = [ + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, // for 0xAA + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, // for 0x55 + ] + const expected = new Uint8Array([0xaa, 0x55]) + assert.deepEqual(bitsToBytes(input), expected) + }) + + it('should correctly convert a non-multiple-of-8 bit array, padding the last byte', () => { + // 110 (3 bits) should be stored as 0b11000000 in a single byte (0xC0) + const input = [1, 1, 0] + const expected = new Uint8Array([0b11000000]) // Expect padding with zeros + assert.deepEqual(bitsToBytes(input), expected) + }) +}) + +describe('Round-trip conversion from bytes to bits', () => { + it('should convert bytes to bits and back to the original bytes', () => { + const original = new Uint8Array([0x12, 0x34, 0x56, 0x78]) + const bits = bytesToBits(original) + const result = bitsToBytes(bits) + assert.deepEqual(result, original) + }) +}) + +describe('equalsBits', () => { + it('should return true for two empty bit arrays', () => { + const bits1: number[] = [] + const bits2: number[] = [] + assert.isTrue(equalsBits(bits1, bits2)) + }) + + it('should return true for two identical bit arrays', () => { + const bits1 = [1, 0, 1, 1, 0, 1] + const bits2 = [1, 0, 1, 1, 0, 1] + assert.isTrue(equalsBits(bits1, bits2)) + }) + + it('should return false for arrays with different lengths', () => { + const bits1 = [1, 0, 1] + const bits2 = [1, 0, 1, 0] + assert.isFalse(equalsBits(bits1, bits2)) + }) + + it('should return false if the arrays differ in at least one element', () => { + const bits1 = [1, 0, 1, 1] + const bits2 = [1, 1, 1, 1] + assert.isFalse(equalsBits(bits1, bits2)) + }) +})