Skip to content

Commit

Permalink
feat!: add CARv2 read support (data only, ignoring index)
Browse files Browse the repository at this point in the history
BREAKING because the type for `CarDecoder#header()` now returns a
`Promise<CarHeader|CarV2Header>` instead of just a `Promise<CarHeader>` so this
may cause some breakage for TypeScript users touching the header and making
strong type assumptions.
  • Loading branch information
rvagg committed Mar 4, 2022
1 parent 6eb1cd2 commit 99cd346
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 26 deletions.
12 changes: 12 additions & 0 deletions examples/test-examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ Blocks:
{"blockLength":47,"blockOffset":572,"cid":{"/":"QmdwjhxpxzcMsR3qUuj7vUL8pbA7MgR3GAxWi2GLHjsKCT"},"length":82,"offset":537}
{"blockLength":4,"blockOffset":656,"cid":{"/":"bafkreidbxzk2ryxwwtqxem4l3xyyjvw35yu4tcct4cqeqxwo47zhxgxqwq"},"length":41,"offset":619}
{"blockLength":18,"blockOffset":697,"cid":{"/":"bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm"},"length":55,"offset":660}
`)
console.log('\u001b[32m✔\u001b[39m [example] dump-index ../test/go.carv2')
})
}).then(async () => {
await runExample('dump-index', ['../test/go.carv2']).then(({ stdout, stderr }) => {
assert.strictEqual(stderr, '')
assert.strictEqual(stdout,
`{"blockLength":47,"blockOffset":143,"cid":{"/":"QmfEoLyB5NndqeKieExd1rtJzTduQUPEV8TwAYcUiy3H5Z"},"length":82,"offset":108}
{"blockLength":99,"blockOffset":226,"cid":{"/":"QmczfirA7VEH7YVvKPTPoU69XM3qY4DC39nnTsWd4K3SkM"},"length":135,"offset":190}
{"blockLength":54,"blockOffset":360,"cid":{"/":"Qmcpz2FHJD7VAhg1fxFXdYJKePtkx1BsHuCrAgWVnaHMTE"},"length":89,"offset":325}
{"blockLength":4,"blockOffset":451,"cid":{"/":"bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu"},"length":41,"offset":414}
{"blockLength":7,"blockOffset":492,"cid":{"/":"bafkreifc4hca3inognou377hfhvu2xfchn2ltzi7yu27jkaeujqqqdbjju"},"length":44,"offset":455}
`)
console.log('\u001b[32m✔\u001b[39m [example] dump-index ../test/go.carv2')
})
Expand Down
19 changes: 17 additions & 2 deletions lib/coding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,25 @@ export interface IteratorChannel<T> {
iterator: AsyncIterator<T>
}

export type CarHeader = { version: number, roots: CID[] }
export interface CarHeader {
version: 1,
roots: CID[]
}

export interface CarV2FixedHeader {
characteristics: [bigint, bigint],
dataOffset: number,
dataSize: number,
indexOffset: number
}

export interface CarV2Header extends CarV2FixedHeader {
version: 2,
roots: CID[],
}

export interface CarDecoder {
header(): Promise<CarHeader>
header(): Promise<CarHeader|CarV2Header>

blocks(): AsyncGenerator<Block>

Expand Down
110 changes: 97 additions & 13 deletions lib/decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import { CarHeader as headerValidator } from './header-validator.js'
* @typedef {import('../api').BlockIndex} BlockIndex
* @typedef {import('./coding').BytesReader} BytesReader
* @typedef {import('./coding').CarHeader} CarHeader
* @typedef {import('./coding').CarV2Header} CarV2Header
* @typedef {import('./coding').CarV2FixedHeader} CarV2FixedHeader
* @typedef {import('./coding').CarDecoder} CarDecoder
*/

Expand All @@ -19,6 +21,8 @@ const CIDV0_BYTES = {
DAG_PB: 0x70
}

const V2_HEADER_LENGTH = /* characteristics */ 16 /* v1 offset */ + 8 /* v1 size */ + 8 /* index offset */ + 8

/**
* @param {BytesReader} reader
* @returns {Promise<number>}
Expand All @@ -34,10 +38,34 @@ async function readVarint (reader) {

/**
* @param {BytesReader} reader
* @param {number} [strictVersion=1] a strict version to expect in the header, or -1 to skip version checking entirely
* @returns {Promise<CarHeader>}
* @returns {Promise<CarV2FixedHeader>}
*/
async function readV2Header (reader) {
/** @type {Uint8Array} */
const bytes = await reader.exactly(V2_HEADER_LENGTH)
const dv = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength)
let offset = 0
const header = {
version: 2,
/** @type {[bigint, bigint]} */
characteristics: [
dv.getBigUint64(offset, true),
dv.getBigUint64(offset += 8, true)
],
dataOffset: Number(dv.getBigUint64(offset += 8, true)),
dataSize: Number(dv.getBigUint64(offset += 8, true)),
indexOffset: Number(dv.getBigUint64(offset += 8, true))
}
reader.seek(V2_HEADER_LENGTH)
return header
}

/**
* @param {BytesReader} reader
* @param {number} [strictVersion]
* @returns {Promise<CarHeader|CarV2Header>}
*/
export async function readHeader (reader, strictVersion = 1) {
export async function readHeader (reader, strictVersion) {
const length = await readVarint(reader)
if (length === 0) {
throw new Error('Invalid CAR header (zero length)')
Expand All @@ -48,17 +76,23 @@ export async function readHeader (reader, strictVersion = 1) {
if (!headerValidator(block)) {
throw new Error('Invalid CAR header format')
}
if (strictVersion !== -1) {
if (block.version !== strictVersion) {
throw new Error(`Invalid CAR version: ${block.version}`)
}
if ((block.version !== 1 && block.version !== 2) || (strictVersion !== undefined && block.version !== strictVersion)) {
throw new Error(`Invalid CAR version: ${block.version}`)
}
if (!Array.isArray(block.roots)) {
// we've made 'roots' optional in the schema so we can do the version check
// before rejecting the block as invalid if there is no version
// we've made 'roots' optional in the schema so we can do the version check
// before rejecting the block as invalid if there is no version
const hasRoots = Array.isArray(block.roots)
if ((block.version === 1 && !hasRoots) || (block.version === 2 && hasRoots)) {
throw new Error('Invalid CAR header format')
}
return block
if (block.version === 1) {
return block
}
// version 2
const v2Header = await readV2Header(reader)
reader.seek(reader.pos - v2Header.dataOffset)
const v1Header = await readHeader(reader, 1)
return Object.assign(v1Header, v2Header)
/* c8 ignore next 2 */
// Node.js 12 c8 bug
}
Expand Down Expand Up @@ -125,7 +159,7 @@ export async function readBlockHead (reader) {
}
length += (reader.pos - start)
const cid = await readCid(reader)
const blockLength = length - (reader.pos - start) // subtract CID length
const blockLength = length - Number(reader.pos - start) // subtract CID length

return { cid, length, blockLength }
/* c8 ignore next 2 */
Expand Down Expand Up @@ -164,7 +198,14 @@ async function readBlockIndex (reader) {
* @returns {CarDecoder}
*/
export function createDecoder (reader) {
const headerPromise = readHeader(reader)
const headerPromise = (async () => {
const header = await readHeader(reader)
if (header.version === 2) {
const v1length = reader.pos - header.dataOffset
reader = limitReader(reader, header.dataSize - v1length)
}
return header
})()

return {
header: () => headerPromise,
Expand Down Expand Up @@ -316,3 +357,46 @@ export function asyncIterableReader (asyncIterable) {

return chunkReader(readChunk)
}

/**
* limits read maximum to `byteLimit`, doesn't update `pos`, however
*
* @param {BytesReader} reader
* @param {number} byteLimit
* @returns {BytesReader}
*/
export function limitReader (reader, byteLimit) {
let bytesRead = 0

/** @type {BytesReader} */
return {
async upTo (length) {
let bytes = await reader.upTo(length)
if (bytes.length + bytesRead > byteLimit) {
bytes = bytes.subarray(0, byteLimit - bytesRead)
}
return bytes
/* c8 ignore next 2 */
// Node.js 12 c8 bug
},

async exactly (length) {
const bytes = await reader.exactly(length)
if (bytes.length + bytesRead > byteLimit) {
throw new Error('Unexpected end of data')
}
return bytes
/* c8 ignore next 2 */
// Node.js 12 c8 bug
},

seek (length) {
bytesRead += length
reader.seek(length)
},

get pos () {
return reader.pos
}
}
}
2 changes: 1 addition & 1 deletion lib/reader-browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ export class CarReader {
* @param {BytesReader} reader
* @returns {Promise<CarReader>}
*/
async function decodeReaderComplete (reader) {
export async function decodeReaderComplete (reader) {
const decoder = createDecoder(reader)
const { version, roots } = await decoder.header()
const blocks = []
Expand Down
2 changes: 1 addition & 1 deletion lib/writer-browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ export class CarWriter {
const reader = bytesReader(bytes)
await readHeader(reader)
const newHeader = createHeader(roots)
if (reader.pos !== newHeader.length) {
if (Number(reader.pos) !== newHeader.length) {
throw new Error(`updateRoots() can only overwrite a header of the same length (old header is ${reader.pos} bytes, new header is ${newHeader.length} bytes)`)
}
bytes.set(newHeader, 0)
Expand Down
52 changes: 49 additions & 3 deletions test/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,13 @@ function makeIterable (data, chunkSize) {
}

const carBytes = bytes.fromHex('63a265726f6f747382d82a58250001711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8bd82a5825000171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365b6776657273696f6e01280155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b461616161280155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e646877626262622801551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a2745163636363511220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d0122d0a240155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b4120363617418048001122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a122d0a240155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e6468771203646f671804122d0a221220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d01205666972737418338301122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de122e0a2401551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a274511204626561721804122f0a22122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a12067365636f6e641895015b01711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8ba2646c696e6bd82a582300122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de646e616d6564626c6970360171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365ba2646c696e6bf6646e616d65656c696d626f')

// go.car is written as a graph, not by the allBlocks ordering here, so ordering is slightly out
const goCarBytes = bytes.fromHex('63a265726f6f747382d82a58250001711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8bd82a5825000171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365b6776657273696f6e015b01711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8ba2646c696e6bd82a582300122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de646e616d6564626c69708301122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de122e0a2401551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a274511204626561721804122f0a22122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a12067365636f6e641895012801551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a27451636363638001122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a122d0a240155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e6468771203646f671804122d0a221220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d0120566697273741833280155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e64687762626262511220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d0122d0a240155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b412036361741804280155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b461616161360171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365ba2646c696e6bf6646e616d65656c696d626f')

const goCarRoots = [
CID.parse('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'),
CID.parse('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm')
]

const goCarIndex = [
{ cid: CID.parse('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'), offset: 100, length: 92, blockOffset: 137, blockLength: 55 },
{ cid: CID.parse('QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d'), offset: 192, length: 133, blockOffset: 228, blockLength: 97 },
Expand All @@ -151,6 +150,49 @@ const goCarIndex = [
{ cid: CID.parse('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm'), offset: 660, length: 55, blockOffset: 697, blockLength: 18 }
]

const goCarV2Bytes = bytes.fromHex('0aa16776657273696f6e02000000000000000000000000000000003300000000000000c001000000000000f30100000000000038a265726f6f747381d82a5823001220fb16f5083412ef1371d031ed4aa239903d84efdadf1ba3cd678e6475b1a232f86776657273696f6e01511220fb16f5083412ef1371d031ed4aa239903d84efdadf1ba3cd678e6475b1a232f8122d0a221220d9c0d5376d26f1931f7ad52d7acc00fc1090d2edb0808bf61eeb0a152826f6261204f09f8da418a40185011220d9c0d5376d26f1931f7ad52d7acc00fc1090d2edb0808bf61eeb0a152826f62612310a221220d745b7757f5b4593eeab7820306c7bc64eb496a7410a0d07df7a34ffec4b97f1120962617272656c657965183a122e0a2401551220a2e1c40da1ae335d4dffe729eb4d5ca23b74b9e51fc535f4a804a261080c294d1204f09f90a11807581220d745b7757f5b4593eeab7820306c7bc64eb496a7410a0d07df7a34ffec4b97f112340a2401551220b474a99a2705e23cf905a484ec6d14ef58b56bbe62e9292783466ec363b5072d120a666973686d6f6e67657218042801551220b474a99a2705e23cf905a484ec6d14ef58b56bbe62e9292783466ec363b5072d666973682b01551220a2e1c40da1ae335d4dffe729eb4d5ca23b74b9e51fc535f4a804a261080c294d6c6f62737465720100000028000000c800000000000000a2e1c40da1ae335d4dffe729eb4d5ca23b74b9e51fc535f4a804a261080c294d9401000000000000b474a99a2705e23cf905a484ec6d14ef58b56bbe62e9292783466ec363b5072d6b01000000000000d745b7757f5b4593eeab7820306c7bc64eb496a7410a0d07df7a34ffec4b97f11201000000000000d9c0d5376d26f1931f7ad52d7acc00fc1090d2edb0808bf61eeb0a152826f6268b00000000000000fb16f5083412ef1371d031ed4aa239903d84efdadf1ba3cd678e6475b1a232f83900000000000000')
const goCarV2Roots = [CID.parse('QmfEoLyB5NndqeKieExd1rtJzTduQUPEV8TwAYcUiy3H5Z')]
const goCarV2Index = [
{ blockLength: 47, blockOffset: 143, cid: CID.parse('QmfEoLyB5NndqeKieExd1rtJzTduQUPEV8TwAYcUiy3H5Z'), length: 82, offset: 108 },
{ blockLength: 99, blockOffset: 226, cid: CID.parse('QmczfirA7VEH7YVvKPTPoU69XM3qY4DC39nnTsWd4K3SkM'), length: 135, offset: 190 },
{ blockLength: 54, blockOffset: 360, cid: CID.parse('Qmcpz2FHJD7VAhg1fxFXdYJKePtkx1BsHuCrAgWVnaHMTE'), length: 89, offset: 325 },
{ blockLength: 4, blockOffset: 451, cid: CID.parse('bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu'), length: 41, offset: 414 },
{ blockLength: 7, blockOffset: 492, cid: CID.parse('bafkreifc4hca3inognou377hfhvu2xfchn2ltzi7yu27jkaeujqqqdbjju'), length: 44, offset: 455 }
]
/** @type {{[k in string]: any}} */
const goCarV2Contents = {
QmfEoLyB5NndqeKieExd1rtJzTduQUPEV8TwAYcUiy3H5Z: {
Links: [{
Hash: CID.parse('QmczfirA7VEH7YVvKPTPoU69XM3qY4DC39nnTsWd4K3SkM'),
Name: '🍤',
Tsize: 164
}]
},
QmczfirA7VEH7YVvKPTPoU69XM3qY4DC39nnTsWd4K3SkM: {
Links: [
{
Hash: CID.parse('Qmcpz2FHJD7VAhg1fxFXdYJKePtkx1BsHuCrAgWVnaHMTE'),
Name: 'barreleye',
Tsize: 58
},
{
Hash: CID.parse('bafkreifc4hca3inognou377hfhvu2xfchn2ltzi7yu27jkaeujqqqdbjju'),
Name: '🐡',
Tsize: 7
}
]
},
Qmcpz2FHJD7VAhg1fxFXdYJKePtkx1BsHuCrAgWVnaHMTE: {
Links: [{
Hash: CID.parse('bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu'),
Name: 'fishmonger',
Tsize: 4
}]
},
bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu: 'fish',
bafkreifc4hca3inognou377hfhvu2xfchn2ltzi7yu27jkaeujqqqdbjju: 'lobster'
}

export {
toBlock,
assert,
Expand All @@ -160,5 +202,9 @@ export {
carBytes,
goCarBytes,
goCarRoots,
goCarIndex
goCarIndex,
goCarV2Bytes,
goCarV2Roots,
goCarV2Index,
goCarV2Contents
}
Binary file added test/go.carv2
Binary file not shown.
8 changes: 4 additions & 4 deletions test/test-errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ describe('Misc errors', () => {

it('bad version', async () => {
// quick sanity check that makeHeader() works properly!
const buf2 = bytes.fromHex('0aa16776657273696f6e02')
// {version:2} - fixed string, likely to be used by CARv2 to escape header parsing rules
assert.strictEqual(bytes.toHex(makeHeader({ version: 2 })), '0aa16776657273696f6e02')
await assert.isRejected(CarReader.fromBytes(buf2), Error, 'Invalid CAR version: 2')
const buf2 = bytes.fromHex('0aa16776657273696f6e03')
assert.strictEqual(bytes.toHex(makeHeader({ version: 3 })), '0aa16776657273696f6e03')
// {version:3}
await assert.isRejected(CarReader.fromBytes(buf2), Error, 'Invalid CAR version: 3')
})

describe('bad header', async () => {
Expand Down
25 changes: 24 additions & 1 deletion test/test-indexer.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
/* eslint-env mocha */

import { CarIndexer } from '@ipld/car/indexer'
import { goCarBytes, goCarIndex, makeIterable, assert } from './common.js'
import {
goCarBytes,
goCarIndex,
goCarV2Bytes,
goCarV2Roots,
goCarV2Index,
makeIterable,
assert
} from './common.js'
import { verifyRoots } from './verify-store-reader.js'

describe('CarIndexer fromBytes()', () => {
Expand All @@ -18,6 +26,21 @@ describe('CarIndexer fromBytes()', () => {
assert.deepStrictEqual(indexData, goCarIndex)
})

it('v2 complete', async () => {
const indexer = await CarIndexer.fromBytes(goCarV2Bytes)
const roots = await indexer.getRoots()
assert.strictEqual(roots.length, 1)
assert(goCarV2Roots[0].equals(roots[0]))
assert.strictEqual(indexer.version, 2)

const indexData = []
for await (const index of indexer) {
indexData.push(index)
}

assert.deepStrictEqual(indexData, goCarV2Index)
})

it('bad argument', async () => {
for (const arg of [true, false, null, undefined, 'string', 100, { obj: 'nope' }]) {
// @ts-ignore
Expand Down
Loading

0 comments on commit 99cd346

Please sign in to comment.