From 8cd2555d901d7e684a9a5cc2516e5a91edd58621 Mon Sep 17 00:00:00 2001 From: Benjamin Goering <171782+gobengo@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:18:32 -0800 Subject: [PATCH] feat: upload-client uploadDirectory, by default, sorts the provided files by file name to help the user call us in a way that is deterministic and minimizes cost (#1173) Motivation: * https://github.com/web3-storage/w3up/issues/1172 * "Stabilize file ordering in uploadDirectory" in https://github.com/web3-storage/secrets/issues/28 * this stabilizes it by sorting the files param unless an option is passed telling us not to --- .../filecoin-api/test/events/aggregator.js | 2 +- packages/upload-client/src/index.js | 10 +- packages/upload-client/src/sharding.js | 35 +++++ packages/upload-client/src/types.ts | 5 +- packages/upload-client/test/index.test.js | 141 +++++++++++++++++- 5 files changed, 187 insertions(+), 6 deletions(-) diff --git a/packages/filecoin-api/test/events/aggregator.js b/packages/filecoin-api/test/events/aggregator.js index c15844c59..4e092cb03 100644 --- a/packages/filecoin-api/test/events/aggregator.js +++ b/packages/filecoin-api/test/events/aggregator.js @@ -738,7 +738,7 @@ export const test = { minPieceInsertedAt: new Date().toISOString(), } const putAggregateRes = await context.aggregateStore.put( - aggregateRecord + aggregateRecord, ) assert.ok(putAggregateRes.ok) diff --git a/packages/upload-client/src/index.js b/packages/upload-client/src/index.js index 3336ee0fd..6c6233d64 100644 --- a/packages/upload-client/src/index.js +++ b/packages/upload-client/src/index.js @@ -6,7 +6,7 @@ import * as Store from './store.js' import * as Upload from './upload.js' import * as UnixFS from './unixfs.js' import * as CAR from './car.js' -import { ShardingStream } from './sharding.js' +import { ShardingStream, defaultFileComparator } from './sharding.js' export { Store, Upload, UnixFS, CAR } export * from './sharding.js' @@ -63,13 +63,17 @@ export async function uploadFile(conf, file, options = {}) { * has the capability to perform the action. * * The issuer needs the `store/add` and `upload/add` delegated capability. - * @param {import('./types.js').FileLike[]} files File data. + * @param {import('./types.js').FileLike[]} files Files that should be in the directory. + * To ensure determinism in the IPLD encoding, files are automatically sorted by `file.name`. + * To retain the order of the files as passed in the array, set `customOrder` option to `true`. * @param {import('./types.js').UploadDirectoryOptions} [options] */ export async function uploadDirectory(conf, files, options = {}) { + const { customOrder = false } = options + const entries = customOrder ? files : [...files].sort(defaultFileComparator) return await uploadBlockStream( conf, - UnixFS.createDirectoryEncoderStream(files, options), + UnixFS.createDirectoryEncoderStream(entries, options), options ) } diff --git a/packages/upload-client/src/sharding.js b/packages/upload-client/src/sharding.js index b69bdc4e0..9070326fd 100644 --- a/packages/upload-client/src/sharding.js +++ b/packages/upload-client/src/sharding.js @@ -1,5 +1,9 @@ import { blockEncodingLength, encode, headerEncodingLength } from './car.js' +/** + * @typedef {import('./types.js').FileLike} FileLike + */ + // https://observablehq.com/@gozala/w3up-shard-size const SHARD_SIZE = 133_169_152 @@ -84,3 +88,34 @@ export class ShardingStream extends TransformStream { }) } } + +/** + * Default comparator for FileLikes. Sorts by file name in ascending order. + * + * @param {FileLike} a + * @param {FileLike} b + * @param {(file: FileLike) => string} getComparedValue - given a file being sorted, return the value by which its order should be determined, if it is different than the file object itself (e.g. file.name) + */ +export const defaultFileComparator = ( + a, + b, + getComparedValue = (file) => file.name +) => { + return ascending(a, b, getComparedValue) +} + +/** + * a comparator for sorting in ascending order. Use with Sorted or Array#sort. + * + * @template T + * @param {T} a + * @param {T} b + * @param {(i: T) => any} getComparedValue - given an item being sorted, return the value by which it should be sorted, if it is different than the item + */ +function ascending(a, b, getComparedValue) { + const ask = getComparedValue(a) + const bsk = getComparedValue(b) + if (ask === bsk) return 0 + else if (ask < bsk) return -1 + return 1 +} diff --git a/packages/upload-client/src/types.ts b/packages/upload-client/src/types.ts index b764c58a3..65886f9c6 100644 --- a/packages/upload-client/src/types.ts +++ b/packages/upload-client/src/types.ts @@ -258,7 +258,10 @@ export interface UploadOptions export interface UploadDirectoryOptions extends UploadOptions, UnixFSDirectoryEncoderOptions, - UploadProgressTrackable {} + UploadProgressTrackable { + /** whether the directory files have already been ordered in a custom way. indicates that the upload must not use a different order than the one provided. */ + customOrder?: boolean +} export interface BlobLike { /** diff --git a/packages/upload-client/test/index.test.js b/packages/upload-client/test/index.test.js index a5ffb6055..11627871c 100644 --- a/packages/upload-client/test/index.test.js +++ b/packages/upload-client/test/index.test.js @@ -6,7 +6,12 @@ import * as CAR from '@ucanto/transport/car' import * as Signer from '@ucanto/principal/ed25519' import * as StoreCapabilities from '@web3-storage/capabilities/store' import * as UploadCapabilities from '@web3-storage/capabilities/upload' -import { uploadFile, uploadDirectory, uploadCAR } from '../src/index.js' +import { + uploadFile, + uploadDirectory, + uploadCAR, + defaultFileComparator, +} from '../src/index.js' import { serviceSigner } from './fixtures.js' import { randomBlock, randomBytes } from './helpers/random.js' import { toCAR } from './helpers/car.js' @@ -368,6 +373,140 @@ describe('uploadDirectory', () => { assert.equal(carCIDs.length, 2) }) + + it('sorts files unless options.customOrder', async () => { + const space = await Signer.generate() + const agent = await Signer.generate() // The "user" that will ask the service to accept the upload + const proofs = await Promise.all([ + StoreCapabilities.add.delegate({ + issuer: space, + audience: agent, + with: space.did(), + expiration: Infinity, + }), + UploadCapabilities.add.delegate({ + issuer: space, + audience: agent, + with: space.did(), + expiration: Infinity, + }), + ]) + function createSimpleMockUploadServer() { + /** + * @type {Array>>} + */ + const invocations = [] + const service = mockService({ + store: { + add: provide(StoreCapabilities.add, (invocation) => { + invocations.push(invocation) + return { + ok: { + status: 'upload', + headers: { 'x-test': 'true' }, + url: 'http://localhost:9200', + with: invocation.capability.with, + link: /** @type {import('../src/types.js').CARLink} */ ( + invocation.capability.nb.link + ), + }, + } + }), + }, + upload: { + add: provide(UploadCapabilities.add, (invocation) => { + invocations.push(invocation) + const { capability } = invocation + if (!capability.nb) throw new Error('nb must be present') + return { ok: capability.nb } + }), + }, + }) + const server = Server.create({ + id: serviceSigner, + service, + codec: CAR.inbound, + validateAuthorization, + }) + const connection = Client.connect({ + id: serviceSigner, + codec: CAR.outbound, + channel: server, + }) + return { invocations, service, server, connection } + } + + const unsortedFiles = [ + new File([await randomBytes(32)], '/b.txt'), + new File([await randomBytes(32)], '/b.txt'), + new File([await randomBytes(32)], 'c.txt'), + new File([await randomBytes(32)], 'a.txt'), + ] + + const uploadServiceForUnordered = createSimpleMockUploadServer() + // uploading unsorted files should work because they should be sorted by `uploadDirectory` + const uploadedDirUnsorted = await uploadDirectory( + { issuer: agent, with: space.did(), proofs, audience: serviceSigner }, + unsortedFiles, + { connection: uploadServiceForUnordered.connection } + ) + + const uploadServiceForOrdered = createSimpleMockUploadServer() + // uploading sorted files should also work + const uploadedDirSorted = await uploadDirectory( + { issuer: agent, with: space.did(), proofs, audience: serviceSigner }, + [...unsortedFiles].sort(defaultFileComparator), + { connection: uploadServiceForOrdered.connection } + ) + + // upload/add roots should be the same. + assert.equal( + uploadedDirUnsorted.toString(), + uploadedDirSorted.toString(), + 'CID of upload/add root is same regardless of whether files param is sorted or unsorted' + ) + + // We also need to make sure the underlying shards are the same. + const shardsForUnordered = uploadServiceForUnordered.invocations + .flatMap((i) => + i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : [] + ) + .map((cid) => cid.toString()) + const shardsForOrdered = uploadServiceForOrdered.invocations + .flatMap((i) => + i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : [] + ) + .map((cid) => cid.toString()) + assert.deepEqual( + shardsForUnordered, + shardsForOrdered, + 'upload/add .nb.shards is identical regardless of ordering of files passed to uploadDirectory' + ) + + // but if options.customOrder is truthy, the caller is indicating + // they have customized the order of files, so `uploadDirectory` will not sort them + const uploadServiceForCustomOrder = createSimpleMockUploadServer() + const uploadedDirCustomOrder = await uploadDirectory( + { issuer: agent, with: space.did(), proofs, audience: serviceSigner }, + [...unsortedFiles], + { connection: uploadServiceForCustomOrder.connection, customOrder: true } + ) + const shardsForCustomOrder = uploadServiceForCustomOrder.invocations + .flatMap((i) => + i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : [] + ) + .map((cid) => cid.toString()) + assert.notDeepEqual( + shardsForCustomOrder, + shardsForOrdered, + 'should not produce sorted shards for customOrder files' + ) + // upload/add roots will also be different + assert.notEqual( + uploadedDirCustomOrder.toString(), + shardsForOrdered.toString() + ) + }) }) describe('uploadCAR', () => {