Skip to content

Commit

Permalink
feat(wrangler) add asset manifest in dev and use in ASW (#6525)
Browse files Browse the repository at this point in the history
* simplify asset manifest building func in deploy

* create asset manifest in dev

* use asset manifest in ASW

* use a reverse map in fake kv service

* fix tests

* review feedback

* add content type to reverse map

* address pr feedback
  • Loading branch information
emily-shen authored Aug 22, 2024
1 parent f5bde66 commit c7a980c
Show file tree
Hide file tree
Showing 13 changed files with 355 additions and 83 deletions.
3 changes: 3 additions & 0 deletions packages/miniflare/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"@types/estree": "^1.0.0",
"@types/glob-to-regexp": "^0.4.1",
"@types/http-cache-semantics": "^4.0.1",
"@types/mime": "^3.0.4",
"@types/node": "20.8.3",
"@types/rimraf": "^4.0.5",
"@types/stoppable": "^1.1.1",
Expand All @@ -85,6 +86,8 @@
"expect-type": "^0.15.0",
"http-cache-semantics": "^4.1.0",
"kleur": "^4.1.5",
"mime": "^3.0.0",
"pretty-bytes": "^6.0.0",
"rimraf": "^5.0.1",
"source-map": "^0.6.1",
"which": "^2.0.2"
Expand Down
206 changes: 193 additions & 13 deletions packages/miniflare/src/plugins/kv/assets.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import crypto from "crypto";
import fs from "fs/promises";
import path from "path";
import { getType } from "mime";
import { KVOptionsSchema } from "miniflare";
import prettyBytes from "pretty-bytes";
import SCRIPT_KV_ASSETS from "worker:kv/assets";
import { z } from "zod";
import { Service, Worker_Binding } from "../../runtime";
Expand All @@ -20,15 +25,12 @@ export function isWorkersWithAssets(

const SERVICE_NAMESPACE_ASSET = `${KV_PLUGIN_NAME}:asset`;

export function buildAssetsManifest(): Uint8Array {
const buffer = new ArrayBuffer(20);
const assetManifest = new Uint8Array(buffer); // [0, 0, 0, ..., 0, 0]
// this will signal to Asset Server Worker that its running in a
// local dev "context"
assetManifest.set([1], 0); // [1, 0, 0, ..., 0, 0]

return assetManifest;
}
export const buildAssetsManifest = async (dir: string) => {
const manifest = await walk(dir);
const sortedAssetManifest = sortManifest(manifest);
const encodedAssetManifest = encodeManifest(sortedAssetManifest);
return encodedAssetManifest;
};

export async function getAssetsBindings(
options: AssetsOptions
Expand All @@ -38,8 +40,7 @@ export async function getAssetsBindings(
options?.assetsManifestBindingName
);

const assetsManifest = buildAssetsManifest();

const assetsManifest = await buildAssetsManifest(options.assetsPath);
return [
{
// this is the binding to the KV namespace that the assets are in.
Expand All @@ -58,7 +59,7 @@ export async function getAssetsBindings(
export async function getAssetsNodeBindings(
options: AssetsOptions
): Promise<Record<string, unknown>> {
const assetsManifest = buildAssetsManifest();
const assetsManifest = buildAssetsManifest(options.assetsPath);
const assetsBindings = getAssetsBindingsNames(
options?.assetsKVBindingName,
options?.assetsManifestBindingName
Expand All @@ -70,7 +71,11 @@ export async function getAssetsNodeBindings(
};
}

export function getAssetsServices(options: AssetsOptions): Service[] {
export async function getAssetsServices(
options: AssetsOptions
): Promise<Service[]> {
const assetsReverseMap = await createReverseMap(options.assetsPath);

const storageServiceName = `${SERVICE_NAMESPACE_ASSET}:storage`;
const storageService: Service = {
name: storageServiceName,
Expand All @@ -92,8 +97,183 @@ export function getAssetsServices(options: AssetsOptions): Service[] {
name: SharedBindings.MAYBE_SERVICE_BLOBS,
service: { name: storageServiceName },
},
{
name: "__STATIC_ASSETS_REVERSE_MAP",
json: assetsReverseMap,
},
],
},
};
return [storageService, namespaceService];
}

// ASSET MANIFEST
//
// 1. Traverse the asset directory to create an asset manifest.
// (In prod the manifest contains a pathHash and a contentHash. The
// contentHash is used for uploading and as the keys for the KV namespace
// where the assets are stored. Uploading is irrelevant in dev, so for
// performance reasons, the pathHash is reused for the "contentHash".)
//
// 2. Sort and binary encode the asset manifest
// This is available to asset service worker as a binding.

const MAX_ASSET_COUNT = 20_000;
const MAX_ASSET_SIZE = 25 * 1024 * 1024;
const MANIFEST_HEADER_SIZE = 20;

const PATH_HASH_OFFSET = 0;
const PATH_HASH_SIZE = 16;

const CONTENT_HASH_OFFSET = PATH_HASH_SIZE;
const CONTENT_HASH_SIZE = 16;

const TAIL_RESERVED_SIZE = 8;

const ENTRY_SIZE = PATH_HASH_SIZE + CONTENT_HASH_SIZE + TAIL_RESERVED_SIZE;

const walk = async (dir: string) => {
const files = await fs.readdir(dir, { recursive: true });
const manifest: Uint8Array[] = [];
let counter = 0;
await Promise.all(
files.map(async (file) => {
const filepath = path.join(dir, file);
const relativeFilepath = path.relative(dir, filepath);
const filestat = await fs.stat(filepath);

// TODO: decide whether to follow symbolic links
if (filestat.isSymbolicLink() || filestat.isDirectory()) {
return;
} else {
if (counter >= MAX_ASSET_COUNT) {
throw new Error(
`Maximum number of assets exceeded.\n` +
`Cloudflare Workers supports up to ${MAX_ASSET_COUNT.toLocaleString()} assets in a version. We found ${counter.toLocaleString()} files in the specified assets directory "${dir}".\n` +
`Ensure your assets directory contains a maximum of ${MAX_ASSET_COUNT.toLocaleString()} files, and that you have specified your assets directory correctly.`
);
}

if (filestat.size > MAX_ASSET_SIZE) {
throw new Error(
`Asset too large.\n` +
`Cloudflare Workers supports assets with sizes of up to ${prettyBytes(
MAX_ASSET_SIZE,
{
binary: true,
}
)}. We found a file ${filepath} with a size of ${prettyBytes(
filestat.size,
{
binary: true,
}
)}.\n` +
`Ensure all assets in your assets directory "${dir}" conform with the Workers maximum size requirement.`
);
}

manifest.push(await hashPath(encodeFilePath(relativeFilepath)));
counter++;
}
})
);
return manifest;
};

const hashPath = async (path: string) => {
const encoder = new TextEncoder();
const data = encoder.encode(path);
const hashBuffer = await crypto.subtle.digest("SHA-256", data.buffer);
return new Uint8Array(hashBuffer, 0, PATH_HASH_SIZE);
};

const encodeFilePath = (filePath: string) => {
const encodedPath = filePath
.split(path.sep)
.map((segment) => encodeURIComponent(segment))
.join("/");
return "/" + encodedPath;
};

// sorts ascending by path hash
const sortManifest = (manifest: Uint8Array[]) => {
return manifest.sort(comparisonFn);
};

const comparisonFn = (a: Uint8Array, b: Uint8Array) => {
// i don't see why this would ever be the case
if (a.length < b.length) {
return -1;
}
if (a.length > b.length) {
return 1;
}
for (const [i, v] of a.entries()) {
if (v < b[i]) {
return -1;
}
if (v > b[i]) {
return 1;
}
}
return 1;
};

const encodeManifest = (manifest: Uint8Array[]) => {
const assetManifestBytes = new Uint8Array(
MANIFEST_HEADER_SIZE + manifest.length * ENTRY_SIZE
);
for (const [i, entry] of manifest.entries()) {
const entryOffset = MANIFEST_HEADER_SIZE + i * ENTRY_SIZE;
// NB: PATH_HASH_OFFSET = 0
// set the path hash:
assetManifestBytes.set(entry, entryOffset + PATH_HASH_OFFSET);
// set the content hash, which happens to be the same as the path hash in dev:
assetManifestBytes.set(entry, entryOffset + CONTENT_HASH_OFFSET);
}
return assetManifestBytes;
};

// ASSET REVERSE MAP
//
// In prod, the contentHash is used as the key for the KV store that holds the assets.
// ASW will hash the path of an incoming request, look for that pathHash in the stored manifest,
// and get the corresponding contentHash to use as the KV key.
// In dev, we fake out this KV store and just get the assets from disk. However we still need
// to map a given "contentHash" to the filePath. This is what the ASSET REVERSE MAP is for.
// This is available to the FAKE_KV_NAMESPACE service (assets.worker.ts) as a binding.

type AssetReverseMap = {
[pathHash: string]: { filePath: string; contentType: string };
};

const createReverseMap = async (dir: string) => {
const files = await fs.readdir(dir, { recursive: true });
const assetsReverseMap: AssetReverseMap = {};
await Promise.all(
files.map(async (file) => {
const filepath = path.join(dir, file);
const relativeFilepath = path.relative(dir, filepath);
const filestat = await fs.stat(filepath);

if (filestat.isSymbolicLink() || filestat.isDirectory()) {
return;
} else {
const pathHash = bytesToHex(
await hashPath(encodeFilePath(relativeFilepath))
);
assetsReverseMap[pathHash] = {
filePath: relativeFilepath,
contentType: getType(filepath) ?? "application/octet-stream",
};
}
})
);
return JSON.stringify(assetsReverseMap);
};

const bytesToHex = (buffer: ArrayBufferLike) => {
return [...new Uint8Array(buffer)]
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
};
2 changes: 1 addition & 1 deletion packages/miniflare/src/plugins/kv/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ export const KV_PLUGIN: Plugin<
}

if (isWorkersWithAssets(options)) {
services.push(...getAssetsServices(options));
services.push(...(await getAssetsServices(options)));
}

return services;
Expand Down
27 changes: 13 additions & 14 deletions packages/miniflare/src/workers/kv/assets.worker.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import { SharedBindings } from "miniflare:shared";
import { KVParams } from "./constants";

interface Env {
[SharedBindings.MAYBE_SERVICE_BLOBS]: Fetcher;
__STATIC_ASSETS_REVERSE_MAP: AssetReverseMap;
}

type AssetReverseMap = {
[pathHash: string]: { filePath: string; contentType: string };
}; //map to actual filepath

export default <ExportedHandler<Env>>{
async fetch(request, env) {
// Only permit reads
Expand All @@ -13,21 +17,16 @@ export default <ExportedHandler<Env>>{
return new Response(message, { status: 405, statusText: message });
}

// Decode key
const url = new URL(request.url);
let key = url.pathname.substring(1); // Strip leading "/"
// don't uri decode pathname, because we encode the filepath before hashing
const pathHash = new URL(request.url).pathname.substring(1);

if (url.searchParams.get(KVParams.URL_ENCODED)?.toLowerCase() === "true") {
key = decodeURIComponent(key);
const entry = env.__STATIC_ASSETS_REVERSE_MAP[pathHash];
if (entry === undefined) {
return new Response("Not Found", { status: 404 });
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { filePath, contentType } = entry;
const blobsService = env[SharedBindings.MAYBE_SERVICE_BLOBS];
if (key === "" || key === "/") {
return new Response("Not Found", {
status: 404,
});
} else {
return blobsService.fetch(new URL(key, "http://placeholder"));
}
return blobsService.fetch(new URL(filePath, "http://placeholder"));
},
};
12 changes: 12 additions & 0 deletions packages/miniflare/test/fixtures/assets/asset-test-helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
export const bytesToHex = (buffer: ArrayBufferLike) => {
return [...new Uint8Array(buffer)]
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
};

export const hashPath = async (path: string) => {
const encoder = new TextEncoder();
const data = encoder.encode(path);
const hashBuffer = await crypto.subtle.digest("SHA-256", data.buffer);
return new Uint8Array(hashBuffer, 0, 16);
};
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { bytesToHex, hashPath } from "./asset-test-helpers.ts";

interface Env {
// custom kv binding name
CUSTOM_ASSETS_NAMESPACE: KVNamespace;
Expand All @@ -8,7 +10,8 @@ export default {
const url = new URL(request.url);
const { pathname } = url;

const content = await env.CUSTOM_ASSETS_NAMESPACE.get(pathname);
const pathHash = bytesToHex(await hashPath(pathname));
const content = await env.CUSTOM_ASSETS_NAMESPACE.get(pathHash);
return new Response(content);
},
};
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { bytesToHex, hashPath } from "./asset-test-helpers.ts";

interface Env {
// this is the default kv binding name
__STATIC_ASSETS_CONTENT: KVNamespace;
Expand All @@ -7,8 +9,8 @@ export default {
async fetch(request: Request, env: Env) {
const url = new URL(request.url);
const { pathname } = url;

const content = await env.__STATIC_ASSETS_CONTENT.get(pathname);
const pathHash = bytesToHex(await hashPath(pathname));
const content = await env.__STATIC_ASSETS_CONTENT.get(pathHash);
return new Response(content);
},
};

This file was deleted.

Loading

0 comments on commit c7a980c

Please sign in to comment.