Skip to content

Commit

Permalink
Merge pull request #3305 from cloudflare/dominik/use-external-pkg-lock
Browse files Browse the repository at this point in the history
Fixes to external package loading: use external lock and load stdlib.
  • Loading branch information
dom96 authored Jan 10, 2025
2 parents 1c424b5 + 9957619 commit c45a1bc
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 20 deletions.
13 changes: 10 additions & 3 deletions src/pyodide/internal/loadPackage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
} from 'pyodide-internal:metadata';
import {
SITE_PACKAGES,
STDLIB_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { parseTarInfo } from 'pyodide-internal:tar';
Expand Down Expand Up @@ -113,7 +114,12 @@ async function loadPackagesImpl(
let loadPromises: Promise<[string, Reader]>[] = [];
let loading = [];
for (const req of requirements) {
if (SITE_PACKAGES.loadedRequirements.has(req)) continue;
if (req === 'test') {
continue; // Skip the test package, it is only useful for internal Python regression testing.
}
if (SITE_PACKAGES.loadedRequirements.has(req)) {
continue;
}
loadPromises.push(loadBundle(req).then((r) => [req, r]));
loading.push(req);
}
Expand All @@ -135,9 +141,10 @@ async function loadPackagesImpl(
}

export async function loadPackages(Module: Module, requirements: Set<string>) {
const pkgsToLoad = requirements.union(new Set(STDLIB_PACKAGES));
if (LOAD_WHEELS_FROM_R2) {
await loadPackagesImpl(Module, requirements, loadBundleFromR2);
await loadPackagesImpl(Module, pkgsToLoad, loadBundleFromR2);
} else if (LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) {
await loadPackagesImpl(Module, requirements, loadBundleFromArtifactBundler);
await loadPackagesImpl(Module, pkgsToLoad, loadBundleFromArtifactBundler);
}
}
6 changes: 3 additions & 3 deletions src/pyodide/internal/metadata.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
import { default as PYODIDE_BUCKET } from 'pyodide-internal:generated/pyodide-bucket.json';
// The pyodide-lock.json is read from the Python bundle (pyodide-capnp-bin).
import { default as PYODIDE_LOCK } from 'pyodide-internal:generated/pyodide-lock.json';
import { default as ArtifactBundler } from 'pyodide-internal:artifacts';

export const IS_WORKERD = MetadataReader.isWorkerd();
Expand All @@ -12,9 +14,7 @@ export const LOAD_WHEELS_FROM_R2: boolean = IS_WORKERD;
export const LOAD_WHEELS_FROM_ARTIFACT_BUNDLER =
MetadataReader.shouldUsePackagesInArtifactBundler();
export const PACKAGES_VERSION = MetadataReader.getPackagesVersion();
export const LOCKFILE: PackageLock = JSON.parse(
MetadataReader.getPackagesLock()
);
export const LOCKFILE: PackageLock = PYODIDE_LOCK;
export const REQUIREMENTS = MetadataReader.getRequirements();
export const MAIN_MODULE_NAME = MetadataReader.getMainModule();
export const MEMORY_SNAPSHOT_READER = MetadataReader.hasMemorySnapshot()
Expand Down
9 changes: 7 additions & 2 deletions src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,21 @@ function canonicalizePackageName(name: string): string {
}

// The "name" field in the lockfile is not canonicalized
const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages)
export const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages)
.filter(({ install_dir }) => install_dir === 'stdlib')
.map(({ name }) => canonicalizePackageName(name));

// Each item in the list is an element of the file path, for example
// `folder/file.txt` -> `["folder", "file.txt"]
export type FilePath = string[];

/**
* SitePackagesDir keeps track of the virtualized view of the site-packages
* directory generated for each worker.
*/
class SitePackagesDir {
public rootInfo: TarFSInfo;
public soFiles: string[][];
public soFiles: FilePath[];
public loadedRequirements: Set<string>;
constructor() {
this.rootInfo = {
Expand Down Expand Up @@ -133,6 +137,7 @@ class SitePackagesDir {
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
if (EmbeddedPackagesTarReader.read === undefined) {
// Package retrieval is enabled, so the embedded tar reader isn't initialised.
// All packages, including STDLIB_PACKAGES, are loaded in `loadPackages`.
return new SitePackagesDir();
}

Expand Down
49 changes: 49 additions & 0 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { default as ArtifactBundler } from 'pyodide-internal:artifacts';
import { default as UnsafeEval } from 'internal:unsafe-eval';
import { default as DiskCache } from 'pyodide-internal:disk_cache';
import {
FilePath,
SITE_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
Expand Down Expand Up @@ -102,6 +103,49 @@ function loadDynlib(
}
}

/**
* This function is used to ensure the order in which we load SO_FILES stays the same.
*
* The sort always puts _lzma.so and _ssl.so
* first, because these SO_FILES are loaded in the baseline snapshot, and if we want to generate
* a package snapshot while a baseline snapshot is loaded we need them to be first. The rest of the
* files are sorted alphabetically.
*
* The `filePaths` list is of the form [["folder", "file.so"], ["file.so"]], so each element in it
* is effectively a file path.
*/
function sortSoFiles(filePaths: FilePath[]): FilePath[] {
let result = [];
let hasLzma = false;
let hasSsl = false;
const lzmaFile = '_lzma.so';
const sslFile = '_ssl.so';
for (const path of filePaths) {
if (path.length == 1 && path[0] == lzmaFile) {
hasLzma = true;
} else if (path.length == 1 && path[0] == sslFile) {
hasSsl = true;
} else {
result.push(path);
}
}

// JS might handle sorting lists of lists fine, but I'd rather be explicit here and make it compare
// strings.
result = result
.map((x) => x.join('/'))
.sort()
.map((x) => x.split('/'));
if (hasSsl) {
result.unshift([sslFile]);
}
if (hasLzma) {
result.unshift([lzmaFile]);
}

return result;
}

// used for checkLoadedSoFiles a snapshot sanity check
const PRELOADED_SO_FILES: string[] = [];

Expand All @@ -121,6 +165,11 @@ export function preloadDynamicLibs(Module: Module): void {
if (IS_CREATING_BASELINE_SNAPSHOT || LOADED_BASELINE_SNAPSHOT) {
SO_FILES_TO_LOAD = [['_lzma.so'], ['_ssl.so']];
}
// The order in which we load the SO_FILES matters. For example, if a snapshot was generated with
// SO_FILES loaded in a certain way, then if we load that snapshot and load the SO_FILES
// differently here then Python will crash.
SO_FILES_TO_LOAD = sortSoFiles(SO_FILES_TO_LOAD);

try {
const sitePackages = getSitePackagesPath(Module);
for (const soFile of SO_FILES_TO_LOAD) {
Expand Down
5 changes: 5 additions & 0 deletions src/pyodide/types/pyodide-lock.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,8 @@ interface PackageLock {
[id: string]: PackageDeclaration;
};
}

declare module 'pyodide-internal:generated/pyodide-lock.json' {
const lock: PackageLock;
export default lock;
}
1 change: 0 additions & 1 deletion src/pyodide/types/runtime-generated/metadata.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ declare namespace MetadataReader {
const disposeMemorySnapshot: () => void;
const shouldUsePackagesInArtifactBundler: () => boolean;
const getPackagesVersion: () => string;
const getPackagesLock: () => string;
const read: (index: number, position: number, buffer: Uint8Array) => number;
}

Expand Down
20 changes: 18 additions & 2 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "pyodide.h"

#include <workerd/api/pyodide/setup-emscripten.h>
#include <workerd/io/compatibility-date.h>
#include <workerd/util/string-buffer.h>
#include <workerd/util/strings.h>

Expand All @@ -22,6 +23,22 @@ const kj::Maybe<jsg::Bundle::Reader> PyodideBundleManager::getPyodideBundle(
[](const MessageBundlePair& t) { return t.bundle; });
}

kj::Maybe<kj::String> PyodideBundleManager::getPyodideLock(
PythonSnapshotRelease::Reader pythonSnapshotRelease) const {
auto bundleName = getPythonBundleName(pythonSnapshotRelease);
// We expect the Pyodide Bundle for the specified bundle name to already be downloaded here.
auto maybeBundle = getPyodideBundle(bundleName);
auto bundle = KJ_ASSERT_NONNULL(maybeBundle);
for (auto module: bundle.getModules()) {
if (module.which() == workerd::jsg::Module::JSON &&
module.getName() == "pyodide-internal:generated/pyodide-lock.json") {
return kj::str(module.getJson());
}
}

return kj::none;
}

void PyodideBundleManager::setPyodideBundleData(
kj::String version, kj::Array<unsigned char> data) const {
auto wordArray = kj::arrayPtr(
Expand Down Expand Up @@ -440,8 +457,7 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(
names.finish(),
contents.finish(),
requirements.finish(),
kj::str("20240829.4"), // TODO: hardcoded version & lock
kj::str(PYODIDE_LOCK.toString()),
kj::str("20240829.4"), // TODO: hardcoded version
true /* isWorkerd */,
false /* isTracing */,
snapshotToDisk,
Expand Down
9 changes: 1 addition & 8 deletions src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class PyodideBundleManager {
public:
void setPyodideBundleData(kj::String version, kj::Array<unsigned char> data) const;
const kj::Maybe<jsg::Bundle::Reader> getPyodideBundle(kj::StringPtr version) const;
kj::Maybe<kj::String> getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease) const;

private:
struct MessageBundlePair {
Expand Down Expand Up @@ -80,7 +81,6 @@ class PyodideMetadataReader: public jsg::Object {
kj::Array<kj::Array<kj::byte>> contents;
kj::Array<kj::String> requirements;
kj::String packagesVersion;
kj::String packagesLock;
bool isWorkerdFlag;
bool isTracingFlag;
bool snapshotToDisk;
Expand All @@ -94,7 +94,6 @@ class PyodideMetadataReader: public jsg::Object {
kj::Array<kj::Array<kj::byte>> contents,
kj::Array<kj::String> requirements,
kj::String packagesVersion,
kj::String packagesLock,
bool isWorkerd,
bool isTracing,
bool snapshotToDisk,
Expand All @@ -106,7 +105,6 @@ class PyodideMetadataReader: public jsg::Object {
contents(kj::mv(contents)),
requirements(kj::mv(requirements)),
packagesVersion(kj::mv(packagesVersion)),
packagesLock(kj::mv(packagesLock)),
isWorkerdFlag(isWorkerd),
isTracingFlag(isTracing),
snapshotToDisk(snapshotToDisk),
Expand Down Expand Up @@ -169,10 +167,6 @@ class PyodideMetadataReader: public jsg::Object {
return kj::str(packagesVersion);
}

kj::String getPackagesLock() {
return kj::str(packagesLock);
}

JSG_RESOURCE_TYPE(PyodideMetadataReader) {
JSG_METHOD(isWorkerd);
JSG_METHOD(isTracing);
Expand All @@ -189,7 +183,6 @@ class PyodideMetadataReader: public jsg::Object {
JSG_METHOD(shouldSnapshotToDisk);
JSG_METHOD(shouldUsePackagesInArtifactBundler);
JSG_METHOD(getPackagesVersion);
JSG_METHOD(getPackagesLock);
JSG_METHOD(isCreatingBaselineSnapshot);
}

Expand Down
2 changes: 1 addition & 1 deletion src/workerd/io/compatibility-date.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef {
pythonWorkers @43 :Bool
$compatEnableFlag("python_workers")
$pythonSnapshotRelease(pyodide = "0.26.0a2", pyodideRevision = "2024-03-01",
packages = "2024-03-01", backport = 12,
packages = "2024-03-01", backport = 13,
baselineSnapshotHash = "d13ce2f4a0ade2e09047b469874dacf4d071ed3558fec4c26f8d0b99d95f77b5")
$impliedByAfterDate(name = "pythonWorkersDevPyodide", date = "2000-01-01");
# Enables Python Workers. Access to this flag is not restricted, instead bundles containing
Expand Down

0 comments on commit c45a1bc

Please sign in to comment.