From fc16af822f3581c5e995c10b1882320782dafe65 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Thu, 9 Jan 2025 09:00:13 -0800 Subject: [PATCH] Provide mechanism for autopopulating node.js process.env Autopopulates the process.env from bindings in local dev. A similar PR will be needed internally to enable it there as it won't be automatic. --- src/node/internal/process.ts | 111 +++++++++--------- src/node/internal/util.d.ts | 1 + .../api/node/tests/process-nodejs-test.js | 53 +++++++++ .../node/tests/process-nodejs-test.wd-test | 17 ++- src/workerd/api/node/util.c++ | 4 + src/workerd/api/node/util.h | 5 + src/workerd/io/compatibility-date.capnp | 6 + src/workerd/jsg/jsg.h | 7 ++ src/workerd/jsg/setup.c++ | 1 + src/workerd/jsg/setup.h | 21 ++++ src/workerd/server/workerd-api.c++ | 67 ++++++++++- 11 files changed, 231 insertions(+), 62 deletions(-) diff --git a/src/node/internal/process.ts b/src/node/internal/process.ts index 2cab92469ea..cdb7d28a8e4 100644 --- a/src/node/internal/process.ts +++ b/src/node/internal/process.ts @@ -26,63 +26,60 @@ export function nextTick(cb: Function, ...args: unknown[]) { // for the worker are accessible from the env argument passed into the fetch // handler and have no impact here. -export const env = new Proxy( - {}, - { - // Per Node.js rules. process.env values must be coerced to strings. - // When defined using defineProperty, the property descriptor must be writable, - // configurable, and enumerable using just a falsy check. Getters and setters - // are not permitted. - set(obj: object, prop: PropertyKey, value: any) { - return Reflect.set(obj, prop, `${value}`); - }, - defineProperty( - obj: object, - prop: PropertyKey, - descriptor: PropertyDescriptor - ) { - validateObject(descriptor, 'descriptor', {}); - if (Reflect.has(descriptor, 'get') || Reflect.has(descriptor, 'set')) { - throw new ERR_INVALID_ARG_VALUE( - 'descriptor', - descriptor, - 'process.env value must not have getter/setter' - ); - } - if (!descriptor.configurable) { - throw new ERR_INVALID_ARG_VALUE( - 'descriptor.configurable', - descriptor, - 'process.env value must be configurable' - ); - } - if (!descriptor.enumerable) { - throw new ERR_INVALID_ARG_VALUE( - 'descriptor.enumerable', - descriptor, - 'process.env value must be enumerable' - ); - } - if (!descriptor.writable) { - throw new ERR_INVALID_ARG_VALUE( - 'descriptor.writable', - descriptor, - 'process.env value must be writable' - ); - } - if (Reflect.has(descriptor, 'value')) { - Reflect.set(descriptor, 'value', `${descriptor.value}`); - } else { - throw new ERR_INVALID_ARG_VALUE( - 'descriptor.value', - descriptor, - 'process.env value must be specified explicitly' - ); - } - return Reflect.defineProperty(obj, prop, descriptor); - }, - } -); +export const env = new Proxy(utilImpl.getEnvObject(), { + // Per Node.js rules. process.env values must be coerced to strings. + // When defined using defineProperty, the property descriptor must be writable, + // configurable, and enumerable using just a falsy check. Getters and setters + // are not permitted. + set(obj: object, prop: PropertyKey, value: any) { + return Reflect.set(obj, prop, `${value}`); + }, + defineProperty( + obj: object, + prop: PropertyKey, + descriptor: PropertyDescriptor + ) { + validateObject(descriptor, 'descriptor', {}); + if (Reflect.has(descriptor, 'get') || Reflect.has(descriptor, 'set')) { + throw new ERR_INVALID_ARG_VALUE( + 'descriptor', + descriptor, + 'process.env value must not have getter/setter' + ); + } + if (!descriptor.configurable) { + throw new ERR_INVALID_ARG_VALUE( + 'descriptor.configurable', + descriptor, + 'process.env value must be configurable' + ); + } + if (!descriptor.enumerable) { + throw new ERR_INVALID_ARG_VALUE( + 'descriptor.enumerable', + descriptor, + 'process.env value must be enumerable' + ); + } + if (!descriptor.writable) { + throw new ERR_INVALID_ARG_VALUE( + 'descriptor.writable', + descriptor, + 'process.env value must be writable' + ); + } + if (Reflect.has(descriptor, 'value')) { + Reflect.set(descriptor, 'value', `${descriptor.value}`); + } else { + throw new ERR_INVALID_ARG_VALUE( + 'descriptor.value', + descriptor, + 'process.env value must be specified explicitly' + ); + } + return Reflect.defineProperty(obj, prop, descriptor); + }, +}); export function getBuiltinModule(id: string): any { return utilImpl.getBuiltinModule(id); diff --git a/src/node/internal/util.d.ts b/src/node/internal/util.d.ts index 7b6e357c470..2f07c635dbc 100644 --- a/src/node/internal/util.d.ts +++ b/src/node/internal/util.d.ts @@ -120,6 +120,7 @@ export function isBoxedPrimitive( value: unknown ): value is number | string | boolean | bigint | symbol; +export function getEnvObject(): Record; export function getBuiltinModule(id: string): any; export function getCallSite(frames: number): Record[]; export function processExitImpl(code: number): void; diff --git a/src/workerd/api/node/tests/process-nodejs-test.js b/src/workerd/api/node/tests/process-nodejs-test.js index eea2dd40a42..7687efce2fd 100644 --- a/src/workerd/api/node/tests/process-nodejs-test.js +++ b/src/workerd/api/node/tests/process-nodejs-test.js @@ -6,3 +6,56 @@ export const processPlatform = { assert.ok(['darwin', 'win32', 'linux'].includes(process.platform)); }, }; + +process.env.QUX = 1; +const pEnv = { ...process.env }; + +export const processEnv = { + async test(ctrl, env) { + assert.strictEqual(pEnv.FOO, 'BAR'); + + // It should be possible to mutate the process.env at runtime. + // All values manually set are coerced to strings. + assert.strictEqual(pEnv.QUX, '1'); + + // JSON bindings that do not parse to strings come through as + // raw unparsed JSON strings.... + assert.strictEqual(pEnv.BAR, '{}'); + JSON.parse(pEnv.BAR); + + // JSON bindings that parse as strings come through as the + // parsed string value. + assert.strictEqual(pEnv.BAZ, 'abc'); + + // Throws because althought defined as a JSON binding, the value + // that comes through to process.env in this case is not a JSON + // parseable string because it will already have been parsed. + // This assertion is only true when the JSON bindings value + // happens to parse as a string. + assert.throws(() => JSON.parse(pEnv.BAZ)); + + // JSON bindings that parse as strings that happen to be double + // escaped might be JSON parseable. + assert.strictEqual(JSON.parse(pEnv.DUB), 'abc'); + + // Test that imports can see the process.env at the top level + const { FOO } = await import('mod'); + assert.strictEqual(FOO, 'BAR'); + + // Mutating the env argument does not change process.env, and + // vis versa + assert.strictEqual(env.QUX, undefined); + env.ZZZ = 'a'; + assert.strictEqual(pEnv.ZZZ, undefined); + + env.FOO = ['just some other value']; + assert.strictEqual(pEnv.FOO, 'BAR'); + + // Other kinds of bindings will be on env but not process.env + assert.strictEqual(pEnv.NON, undefined); + assert.deepStrictEqual( + new Uint8Array(env.NON), + new Uint8Array([97, 98, 99, 100, 101, 102]) + ); + }, +}; diff --git a/src/workerd/api/node/tests/process-nodejs-test.wd-test b/src/workerd/api/node/tests/process-nodejs-test.wd-test index 77af308a27f..ff935498b0f 100644 --- a/src/workerd/api/node/tests/process-nodejs-test.wd-test +++ b/src/workerd/api/node/tests/process-nodejs-test.wd-test @@ -5,10 +5,21 @@ const unitTests :Workerd.Config = ( ( name = "nodejs-process-test", worker = ( modules = [ - (name = "worker", esModule = embed "process-nodejs-test.js") + (name = "worker", esModule = embed "process-nodejs-test.js"), + (name = "mod", esModule = "export const { FOO } = process.env;") + ], + compatibilityDate = "2024-12-28", + compatibilityFlags = [ + "nodejs_compat", + "nodejs_compat_populate_process_env" + ], + bindings = [ + (name = "FOO", text = "BAR"), + (name = "BAR", json = "{}"), + (name = "BAZ", json = "\"abc\""), + (name = "DUB", json = "\"\\\"abc\\\"\""), + (name = "NON", data = "abcdef") ], - compatibilityDate = "2024-10-11", - compatibilityFlags = ["nodejs_compat"], ) ), ], diff --git a/src/workerd/api/node/util.c++ b/src/workerd/api/node/util.c++ index 0b526cb2bc4..b62422266fa 100644 --- a/src/workerd/api/node/util.c++ +++ b/src/workerd/api/node/util.c++ @@ -234,6 +234,10 @@ jsg::JsValue UtilModule::getBuiltinModule(jsg::Lock& js, kj::String specifier) { return js.undefined(); } +jsg::JsObject UtilModule::getEnvObject(jsg::Lock& js) { + return js.getEnv(true); +} + namespace { [[noreturn]] void handleProcessExit(jsg::Lock& js, int code) { // There are a few things happening here. First, we abort the current IoContext diff --git a/src/workerd/api/node/util.h b/src/workerd/api/node/util.h index b5ed95d849d..eaf6528507d 100644 --- a/src/workerd/api/node/util.h +++ b/src/workerd/api/node/util.h @@ -239,6 +239,8 @@ class UtilModule final: public jsg::Object { return processPlatform; } + jsg::JsObject getEnvObject(jsg::Lock& js); + JSG_RESOURCE_TYPE(UtilModule) { JSG_NESTED_TYPE(MIMEType); JSG_NESTED_TYPE(MIMEParams); @@ -258,6 +260,9 @@ class UtilModule final: public jsg::Object { JSG_METHOD(previewEntries); JSG_METHOD(getConstructorName); JSG_METHOD(getCallSite); + // TODO(cleanup): It might be about time to separate some of these out + // to a different module. + JSG_METHOD(getEnvObject); #define V(Type) JSG_METHOD(is##Type); JS_UTIL_IS_TYPES(V) diff --git a/src/workerd/io/compatibility-date.capnp b/src/workerd/io/compatibility-date.capnp index 3572bf2aa5e..5a77dfc9f55 100644 --- a/src/workerd/io/compatibility-date.capnp +++ b/src/workerd/io/compatibility-date.capnp @@ -679,4 +679,10 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef { $compatDisableFlag("cache_no_cache_disabled") $experimental; # Enables the use of cache: no-cache in the fetch api. + + populateProcessEnv @71 :Bool + $compatEnableFlag("nodejs_compat_populate_process_env") + $compatDisableFlag("nodejs_compat_do_not_populate_process_env"); + # Automatically populate process.env from text bindings only + # when nodejs_compat is being used. } diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h index fdd5c402368..27a3c0794e1 100644 --- a/src/workerd/jsg/jsg.h +++ b/src/workerd/jsg/jsg.h @@ -2675,6 +2675,13 @@ class Lock { // the inspector (if attached), or to KJ_LOG(Info). virtual void reportError(const JsValue& value) = 0; + // Sets an env value that will be expressed on the process.env + // if/when nodejs-compat mode is used. + virtual void setEnvField(const JsValue& name, const JsValue& value) = 0; + + // Returns the env base object. + virtual JsObject getEnv(bool release = false) = 0; + private: // Mark the jsg::Lock as being disallowed from being passed as a parameter into // a kj promise coroutine. Note that this only blocks directly passing the Lock diff --git a/src/workerd/jsg/setup.c++ b/src/workerd/jsg/setup.c++ index 864b3f3b270..33a5a64d3ab 100644 --- a/src/workerd/jsg/setup.c++ +++ b/src/workerd/jsg/setup.c++ @@ -423,6 +423,7 @@ void IsolateBase::dropWrappers(kj::FunctionParam drop) { // Make sure v8::Globals are destroyed under lock (but not until later). KJ_DEFER(symbolAsyncDispose.Reset()); KJ_DEFER(opaqueTemplate.Reset()); + KJ_DEFER(envObj.Reset()); // Make sure the TypeWrapper is destroyed under lock by declaring a new copy of the variable // that is destroyed before the lock is released. diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index e0f535a436e..2f08d621d0d 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -261,6 +261,9 @@ class IsolateBase { // object with 2 internal fields. v8::Global opaqueTemplate; + // Object that is used as the underlying target of process.env when nodejs-compat mode is used. + v8::Global envObj; + // Polyfilled Symbol.asyncDispose. v8::Global symbolAsyncDispose; @@ -665,6 +668,24 @@ class Isolate: public IsolateBase { } } + // Sets an env value that will be expressed on the process.env + // if/when nodejs-compat mode is used. + void setEnvField(const JsValue& name, const JsValue& value) override { + getEnv().set(*this, name, value); + } + + // Returns the env base object. + JsObject getEnv(bool release = false) override { + KJ_DEFER({ + if (release) jsgIsolate.envObj.Reset(); + }); + if (jsgIsolate.envObj.IsEmpty()) { + v8::Local env = obj(); + jsgIsolate.envObj.Reset(v8Isolate, env); + } + return JsObject(jsgIsolate.envObj.Get(v8Isolate)); + } + private: Isolate& jsgIsolate; diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index fa9892f508b..fea5012bdb0 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -625,8 +625,68 @@ static v8::Local createBindingValue(JsgWorkerdIsolate::Lock& lock, KJ_SWITCH_ONEOF(global.value) { KJ_CASE_ONEOF(json, Global::Json) { - v8::Local string = lock.wrap(context, kj::mv(json.text)); - value = jsg::check(v8::JSON::Parse(context, string)); + value = jsg::check(v8::JSON::Parse(context, lock.str(json.text))); + if (featureFlags.getPopulateProcessEnv() && featureFlags.getNodeJsCompat()) { + // Generally speaking, process.env has the TEXT and JSON bindings from env. + // TEXT bindings are simple enough and env.{name} will strictly equal + // process.env.{name}. With JSON bindings it a bit trickier due to architectural + // quirks and history in our runtime. + // If you set the JSON environment variable to a value that parses to a string + // then it will be exposed on process.env and env as that parsed string such + // that env.{name} will strictly equal process.env{name} like with TEXT bindings. + // If, however, the value parses to any type other than a string, the + // process.env.{name} will instead be the raw parseable JSON string and will + // *not* strictly equal env.{name}. + // + // So, for example, given the bindings: + // + // (name = "FOO", json = "{}"), + // (name = "BAR", json = "\"abc\""), + // (name = "BAZ", json = "\"\\\"abc\\\"\"") + // + // In the worker: + // + // env.FOO === process.env.FOO; // false + // console.log(typeof env.FOO); // 'object' + // console.log(typeof process.env.FOO); // 'string' + // console.log(env.FOO); // [object Object] + // console.log(process.env.FOO); // '{}' + // console.log(typeof JSON.parse(process.env.FOO)); // 'object' + // + // env.BAR === process.env.BAR; // true + // console.log(typeof env.BAR); // 'string' + // console.log(typeof process.env.BAR); // 'string' + // console.log(env.BAR); // 'abc' + // console.log(process.env.BAR); // 'abc' + // console.log(typeof JSON.parse(process.env.BAR)); // throws!! + // + // env.BAZ === process.env.BAZ; // true + // console.log(typeof enf.BAZ); // 'string' + // console.log(typeof process.env.BAZ); // 'string' + // console.log(env.BAZ); // '"abc"' + // console.log(process.env.BAZ); // '"abc" + // console.log(typeof JSON.parse(process.env.BAZ)); // 'string' + // + // JSON.parse(process.env.FOO) works because the value is a parseable + // JSON string. JSON.parse(process.env.BAR) throws an error because + // the value is not a parseable JSON string, even tho the binding uses + // type JSON. JSON.parse(process.env.BAZ)` works because the original + // JSON-encoded value was double-escaped and the result of the above + // v8::JSON::Parse is itself a parseable JSON string. + // + // Practically speaking this means that developers can never really + // count on environment variables accessed via `env` always being + // strictly equal to the same environment variable accessed via + // process.env because, despite being defined as JSON bindings, + // the resulting value may or may not be JSON parseable or may be + // parsed in one context (env) and unparsed in another (process.env). + + if (value->IsString()) { + lock.setEnvField(lock.str(global.name), jsg::JsValue(value)); + } else { + lock.setEnvField(lock.str(global.name), lock.str(json.text)); + } + } } KJ_CASE_ONEOF(pipeline, Global::Fetcher) { @@ -712,6 +772,9 @@ static v8::Local createBindingValue(JsgWorkerdIsolate::Lock& lock, KJ_CASE_ONEOF(text, kj::String) { value = lock.wrap(context, kj::mv(text)); + if (featureFlags.getPopulateProcessEnv() && featureFlags.getNodeJsCompat()) { + lock.setEnvField(lock.str(global.name), jsg::JsValue(value)); + } } KJ_CASE_ONEOF(data, kj::Array) {