From 9330e754e0d9b00ef23df63e29b4938ea6d8b9b8 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 12 Mar 2024 06:37:51 -0700 Subject: [PATCH] module: fix detect-module not retrying as esm for cjs-only errors PR-URL: https://github.com/nodejs/node/pull/52024 Reviewed-By: Guy Bedford Reviewed-By: Marco Ippolito --- doc/api/cli.md | 14 ++- src/node_contextify.cc | 81 +++++++++++++++- test/es-module/test-esm-detect-ambiguous.mjs | 93 +++++++++++++++++++ .../commonjs-wrapper-variables.js | 6 ++ 4 files changed, 187 insertions(+), 7 deletions(-) create mode 100644 test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js diff --git a/doc/api/cli.md b/doc/api/cli.md index df90e9ae306b457..d7b514694595b05 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -777,7 +777,7 @@ added: - v20.10.0 --> -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development Node.js will inspect the source code of ambiguous input to determine whether it contains ES module syntax; if such syntax is detected, the input will be treated @@ -792,9 +792,15 @@ Ambiguous input is defined as: `--experimental-default-type` are specified. ES module syntax is defined as syntax that would throw when evaluated as -CommonJS. This includes `import` and `export` statements and `import.meta` -references. It does _not_ include `import()` expressions, which are valid in -CommonJS. +CommonJS. This includes the following: + +* `import` statements (but _not_ `import()` expressions, which are valid in + CommonJS). +* `export` statements. +* `import.meta` references. +* `await` at the top level of a module. +* Lexical redeclarations of the CommonJS wrapper variables (`require`, `module`, + `exports`, `__dirname`, `__filename`). ### `--experimental-import-meta-resolve` diff --git a/src/node_contextify.cc b/src/node_contextify.cc index f39b4ec4c372a3c..09079e963c70366 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1409,6 +1409,25 @@ constexpr std::array esm_syntax_error_messages = { "Unexpected token 'export'", // `export` statements "Cannot use 'import.meta' outside a module"}; // `import.meta` references +// Another class of error messages that we need to check for are syntax errors +// where the syntax throws when parsed as CommonJS but succeeds when parsed as +// ESM. So far, the cases we've found are: +// - CommonJS module variables (`module`, `exports`, `require`, `__filename`, +// `__dirname`): if the user writes code such as `const module =` in the top +// level of a CommonJS module, it will throw a syntax error; but the same +// code is valid in ESM. +// - Top-level `await`: if the user writes `await` at the top level of a +// CommonJS module, it will throw a syntax error; but the same code is valid +// in ESM. +constexpr std::array throws_only_in_cjs_error_messages = { + "Identifier 'module' has already been declared", + "Identifier 'exports' has already been declared", + "Identifier 'require' has already been declared", + "Identifier '__filename' has already been declared", + "Identifier '__dirname' has already been declared", + "await is only valid in async functions and " + "the top level bodies of modules"}; + void ContextifyContext::ContainsModuleSyntax( const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1476,19 +1495,75 @@ void ContextifyContext::ContainsModuleSyntax( id_symbol, try_catch); - bool found_error_message_caused_by_module_syntax = false; + bool should_retry_as_esm = false; if (try_catch.HasCaught() && !try_catch.HasTerminated()) { Utf8Value message_value(env->isolate(), try_catch.Message()->Get()); auto message = message_value.ToStringView(); for (const auto& error_message : esm_syntax_error_messages) { if (message.find(error_message) != std::string_view::npos) { - found_error_message_caused_by_module_syntax = true; + should_retry_as_esm = true; break; } } + + if (!should_retry_as_esm) { + for (const auto& error_message : throws_only_in_cjs_error_messages) { + if (message.find(error_message) != std::string_view::npos) { + // Try parsing again where the CommonJS wrapper is replaced by an + // async function wrapper. If the new parse succeeds, then the error + // was caused by either a top-level declaration of one of the CommonJS + // module variables, or a top-level `await`. + TryCatchScope second_parse_try_catch(env); + code = + String::Concat(isolate, + String::NewFromUtf8(isolate, "(async function() {") + .ToLocalChecked(), + code); + code = String::Concat( + isolate, + code, + String::NewFromUtf8(isolate, "})();").ToLocalChecked()); + ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( + isolate, code, filename, 0, 0, host_defined_options, nullptr); + std::ignore = ScriptCompiler::CompileFunction( + context, + &wrapped_source, + params.size(), + params.data(), + 0, + nullptr, + options, + v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); + if (!second_parse_try_catch.HasTerminated()) { + if (second_parse_try_catch.HasCaught()) { + // If on the second parse an error is thrown by ESM syntax, then + // what happened was that the user had top-level `await` or a + // top-level declaration of one of the CommonJS module variables + // above the first `import` or `export`. + Utf8Value second_message_value( + env->isolate(), second_parse_try_catch.Message()->Get()); + auto second_message = second_message_value.ToStringView(); + for (const auto& error_message : esm_syntax_error_messages) { + if (second_message.find(error_message) != + std::string_view::npos) { + should_retry_as_esm = true; + break; + } + } + } else { + // No errors thrown in the second parse, so most likely the error + // was caused by a top-level `await` or a top-level declaration of + // one of the CommonJS module variables. + should_retry_as_esm = true; + } + } + break; + } + } + } } - args.GetReturnValue().Set(found_error_message_caused_by_module_syntax); + args.GetReturnValue().Set(should_retry_as_esm); } static void CompileFunctionForCJSLoader( diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 43537f26f5304a6..2067040114a86be 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -234,6 +234,99 @@ describe('--experimental-detect-module', { concurrency: true }, () => { }); } }); + + // https://github.com/nodejs/node/issues/50917 + describe('syntax that errors in CommonJS but works in ESM', { concurrency: true }, () => { + it('permits top-level `await`', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'await Promise.resolve(); console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + + it('permits top-level `await` above import/export syntax', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'await Promise.resolve(); import "node:os"; console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + + it('still throws on `await` in an ordinary sync function', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'function fn() { await Promise.resolve(); } fn();', + ]); + + match(stderr, /SyntaxError: await is only valid in async function/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); + + it('throws on undefined `require` when top-level `await` triggers ESM parsing', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'const fs = require("node:fs"); await Promise.resolve();', + ]); + + match(stderr, /ReferenceError: require is not defined in ES module scope/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); + + it('permits declaration of CommonJS module variables', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + fixtures.path('es-modules/package-without-type/commonjs-wrapper-variables.js'), + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'exports require module __filename __dirname\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + + it('permits declaration of CommonJS module variables above import/export', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'const module = 3; import "node:os"; console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + + it('still throws on double `const` declaration not at the top level', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'function fn() { const require = 1; const require = 2; } fn();', + ]); + + match(stderr, /SyntaxError: Identifier 'require' has already been declared/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); + }); }); // Validate temporarily disabling `--abort-on-uncaught-exception` diff --git a/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js b/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js new file mode 100644 index 000000000000000..946bc690f27baa7 --- /dev/null +++ b/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js @@ -0,0 +1,6 @@ +const exports = "exports"; +const require = "require"; +const module = "module"; +const __filename = "__filename"; +const __dirname = "__dirname"; +console.log(exports, require, module, __filename, __dirname);