From 4638ce6f03a886a60fd472554fe2c5ddd88ee5ea Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Mon, 6 Aug 2018 23:27:03 +0800 Subject: [PATCH] src: perform integrity checks on built-in code cache Currently V8 only checks that the length of the source code is the same as the code used to generate the hash, so we add an additional check here: 1. During compile time, when generating node_javascript.cc and node_code_cache.cc, we compute and include the hash of the (unwrapped) JavaScript source in both. 2. At runtime, we check that the hash of the code being compiled and the hash of the code used to generate the cache (inside the wrapper) is the same. This is based on the assumptions: 1. `internalBinding('code_cache_hash')` must be in sync with `internalBinding('code_cache')` (same C++ file) 2. `internalBinding('natives_hash')` must be in sync with `process.binding('natives')` (same C++ file) 3. If `internalBinding('natives_hash')` is in sync with `internalBinding('natives_hash')`, then the (unwrapped) code used to generate `internalBinding('code_cache')` should be in sync with the (unwrapped) code in `process.binding('natives')` There will be, however, false positives if the wrapper used to generate the cache is different from the one used at run time, and the length of the wrapper somehow stays the same. But that should be rare and can be eased once we make the two bootstrappers cached and checked as well. PR-URL: https://github.com/nodejs/node/pull/22152 Reviewed-By: Anna Henningsen Reviewed-By: Tiancheng "Timothy" Gu Reviewed-By: James M Snell Reviewed-By: Gus Caplan --- lib/internal/bootstrap/cache.js | 1 + lib/internal/bootstrap/loaders.js | 48 ++++++++++++++++++++++++------- src/node.cc | 8 ++++++ src/node_code_cache.h | 1 + src/node_code_cache_stub.cc | 6 ++++ src/node_javascript.h | 1 + tools/generate_code_cache.js | 32 +++++++++++++++++++-- tools/js2c.py | 18 +++++++++++- 8 files changed, 100 insertions(+), 15 deletions(-) diff --git a/lib/internal/bootstrap/cache.js b/lib/internal/bootstrap/cache.js index e7e46bdf5174d5..41fe1e3a914ba0 100644 --- a/lib/internal/bootstrap/cache.js +++ b/lib/internal/bootstrap/cache.js @@ -48,6 +48,7 @@ module.exports = { ), builtinSource: Object.assign({}, NativeModule._source), getCodeCache, + getSource: NativeModule.getSource, codeCache: internalBinding('code_cache'), compiledWithoutCache: NativeModule.compiledWithoutCache, compiledWithCache: NativeModule.compiledWithCache, diff --git a/lib/internal/bootstrap/loaders.js b/lib/internal/bootstrap/loaders.js index e85d5de9b79a49..c04a4207c0b482 100644 --- a/lib/internal/bootstrap/loaders.js +++ b/lib/internal/bootstrap/loaders.js @@ -127,6 +127,8 @@ const config = getBinding('config'); const codeCache = getInternalBinding('code_cache'); + const codeCacheHash = getInternalBinding('code_cache_hash'); + const sourceHash = getInternalBinding('natives_hash'); const compiledWithoutCache = NativeModule.compiledWithoutCache = []; const compiledWithCache = NativeModule.compiledWithCache = []; @@ -232,32 +234,56 @@ }; NativeModule.prototype.compile = function() { - let source = NativeModule.getSource(this.id); + const id = this.id; + let source = NativeModule.getSource(id); source = NativeModule.wrap(source); this.loading = true; try { + // Currently V8 only checks that the length of the source code is the + // same as the code used to generate the hash, so we add an additional + // check here: + // 1. During compile time, when generating node_javascript.cc and + // node_code_cache.cc, we compute and include the hash of the + // (unwrapped) JavaScript source in both. + // 2. At runtime, we check that the hash of the code being compiled + // and the hash of the code used to generate the cache + // (inside the wrapper) is the same. + // This is based on the assumptions: + // 1. `internalBinding('code_cache_hash')` must be in sync with + // `internalBinding('code_cache')` (same C++ file) + // 2. `internalBinding('natives_hash')` must be in sync with + // `process.binding('natives')` (same C++ file) + // 3. If `internalBinding('natives_hash')` is in sync with + // `internalBinding('natives_hash')`, then the (unwrapped) + // code used to generate `internalBinding('code_cache')` + // should be in sync with the (unwrapped) code in + // `process.binding('natives')` + // There will be, however, false positives if the wrapper used + // to generate the cache is different from the one used at run time, + // and the length of the wrapper somehow stays the same. + // But that should be rare and can be eased once we make the + // two bootstrappers cached and checked as well. + const cache = codeCacheHash[id] && + (codeCacheHash[id] === sourceHash[id]) ? codeCache[id] : undefined; + // (code, filename, lineOffset, columnOffset // cachedData, produceCachedData, parsingContext) const script = new ContextifyScript( source, this.filename, 0, 0, - codeCache[this.id], false, undefined + cache, false, undefined ); + // This will be used to create code cache in tools/generate_code_cache.js this.script = script; // One of these conditions may be false when any of the inputs // of the `node_js2c` target in node.gyp is modified. - // FIXME(joyeecheung): - // 1. Figure out how to resolve the dependency issue. When the - // code cache was introduced we were at a point where refactoring - // node.gyp may not be worth the effort. - // 2. Calculate checksums in both js2c and generate_code_cache.js - // and compare them before compiling the native modules since - // V8 only checks the length of the source to decide whether to - // reject the cache. - if (!codeCache[this.id] || script.cachedDataRejected) { + // FIXME(joyeecheung): Figure out how to resolve the dependency issue. + // When the code cache was introduced we were at a point where refactoring + // node.gyp may not be worth the effort. + if (!cache || script.cachedDataRejected) { compiledWithoutCache.push(this.id); } else { compiledWithCache.push(this.id); diff --git a/src/node.cc b/src/node.cc index 27704c7933f6fa..32568310e91631 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1732,6 +1732,14 @@ static void GetInternalBinding(const FunctionCallbackInfo& args) { // internalBinding('code_cache') exports = Object::New(env->isolate()); DefineCodeCache(env, exports); + } else if (!strcmp(*module_v, "code_cache_hash")) { + // internalBinding('code_cache_hash') + exports = Object::New(env->isolate()); + DefineCodeCacheHash(env, exports); + } else if (!strcmp(*module_v, "natives_hash")) { + // internalBinding('natives_hash') + exports = Object::New(env->isolate()); + DefineJavaScriptHash(env, exports); } else { return ThrowIfNoSuchModule(env, *module_v); } diff --git a/src/node_code_cache.h b/src/node_code_cache.h index d4775a2b65e022..db8012286172a9 100644 --- a/src/node_code_cache.h +++ b/src/node_code_cache.h @@ -8,6 +8,7 @@ namespace node { void DefineCodeCache(Environment* env, v8::Local target); +void DefineCodeCacheHash(Environment* env, v8::Local target); } // namespace node diff --git a/src/node_code_cache_stub.cc b/src/node_code_cache_stub.cc index 3e9e5960c1bebe..35780e13f026b0 100644 --- a/src/node_code_cache_stub.cc +++ b/src/node_code_cache_stub.cc @@ -11,4 +11,10 @@ void DefineCodeCache(Environment* env, v8::Local target) { // (here as `target`) so this is a noop. } +void DefineCodeCacheHash(Environment* env, v8::Local target) { + // When we do not produce code cache for builtin modules, + // `internalBinding('code_cache_hash')` returns an empty object + // (here as `target`) so this is a noop. +} + } // namespace node diff --git a/src/node_javascript.h b/src/node_javascript.h index 00cdc0c0b6c13e..80ef40b4ec414f 100644 --- a/src/node_javascript.h +++ b/src/node_javascript.h @@ -29,6 +29,7 @@ namespace node { void DefineJavaScript(Environment* env, v8::Local target); +void DefineJavaScriptHash(Environment* env, v8::Local target); v8::Local NodePerContextSource(v8::Isolate* isolate); v8::Local LoadersBootstrapperSource(Environment* env); v8::Local NodeBootstrapperSource(Environment* env); diff --git a/tools/generate_code_cache.js b/tools/generate_code_cache.js index 740cbd718aa11e..35411bc5af182a 100644 --- a/tools/generate_code_cache.js +++ b/tools/generate_code_cache.js @@ -9,9 +9,17 @@ const { getCodeCache, + getSource, cachableBuiltins } = require('internal/bootstrap/cache'); +function hash(str) { + if (process.versions.openssl) { + return require('crypto').createHash('sha256').update(str).digest('hex'); + } + return ''; +} + const fs = require('fs'); const resultPath = process.argv[2]; @@ -51,6 +59,8 @@ function getInitalizer(key, cache) { const defName = key.replace(/\//g, '_').replace(/-/g, '_'); const definition = `static uint8_t ${defName}_raw[] = {\n` + `${cache.join(',')}\n};`; + const source = getSource(key); + const sourceHash = hash(source); const initializer = ` v8::Local ${defName}_ab = v8::ArrayBuffer::New(isolate, ${defName}_raw, ${cache.length}); @@ -60,13 +70,19 @@ function getInitalizer(key, cache) { FIXED_ONE_BYTE_STRING(isolate, "${key}"), ${defName}_array).FromJust(); `; + const hashIntializer = ` + target->Set(context, + FIXED_ONE_BYTE_STRING(isolate, "${key}"), + OneByteString(isolate, "${sourceHash}")).FromJust(); + `; return { - definition, initializer + definition, initializer, hashIntializer, sourceHash }; } const cacheDefinitions = []; const cacheInitializers = []; +const cacheHashInitializers = []; let totalCacheSize = 0; @@ -79,11 +95,14 @@ for (const key of cachableBuiltins) { const length = cachedData.length; totalCacheSize += length; - const { definition, initializer } = getInitalizer(key, cachedData); + const { + definition, initializer, hashIntializer, sourceHash + } = getInitalizer(key, cachedData); cacheDefinitions.push(definition); cacheInitializers.push(initializer); + cacheHashInitializers.push(hashIntializer); console.log(`Generated cache for '${key}', size = ${formatSize(length)}` + - `, total = ${formatSize(totalCacheSize)}`); + `, hash = ${sourceHash}, total = ${formatSize(totalCacheSize)}`); } const result = `#include "node.h" @@ -106,6 +125,13 @@ void DefineCodeCache(Environment* env, v8::Local target) { ${cacheInitializers.join('\n')} } +// The target here will be returned as \`internalBinding('code_cache_hash')\` +void DefineCodeCacheHash(Environment* env, v8::Local target) { + v8::Isolate* isolate = env->isolate(); + v8::Local context = env->context(); + ${cacheHashInitializers.join('\n')} +} + } // namespace node `; diff --git a/tools/js2c.py b/tools/js2c.py index 249df58085dd96..40f2bc6f48f483 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -35,6 +35,7 @@ import re import sys import string +import hashlib def ToCArray(elements, step=10): @@ -205,6 +206,10 @@ def ReadMacros(lines): {initializers} }} +void DefineJavaScriptHash(Environment* env, v8::Local target) {{ + {hash_initializers} +}} + }} // namespace node """ @@ -240,6 +245,12 @@ def ReadMacros(lines): {value}.ToStringChecked(env->isolate())).FromJust()); """ +HASH_INITIALIZER = """\ +CHECK(target->Set(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "{key}"), + FIXED_ONE_BYTE_STRING(env->isolate(), "{value}")).FromJust()); +""" + DEPRECATED_DEPS = """\ 'use strict'; process.emitWarning( @@ -280,6 +291,7 @@ def JS2C(source, target): # Build source code lines definitions = [] initializers = [] + hash_initializers = []; for name in modules: lines = ReadFile(str(name)) @@ -309,10 +321,12 @@ def JS2C(source, target): var = name.replace('-', '_').replace('/', '_') key = '%s_key' % var value = '%s_value' % var + hash_value = hashlib.sha256(lines).hexdigest() definitions.append(Render(key, name)) definitions.append(Render(value, lines)) initializers.append(INITIALIZER.format(key=key, value=value)) + hash_initializers.append(HASH_INITIALIZER.format(key=name, value=hash_value)) if deprecated_deps is not None: name = '/'.join(deprecated_deps) @@ -324,11 +338,13 @@ def JS2C(source, target): definitions.append(Render(key, name)) definitions.append(Render(value, DEPRECATED_DEPS.format(module=name))) initializers.append(INITIALIZER.format(key=key, value=value)) + hash_initializers.append(HASH_INITIALIZER.format(key=name, value=hash_value)) # Emit result output = open(str(target[0]), "w") output.write(TEMPLATE.format(definitions=''.join(definitions), - initializers=''.join(initializers))) + initializers=''.join(initializers), + hash_initializers=''.join(hash_initializers))) output.close() def main():