From e19067d17339df2458f5f27194496616b0b2694f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 9 Aug 2019 16:43:20 +0200 Subject: [PATCH 1/5] test: Add unit test for max number of pushes --- test/unittests/evm_test.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unittests/evm_test.cpp b/test/unittests/evm_test.cpp index 5478d550fe..35810b6051 100644 --- a/test/unittests/evm_test.cpp +++ b/test/unittests/evm_test.cpp @@ -11,6 +11,8 @@ using namespace intx; +constexpr auto max_code_size = 0x6000; + TEST_F(evm, empty) { execute(0, ""); @@ -1194,6 +1196,19 @@ TEST_F(evm, extcodecopy_buffer_overflow) } } +TEST_F(evm, max_code_size_push1) +{ + // TODO: Optimize the code generation. + const auto code = (max_code_size / 2) * push(1); + ASSERT_EQ(code.size(), max_code_size); + + execute(code); + EXPECT_STATUS(EVMC_STACK_OVERFLOW); + + execute(code.substr(0, code.size() - 1)); + EXPECT_STATUS(EVMC_STACK_OVERFLOW); +} + struct memory_access_opcode { evmc_opcode opcode; From 83ec8b757f4a73a77a1a080d03366072dfbf66c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 9 Aug 2019 15:10:49 +0200 Subject: [PATCH 2/5] analysis: Process instruction metrics up front --- lib/evmone/analysis.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/evmone/analysis.cpp b/lib/evmone/analysis.cpp index 6e00ff6ab8..355602520e 100644 --- a/lib/evmone/analysis.cpp +++ b/lib/evmone/analysis.cpp @@ -79,6 +79,10 @@ code_analysis analyze( const auto instr_stack_req = metrics.num_stack_arguments; const auto instr_stack_change = metrics.num_stack_returned_items - instr_stack_req; + block->stack_req = std::max(block->stack_req, instr_stack_req - block->stack_change); + block->stack_change += instr_stack_change; + block->stack_max = std::max(block->stack_max, block->stack_change); + if (metrics.gas_cost > 0) // can be -1 for undefined instruction block->gas_cost += metrics.gas_cost; @@ -112,12 +116,7 @@ code_analysis analyze( instr.arg.p.number = static_cast(i); else if (c >= OP_LOG0 && c <= OP_LOG4) instr.arg.p.number = c - OP_LOG0; - - block->stack_req = std::max(block->stack_req, instr_stack_req - block->stack_change); - block->stack_change += instr_stack_change; - block->stack_max = std::max(block->stack_max, block->stack_change); - - if (is_terminator(c)) + else if (is_terminator(c)) block = nullptr; } From 84cbd298814333fe0a78436b98a183af758c95c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 9 Aug 2019 15:53:23 +0200 Subject: [PATCH 3/5] analysis: Use switch() --- lib/evmone/CMakeLists.txt | 1 + lib/evmone/analysis.cpp | 65 ++++++++++++++++++++----------- lib/evmone/opcodes_helpers.h | 74 ++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 23 deletions(-) create mode 100644 lib/evmone/opcodes_helpers.h diff --git a/lib/evmone/CMakeLists.txt b/lib/evmone/CMakeLists.txt index bc3953a9f5..314806a719 100644 --- a/lib/evmone/CMakeLists.txt +++ b/lib/evmone/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(evmone execution.cpp execution.hpp instructions.cpp + opcodes_helpers.h ) target_link_libraries(evmone PUBLIC evmc::evmc PRIVATE intx::intx evmc::instructions ethash::keccak) target_include_directories(evmone PUBLIC diff --git a/lib/evmone/analysis.cpp b/lib/evmone/analysis.cpp index 355602520e..c116b6d9c4 100644 --- a/lib/evmone/analysis.cpp +++ b/lib/evmone/analysis.cpp @@ -3,21 +3,12 @@ // Licensed under the Apache License, Version 2.0. #include "analysis.hpp" - +#include "opcodes_helpers.h" #include namespace evmone { -namespace -{ -bool is_terminator(uint8_t c) noexcept -{ - return c == OP_JUMP || c == OP_JUMPI || c == OP_STOP || c == OP_RETURN || c == OP_REVERT || - c == OP_SELFDESTRUCT; -} -} // namespace - -evmc_call_kind op2call_kind(uint8_t opcode) noexcept +inline constexpr evmc_call_kind op2call_kind(uint8_t opcode) noexcept { switch (opcode) { @@ -86,8 +77,9 @@ code_analysis analyze( if (metrics.gas_cost > 0) // can be -1 for undefined instruction block->gas_cost += metrics.gas_cost; - // Skip PUSH data. - if (c >= OP_PUSH1 && c <= OP_PUSH32) + switch (c) + { + case ANY_PUSH: { // OPT: bswap data here. ++i; @@ -99,25 +91,52 @@ code_analysis analyze( data[leading_zeros + j] = code[i + j]; instr.arg.data = &data[0]; i += push_size - 1; + break; } - else if (c >= OP_DUP1 && c <= OP_DUP16) + + case ANY_DUP: instr.arg.p.number = c - OP_DUP1; - else if (c >= OP_SWAP1 && c <= OP_SWAP16) + break; + + case ANY_SWAP: instr.arg.p.number = c - OP_SWAP1 + 1; - else if (c == OP_GAS) + break; + + case OP_GAS: instr.arg.p.number = static_cast(block->gas_cost); - else if (c == OP_DELEGATECALL || c == OP_CALL || c == OP_CALLCODE || c == OP_STATICCALL || - c == OP_CREATE || c == OP_CREATE2) - { + break; + + case OP_CALL: + case OP_CALLCODE: + case OP_DELEGATECALL: + case OP_STATICCALL: + case OP_CREATE: + case OP_CREATE2: instr.arg.p.number = static_cast(block->gas_cost); instr.arg.p.call_kind = op2call_kind(c == OP_STATICCALL ? uint8_t{OP_CALL} : c); - } - else if (c == OP_PC) + break; + + case OP_PC: instr.arg.p.number = static_cast(i); - else if (c >= OP_LOG0 && c <= OP_LOG4) + break; + + case OP_LOG0: + case OP_LOG1: + case OP_LOG2: + case OP_LOG3: + case OP_LOG4: instr.arg.p.number = c - OP_LOG0; - else if (is_terminator(c)) + break; + + case OP_JUMP: + case OP_JUMPI: + case OP_STOP: + case OP_RETURN: + case OP_REVERT: + case OP_SELFDESTRUCT: block = nullptr; + break; + } } // Not terminated block or empty code. diff --git a/lib/evmone/opcodes_helpers.h b/lib/evmone/opcodes_helpers.h new file mode 100644 index 0000000000..271e00fb74 --- /dev/null +++ b/lib/evmone/opcodes_helpers.h @@ -0,0 +1,74 @@ +// evmone: Fast Ethereum Virtual Machine implementation +// Copyright 2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. +#pragma once + +#define ANY_PUSH \ + OP_PUSH1: \ + case OP_PUSH2: \ + case OP_PUSH3: \ + case OP_PUSH4: \ + case OP_PUSH5: \ + case OP_PUSH6: \ + case OP_PUSH7: \ + case OP_PUSH8: \ + case OP_PUSH9: \ + case OP_PUSH10: \ + case OP_PUSH11: \ + case OP_PUSH12: \ + case OP_PUSH13: \ + case OP_PUSH14: \ + case OP_PUSH15: \ + case OP_PUSH16: \ + case OP_PUSH17: \ + case OP_PUSH18: \ + case OP_PUSH19: \ + case OP_PUSH20: \ + case OP_PUSH21: \ + case OP_PUSH22: \ + case OP_PUSH23: \ + case OP_PUSH24: \ + case OP_PUSH25: \ + case OP_PUSH26: \ + case OP_PUSH27: \ + case OP_PUSH28: \ + case OP_PUSH29: \ + case OP_PUSH30: \ + case OP_PUSH31: \ + case OP_PUSH32 + +#define ANY_DUP \ + OP_DUP1: \ + case OP_DUP2: \ + case OP_DUP3: \ + case OP_DUP4: \ + case OP_DUP5: \ + case OP_DUP6: \ + case OP_DUP7: \ + case OP_DUP8: \ + case OP_DUP9: \ + case OP_DUP10: \ + case OP_DUP11: \ + case OP_DUP12: \ + case OP_DUP13: \ + case OP_DUP14: \ + case OP_DUP15: \ + case OP_DUP16 + +#define ANY_SWAP \ + OP_SWAP1: \ + case OP_SWAP2: \ + case OP_SWAP3: \ + case OP_SWAP4: \ + case OP_SWAP5: \ + case OP_SWAP6: \ + case OP_SWAP7: \ + case OP_SWAP8: \ + case OP_SWAP9: \ + case OP_SWAP10: \ + case OP_SWAP11: \ + case OP_SWAP12: \ + case OP_SWAP13: \ + case OP_SWAP14: \ + case OP_SWAP15: \ + case OP_SWAP16 From 1b177c7eac7ff4aa318025261668d39c455bb177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 9 Aug 2019 16:43:01 +0200 Subject: [PATCH 4/5] analysis: Change args_storage from std::deque to std::vector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Paweł Bylica --- lib/evmone/analysis.cpp | 15 +++++++++++++-- lib/evmone/analysis.hpp | 3 +-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/evmone/analysis.cpp b/lib/evmone/analysis.cpp index c116b6d9c4..682ca1cae6 100644 --- a/lib/evmone/analysis.cpp +++ b/lib/evmone/analysis.cpp @@ -5,6 +5,7 @@ #include "analysis.hpp" #include "opcodes_helpers.h" #include +#include namespace evmone { @@ -31,8 +32,13 @@ code_analysis analyze( const exec_fn_table& fns, evmc_revision rev, const uint8_t* code, size_t code_size) noexcept { code_analysis analysis; - // TODO: Check if final result exceeds the reservation. - analysis.instrs.reserve(code_size + 1); + + const auto max_instrs_size = code_size + 1; + analysis.instrs.reserve(max_instrs_size); + + // This is 2x more than needed but using (code_size / 2 + 1) increases page-faults 1000x. + const auto max_args_storage_size = code_size + 1; + analysis.args_storage.reserve(max_args_storage_size); const auto* instr_table = evmc_get_instruction_metrics_table(rev); @@ -143,6 +149,11 @@ code_analysis analyze( if (block || code_size == 0 || code[code_size - 1] == OP_JUMPI) analysis.instrs.emplace_back(fns[OP_STOP]); + // FIXME: assert(analysis.instrs.size() <= max_instrs_size); + + // Make sure the args_storage has not been reallocated. Otherwise iterators are invalid. + assert(analysis.args_storage.size() <= max_args_storage_size); + return analysis; } diff --git a/lib/evmone/analysis.hpp b/lib/evmone/analysis.hpp index 5524fef54a..b571d6b28d 100644 --- a/lib/evmone/analysis.hpp +++ b/lib/evmone/analysis.hpp @@ -9,7 +9,6 @@ #include #include #include -#include #include namespace evmone @@ -177,7 +176,7 @@ struct code_analysis /// /// The deque container is used because pointers to its elements are not /// invalidated when the container grows. - std::deque args_storage; + std::vector args_storage; /// The offsets of JUMPDESTs in the original code. /// These are values that JUMP/JUMPI receives as an argument. From 6ac59ff5d113f993fb75a219ae9a7f3c5a122db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 9 Aug 2019 23:23:17 +0200 Subject: [PATCH 5/5] analysis: Use pointer to traverse code --- lib/evmone/analysis.cpp | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/evmone/analysis.cpp b/lib/evmone/analysis.cpp index 682ca1cae6..a9c6e9adcb 100644 --- a/lib/evmone/analysis.cpp +++ b/lib/evmone/analysis.cpp @@ -43,13 +43,15 @@ code_analysis analyze( const auto* instr_table = evmc_get_instruction_metrics_table(rev); block_info* block = nullptr; + int instr_index = 0; - for (size_t i = 0; i < code_size; ++i, ++instr_index) + + for (auto code_pos = code; code_pos < code + code_size; ++code_pos, ++instr_index) { // TODO: Loop in reverse order for easier GAS analysis. - const auto c = code[i]; + const auto opcode = *code_pos; - const bool jumpdest = c == OP_JUMPDEST; + const bool jumpdest = opcode == OP_JUMPDEST; if (!block || jumpdest) { @@ -63,16 +65,16 @@ code_analysis analyze( if (jumpdest) // Add the jumpdest to the map. { - analysis.jumpdest_offsets.emplace_back(static_cast(i)); + analysis.jumpdest_offsets.emplace_back(static_cast(code_pos - code)); analysis.jumpdest_targets.emplace_back(static_cast(instr_index)); } else // Increase instruction count because additional BEGINBLOCK was injected. ++instr_index; } - auto& instr = jumpdest ? analysis.instrs.back() : analysis.instrs.emplace_back(fns[c]); + auto& instr = jumpdest ? analysis.instrs.back() : analysis.instrs.emplace_back(fns[opcode]); - const auto metrics = instr_table[c]; + const auto metrics = instr_table[opcode]; const auto instr_stack_req = metrics.num_stack_arguments; const auto instr_stack_change = metrics.num_stack_returned_items - instr_stack_req; @@ -83,29 +85,29 @@ code_analysis analyze( if (metrics.gas_cost > 0) // can be -1 for undefined instruction block->gas_cost += metrics.gas_cost; - switch (c) + switch (opcode) { case ANY_PUSH: { // OPT: bswap data here. - ++i; - const auto push_size = size_t(c - OP_PUSH1 + 1); + const auto push_size = size_t(opcode - OP_PUSH1 + 1); auto& data = analysis.args_storage.emplace_back(); const auto leading_zeros = 32 - push_size; + const auto i = code_pos - code + 1; for (size_t j = 0; j < push_size && (i + j) < code_size; ++j) data[leading_zeros + j] = code[i + j]; instr.arg.data = &data[0]; - i += push_size - 1; + code_pos += push_size; break; } case ANY_DUP: - instr.arg.p.number = c - OP_DUP1; + instr.arg.p.number = opcode - OP_DUP1; break; case ANY_SWAP: - instr.arg.p.number = c - OP_SWAP1 + 1; + instr.arg.p.number = opcode - OP_SWAP1 + 1; break; case OP_GAS: @@ -119,11 +121,12 @@ code_analysis analyze( case OP_CREATE: case OP_CREATE2: instr.arg.p.number = static_cast(block->gas_cost); - instr.arg.p.call_kind = op2call_kind(c == OP_STATICCALL ? uint8_t{OP_CALL} : c); + instr.arg.p.call_kind = + op2call_kind(opcode == OP_STATICCALL ? uint8_t{OP_CALL} : opcode); break; case OP_PC: - instr.arg.p.number = static_cast(i); + instr.arg.p.number = static_cast(code_pos - code); break; case OP_LOG0: @@ -131,7 +134,7 @@ code_analysis analyze( case OP_LOG2: case OP_LOG3: case OP_LOG4: - instr.arg.p.number = c - OP_LOG0; + instr.arg.p.number = opcode - OP_LOG0; break; case OP_JUMP: