From ad44032faa25850419285fc7d335955e806d56bc Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Fri, 13 Oct 2023 16:01:29 -0400 Subject: [PATCH 1/5] hardening resource manager plugin shutdown --- .../file_space_handler.hpp | 40 ++++++++++++++++++- .../resource_monitor_plugin.cpp | 1 + 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 1ddcd82bfa..46735a6ebc 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -6,6 +6,8 @@ #include #include +#include + namespace bfs = boost::filesystem; namespace eosio::resource_monitor { @@ -131,26 +133,60 @@ namespace eosio::resource_monitor { } void space_monitor_loop() { + space_monitor_task_running = true; + + if (stopping) { + space_monitor_task_running = false; + return; + } if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); appbase::app().quit(); // This will gracefully stop Nodeos + space_monitor_task_running = false; return; } update_warning_interval_counter(); timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); - + if (stopping) { + space_monitor_task_running = false; + return; + } timer.async_wait([this](auto& ec) { if ( ec ) { wlog("Exit due to error: ${ec}, message: ${message}", ("ec", ec.value()) ("message", ec.message())); + space_monitor_task_running = false; return; } else { // Loop over space_monitor_loop(); } }); + space_monitor_task_running = false; + } + + // called on main thread from plugin shutdown() + void stop() { + // signalling space_monitor_loop that plugin is being stopped + stopping = true; + + // cancel outstanding scheduled space_monitor_loop task + timer.cancel(); + + // make sure space_monitor_loop has exited. + // in vast majority time, it is not running; just waiting to be executed + using namespace std::chrono_literals; // for ms + auto num_sleeps = 0u; + constexpr auto max_num_sleeps = 10u; + while (space_monitor_task_running && num_sleeps < max_num_sleeps) { + std::this_thread::sleep_for( 1ms ); + ++num_sleeps; + } + if (space_monitor_task_running) { + wlog("space_monitor_loop not stopped after ${n} ms", ("n", num_sleeps)); + } } private: @@ -164,6 +200,8 @@ namespace eosio::resource_monitor { uint64_t shutdown_absolute {0}; uint64_t warning_absolute {0}; bool shutdown_on_exceeded {true}; + std::atomic stopping {false}; // set on main thread, checked on resmon thread + std::atomic space_monitor_task_running {false}; // set on resmon thread, checked on main thread. after space_monitor_loop is not running, it is safe to stop struct filesystem_info { dev_t st_dev; // device id of file system containing "file_path" diff --git a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp index 17f6b2bd3f..c01eede115 100644 --- a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp +++ b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp @@ -142,6 +142,7 @@ class resource_monitor_plugin_impl { void plugin_shutdown() { ilog("shutdown..."); + space_handler.stop(); ctx.stop(); // Wait for the thread to end From 7052a94e11e9cc55ffe79166e642ed4c1425e427 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 15:14:45 -0400 Subject: [PATCH 2/5] use named_thread_pool to streamline thread management in resource monitor plugin --- .../file_space_handler.hpp | 110 +++++++++--------- .../resource_monitor_plugin.cpp | 39 +------ .../test/test_add_file_system.cpp | 4 +- .../test/test_threshold.cpp | 6 +- 4 files changed, 63 insertions(+), 96 deletions(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 46735a6ebc..5ecee7bd3c 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -5,8 +5,7 @@ #include #include - -#include +#include namespace bfs = boost::filesystem; @@ -14,12 +13,43 @@ namespace eosio::resource_monitor { template class file_space_handler { public: - file_space_handler(SpaceProvider&& space_provider, boost::asio::io_context& ctx) - :space_provider(std::move(space_provider)), - timer{ctx} + file_space_handler(SpaceProvider&& space_provider) + :space_provider(std::move(space_provider)) { } + void start(const std::vector& directories) { + for ( auto& dir: directories ) { + add_file_system( dir ); + + // A directory like "data" contains subdirectories like + // "block". Those subdirectories can mount on different + // file systems. Make sure they are taken care of. + for (bfs::directory_iterator itr(dir); itr != bfs::directory_iterator(); ++itr) { + if (bfs::is_directory(itr->path())) { + add_file_system( itr->path() ); + } + } + } + + thread_pool.start(thread_pool_size, + []( const fc::exception& e ) { + elog("Exception in read-only thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); + appbase::app().quit(); }, + [&]() { space_monitor_loop(); } + ); + } + + // called on main thread from plugin shutdown() + void stop() { + { + std::lock_guard g( timer_mtx ); + boost::system::error_code ec; + timer.cancel(ec); + } + thread_pool.stop(); + } + void set_sleep_time(uint32_t sleep_time) { sleep_time_in_secs = sleep_time; } @@ -133,66 +163,40 @@ namespace eosio::resource_monitor { } void space_monitor_loop() { - space_monitor_task_running = true; - - if (stopping) { - space_monitor_task_running = false; - return; - } if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); appbase::app().quit(); // This will gracefully stop Nodeos - space_monitor_task_running = false; return; } update_warning_interval_counter(); - timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); - if (stopping) { - space_monitor_task_running = false; - return; - } - timer.async_wait([this](auto& ec) { - if ( ec ) { - wlog("Exit due to error: ${ec}, message: ${message}", - ("ec", ec.value()) - ("message", ec.message())); - space_monitor_task_running = false; - return; - } else { - // Loop over - space_monitor_loop(); - } - }); - space_monitor_task_running = false; - } - - // called on main thread from plugin shutdown() - void stop() { - // signalling space_monitor_loop that plugin is being stopped - stopping = true; - - // cancel outstanding scheduled space_monitor_loop task - timer.cancel(); - - // make sure space_monitor_loop has exited. - // in vast majority time, it is not running; just waiting to be executed - using namespace std::chrono_literals; // for ms - auto num_sleeps = 0u; - constexpr auto max_num_sleeps = 10u; - while (space_monitor_task_running && num_sleeps < max_num_sleeps) { - std::this_thread::sleep_for( 1ms ); - ++num_sleeps; - } - if (space_monitor_task_running) { - wlog("space_monitor_loop not stopped after ${n} ms", ("n", num_sleeps)); + { + std::lock_guard g( timer_mtx ); + timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); + timer.async_wait([this](auto& ec) { + if ( ec ) { + if ( ec != boost::asio::error::operation_aborted ) { // not cancelled + wlog("Exit due to error: ${ec}, message: ${message}", + ("ec", ec.value()) + ("message", ec.message())); + } + return; + } else { + // Loop over + space_monitor_loop(); + } + }); } } private: SpaceProvider space_provider; - boost::asio::deadline_timer timer; + static constexpr size_t thread_pool_size = 1; + eosio::chain::named_thread_pool thread_pool; + + std::mutex timer_mtx; + boost::asio::deadline_timer timer {thread_pool.get_executor()}; uint32_t sleep_time_in_secs {2}; uint32_t shutdown_threshold {90}; @@ -200,8 +204,6 @@ namespace eosio::resource_monitor { uint64_t shutdown_absolute {0}; uint64_t warning_absolute {0}; bool shutdown_on_exceeded {true}; - std::atomic stopping {false}; // set on main thread, checked on resmon thread - std::atomic space_monitor_task_running {false}; // set on resmon thread, checked on main thread. after space_monitor_loop is not running, it is safe to stop struct filesystem_info { dev_t st_dev; // device id of file system containing "file_path" diff --git a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp index c01eede115..bb38152b09 100644 --- a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp +++ b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp @@ -40,7 +40,7 @@ namespace eosio { class resource_monitor_plugin_impl { public: resource_monitor_plugin_impl() - :space_handler(system_file_space_provider(), ctx) + :space_handler(system_file_space_provider()) { } @@ -112,43 +112,14 @@ class resource_monitor_plugin_impl { // Start main thread void plugin_startup() { - ilog("Creating and starting monitor thread"); - - // By now all plugins are initialized. - // Find out filesystems containing the directories requested - // so far. - for ( auto& dir: directories_registered ) { - space_handler.add_file_system( dir ); - - // A directory like "data" contains subdirectories like - // "block". Those subdirectories can mount on different - // file systems. Make sure they are taken care of. - for (bfs::directory_iterator itr(dir); itr != bfs::directory_iterator(); ++itr) { - if (fc::is_directory(itr->path())) { - space_handler.add_file_system( itr->path() ); - } - } - } - - monitor_thread = std::thread( [this] { - fc::set_os_thread_name( "resmon" ); // console_appender uses 9 chars for thread name reporting. - space_handler.space_monitor_loop(); - - ctx.run(); - } ); + space_handler.start(directories_registered); } // System is shutting down. void plugin_shutdown() { - ilog("shutdown..."); - + ilog("entered shutdown..."); space_handler.stop(); - ctx.stop(); - - // Wait for the thread to end - monitor_thread.join(); - - ilog("exit shutdown"); + ilog("exiting shutdown"); } void monitor_directory(const bfs::path& path) { @@ -173,8 +144,6 @@ class resource_monitor_plugin_impl { static constexpr uint32_t warning_interval_min = 1; static constexpr uint32_t warning_interval_max = 450; // e.g. if the monitor interval is 2 sec, the warning interval is at most 15 minutes - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; file_space_handler_t space_handler; }; diff --git a/plugins/resource_monitor_plugin/test/test_add_file_system.cpp b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp index 08ac6100c1..74a078adf9 100644 --- a/plugins/resource_monitor_plugin/test/test_add_file_system.cpp +++ b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp @@ -23,11 +23,9 @@ struct add_file_system_fixture { add_file_system_fixture& fixture; }; - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; add_file_system_fixture() - : space_handler(mock_space_provider(*this), ctx) + : space_handler(mock_space_provider(*this)) { } diff --git a/plugins/resource_monitor_plugin/test/test_threshold.cpp b/plugins/resource_monitor_plugin/test/test_threshold.cpp index c817d5cb13..4fd2e77b4c 100644 --- a/plugins/resource_monitor_plugin/test/test_threshold.cpp +++ b/plugins/resource_monitor_plugin/test/test_threshold.cpp @@ -23,11 +23,9 @@ struct threshold_fixture { threshold_fixture& fixture; }; - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; threshold_fixture() - : space_handler(std::make_unique(mock_space_provider(*this), ctx)) + : space_handler(std::make_unique(mock_space_provider(*this))) { } @@ -49,7 +47,7 @@ struct threshold_fixture { bool test_threshold_common(std::map& available, std::map& dev, uint32_t warning_threshold=75) { bool first = test_threshold_common_(available, dev, warning_threshold); - space_handler = std::make_unique(mock_space_provider(*this), ctx); + space_handler = std::make_unique(mock_space_provider(*this)); test_absolute = true; bool second = test_threshold_common_(available, dev, warning_threshold); From 599b01d4ec03df20131a801049ae0e925ca6388a Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 15:23:16 -0400 Subject: [PATCH 3/5] Remove unnecessary unit tests Those tests were intended to verify the duration of space_monitor_loop. In essence they tested Boost's expires_from_noa, which was not necessary. The tests themselves were hacky and took uncessary 50 seconds. --- .../test/CMakeLists.txt | 2 +- .../test/test_monitor_loop.cpp | 151 ------------------ 2 files changed, 1 insertion(+), 152 deletions(-) delete mode 100644 plugins/resource_monitor_plugin/test/test_monitor_loop.cpp diff --git a/plugins/resource_monitor_plugin/test/CMakeLists.txt b/plugins/resource_monitor_plugin/test/CMakeLists.txt index a564c515dc..e9f0bf03ba 100644 --- a/plugins/resource_monitor_plugin/test/CMakeLists.txt +++ b/plugins/resource_monitor_plugin/test/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable( test_resmon_plugin test_resmon_plugin.cpp test_add_file_system.cpp test_monitor_loop.cpp test_threshold.cpp ) +add_executable( test_resmon_plugin test_resmon_plugin.cpp test_add_file_system.cpp test_threshold.cpp ) target_link_libraries( test_resmon_plugin resource_monitor_plugin ) target_include_directories( test_resmon_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) add_test(NAME test_resmon_plugin COMMAND plugins/resource_monitor_plugin/test/test_resmon_plugin WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp b/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp deleted file mode 100644 index f572cd53b7..0000000000 --- a/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp +++ /dev/null @@ -1,151 +0,0 @@ -#include - -#include - -using namespace eosio; -using namespace eosio::resource_monitor; -using namespace boost::system; - -struct space_handler_fixture { - struct mock_space_provider { - explicit mock_space_provider(space_handler_fixture& fixture) - :fixture(fixture) - {} - - int get_stat(const char *path, struct stat *buf) const { - return fixture.mock_get_stat(path, buf); - } - - bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const { - return fixture.mock_get_space(p, ec); - } - - space_handler_fixture& fixture; - }; - - boost::asio::io_context ctx; - - using file_space_handler_t = file_space_handler; - space_handler_fixture() - : space_handler(mock_space_provider( *this ), ctx) - { - } - - void add_file_system(const bfs::path& path_name) { - space_handler.add_file_system( path_name ); - } - - void set_threshold(uint32_t threshold, uint32_t warning_threshold) { - space_handler.set_threshold( threshold, warning_threshold ); - } - - void set_sleep_time(uint32_t sleep_time) { - space_handler.set_sleep_time( sleep_time ); - } - - void set_shutdown_on_exceeded(bool shutdown_on_exceeded) { - space_handler.set_shutdown_on_exceeded(shutdown_on_exceeded); - } - - bool is_threshold_exceeded() { - return space_handler.is_threshold_exceeded(); - } - - void space_monitor_loop() { - return space_handler.space_monitor_loop(); - } - - bool test_loop_common(int num_loops, int interval) - { - mock_get_space = [ i = 0, num_loops ]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { - ec = boost::system::errc::make_error_code(errc::success); - - bfs::space_info rc{}; - rc.capacity = 1000000; - - if ( i < num_loops + 1 ) { // "+ 1" for the get_space in add_file_system - rc.available = 300000; - } else { - rc.available = 100000; - } - - i++; - - return rc; - }; - - mock_get_stat = []( const char *path, struct stat *buf ) -> int { - buf->st_dev = 0; - return 0; - }; - - set_threshold(80, 75); - set_shutdown_on_exceeded(true); - set_sleep_time(interval); - add_file_system("/test"); - - auto start = std::chrono::system_clock::now(); - - auto monitor_thread = std::thread( [this] { - space_monitor_loop(); - ctx.run(); - }); - - monitor_thread.join(); - - auto end = std::chrono::system_clock::now(); - std::chrono::duration test_duration = end - start; - - // For tests to be repeatable on any platforms under any loads, - // particularly for longer runs, - // we just make sure the test duration is longer than a margin - // of theroretical duration. - bool finished_in_time = (test_duration >= std::chrono::duration((num_loops - 1) * interval)); - - return finished_in_time; - } - - // fixture data and methods - std::function mock_get_space; - std::function mock_get_stat; - - file_space_handler_t space_handler; -}; - -BOOST_AUTO_TEST_SUITE(monitor_loop_tests) - BOOST_FIXTURE_TEST_CASE(zero_loop, space_handler_fixture) - { - BOOST_TEST( test_loop_common(0, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(one_loop_1_secs_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(1, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(two_loops_1_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(2, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(ten_loops_1_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(10, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(one_loop_5_secs_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(1, 5) ); - } - - BOOST_FIXTURE_TEST_CASE(two_loops_5_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(2, 5) ); - } - - BOOST_FIXTURE_TEST_CASE(five_loops_5_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(5, 5) ); - } - -BOOST_AUTO_TEST_SUITE_END() From dc87f58939deb89cad6bc3d63e5c2aa8c0403900 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 17:22:39 -0400 Subject: [PATCH 4/5] update resource monitor plugin integration tests * removes unnecessary check of info logging "Creating and st arting monitor thread" * reduces time to wait for nodeos startup from 120 seconds to 10 seconds --- tests/resource_monitor_plugin_test.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/resource_monitor_plugin_test.py b/tests/resource_monitor_plugin_test.py index e50db5fac3..f6fa9c502d 100755 --- a/tests/resource_monitor_plugin_test.py +++ b/tests/resource_monitor_plugin_test.py @@ -23,6 +23,7 @@ stderrFile=dataDir + "/stderr.txt" testNum=0 +max_start_time_secs=10 # time nodeos takes to start # We need debug level to get more information about nodeos process logging="""{ @@ -105,7 +106,7 @@ def testCommon(title, extraNodeosArgs, expectedMsgs): prepareDirectories() - timeout=120 # Leave sufficient time such nodeos can start up fully in any platforms + timeout=max_start_time_secs # Leave sufficient time such nodeos can start up fully in any platforms runNodeos(extraNodeosArgs, timeout) for msg in expectedMsgs: @@ -156,7 +157,7 @@ def testInterval(title, extraNodeosArgs, interval, expectedMsgs, warningThreshol prepareDirectories() fillFS(dataDir, warningThreshold) - timeout = 120 + interval * 2 # Leave sufficient time so nodeos can start up fully in any platforms, and at least two warnings can be output + timeout = max_start_time_secs + interval * 2 # Leave sufficient time so nodeos can start up fully in any platforms, and at least two warnings can be output if timeout > testIntervalMaxTimeout: errorExit ("Max timeout for testInterval is %d sec" % (testIntervalMaxTimeout)) runNodeos(extraNodeosArgs, timeout) @@ -169,15 +170,15 @@ def testInterval(title, extraNodeosArgs, interval, expectedMsgs, warningThreshol errorExit ("Log containing \"%s\" should be output every %d seconds" % (msg, interval)) def testAll(): - testCommon("Resmon enabled: all arguments", "--plugin eosio::resource_monitor_plugin --resource-monitor-space-threshold=85 --resource-monitor-interval-seconds=5 --resource-monitor-not-shutdown-on-threshold-exceeded", ["threshold set to 85", "interval set to 5", "Shutdown flag when threshold exceeded set to false", "Creating and starting monitor thread"]) + testCommon("Resmon enabled: all arguments", "--plugin eosio::resource_monitor_plugin --resource-monitor-space-threshold=85 --resource-monitor-interval-seconds=5 --resource-monitor-not-shutdown-on-threshold-exceeded", ["threshold set to 85", "interval set to 5", "Shutdown flag when threshold exceeded set to false"]) # default arguments and default directories to be monitored - testCommon("Resmon not enabled: no arguments", "", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "Creating and starting monitor thread", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored"]) + testCommon("Resmon not enabled: no arguments", "", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored"]) # default arguments with registered directories - testCommon("Resmon not enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread"]) + testCommon("Resmon not enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored"]) - testCommon("Resmon enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::resource_monitor_plugin --plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis --resource-monitor-space-threshold=80 --resource-monitor-interval-seconds=3", ["snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread", "threshold set to 80", "interval set to 3", "Shutdown flag when threshold exceeded set to true"]) + testCommon("Resmon enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::resource_monitor_plugin --plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis --resource-monitor-space-threshold=80 --resource-monitor-interval-seconds=3", ["snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "threshold set to 80", "interval set to 3", "Shutdown flag when threshold exceeded set to true"]) # Only test minimum warning threshold (i.e. 6) to trigger warning as much as possible testInterval("Resmon enabled: set warning interval", From d4c417c11bdc92951c47edf5c6ade697153ea374 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Mon, 16 Oct 2023 13:39:36 -0400 Subject: [PATCH 5/5] simplify use of thread pool * no need to call cancel timer explicitly. * no need to use mutex for timer as when timer is used on the main thread, resource monitor thread has already stopped. --- .../file_space_handler.hpp | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 5ecee7bd3c..5fc4d96e84 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -34,7 +34,7 @@ namespace eosio::resource_monitor { thread_pool.start(thread_pool_size, []( const fc::exception& e ) { - elog("Exception in read-only thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); + elog("Exception in resource monitor plugin thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); appbase::app().quit(); }, [&]() { space_monitor_loop(); } ); @@ -42,11 +42,9 @@ namespace eosio::resource_monitor { // called on main thread from plugin shutdown() void stop() { - { - std::lock_guard g( timer_mtx ); - boost::system::error_code ec; - timer.cancel(ec); - } + // After thread pool stops, timer is not accessible within it. + // In addition, timer's destructor will call cancel. + // Therefore, no need to call cancel explicitly. thread_pool.stop(); } @@ -162,6 +160,7 @@ namespace eosio::resource_monitor { ("path_name", path_name.string())("shutdown_available", to_gib(shutdown_available)) ("capacity", to_gib(info.capacity))("threshold_desc", threshold_desc()) ); } + // on resmon thread void space_monitor_loop() { if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); @@ -171,15 +170,16 @@ namespace eosio::resource_monitor { update_warning_interval_counter(); { - std::lock_guard g( timer_mtx ); timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); timer.async_wait([this](auto& ec) { if ( ec ) { - if ( ec != boost::asio::error::operation_aborted ) { // not cancelled - wlog("Exit due to error: ${ec}, message: ${message}", - ("ec", ec.value()) - ("message", ec.message())); - } + // No need to check if ec is operation_aborted (cancelled), + // as cancel callback will never be make it here after thread_pool + // is stopped, even though cancel is called in the timer's + // destructor. + wlog("Exit due to error: ${ec}, message: ${message}", + ("ec", ec.value()) + ("message", ec.message())); return; } else { // Loop over @@ -195,7 +195,6 @@ namespace eosio::resource_monitor { static constexpr size_t thread_pool_size = 1; eosio::chain::named_thread_pool thread_pool; - std::mutex timer_mtx; boost::asio::deadline_timer timer {thread_pool.get_executor()}; uint32_t sleep_time_in_secs {2};