From 7e1299bcbc9479ab9d61ee9c7c0ff50bf1674452 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 20 Jul 2023 10:33:03 +0800 Subject: [PATCH] [fix](memory) fix mem tracker grace exit 2 (#22003) fix: #21136 mem tracker group uses class static variables instead of global variables https://stackoverflow.com/questions/2204608/does-c-call-destructors-for-global-and-class-static-variables TODO: A mem tracker manager is required, don't use global variables, it will sad ==3623982==ERROR: AddressSanitizer: heap-use-after-free on address 0x60f0000056b8 at pc 0x56478bbe3ae0 bp 0x7f20953d2270 sp 0x7f20953d2268 READ of size 8 at 0x60f0000056b8 thread T41 (memory_tracker_) *** Query id: 0-0 *** *** Aborted at 1689749969 (unix time) try "date -d @1689749969" if you are using GNU date *** *** Current BE git commitID: b3e9cad48e *** *** SIGSEGV address not mapped to object (@0x0) received by PID 3623982 (TID 3624277 OR 0x7f19e06dd640) from PID 0; stack trace: *** #0 0x56478bbe3adf in std::__shared_ptr::operator bool() const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1295:16 #1 0x56478bbe306e in doris::MemTracker::refresh_profile_counter() /doris/be/src/runtime/memory/mem_tracker.h:149:13 #2 0x56478bbec669 in doris::MemTrackerLimiter::refresh_all_tracker_profile() /doris/be/src/runtime/memory/mem_tracker_limiter.cpp:119:22 #3 0x564788f53fa0 in doris::Daemon::memory_tracker_profile_refresh_thread() /doris/be/src/common/daemon.cpp:295:9 #4 0x564788f5d04b in doris::Daemon::start()::$_4::operator()() const /doris/be/src/common/daemon.cpp:473:30 #5 0x564788f5cff6 in void std::__invoke_impl(std::__invoke_other, doris::Daemon::start()::$_4&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14 #6 0x564788f5cf78 in std::enable_if, void>::type std::__invoke_r(doris::Daemon::start()::$_4&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2 #7 0x564788f5cdae in std::_Function_handler::_M_invoke(std::_Any_data const&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9 #8 0x56478903f576 in std::function::operator()() const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 #9 0x56478c4a35af in doris::Thread::supervise_thread(void*) /doris/be/src/util/thread.cpp:465:5 #10 0x7f217c8a244f in start_thread nptl/pthread_create.c:473:8 #11 0x7f217cb27d52 in __clone misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95 0x60f0000056b8 is located 56 bytes inside of 168-byte region [0x60f000005680,0x60f000005728) freed by thread T0 here: #0 0x564788e7280d in operator delete(void*) (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x1758280d) (BuildId: 219493cc924323ee) #1 0x56478acec1d5 in std::default_delete::operator()(doris::MemTrackerLimiter*) const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85:2 #2 0x56478ace9faf in std::unique_ptr >::~unique_ptr() /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:361:4 #3 0x56478ace1471 in doris::ShardedLRUCache::~ShardedLRUCache() /doris/be/src/olap/lru_cache.cpp:581:1 #4 0x56478ace14c8 in doris::ShardedLRUCache::~ShardedLRUCache() /doris/be/src/olap/lru_cache.cpp:572:37 #5 0x56478acd0984 in std::default_delete::operator()(doris::Cache*) const /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85:2 #6 0x56478acceddf in std::unique_ptr >::~unique_ptr() /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:361:4 #7 0x56478ad96dc6 in doris::StoragePageCache::~StoragePageCache() /doris/be/src/olap/page_cache.h:78:7 #8 0x7f217ca54146 in __run_exit_handlers stdlib/exit.c:108:8 previously allocated by thread T0 here: #0 0x564788e71fad in operator new(unsigned long) (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x17581fad) (BuildId: 219493cc924323ee) #1 0x56478ace9c90 in std::_MakeUniq::__single_object std::make_unique, std::allocator > const&>(doris::MemTrackerLimiter::Type&&, std::__cxx11::basic_string, std::allocator > const&) /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:962:30 #2 0x56478acde930 in doris::ShardedLRUCache::ShardedLRUCache(std::__cxx11::basic_string, std::allocator > const&, unsigned long, doris::LRUCacheType, unsigned int, unsigned int) /doris/be/src/olap/lru_cache.cpp:526:20 #3 0x56478ace22e1 in doris::new_lru_cache(std::__cxx11::basic_string, std::allocator > const&, unsigned long, doris::LRUCacheType, unsigned int) /doris/be/src/olap/lru_cache.cpp:670:16 #4 0x56478ad91da2 in doris::StoragePageCache::StoragePageCache(unsigned long, int, long, unsigned int) /doris/be/src/olap/page_cache.cpp:47:17 #5 0x56478ad9156e in doris::StoragePageCache::create_global_cache(unsigned long, int, long, unsigned int) /doris/be/src/olap/page_cache.cpp:31:29 #6 0x56478b98b3d3 in doris::ExecEnv::_init_mem_env() /doris/be/src/runtime/exec_env_init.cpp:251:5 #7 0x56478b98946c in doris::ExecEnv::_init(std::vector > const&) /doris/be/src/runtime/exec_env_init.cpp:182:5 #8 0x56478b987139 in doris::ExecEnv::init(doris::ExecEnv*, std::vector > const&) /doris/be/src/runtime/exec_env_init.cpp:98:17 #9 0x564788e79b50 in main /doris/be/src/service/doris_main.cpp:429:5 #10 0x7f217ca38564 in __libc_start_main csu/../csu/libc-start.c:332:16 Thread T41 (memory_tracker_) created by T0 here: #0 0x564788e1fcaa in pthread_create (/mnt/hdd01/dorisTestEnv/NEREIDS_ASAN/be/lib/doris_be+0x1752fcaa) (BuildId: 219493cc924323ee) #1 0x56478c4a2366 in doris::Thread::start_thread(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, std::function const&, unsigned long, scoped_refptr*) /doris/be/src/util/thread.cpp:419:15 #2 0x564788f59b91 in doris::Status doris::Thread::create(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, doris::Daemon::start()::$_4 const&, scoped_refptr*) /doris/be/src/util/thread.h:50:16 #3 0x564788f58165 in doris::Daemon::start() /doris/be/src/common/daemon.cpp:471:10 #4 0x564788e79a96 in main /doris/be/src/service/doris_main.cpp:420:12 #5 0x7f217ca38564 in __libc_start_main csu/../csu/libc-start.c:332:16 --- be/src/runtime/memory/mem_tracker.cpp | 10 ++-------- be/src/runtime/memory/mem_tracker.h | 7 +++++++ be/src/runtime/memory/mem_tracker_limiter.cpp | 6 +++--- be/src/runtime/memory/mem_tracker_limiter.h | 14 +++++++------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp index 7ec943090d0441..6a7994ee7cc65d 100644 --- a/be/src/runtime/memory/mem_tracker.cpp +++ b/be/src/runtime/memory/mem_tracker.cpp @@ -24,22 +24,16 @@ #include -#include "common/daemon.h" #include "runtime/memory/mem_tracker_limiter.h" #include "runtime/thread_context.h" namespace doris { -struct TrackerGroup { - std::list trackers; - std::mutex group_lock; -}; - // Save all MemTrackers in use to maintain the weak relationship between MemTracker and MemTrackerLimiter. // When MemTrackerLimiter prints statistics, all MemTracker statistics with weak relationship will be printed together. // Each group corresponds to several MemTrackerLimiters and has a lock. // Multiple groups are used to reduce the impact of locks. -static std::vector mem_tracker_pool(1000); +std::vector MemTracker::mem_tracker_pool(1000); MemTracker::MemTracker(const std::string& label, RuntimeProfile* profile, MemTrackerLimiter* parent, const std::string& profile_counter_name) @@ -82,7 +76,7 @@ void MemTracker::bind_parent(MemTrackerLimiter* parent) { } MemTracker::~MemTracker() { - if (_parent_group_num != -1 && !k_doris_exit) { + if (_parent_group_num != -1) { std::lock_guard l(mem_tracker_pool[_parent_group_num].group_lock); if (_tracker_group_it != mem_tracker_pool[_parent_group_num].trackers.end()) { mem_tracker_pool[_parent_group_num].trackers.erase(_tracker_group_it); diff --git a/be/src/runtime/memory/mem_tracker.h b/be/src/runtime/memory/mem_tracker.h index c21ef478e03ae8..b83502c6c64619 100644 --- a/be/src/runtime/memory/mem_tracker.h +++ b/be/src/runtime/memory/mem_tracker.h @@ -57,6 +57,11 @@ class MemTracker { int64_t peak_consumption = 0; }; + struct TrackerGroup { + std::list trackers; + std::mutex group_lock; + }; + // A counter that keeps track of the current and peak value seen. // Relaxed ordering, not accurate in real time. class MemCounter { @@ -182,6 +187,8 @@ class MemTracker { // Use _parent_label to correlate with parent limiter tracker. std::string _parent_label = "-"; + static std::vector mem_tracker_pool; + // Iterator into mem_tracker_pool for this object. Stored to have O(1) remove. std::list::iterator _tracker_group_it; }; diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index e44da881eb0394..517589f7671a10 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -26,7 +26,6 @@ #include #include -#include "common/daemon.h" #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" #include "runtime/load_channel_mgr.h" @@ -42,7 +41,8 @@ namespace doris { // Save all MemTrackerLimiters in use. // Each group corresponds to several MemTrackerLimiters and has a lock. // Multiple groups are used to reduce the impact of locks. -static std::vector mem_tracker_limiter_pool(MEM_TRACKER_GROUP_NUM); +std::vector MemTrackerLimiter::mem_tracker_limiter_pool( + MEM_TRACKER_GROUP_NUM); std::atomic MemTrackerLimiter::_enable_print_log_process_usage {true}; @@ -91,7 +91,7 @@ MemTrackerLimiter::~MemTrackerLimiter() { // in real time. Merge its consumption into orphan when parent is process, to avoid repetition. ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value()); _consumption->set(0); - if (!k_doris_exit) { + { std::lock_guard l(mem_tracker_limiter_pool[_group_num].group_lock); if (_tracker_limiter_group_it != mem_tracker_limiter_pool[_group_num].trackers.end()) { mem_tracker_limiter_pool[_group_num].trackers.erase(_tracker_limiter_group_it); diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index c90845ae88d8b1..218f9e166de3fa 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -50,13 +50,6 @@ class TaskGroup; using TaskGroupPtr = std::shared_ptr; } // namespace taskgroup -class MemTrackerLimiter; - -struct TrackerLimiterGroup { - std::list trackers; - std::mutex group_lock; -}; - // Track and limit the memory usage of process and query. // Contains an limit, arranged into a tree structure. // @@ -76,6 +69,11 @@ class MemTrackerLimiter final : public MemTracker { 6 // Experimental memory statistics, usually inaccurate, used for debugging, and expect to add other types in the future. }; + struct TrackerLimiterGroup { + std::list trackers; + std::mutex group_lock; + }; + inline static std::unordered_map> TypeMemSum = {{Type::GLOBAL, std::make_shared(TUnit::BYTES)}, @@ -272,6 +270,8 @@ class MemTrackerLimiter final : public MemTracker { bool _enable_print_log_usage = false; static std::atomic _enable_print_log_process_usage; + static std::vector mem_tracker_limiter_pool; + // Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove. std::list::iterator _tracker_limiter_group_it; };