From c6b9186e69299be34f520caf6f708fdee070acb6 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kwvg@users.noreply.github.com> Date: Sat, 11 Jun 2022 09:23:51 +0200 Subject: [PATCH] merge bitcoin#25325: Add pool based memory resource --- src/Makefile.am | 1 + src/Makefile.bench.include | 1 + src/Makefile.test.include | 2 + src/Makefile.test_util.include | 1 + src/bench/pool.cpp | 50 ++++ src/coins.cpp | 15 +- src/coins.h | 20 +- src/memusage.h | 20 ++ src/support/allocators/pool.h | 349 ++++++++++++++++++++++++++++ src/test/coins_tests.cpp | 39 +++- src/test/fuzz/coins_view.cpp | 3 +- src/test/fuzz/poolresource.cpp | 174 ++++++++++++++ src/test/pool_tests.cpp | 189 +++++++++++++++ src/test/util/poolresourcetester.h | 129 ++++++++++ src/test/validation_flush_tests.cpp | 33 +-- src/validation.cpp | 1 - 16 files changed, 1003 insertions(+), 24 deletions(-) create mode 100644 src/bench/pool.cpp create mode 100644 src/support/allocators/pool.h create mode 100644 src/test/fuzz/poolresource.cpp create mode 100644 src/test/pool_tests.cpp create mode 100644 src/test/util/poolresourcetester.h diff --git a/src/Makefile.am b/src/Makefile.am index f90e432b7bca8..0c2e16a6db2bd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -314,6 +314,7 @@ BITCOIN_CORE_H = \ streams.h \ statsd_client.h \ support/allocators/mt_pooled_secure.h \ + support/allocators/pool.h \ support/allocators/pooled_secure.h \ support/allocators/secure.h \ support/allocators/zeroafterfree.h \ diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index 5da7d544a9c87..133bc282ef741 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -41,6 +41,7 @@ bench_bench_dash_SOURCES = \ bench/nanobench.h \ bench/nanobench.cpp \ bench/peer_eviction.cpp \ + bench/pool.cpp \ bench/rpc_blockchain.cpp \ bench/rpc_mempool.cpp \ bench/util_time.cpp \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include index d7ccc53c5d2b5..471c38be44ac8 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -136,6 +136,7 @@ BITCOIN_TESTS =\ test/netbase_tests.cpp \ test/pmt_tests.cpp \ test/policyestimator_tests.cpp \ + test/pool_tests.cpp \ test/pow_tests.cpp \ test/prevector_tests.cpp \ test/raii_event_tests.cpp \ @@ -298,6 +299,7 @@ test_fuzz_fuzz_SOURCES = \ test/fuzz/parse_univalue.cpp \ test/fuzz/policy_estimator.cpp \ test/fuzz/policy_estimator_io.cpp \ + test/fuzz/poolresource.cpp \ test/fuzz/pow.cpp \ test/fuzz/prevector.cpp \ test/fuzz/primitives_transaction.cpp \ diff --git a/src/Makefile.test_util.include b/src/Makefile.test_util.include index ee349bbc6f248..5e17e398b1be9 100644 --- a/src/Makefile.test_util.include +++ b/src/Makefile.test_util.include @@ -14,6 +14,7 @@ TEST_UTIL_H = \ test/util/logging.h \ test/util/mining.h \ test/util/net.h \ + test/util/poolresourcetester.h \ test/util/script.h \ test/util/setup_common.h \ test/util/str.h \ diff --git a/src/bench/pool.cpp b/src/bench/pool.cpp new file mode 100644 index 0000000000000..0bf2b18514808 --- /dev/null +++ b/src/bench/pool.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include + +#include + +template +void BenchFillClearMap(benchmark::Bench& bench, Map& map) +{ + size_t batch_size = 5000; + + // make sure each iteration of the benchmark contains exactly 5000 inserts and one clear. + // do this at least 10 times so we get reasonable accurate results + + bench.batch(batch_size).minEpochIterations(10).run([&] { + auto rng = ankerl::nanobench::Rng(1234); + for (size_t i = 0; i < batch_size; ++i) { + map[rng()]; + } + map.clear(); + }); +} + +static void PoolAllocator_StdUnorderedMap(benchmark::Bench& bench) +{ + auto map = std::unordered_map(); + BenchFillClearMap(bench, map); +} + +static void PoolAllocator_StdUnorderedMapWithPoolResource(benchmark::Bench& bench) +{ + using Map = std::unordered_map, + std::equal_to, + PoolAllocator, + sizeof(std::pair) + 4 * sizeof(void*), + alignof(void*)>>; + + // make sure the resource supports large enough pools to hold the node. We do this by adding the size of a few pointers to it. + auto pool_resource = Map::allocator_type::ResourceType(); + auto map = Map{0, std::hash{}, std::equal_to{}, &pool_resource}; + BenchFillClearMap(bench, map); +} + +BENCHMARK(PoolAllocator_StdUnorderedMap); +BENCHMARK(PoolAllocator_StdUnorderedMapWithPoolResource); diff --git a/src/coins.cpp b/src/coins.cpp index 431d7223d8898..4a1cccd0abe64 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -33,7 +33,7 @@ size_t CCoinsViewBacked::EstimateSize() const { return base->EstimateSize(); } CCoinsViewCache::CCoinsViewCache(CCoinsView* baseIn, bool deterministic) : CCoinsViewBacked(baseIn), m_deterministic(deterministic), - cacheCoins(0, SaltedOutpointHasher(/*deterministic=*/deterministic)) + cacheCoins(0, SaltedOutpointHasher(/*deterministic=*/deterministic), CCoinsMap::key_equal{}, &m_cache_coins_memory_resource) {} size_t CCoinsViewCache::DynamicMemoryUsage() const { @@ -240,9 +240,12 @@ bool CCoinsViewCache::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlockIn bool CCoinsViewCache::Flush() { bool fOk = base->BatchWrite(cacheCoins, hashBlock, /*erase=*/true); - if (fOk && !cacheCoins.empty()) { - /* BatchWrite must erase all cacheCoins elements when erase=true. */ - throw std::logic_error("Not all cached coins were erased"); + if (fOk) { + if (!cacheCoins.empty()) { + /* BatchWrite must erase all cacheCoins elements when erase=true. */ + throw std::logic_error("Not all cached coins were erased"); + } + ReallocateCache(); } cachedCoinsUsage = 0; return fOk; @@ -295,7 +298,9 @@ void CCoinsViewCache::ReallocateCache() // Cache should be empty when we're calling this. assert(cacheCoins.size() == 0); cacheCoins.~CCoinsMap(); - ::new (&cacheCoins) CCoinsMap(0, SaltedOutpointHasher(/*deterministic=*/m_deterministic)); + m_cache_coins_memory_resource.~CCoinsMapMemoryResource(); + ::new (&m_cache_coins_memory_resource) CCoinsMapMemoryResource{}; + ::new (&cacheCoins) CCoinsMap{0, SaltedOutpointHasher{/*deterministic=*/m_deterministic}, CCoinsMap::key_equal{}, &m_cache_coins_memory_resource}; } void CCoinsViewCache::SanityCheck() const diff --git a/src/coins.h b/src/coins.h index 3dbe23aea5820..fde5f688cc12a 100644 --- a/src/coins.h +++ b/src/coins.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,23 @@ struct CCoinsCacheEntry CCoinsCacheEntry(Coin&& coin_, unsigned char flag) : coin(std::move(coin_)), flags(flag) {} }; -typedef std::unordered_map CCoinsMap; +/** + * PoolAllocator's MAX_BLOCK_SIZE_BYTES parameter here uses sizeof the data, and adds the size + * of 4 pointers. We do not know the exact node size used in the std::unordered_node implementation + * because it is implementation defined. Most implementations have an overhead of 1 or 2 pointers, + * so nodes can be connected in a linked list, and in some cases the hash value is stored as well. + * Using an additional sizeof(void*)*4 for MAX_BLOCK_SIZE_BYTES should thus be sufficient so that + * all implementations can allocate the nodes from the PoolAllocator. + */ +using CCoinsMap = std::unordered_map, + PoolAllocator, + sizeof(std::pair) + sizeof(void*) * 4, + alignof(void*)>>; + +using CCoinsMapMemoryResource = CCoinsMap::allocator_type::ResourceType; /** Cursor for iterating over CoinsView state */ class CCoinsViewCursor @@ -221,6 +238,7 @@ class CCoinsViewCache : public CCoinsViewBacked * declared as "const". */ mutable uint256 hashBlock; + mutable CCoinsMapMemoryResource m_cache_coins_memory_resource{}; mutable CCoinsMap cacheCoins; /* Cached dynamic memory usage for the inner Coin objects. */ diff --git a/src/memusage.h b/src/memusage.h index a6e894129aa29..5fffe4ec07b62 100644 --- a/src/memusage.h +++ b/src/memusage.h @@ -7,6 +7,7 @@ #include #include +#include #include @@ -167,6 +168,25 @@ static inline size_t DynamicUsage(const std::unordered_map& m) return MallocUsage(sizeof(unordered_node >)) * m.size() + MallocUsage(sizeof(void*) * m.bucket_count()); } +template +static inline size_t DynamicUsage(const std::unordered_map, + MAX_BLOCK_SIZE_BYTES, + ALIGN_BYTES>>& m) +{ + auto* pool_resource = m.get_allocator().resource(); + + // The allocated chunks are stored in a std::list. Size per node should + // therefore be 3 pointers: next, previous, and a pointer to the chunk. + size_t estimated_list_node_size = MallocUsage(sizeof(void*) * 3); + size_t usage_resource = estimated_list_node_size * pool_resource->NumAllocatedChunks(); + size_t usage_chunks = MallocUsage(pool_resource->ChunkSizeBytes()) * pool_resource->NumAllocatedChunks(); + return usage_resource + usage_chunks + MallocUsage(sizeof(void*) * m.bucket_count()); } +} // namespace memusage + #endif // BITCOIN_MEMUSAGE_H diff --git a/src/support/allocators/pool.h b/src/support/allocators/pool.h new file mode 100644 index 0000000000000..c8e70ebacff6c --- /dev/null +++ b/src/support/allocators/pool.h @@ -0,0 +1,349 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_SUPPORT_ALLOCATORS_POOL_H +#define BITCOIN_SUPPORT_ALLOCATORS_POOL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * A memory resource similar to std::pmr::unsynchronized_pool_resource, but + * optimized for node-based containers. It has the following properties: + * + * * Owns the allocated memory and frees it on destruction, even when deallocate + * has not been called on the allocated blocks. + * + * * Consists of a number of pools, each one for a different block size. + * Each pool holds blocks of uniform size in a freelist. + * + * * Exhausting memory in a freelist causes a new allocation of a fixed size chunk. + * This chunk is used to carve out blocks. + * + * * Block sizes or alignments that can not be served by the pools are allocated + * and deallocated by operator new(). + * + * PoolResource is not thread-safe. It is intended to be used by PoolAllocator. + * + * @tparam MAX_BLOCK_SIZE_BYTES Maximum size to allocate with the pool. If larger + * sizes are requested, allocation falls back to new(). + * + * @tparam ALIGN_BYTES Required alignment for the allocations. + * + * An example: If you create a PoolResource<128, 8>(262144) and perform a bunch of + * allocations and deallocate 2 blocks with size 8 bytes, and 3 blocks with size 16, + * the members will look like this: + * + * m_free_lists m_allocated_chunks + * ┌───┐ ┌───┐ ┌────────────-------──────┐ + * │ │ blocks │ ├─►│ 262144 B │ + * │ │ ┌─────┐ ┌─────┐ └─┬─┘ └────────────-------──────┘ + * │ 1 ├─►│ 8 B ├─►│ 8 B │ │ + * │ │ └─────┘ └─────┘ : + * │ │ │ + * │ │ ┌─────┐ ┌─────┐ ┌─────┐ ▼ + * │ 2 ├─►│16 B ├─►│16 B ├─►│16 B │ ┌───┐ ┌─────────────────────────┐ + * │ │ └─────┘ └─────┘ └─────┘ │ ├─►│ ▲ │ ▲ + * │ │ └───┘ └──────────┬──────────────┘ │ + * │ . │ │ m_available_memory_end + * │ . │ m_available_memory_it + * │ . │ + * │ │ + * │ │ + * │16 │ + * └───┘ + * + * Here m_free_lists[1] holds the 2 blocks of size 8 bytes, and m_free_lists[2] + * holds the 3 blocks of size 16. The blocks came from the data stored in the + * m_allocated_chunks list. Each chunk has bytes 262144. The last chunk has still + * some memory available for the blocks, and when m_available_memory_it is at the + * end, a new chunk will be allocated and added to the list. + */ +template +class PoolResource final +{ + static_assert(ALIGN_BYTES > 0, "ALIGN_BYTES must be nonzero"); + static_assert((ALIGN_BYTES & (ALIGN_BYTES - 1)) == 0, "ALIGN_BYTES must be a power of two"); + + /** + * In-place linked list of the allocations, used for the freelist. + */ + struct ListNode { + ListNode* m_next; + + explicit ListNode(ListNode* next) : m_next(next) {} + }; + static_assert(std::is_trivially_destructible_v, "Make sure we don't need to manually call a destructor"); + + /** + * Internal alignment value. The larger of the requested ALIGN_BYTES and alignof(FreeList). + */ + static constexpr std::size_t ELEM_ALIGN_BYTES = std::max(alignof(ListNode), ALIGN_BYTES); + static_assert((ELEM_ALIGN_BYTES & (ELEM_ALIGN_BYTES - 1)) == 0, "ELEM_ALIGN_BYTES must be a power of two"); + static_assert(sizeof(ListNode) <= ELEM_ALIGN_BYTES, "Units of size ELEM_SIZE_ALIGN need to be able to store a ListNode"); + static_assert((MAX_BLOCK_SIZE_BYTES & (ELEM_ALIGN_BYTES - 1)) == 0, "MAX_BLOCK_SIZE_BYTES needs to be a multiple of the alignment."); + + /** + * Size in bytes to allocate per chunk + */ + const size_t m_chunk_size_bytes; + + /** + * Contains all allocated pools of memory, used to free the data in the destructor. + */ + std::list m_allocated_chunks{}; + + /** + * Single linked lists of all data that came from deallocating. + * m_free_lists[n] will serve blocks of size n*ELEM_ALIGN_BYTES. + */ + std::array m_free_lists{}; + + /** + * Points to the beginning of available memory for carving out allocations. + */ + std::byte* m_available_memory_it = nullptr; + + /** + * Points to the end of available memory for carving out allocations. + * + * That member variable is redundant, and is always equal to `m_allocated_chunks.back() + m_chunk_size_bytes` + * whenever it is accessed, but `m_available_memory_end` caches this for clarity and efficiency. + */ + std::byte* m_available_memory_end = nullptr; + + /** + * How many multiple of ELEM_ALIGN_BYTES are necessary to fit bytes. We use that result directly as an index + * into m_free_lists. Round up for the special case when bytes==0. + */ + [[nodiscard]] static constexpr std::size_t NumElemAlignBytes(std::size_t bytes) + { + return (bytes + ELEM_ALIGN_BYTES - 1) / ELEM_ALIGN_BYTES + (bytes == 0); + } + + /** + * True when it is possible to make use of the freelist + */ + [[nodiscard]] static constexpr bool IsFreeListUsable(std::size_t bytes, std::size_t alignment) + { + return alignment <= ELEM_ALIGN_BYTES && bytes <= MAX_BLOCK_SIZE_BYTES; + } + + /** + * Replaces node with placement constructed ListNode that points to the previous node + */ + void PlacementAddToList(void* p, ListNode*& node) + { + node = new (p) ListNode{node}; + } + + /** + * Allocate one full memory chunk which will be used to carve out allocations. + * Also puts any leftover bytes into the freelist. + * + * Precondition: leftover bytes are either 0 or few enough to fit into a place in the freelist + */ + void AllocateChunk() + { + // if there is still any available memory left, put it into the freelist. + size_t remaining_available_bytes = std::distance(m_available_memory_it, m_available_memory_end); + if (0 != remaining_available_bytes) { + PlacementAddToList(m_available_memory_it, m_free_lists[remaining_available_bytes / ELEM_ALIGN_BYTES]); + } + + void* storage = ::operator new (m_chunk_size_bytes, std::align_val_t{ELEM_ALIGN_BYTES}); + m_available_memory_it = new (storage) std::byte[m_chunk_size_bytes]; + m_available_memory_end = m_available_memory_it + m_chunk_size_bytes; + m_allocated_chunks.emplace_back(m_available_memory_it); + } + + /** + * Access to internals for testing purpose only + */ + friend class PoolResourceTester; + +public: + /** + * Construct a new PoolResource object which allocates the first chunk. + * chunk_size_bytes will be rounded up to next multiple of ELEM_ALIGN_BYTES. + */ + explicit PoolResource(std::size_t chunk_size_bytes) + : m_chunk_size_bytes(NumElemAlignBytes(chunk_size_bytes) * ELEM_ALIGN_BYTES) + { + assert(m_chunk_size_bytes >= MAX_BLOCK_SIZE_BYTES); + AllocateChunk(); + } + + /** + * Construct a new Pool Resource object, defaults to 2^18=262144 chunk size. + */ + PoolResource() : PoolResource(262144) {} + + /** + * Disable copy & move semantics, these are not supported for the resource. + */ + PoolResource(const PoolResource&) = delete; + PoolResource& operator=(const PoolResource&) = delete; + PoolResource(PoolResource&&) = delete; + PoolResource& operator=(PoolResource&&) = delete; + + /** + * Deallocates all memory allocated associated with the memory resource. + */ + ~PoolResource() + { + for (std::byte* chunk : m_allocated_chunks) { + std::destroy(chunk, chunk + m_chunk_size_bytes); + ::operator delete ((void*)chunk, std::align_val_t{ELEM_ALIGN_BYTES}); + } + } + + /** + * Allocates a block of bytes. If possible the freelist is used, otherwise allocation + * is forwarded to ::operator new(). + */ + void* Allocate(std::size_t bytes, std::size_t alignment) + { + if (IsFreeListUsable(bytes, alignment)) { + const std::size_t num_alignments = NumElemAlignBytes(bytes); + if (nullptr != m_free_lists[num_alignments]) { + // we've already got data in the pool's freelist, unlink one element and return the pointer + // to the unlinked memory. Since FreeList is trivially destructible we can just treat it as + // uninitialized memory. + return std::exchange(m_free_lists[num_alignments], m_free_lists[num_alignments]->m_next); + } + + // freelist is empty: get one allocation from allocated chunk memory. + const std::ptrdiff_t round_bytes = static_cast(num_alignments * ELEM_ALIGN_BYTES); + if (round_bytes > m_available_memory_end - m_available_memory_it) { + // slow path, only happens when a new chunk needs to be allocated + AllocateChunk(); + } + + // Make sure we use the right amount of bytes for that freelist (might be rounded up), + return std::exchange(m_available_memory_it, m_available_memory_it + round_bytes); + } + + // Can't use the pool => use operator new() + return ::operator new (bytes, std::align_val_t{alignment}); + } + + /** + * Returns a block to the freelists, or deletes the block when it did not come from the chunks. + */ + void Deallocate(void* p, std::size_t bytes, std::size_t alignment) noexcept + { + if (IsFreeListUsable(bytes, alignment)) { + const std::size_t num_alignments = NumElemAlignBytes(bytes); + // put the memory block into the linked list. We can placement construct the FreeList + // into the memory since we can be sure the alignment is correct. + PlacementAddToList(p, m_free_lists[num_alignments]); + } else { + // Can't use the pool => forward deallocation to ::operator delete(). + ::operator delete (p, std::align_val_t{alignment}); + } + } + + /** + * Number of allocated chunks + */ + [[nodiscard]] std::size_t NumAllocatedChunks() const + { + return m_allocated_chunks.size(); + } + + /** + * Size in bytes to allocate per chunk, currently hardcoded to a fixed size. + */ + [[nodiscard]] size_t ChunkSizeBytes() const + { + return m_chunk_size_bytes; + } +}; + + +/** + * Forwards all allocations/deallocations to the PoolResource. + */ +template +class PoolAllocator +{ + PoolResource* m_resource; + + template + friend class PoolAllocator; + +public: + using value_type = T; + using ResourceType = PoolResource; + + /** + * Not explicit so we can easily construct it with the correct resource + */ + PoolAllocator(ResourceType* resource) noexcept + : m_resource(resource) + { + } + + PoolAllocator(const PoolAllocator& other) noexcept = default; + PoolAllocator& operator=(const PoolAllocator& other) noexcept = default; + + template + PoolAllocator(const PoolAllocator& other) noexcept + : m_resource(other.resource()) + { + } + + /** + * The rebind struct here is mandatory because we use non type template arguments for + * PoolAllocator. See https://en.cppreference.com/w/cpp/named_req/Allocator#cite_note-2 + */ + template + struct rebind { + using other = PoolAllocator; + }; + + /** + * Forwards each call to the resource. + */ + T* allocate(size_t n) + { + return static_cast(m_resource->Allocate(n * sizeof(T), alignof(T))); + } + + /** + * Forwards each call to the resource. + */ + void deallocate(T* p, size_t n) noexcept + { + m_resource->Deallocate(p, n * sizeof(T), alignof(T)); + } + + ResourceType* resource() const noexcept + { + return m_resource; + } +}; + +template +bool operator==(const PoolAllocator& a, + const PoolAllocator& b) noexcept +{ + return a.resource() == b.resource(); +} + +template +bool operator!=(const PoolAllocator& a, + const PoolAllocator& b) noexcept +{ + return !(a == b); +} + +#endif // BITCOIN_SUPPORT_ALLOCATORS_POOL_H diff --git a/src/test/coins_tests.cpp b/src/test/coins_tests.cpp index 2c8e0b10133db..75b25143e2884 100644 --- a/src/test/coins_tests.cpp +++ b/src/test/coins_tests.cpp @@ -6,6 +6,7 @@ #include #include