From c6d7390db644e0d0769f248e5b3c271dea8f58b1 Mon Sep 17 00:00:00 2001 From: Zac Wen Date: Tue, 18 Jun 2024 13:27:26 -0700 Subject: [PATCH] Add a limit on loadQuantum (#10242) Summary: SsdRun only reserves 23 bits (out of 64 bits) for entry size. loadQuantum larger than that will result in cache error. Fixing https://github.com/facebookincubator/velox/issues/10098 Pull Request resolved: https://github.com/facebookincubator/velox/pull/10242 Reviewed By: xiaoxmeng Differential Revision: D58711635 Pulled By: zacw7 fbshipit-source-id: 70327443c21d8c6d1d537c145143d46ad012501d --- velox/connectors/hive/HiveConfig.h | 3 ++- velox/dwio/common/CachedBufferedInput.h | 19 +++++++++++++++++-- velox/dwio/dwrf/test/CacheInputTest.cpp | 22 ++++++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 3565b0fde0da..46bf85f797d4 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -148,7 +148,8 @@ class HiveConfig { /// The number of prefetch rowgroups static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups"; - /// The total size in bytes for a direct coalesce request. + /// The total size in bytes for a direct coalesce request. Up to 8MB load + /// quantum size is supported when SSD cache is enabled. static constexpr const char* kLoadQuantum = "load-quantum"; /// Maximum number of entries in the file handle cache. diff --git a/velox/dwio/common/CachedBufferedInput.h b/velox/dwio/common/CachedBufferedInput.h index 68664eb6c87c..8af2aed63077 100644 --- a/velox/dwio/common/CachedBufferedInput.h +++ b/velox/dwio/common/CachedBufferedInput.h @@ -77,7 +77,9 @@ class CachedBufferedInput : public BufferedInput { ioStats_(std::move(ioStats)), executor_(executor), fileSize_(input_->getLength()), - options_(readerOptions) {} + options_(readerOptions) { + checkLoadQuantum(); + } CachedBufferedInput( std::shared_ptr input, @@ -96,7 +98,9 @@ class CachedBufferedInput : public BufferedInput { ioStats_(std::move(ioStats)), executor_(executor), fileSize_(input_->getLength()), - options_(readerOptions) {} + options_(readerOptions) { + checkLoadQuantum(); + } ~CachedBufferedInput() override { for (auto& load : allCoalescedLoads_) { @@ -173,6 +177,17 @@ class CachedBufferedInput : public BufferedInput { // concerns. void readRegion(const std::vector& requests, bool prefetch); + // We only support up to 8MB load quantum size on SSD and there is no need for + // larger SSD read size performance wise. + void checkLoadQuantum() { + if (cache_->ssdCache() != nullptr) { + VELOX_CHECK_LE( + options_.loadQuantum(), + 1 << cache::SsdRun::kSizeBits, + "Load quantum exceeded SSD cache entry size limit."); + } + } + cache::AsyncDataCache* const cache_; const uint64_t fileNum_; const std::shared_ptr tracker_; diff --git a/velox/dwio/dwrf/test/CacheInputTest.cpp b/velox/dwio/dwrf/test/CacheInputTest.cpp index 6c301f01dc52..c326f4fa16ba 100644 --- a/velox/dwio/dwrf/test/CacheInputTest.cpp +++ b/velox/dwio/dwrf/test/CacheInputTest.cpp @@ -17,6 +17,7 @@ #include #include #include +#include "velox/common/base/tests/GTestUtils.h" #include "velox/common/caching/FileIds.h" #include "velox/common/file/FileSystems.h" #include "velox/common/io/IoStatistics.h" @@ -830,3 +831,24 @@ TEST_F(CacheTest, noCacheRetention) { ASSERT_EQ(stats.numEntries, cacheEntries.size()); } } + +TEST_F(CacheTest, loadQuotumTooLarge) { + initializeCache(64 << 20, 256 << 20); + auto fileId = std::make_unique(fileIds(), "foo"); + auto readFile = + std::make_shared(fileId->id(), 10 << 20, nullptr); + auto readOptions = io::ReaderOptions(pool_.get()); + readOptions.setLoadQuantum(9 << 20 /*9MB*/); + VELOX_ASSERT_THROW( + std::make_unique( + readFile, + MetricsLog::voidLog(), + fileId->id(), + cache_.get(), + nullptr, + 0, + nullptr, + executor_.get(), + readOptions), + "Load quantum exceeded SSD cache entry size limit"); +}