Add a limit on loadQuantum (facebookincubator#10242)

Summary: SsdRun only reserves 23 bits (out of 64 bits) for entry size. loadQuantum larger than that will result in cache error. Fixing facebookincubator#10098 Pull Request resolved: facebookincubator#10242 Reviewed By: xiaoxmeng Differential Revision: D58711635 Pulled By: zacw7 fbshipit-source-id: 70327443c21d8c6d1d537c145143d46ad012501d
zacw7 · Jun 18, 2024 · c6d7390 · c6d7390
1 parent 3810cbb
commit c6d7390
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 3 deletions.
diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h
@@ -148,7 +148,8 @@ class HiveConfig {
   /// The number of prefetch rowgroups
   static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups";
 
-  /// The total size in bytes for a direct coalesce request.
+  /// The total size in bytes for a direct coalesce request. Up to 8MB load
+  /// quantum size is supported when SSD cache is enabled.
   static constexpr const char* kLoadQuantum = "load-quantum";
 
   /// Maximum number of entries in the file handle cache.

diff --git a/velox/dwio/common/CachedBufferedInput.h b/velox/dwio/common/CachedBufferedInput.h
@@ -77,7 +77,9 @@ class CachedBufferedInput : public BufferedInput {
         ioStats_(std::move(ioStats)),
         executor_(executor),
         fileSize_(input_->getLength()),
-        options_(readerOptions) {}
+        options_(readerOptions) {
+    checkLoadQuantum();
+  }
 
   CachedBufferedInput(
       std::shared_ptr<ReadFileInputStream> input,
@@ -96,7 +98,9 @@ class CachedBufferedInput : public BufferedInput {
         ioStats_(std::move(ioStats)),
         executor_(executor),
         fileSize_(input_->getLength()),
-        options_(readerOptions) {}
+        options_(readerOptions) {
+    checkLoadQuantum();
+  }
 
   ~CachedBufferedInput() override {
     for (auto& load : allCoalescedLoads_) {
@@ -173,6 +177,17 @@ class CachedBufferedInput : public BufferedInput {
   // concerns.
   void readRegion(const std::vector<CacheRequest*>& requests, bool prefetch);
 
+  // We only support up to 8MB load quantum size on SSD and there is no need for
+  // larger SSD read size performance wise.
+  void checkLoadQuantum() {
+    if (cache_->ssdCache() != nullptr) {
+      VELOX_CHECK_LE(
+          options_.loadQuantum(),
+          1 << cache::SsdRun::kSizeBits,
+          "Load quantum exceeded SSD cache entry size limit.");
+    }
+  }
+
   cache::AsyncDataCache* const cache_;
   const uint64_t fileNum_;
   const std::shared_ptr<cache::ScanTracker> tracker_;

diff --git a/velox/dwio/dwrf/test/CacheInputTest.cpp b/velox/dwio/dwrf/test/CacheInputTest.cpp
@@ -17,6 +17,7 @@
 #include <folly/Random.h>
 #include <folly/container/F14Map.h>
 #include <folly/executors/IOThreadPoolExecutor.h>
+#include "velox/common/base/tests/GTestUtils.h"
 #include "velox/common/caching/FileIds.h"
 #include "velox/common/file/FileSystems.h"
 #include "velox/common/io/IoStatistics.h"
@@ -830,3 +831,24 @@ TEST_F(CacheTest, noCacheRetention) {
     ASSERT_EQ(stats.numEntries, cacheEntries.size());
   }
 }
+
+TEST_F(CacheTest, loadQuotumTooLarge) {
+  initializeCache(64 << 20, 256 << 20);
+  auto fileId = std::make_unique<StringIdLease>(fileIds(), "foo");
+  auto readFile =
+      std::make_shared<TestReadFile>(fileId->id(), 10 << 20, nullptr);
+  auto readOptions = io::ReaderOptions(pool_.get());
+  readOptions.setLoadQuantum(9 << 20 /*9MB*/);
+  VELOX_ASSERT_THROW(
+      std::make_unique<CachedBufferedInput>(
+          readFile,
+          MetricsLog::voidLog(),
+          fileId->id(),
+          cache_.get(),
+          nullptr,
+          0,
+          nullptr,
+          executor_.get(),
+          readOptions),
+      "Load quantum exceeded SSD cache entry size limit");
+}