From c6d7390db644e0d0769f248e5b3c271dea8f58b1 Mon Sep 17 00:00:00 2001
From: Zac Wen <zacw@meta.com>
Date: Tue, 18 Jun 2024 13:27:26 -0700
Subject: [PATCH] Add a limit on loadQuantum (#10242)

Summary:
SsdRun only reserves 23 bits (out of 64 bits) for entry size. loadQuantum larger than that will result in cache error.
Fixing https://github.com/facebookincubator/velox/issues/10098

Pull Request resolved: https://github.com/facebookincubator/velox/pull/10242

Reviewed By: xiaoxmeng

Differential Revision: D58711635

Pulled By: zacw7

fbshipit-source-id: 70327443c21d8c6d1d537c145143d46ad012501d
---
 velox/connectors/hive/HiveConfig.h      |  3 ++-
 velox/dwio/common/CachedBufferedInput.h | 19 +++++++++++++++++--
 velox/dwio/dwrf/test/CacheInputTest.cpp | 22 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h
index 3565b0fde0da..46bf85f797d4 100644
--- a/velox/connectors/hive/HiveConfig.h
+++ b/velox/connectors/hive/HiveConfig.h
@@ -148,7 +148,8 @@ class HiveConfig {
   /// The number of prefetch rowgroups
   static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups";
 
-  /// The total size in bytes for a direct coalesce request.
+  /// The total size in bytes for a direct coalesce request. Up to 8MB load
+  /// quantum size is supported when SSD cache is enabled.
   static constexpr const char* kLoadQuantum = "load-quantum";
 
   /// Maximum number of entries in the file handle cache.
diff --git a/velox/dwio/common/CachedBufferedInput.h b/velox/dwio/common/CachedBufferedInput.h
index 68664eb6c87c..8af2aed63077 100644
--- a/velox/dwio/common/CachedBufferedInput.h
+++ b/velox/dwio/common/CachedBufferedInput.h
@@ -77,7 +77,9 @@ class CachedBufferedInput : public BufferedInput {
         ioStats_(std::move(ioStats)),
         executor_(executor),
         fileSize_(input_->getLength()),
-        options_(readerOptions) {}
+        options_(readerOptions) {
+    checkLoadQuantum();
+  }
 
   CachedBufferedInput(
       std::shared_ptr<ReadFileInputStream> input,
@@ -96,7 +98,9 @@ class CachedBufferedInput : public BufferedInput {
         ioStats_(std::move(ioStats)),
         executor_(executor),
         fileSize_(input_->getLength()),
-        options_(readerOptions) {}
+        options_(readerOptions) {
+    checkLoadQuantum();
+  }
 
   ~CachedBufferedInput() override {
     for (auto& load : allCoalescedLoads_) {
@@ -173,6 +177,17 @@ class CachedBufferedInput : public BufferedInput {
   // concerns.
   void readRegion(const std::vector<CacheRequest*>& requests, bool prefetch);
 
+  // We only support up to 8MB load quantum size on SSD and there is no need for
+  // larger SSD read size performance wise.
+  void checkLoadQuantum() {
+    if (cache_->ssdCache() != nullptr) {
+      VELOX_CHECK_LE(
+          options_.loadQuantum(),
+          1 << cache::SsdRun::kSizeBits,
+          "Load quantum exceeded SSD cache entry size limit.");
+    }
+  }
+
   cache::AsyncDataCache* const cache_;
   const uint64_t fileNum_;
   const std::shared_ptr<cache::ScanTracker> tracker_;
diff --git a/velox/dwio/dwrf/test/CacheInputTest.cpp b/velox/dwio/dwrf/test/CacheInputTest.cpp
index 6c301f01dc52..c326f4fa16ba 100644
--- a/velox/dwio/dwrf/test/CacheInputTest.cpp
+++ b/velox/dwio/dwrf/test/CacheInputTest.cpp
@@ -17,6 +17,7 @@
 #include <folly/Random.h>
 #include <folly/container/F14Map.h>
 #include <folly/executors/IOThreadPoolExecutor.h>
+#include "velox/common/base/tests/GTestUtils.h"
 #include "velox/common/caching/FileIds.h"
 #include "velox/common/file/FileSystems.h"
 #include "velox/common/io/IoStatistics.h"
@@ -830,3 +831,24 @@ TEST_F(CacheTest, noCacheRetention) {
     ASSERT_EQ(stats.numEntries, cacheEntries.size());
   }
 }
+
+TEST_F(CacheTest, loadQuotumTooLarge) {
+  initializeCache(64 << 20, 256 << 20);
+  auto fileId = std::make_unique<StringIdLease>(fileIds(), "foo");
+  auto readFile =
+      std::make_shared<TestReadFile>(fileId->id(), 10 << 20, nullptr);
+  auto readOptions = io::ReaderOptions(pool_.get());
+  readOptions.setLoadQuantum(9 << 20 /*9MB*/);
+  VELOX_ASSERT_THROW(
+      std::make_unique<CachedBufferedInput>(
+          readFile,
+          MetricsLog::voidLog(),
+          fileId->id(),
+          cache_.get(),
+          nullptr,
+          0,
+          nullptr,
+          executor_.get(),
+          readOptions),
+      "Load quantum exceeded SSD cache entry size limit");
+}