Skip to content

Commit

Permalink
respect spark.shuffle.spill.compress
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Dec 11, 2024
1 parent f7f6eec commit 313872d
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
8 changes: 6 additions & 2 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,12 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<uint8_t>(kSpillPartitionBits, 3));
configs[velox::core::QueryConfig::kSpillableReservationGrowthPct] =
std::to_string(veloxCfg_->get<uint8_t>(kSpillableReservationGrowthPct, 25));
configs[velox::core::QueryConfig::kSpillCompressionKind] =
veloxCfg_->get<std::string>(kSpillCompressionKind, veloxCfg_->get<std::string>(kCompressionKind, "lz4"));
if (veloxCfg_->get<bool>(kSparkShuffleSpillCompress, true)) {
configs[velox::core::QueryConfig::kSpillCompressionKind] =
veloxCfg_->get<std::string>(kSpillCompressionKind, veloxCfg_->get<std::string>(kCompressionKind, "lz4"));
} else {
configs[velox::core::QueryConfig::kSpillCompressionKind] = "none";
}
configs[velox::core::QueryConfig::kSparkBloomFilterExpectedNumItems] =
std::to_string(veloxCfg_->get<int64_t>(kBloomFilterExpectedNumItems, 1000000));
configs[velox::core::QueryConfig::kSparkBloomFilterNumBits] =
Expand Down
2 changes: 2 additions & 0 deletions cpp/velox/config/VeloxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024;

const std::string kSpillableReservationGrowthPct =
"spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
// Whether to compress data spilled. Compression will use spark.io.compression.codec or kSpillCompressionKind.
const std::string kSparkShuffleSpillCompress = "spark.shuffle.spill.compress";
const std::string kCompressionKind = "spark.io.compression.codec";
/// The compression codec to use for spilling. Use kCompressionKind if not set.
const std::string kSpillCompressionKind = "spark.gluten.sql.columnar.backend.velox.spillCompressionCodec";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,8 @@ object GlutenConfig {
val SPARK_UNSAFE_SORTER_SPILL_READER_BUFFER_SIZE_DEFAULT: Int = 1024 * 1024
val SPARK_SHUFFLE_SPILL_DISK_WRITE_BUFFER_SIZE = "spark.shuffle.spill.diskWriteBufferSize"
val SPARK_SHUFFLE_SPILL_DISK_WRITE_BUFFER_SIZE_DEFAULT: Int = 1024 * 1024
val SPARK_SHUFFLE_SPILL_COMPRESS = "spark.shuffle.spill.compress"
val SPARK_SHUFFLE_SPILL_COMPRESS_DEFAULT: Boolean = true

// For Soft Affinity Scheduling
// Enable Soft Affinity Scheduling, default value is false
Expand Down Expand Up @@ -740,7 +742,8 @@ object GlutenConfig {
SPARK_UNSAFE_SORTER_SPILL_READER_BUFFER_SIZE_DEFAULT.toString),
(
SPARK_SHUFFLE_SPILL_DISK_WRITE_BUFFER_SIZE,
SPARK_SHUFFLE_SPILL_DISK_WRITE_BUFFER_SIZE_DEFAULT.toString)
SPARK_SHUFFLE_SPILL_DISK_WRITE_BUFFER_SIZE_DEFAULT.toString),
(SPARK_SHUFFLE_SPILL_COMPRESS, SPARK_SHUFFLE_SPILL_COMPRESS_DEFAULT.toString)
)
keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1, e._2)))

Expand Down

0 comments on commit 313872d

Please sign in to comment.