From e1ceaa2ce0e55c6461e43c21c7791452f81f3594 Mon Sep 17 00:00:00 2001 From: yingsu00 Date: Tue, 26 Mar 2024 02:43:36 +0800 Subject: [PATCH] Fix the null partition key filters --- velox/connectors/hive/HiveConnectorUtil.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index a1d296e650a72..05ae6dca6a653 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -581,16 +581,19 @@ bool testFilters( for (const auto& child : scanSpec->children()) { if (child->filter()) { const auto& name = child->fieldName(); - if (!rowType->containsChild(name)) { - // If missing column is partition key. - auto iter = partitionKey.find(name); + auto iter = partitionKey.find(name); + // The partition key columns are writen in the data file for + // IcebergTables, so we need to test both cases + if (!rowType->containsChild(name) || iter != partitionKey.end()) { if (iter != partitionKey.end() && iter->second.has_value()) { + // This is a non-null partition key return applyPartitionFilter( (*partitionKeysHandle)[name]->dataType()->kind(), iter->second.value(), child->filter()); } - // Column is missing. Most likely due to schema evolution. + // Column is missing, most likely due to schema evolution. Or it's a + // partition key but the partition value is NULL. if (child->filter()->isDeterministic() && !child->filter()->testNull()) { return false;