From 21a3bbe59ccb2b0d8c72003d47771b7106be810c Mon Sep 17 00:00:00 2001 From: Pranjal Shankhdhar Date: Thu, 15 Sep 2022 22:57:54 -0700 Subject: [PATCH] Canonicalize domain predicate in TableScanNode --- .../presto/common/predicate/TupleDomain.java | 4 +- .../presto/hive/HiveTableLayoutHandle.java | 38 ++++++----- .../hive/TestHiveTableLayoutHandle.java | 63 +++++++++++++++++++ .../presto/spi/predicate/TestTupleDomain.java | 41 ++++++++++++ .../presto/tpch/TpchTableLayoutHandle.java | 2 +- 5 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 presto-hive/src/test/java/com/facebook/presto/hive/TestHiveTableLayoutHandle.java diff --git a/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomain.java b/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomain.java index b97a8a4aacffa..3972e6214f73b 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomain.java +++ b/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomain.java @@ -485,11 +485,11 @@ public TupleDomain compact(int threshold) return TupleDomain.withColumnDomains(unmodifiableMap(compactedDomains)); } - public TupleDomain canonicalize(boolean removeConstants) + public TupleDomain canonicalize(Function removeConstants) { return new TupleDomain<>(domains.map(d -> unmodifiableMap(d.entrySet().stream() .sorted(comparing(domain -> domain.getKey().toString())) - .collect(toLinkedMap(Entry::getKey, entry -> entry.getValue().canonicalize(removeConstants)))))); + .collect(toLinkedMap(Entry::getKey, entry -> entry.getValue().canonicalize(removeConstants.apply(entry.getKey()))))))); } public static Collector> toLinkedMap(Function keyMapper, Function valueMapper) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableLayoutHandle.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableLayoutHandle.java index 308484a287f76..70993bfb6384b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableLayoutHandle.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableLayoutHandle.java @@ -31,6 +31,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -41,13 +42,10 @@ import java.util.Optional; import java.util.Set; -import static com.facebook.presto.common.predicate.TupleDomain.toLinkedMap; -import static com.facebook.presto.common.predicate.TupleDomain.withColumnDomains; import static com.facebook.presto.expressions.CanonicalRowExpressionRewriter.canonicalizeRowExpression; import static com.facebook.presto.hive.HiveMetadata.createPredicate; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.util.Comparator.comparing; import static java.util.Objects.requireNonNull; public class HiveTableLayoutHandle @@ -285,7 +283,7 @@ public Object getIdentifier(Optional split, PlanCanonicalization // which is unrelated to identifier purpose, or has already been applied as the boundary of split. return ImmutableMap.builder() .put("schemaTableName", schemaTableName) - .put("domainPredicate", domainPredicate.canonicalize(false)) + .put("domainPredicate", canonicalizeDomainPredicate(domainPredicate, getPredicateColumns(), canonicalizationStrategy)) .put("remainingPredicate", canonicalizeRowExpression(remainingPredicate, false)) .put("constraint", getConstraint(canonicalizationStrategy)) // TODO: Decide what to do with bucketFilter when canonicalizing @@ -304,20 +302,32 @@ private TupleDomain getConstraint(PlanCanonicalizationStrategy can // `x = 1` is equivalent to `x = 1000` // `x > 1` is NOT equivalent to `x > 1000` TupleDomain constraint = createPredicate(ImmutableList.copyOf(partitionColumns), partitions); - if (pushdownFilterEnabled) { - constraint = getDomainPredicate() - .transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null) - .transform(getPredicateColumns()::get) - .transform(ColumnHandle.class::cast) - .intersect(constraint); - } + constraint = getDomainPredicate() + .transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null) + .transform(getPredicateColumns()::get) + .transform(ColumnHandle.class::cast) + .intersect(constraint); - constraint = withColumnDomains(constraint.getDomains().get().entrySet().stream() - .sorted(comparing(entry -> entry.getKey().toString())) - .collect(toLinkedMap(Map.Entry::getKey, entry -> entry.getValue().canonicalize(isPartitionKey(entry.getKey()))))); + constraint = constraint.canonicalize(HiveTableLayoutHandle::isPartitionKey); return constraint; } + @VisibleForTesting + public static TupleDomain canonicalizeDomainPredicate(TupleDomain domainPredicate, Map predicateColumns, PlanCanonicalizationStrategy strategy) + { + if (strategy == PlanCanonicalizationStrategy.DEFAULT) { + return domainPredicate.canonicalize(ignored -> false); + } + return domainPredicate + .transform(subfield -> { + if (!subfield.getPath().isEmpty() || !predicateColumns.containsKey(subfield.getRootName())) { + return subfield; + } + return isPartitionKey(predicateColumns.get(subfield.getRootName())) ? null : subfield; + }) + .canonicalize(ignored -> false); + } + private static boolean isPartitionKey(ColumnHandle columnHandle) { return columnHandle instanceof HiveColumnHandle && ((HiveColumnHandle) columnHandle).isPartitionKey(); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveTableLayoutHandle.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveTableLayoutHandle.java new file mode 100644 index 0000000000000..ce00711f31858 --- /dev/null +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveTableLayoutHandle.java @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.presto.common.Subfield; +import com.facebook.presto.common.predicate.TupleDomain; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.Test; + +import java.util.Map; +import java.util.Optional; + +import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.CONNECTOR; +import static com.facebook.presto.common.predicate.Domain.singleValue; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; +import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; +import static com.facebook.presto.hive.HiveTableLayoutHandle.canonicalizeDomainPredicate; +import static com.facebook.presto.hive.HiveType.HIVE_STRING; +import static io.airlift.slice.Slices.utf8Slice; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +public class TestHiveTableLayoutHandle +{ + @Test + public void testCanonicalizeDomain() + { + Map predicateColumns = ImmutableMap.of( + "ds", getColumnHandle("ds", true), + "col", getColumnHandle("col", false)); + TupleDomain domain = TupleDomain.withColumnDomains(ImmutableMap.of( + new Subfield("ds"), singleValue(VARCHAR, utf8Slice("2022-01-01")), + new Subfield("col"), singleValue(VARCHAR, utf8Slice("id")))); + TupleDomain newDomain = canonicalizeDomainPredicate(domain, predicateColumns, CONNECTOR); + assertTrue(newDomain.getDomains().isPresent()); + assertEquals(newDomain.getDomains().get().size(), 1); + assertEquals(newDomain.getDomains().get().get(new Subfield("col")), singleValue(VARCHAR, utf8Slice("id"))); + } + + private HiveColumnHandle getColumnHandle(String name, boolean partitioned) + { + return new HiveColumnHandle( + name, + HIVE_STRING, + HIVE_STRING.getTypeSignature(), + 1, + partitioned ? PARTITION_KEY : REGULAR, + Optional.empty(), + Optional.empty()); + } +} diff --git a/presto-spi/src/test/java/com/facebook/presto/spi/predicate/TestTupleDomain.java b/presto-spi/src/test/java/com/facebook/presto/spi/predicate/TestTupleDomain.java index a683dc4045647..ba08b02a94d81 100644 --- a/presto-spi/src/test/java/com/facebook/presto/spi/predicate/TestTupleDomain.java +++ b/presto-spi/src/test/java/com/facebook/presto/spi/predicate/TestTupleDomain.java @@ -40,6 +40,7 @@ import java.util.Map; import static com.facebook.presto.common.predicate.TupleDomain.columnWiseUnion; +import static com.facebook.presto.common.predicate.TupleDomain.withColumnDomains; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; @@ -47,6 +48,7 @@ import static io.airlift.slice.Slices.utf8Slice; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertTrue; public class TestTupleDomain @@ -58,6 +60,20 @@ public class TestTupleDomain private static final ColumnHandle E = new TestingColumnHandle("e"); private static final ColumnHandle F = new TestingColumnHandle("f"); + private final ObjectMapper mapper; + + public TestTupleDomain() + { + TestingTypeManager typeManager = new TestingTypeManager(); + TestingBlockEncodingSerde blockEncodingSerde = new TestingBlockEncodingSerde(); + + this.mapper = new JsonObjectMapperProvider().get() + .registerModule(new SimpleModule() + .addDeserializer(Type.class, new TestingTypeDeserializer(typeManager)) + .addSerializer(Block.class, new TestingBlockJsonSerde.Serializer(blockEncodingSerde)) + .addDeserializer(Block.class, new TestingBlockJsonSerde.Deserializer(blockEncodingSerde))); + } + @Test public void testNone() { @@ -655,6 +671,31 @@ public void testTransformFailsWithNonUniqueMapping() domain.transform(input -> "x"); } + @Test + public void testCanonicalize() + throws Exception + { + TupleDomain domain1 = withColumnDomains(ImmutableMap.of( + "col1", Domain.singleValue(VARCHAR, utf8Slice("abc")), + "col2", Domain.singleValue(VARCHAR, utf8Slice("def")))); + + TupleDomain domain2 = withColumnDomains(ImmutableMap.of( + "col1", Domain.singleValue(VARCHAR, utf8Slice("abcd")), + "col2", Domain.singleValue(VARCHAR, utf8Slice("def")))); + + TupleDomain domain3 = withColumnDomains(ImmutableMap.of( + "col1", Domain.singleValue(VARCHAR, utf8Slice("abc")), + "col2", Domain.singleValue(VARCHAR, utf8Slice("defg")))); + + assertEquals( + mapper.writeValueAsString(domain1.canonicalize(key -> key.equals("col1"))), + mapper.writeValueAsString(domain2.canonicalize(key -> key.equals("col1")))); + + assertNotEquals( + mapper.writeValueAsString(domain1.canonicalize(key -> key.equals("col1"))), + mapper.writeValueAsString(domain3.canonicalize(key -> key.equals("col1")))); + } + private boolean overlaps(Map domains1, Map domains2) { TupleDomain tupleDomain1 = TupleDomain.withColumnDomains(domains1); diff --git a/presto-tpch/src/main/java/com/facebook/presto/tpch/TpchTableLayoutHandle.java b/presto-tpch/src/main/java/com/facebook/presto/tpch/TpchTableLayoutHandle.java index 8775233d020b6..9c8e080abac6a 100644 --- a/presto-tpch/src/main/java/com/facebook/presto/tpch/TpchTableLayoutHandle.java +++ b/presto-tpch/src/main/java/com/facebook/presto/tpch/TpchTableLayoutHandle.java @@ -60,7 +60,7 @@ public Object getIdentifier(Optional split, PlanCanonicalization { return ImmutableMap.builder() .put("table", table) - .put("predicate", predicate.canonicalize(false)) + .put("predicate", predicate.canonicalize(ignored -> false)) .build(); } }