Skip to content

Commit

Permalink
Canonicalize domain predicate in TableScanNode
Browse files Browse the repository at this point in the history
  • Loading branch information
pranjalssh authored and rschlussel committed Oct 26, 2022
1 parent ba9ec30 commit 21a3bbe
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -485,11 +485,11 @@ public TupleDomain<T> compact(int threshold)
return TupleDomain.withColumnDomains(unmodifiableMap(compactedDomains));
}

public TupleDomain<T> canonicalize(boolean removeConstants)
public TupleDomain<T> canonicalize(Function<T, Boolean> removeConstants)
{
return new TupleDomain<>(domains.map(d -> unmodifiableMap(d.entrySet().stream()
.sorted(comparing(domain -> domain.getKey().toString()))
.collect(toLinkedMap(Entry::getKey, entry -> entry.getValue().canonicalize(removeConstants))))));
.collect(toLinkedMap(Entry::getKey, entry -> entry.getValue().canonicalize(removeConstants.apply(entry.getKey())))))));
}

public static <T, K, U> Collector<T, ?, Map<K, U>> toLinkedMap(Function<? super T, ? extends K> keyMapper, Function<? super T, ? extends U> valueMapper)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

Expand All @@ -41,13 +42,10 @@
import java.util.Optional;
import java.util.Set;

import static com.facebook.presto.common.predicate.TupleDomain.toLinkedMap;
import static com.facebook.presto.common.predicate.TupleDomain.withColumnDomains;
import static com.facebook.presto.expressions.CanonicalRowExpressionRewriter.canonicalizeRowExpression;
import static com.facebook.presto.hive.HiveMetadata.createPredicate;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.util.Comparator.comparing;
import static java.util.Objects.requireNonNull;

public class HiveTableLayoutHandle
Expand Down Expand Up @@ -285,7 +283,7 @@ public Object getIdentifier(Optional<ConnectorSplit> split, PlanCanonicalization
// which is unrelated to identifier purpose, or has already been applied as the boundary of split.
return ImmutableMap.builder()
.put("schemaTableName", schemaTableName)
.put("domainPredicate", domainPredicate.canonicalize(false))
.put("domainPredicate", canonicalizeDomainPredicate(domainPredicate, getPredicateColumns(), canonicalizationStrategy))
.put("remainingPredicate", canonicalizeRowExpression(remainingPredicate, false))
.put("constraint", getConstraint(canonicalizationStrategy))
// TODO: Decide what to do with bucketFilter when canonicalizing
Expand All @@ -304,20 +302,32 @@ private TupleDomain<ColumnHandle> getConstraint(PlanCanonicalizationStrategy can
// `x = 1` is equivalent to `x = 1000`
// `x > 1` is NOT equivalent to `x > 1000`
TupleDomain<ColumnHandle> constraint = createPredicate(ImmutableList.copyOf(partitionColumns), partitions);
if (pushdownFilterEnabled) {
constraint = getDomainPredicate()
.transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null)
.transform(getPredicateColumns()::get)
.transform(ColumnHandle.class::cast)
.intersect(constraint);
}
constraint = getDomainPredicate()
.transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null)
.transform(getPredicateColumns()::get)
.transform(ColumnHandle.class::cast)
.intersect(constraint);

constraint = withColumnDomains(constraint.getDomains().get().entrySet().stream()
.sorted(comparing(entry -> entry.getKey().toString()))
.collect(toLinkedMap(Map.Entry::getKey, entry -> entry.getValue().canonicalize(isPartitionKey(entry.getKey())))));
constraint = constraint.canonicalize(HiveTableLayoutHandle::isPartitionKey);
return constraint;
}

@VisibleForTesting
public static TupleDomain<Subfield> canonicalizeDomainPredicate(TupleDomain<Subfield> domainPredicate, Map<String, HiveColumnHandle> predicateColumns, PlanCanonicalizationStrategy strategy)
{
if (strategy == PlanCanonicalizationStrategy.DEFAULT) {
return domainPredicate.canonicalize(ignored -> false);
}
return domainPredicate
.transform(subfield -> {
if (!subfield.getPath().isEmpty() || !predicateColumns.containsKey(subfield.getRootName())) {
return subfield;
}
return isPartitionKey(predicateColumns.get(subfield.getRootName())) ? null : subfield;
})
.canonicalize(ignored -> false);
}

private static boolean isPartitionKey(ColumnHandle columnHandle)
{
return columnHandle instanceof HiveColumnHandle && ((HiveColumnHandle) columnHandle).isPartitionKey();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;

import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.predicate.TupleDomain;
import com.google.common.collect.ImmutableMap;
import org.testng.annotations.Test;

import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.CONNECTOR;
import static com.facebook.presto.common.predicate.Domain.singleValue;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR;
import static com.facebook.presto.hive.HiveTableLayoutHandle.canonicalizeDomainPredicate;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static io.airlift.slice.Slices.utf8Slice;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestHiveTableLayoutHandle
{
@Test
public void testCanonicalizeDomain()
{
Map<String, HiveColumnHandle> predicateColumns = ImmutableMap.of(
"ds", getColumnHandle("ds", true),
"col", getColumnHandle("col", false));
TupleDomain<Subfield> domain = TupleDomain.withColumnDomains(ImmutableMap.of(
new Subfield("ds"), singleValue(VARCHAR, utf8Slice("2022-01-01")),
new Subfield("col"), singleValue(VARCHAR, utf8Slice("id"))));
TupleDomain<Subfield> newDomain = canonicalizeDomainPredicate(domain, predicateColumns, CONNECTOR);
assertTrue(newDomain.getDomains().isPresent());
assertEquals(newDomain.getDomains().get().size(), 1);
assertEquals(newDomain.getDomains().get().get(new Subfield("col")), singleValue(VARCHAR, utf8Slice("id")));
}

private HiveColumnHandle getColumnHandle(String name, boolean partitioned)
{
return new HiveColumnHandle(
name,
HIVE_STRING,
HIVE_STRING.getTypeSignature(),
1,
partitioned ? PARTITION_KEY : REGULAR,
Optional.empty(),
Optional.empty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@
import java.util.Map;

import static com.facebook.presto.common.predicate.TupleDomain.columnWiseUnion;
import static com.facebook.presto.common.predicate.TupleDomain.withColumnDomains;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static io.airlift.slice.Slices.utf8Slice;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertTrue;

public class TestTupleDomain
Expand All @@ -58,6 +60,20 @@ public class TestTupleDomain
private static final ColumnHandle E = new TestingColumnHandle("e");
private static final ColumnHandle F = new TestingColumnHandle("f");

private final ObjectMapper mapper;

public TestTupleDomain()
{
TestingTypeManager typeManager = new TestingTypeManager();
TestingBlockEncodingSerde blockEncodingSerde = new TestingBlockEncodingSerde();

this.mapper = new JsonObjectMapperProvider().get()
.registerModule(new SimpleModule()
.addDeserializer(Type.class, new TestingTypeDeserializer(typeManager))
.addSerializer(Block.class, new TestingBlockJsonSerde.Serializer(blockEncodingSerde))
.addDeserializer(Block.class, new TestingBlockJsonSerde.Deserializer(blockEncodingSerde)));
}

@Test
public void testNone()
{
Expand Down Expand Up @@ -655,6 +671,31 @@ public void testTransformFailsWithNonUniqueMapping()
domain.transform(input -> "x");
}

@Test
public void testCanonicalize()
throws Exception
{
TupleDomain<String> domain1 = withColumnDomains(ImmutableMap.of(
"col1", Domain.singleValue(VARCHAR, utf8Slice("abc")),
"col2", Domain.singleValue(VARCHAR, utf8Slice("def"))));

TupleDomain<String> domain2 = withColumnDomains(ImmutableMap.of(
"col1", Domain.singleValue(VARCHAR, utf8Slice("abcd")),
"col2", Domain.singleValue(VARCHAR, utf8Slice("def"))));

TupleDomain<String> domain3 = withColumnDomains(ImmutableMap.of(
"col1", Domain.singleValue(VARCHAR, utf8Slice("abc")),
"col2", Domain.singleValue(VARCHAR, utf8Slice("defg"))));

assertEquals(
mapper.writeValueAsString(domain1.canonicalize(key -> key.equals("col1"))),
mapper.writeValueAsString(domain2.canonicalize(key -> key.equals("col1"))));

assertNotEquals(
mapper.writeValueAsString(domain1.canonicalize(key -> key.equals("col1"))),
mapper.writeValueAsString(domain3.canonicalize(key -> key.equals("col1"))));
}

private boolean overlaps(Map<ColumnHandle, Domain> domains1, Map<ColumnHandle, Domain> domains2)
{
TupleDomain<ColumnHandle> tupleDomain1 = TupleDomain.withColumnDomains(domains1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public Object getIdentifier(Optional<ConnectorSplit> split, PlanCanonicalization
{
return ImmutableMap.builder()
.put("table", table)
.put("predicate", predicate.canonicalize(false))
.put("predicate", predicate.canonicalize(ignored -> false))
.build();
}
}

0 comments on commit 21a3bbe

Please sign in to comment.