Skip to content

Commit

Permalink
Enable logical property propagation by default
Browse files Browse the repository at this point in the history
  • Loading branch information
ClarenceThreepwood committed Apr 8, 2024
1 parent 4f91dee commit 54ffa28
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 21 deletions.
8 changes: 8 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,14 @@ Optimizer Properties

Log the stats equivalent plan and canonicalized plans used in history based optimization.

``optimizer.exploit-constraints``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``true``

Enable analysis and propagation of logical properties like distinct keys or cardinality among the nodes of
a query plan. The optimizer may then use these properties to perform various optimizations.

Planner Properties
--------------------------------------
Expand Down
1 change: 1 addition & 0 deletions presto-docs/src/main/sphinx/optimizer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ Query Optimizer
optimizer/cost-in-explain
optimizer/cost-based-optimizations
optimizer/history-based-optimization
optimizer/logical-properties
32 changes: 32 additions & 0 deletions presto-docs/src/main/sphinx/optimizer/logical-properties.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
=================================
Logical Properties of Query Plans
=================================

Presto implements a framework for associating logical properties with the
result sets produced by the nodes of a query plan. These logical properties
might either derive from constraints defined on tables, or from
operations performed by intermediate nodes in the query plan such as
aggregations, limits, or the application of predicates. The Presto optimizer
may then use these logical properties to perform optimizations such as
removing redundant operations or other logical transformations.

The propagation of logical properties in query plans is enabled by the
``exploit_constraints`` session property or ``optimizer.exploit_constraints``
configuration property set in ``etc/config.properties`` of the coordinator.
Logical property propagation is enabled by default.


Types of Logical Properties
---------------------------

Presto detects and propagates the following logical properties:

* ``KeyProperty`` - A collection of distinct attributes that hold for
a final or intermediate result set produced by a plan node.

* ``MaxCardProperty`` - A provable maximum number of rows in a final or
intermediate result set produced by a plan node.

* ``EquivalenceClassProperty`` - Classes of equivalent variable and
constant references that hold for a final or intermediate result set produced
by a plan node.
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public class FeaturesConfig
private double memoryRevokingThreshold = 0.9;
private boolean parseDecimalLiteralsAsDouble;
private boolean useMarkDistinct = true;
private boolean exploitConstraints;
private boolean exploitConstraints = true;
private boolean preferPartialAggregation = true;
private PartialAggregationStrategy partialAggregationStrategy = PartialAggregationStrategy.ALWAYS;
private double partialAggregationByteReductionThreshold = 0.5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,8 @@ public LogicalProperties getAggregationProperties(AggregationNode aggregationNod
throw new IllegalStateException("Expected source PlanNode to be a GroupReference with LogicalProperties");
}

if (aggregationNode.getGroupingKeys().isEmpty() && aggregationNode.getAggregations().isEmpty()) {
throw new IllegalStateException("Aggregation node with no grouping columns and no aggregation functions");
}

LogicalPropertiesImpl sourceProperties = (LogicalPropertiesImpl) ((GroupReference) aggregationNode.getSource()).getLogicalProperties().get();
if (!aggregationNode.getAggregations().isEmpty() && aggregationNode.getGroupingKeys().isEmpty()) {
if (aggregationNode.getGroupingKeys().isEmpty()) {
//aggregation with no grouping variables, single row output
return propagateAndLimitProperties(sourceProperties, Long.valueOf(1));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
import com.facebook.presto.sql.planner.iterative.Rule;
import com.google.common.collect.ImmutableMap;

import java.util.AbstractMap.SimpleEntry;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.facebook.presto.spi.plan.AggregationNode.Aggregation.removeDistinct;
import static com.facebook.presto.sql.planner.plan.Patterns.aggregation;
import static com.google.common.collect.ImmutableSet.toImmutableSet;

/**
* Removes distinct from aggregates where the combination of aggregate columns and grouping variables contain a unique key.
Expand Down Expand Up @@ -61,16 +62,10 @@ public Result apply(AggregationNode node, Captures captures, Context context)
ImmutableMap.Builder<VariableReferenceExpression, AggregationNode.Aggregation> aggregationsBuilder = ImmutableMap.builder();

for (Map.Entry<VariableReferenceExpression, AggregationNode.Aggregation> agg : node.getAggregations().entrySet()) {
Set<VariableReferenceExpression> varAndGroupingKeySet =
Stream.concat(node.getGroupingKeys().stream().map(VariableReferenceExpression.class::cast),
(agg.getValue()).getArguments().stream().map(VariableReferenceExpression.class::cast))
.collect(Collectors.toSet());
if (agg.getValue().isDistinct() && ((GroupReference) node.getSource()).getLogicalProperties().get().isDistinct(varAndGroupingKeySet)) {
aggregationsBuilder.put(agg.getKey(), removeDistinct(agg.getValue()));
}
else {
aggregationsBuilder.put(agg);
}
aggregationsBuilder.put(
canRemoveDistinct(node, agg.getValue()) ?
new SimpleEntry<>(agg.getKey(), removeDistinct(agg.getValue())) :
agg);
}

Map<VariableReferenceExpression, AggregationNode.Aggregation> newAggregations = aggregationsBuilder.build();
Expand All @@ -92,4 +87,28 @@ public Result apply(AggregationNode node, Captures captures, Context context)
node.getGroupIdVariable(),
node.getAggregationId()));
}

private boolean canRemoveDistinct(AggregationNode node, AggregationNode.Aggregation aggregation)
{
if (!aggregation.isDistinct()) {
// nothing to do
return false;
}

if (!node.getGroupingKeys().stream().allMatch(key -> key instanceof VariableReferenceExpression)
|| !aggregation.getArguments().stream().allMatch(arg -> arg instanceof VariableReferenceExpression)) {
return false;
}

Set<VariableReferenceExpression> varAndGroupingKeySet = Stream.concat(
node.getGroupingKeys()
.stream()
.map(VariableReferenceExpression.class::cast),
aggregation.getArguments()
.stream()
.map(VariableReferenceExpression.class::cast))
.collect(toImmutableSet());

return ((GroupReference) node.getSource()).getLogicalProperties().get().isDistinct(varAndGroupingKeySet);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ public class RemoveRedundantDistinct

private static boolean distinctOfUniqueKey(AggregationNode node)
{
return node.getGroupingSetCount() == 1 &&
return node.hasNonEmptyGroupingSet() &&
node.getGroupingSetCount() == 1 &&
node.getAggregations().isEmpty() &&
((GroupReference) node.getSource()).getLogicalProperties().isPresent() &&
((GroupReference) node.getSource()).getLogicalProperties().get().isDistinct(node.getGroupingKeys().stream().collect(Collectors.toSet()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public void testDefaults()
.setFilterAndProjectMinOutputPageSize(new DataSize(500, KILOBYTE))
.setFilterAndProjectMinOutputPageRowCount(256)
.setUseMarkDistinct(true)
.setExploitConstraints(false)
.setExploitConstraints(true)
.setPreferPartialAggregation(true)
.setPartialAggregationStrategy(PartialAggregationStrategy.ALWAYS)
.setPartialAggregationByteReductionThreshold(0.5)
Expand Down Expand Up @@ -377,7 +377,7 @@ public void testExplicitPropertyMappings()
.put("arrayagg.implementation", "LEGACY")
.put("multimapagg.implementation", "LEGACY")
.put("optimizer.use-mark-distinct", "false")
.put("optimizer.exploit-constraints", "true")
.put("optimizer.exploit-constraints", "false")
.put("optimizer.prefer-partial-aggregation", "false")
.put("optimizer.partial-aggregation-strategy", "automatic")
.put("optimizer.partial-aggregation-byte-reduction-threshold", "0.8")
Expand Down Expand Up @@ -584,7 +584,7 @@ public void testExplicitPropertyMappings()
.setFilterAndProjectMinOutputPageSize(new DataSize(1, MEGABYTE))
.setFilterAndProjectMinOutputPageRowCount(2048)
.setUseMarkDistinct(false)
.setExploitConstraints(true)
.setExploitConstraints(false)
.setPreferPartialAggregation(false)
.setPartialAggregationStrategy(PartialAggregationStrategy.AUTOMATIC)
.setPartialAggregationByteReductionThreshold(0.8)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1399,6 +1399,21 @@ public void testComplexOrderBy()
" COUNT(*), " +
" SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) " +
" )",
output(aggregation(ImmutableMap.of(),
values())));

Session session = Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(EXPLOIT_CONSTRAINTS, Boolean.toString(false))
.build();
assertDistributedPlan("SELECT COUNT(*) " +
"FROM (values ARRAY['a', 'b']) as t(col1) " +
"ORDER BY " +
" IF( " +
" SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) > 0, " +
" COUNT(*), " +
" SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) " +
" )",
session,
output(
project(
exchange(
Expand Down

0 comments on commit 54ffa28

Please sign in to comment.