From 4b453a554148bdc2fcb2c14aaa64f8c9248d871f Mon Sep 17 00:00:00 2001 From: Vivek Date: Mon, 26 Feb 2024 14:26:04 -0800 Subject: [PATCH] Enable logical property propagation by default --- .../src/main/sphinx/admin/properties.rst | 11 ++++++- presto-docs/src/main/sphinx/optimizer.rst | 1 + .../sphinx/optimizer/logical-properties.rst | 32 +++++++++++++++++++ .../presto/sql/analyzer/FeaturesConfig.java | 2 +- .../LogicalPropertiesProviderImpl.java | 6 +--- .../RemoveRedundantAggregateDistinct.java | 22 +++++++------ .../sql/analyzer/TestFeaturesConfig.java | 6 ++-- .../sql/planner/TestLogicalPlanner.java | 15 +++++++++ 8 files changed, 75 insertions(+), 20 deletions(-) create mode 100644 presto-docs/src/main/sphinx/optimizer/logical-properties.rst diff --git a/presto-docs/src/main/sphinx/admin/properties.rst b/presto-docs/src/main/sphinx/admin/properties.rst index b75014cc6dd2a..23d4fb95218aa 100644 --- a/presto-docs/src/main/sphinx/admin/properties.rst +++ b/presto-docs/src/main/sphinx/admin/properties.rst @@ -803,6 +803,15 @@ Optimizer Properties Plan canonicalization strategies used to canonicalize a query plan for history based optimization. +``optimizer.exploit-constraints`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + * **Type:** ``boolean`` + * **Default value:** ``true`` + + Enable analysis and propagation of logical properties (distinct keys, cardinality, etc.) among the nodes of + a query plan. The optimizer may then use these properties to perform various optimizations. + Planner Properties -------------------------------------- @@ -964,4 +973,4 @@ Logging Properties * **Type:** ``data size`` * **Default value:** ``100MB`` - The maximum file size for the log file of the HTTP server. \ No newline at end of file + The maximum file size for the log file of the HTTP server. diff --git a/presto-docs/src/main/sphinx/optimizer.rst b/presto-docs/src/main/sphinx/optimizer.rst index 6d302b992f1d0..9633c50183144 100644 --- a/presto-docs/src/main/sphinx/optimizer.rst +++ b/presto-docs/src/main/sphinx/optimizer.rst @@ -9,3 +9,4 @@ Query Optimizer optimizer/cost-in-explain optimizer/cost-based-optimizations optimizer/history-based-optimization + optimizer/logical-properties diff --git a/presto-docs/src/main/sphinx/optimizer/logical-properties.rst b/presto-docs/src/main/sphinx/optimizer/logical-properties.rst new file mode 100644 index 0000000000000..3011aa980f5f7 --- /dev/null +++ b/presto-docs/src/main/sphinx/optimizer/logical-properties.rst @@ -0,0 +1,32 @@ +================================= +Logical Properties of Query Plans +================================= + +Presto implements a framework for associating logical properties with the +result sets produced by the nodes of a query plan. These logical properties +might either initially derive from constraints defined on tables, or from +operations performed by intermediate nodes in the query plan, such as +aggregations, limits, or the application of predicates. The Presto optimizer +may then exploit these logical properties to perform optimizations such as +removing redundant operations or other logical transformations. + +The propagation of logical properties in query plans is enabled by the +``exploit_constraints`` session property or ``optimizer.exploit_constraints`` +configuration property set in ``etc/config.properties`` of the coordinator. +Logical property propagation is enabled by default. + + +Types of Logical Properties +--------------------------- + +Currently Presto detects and propagates the following logical properties: + +* ``KeyProperty`` - Represents a collection of distinct attributes that hold for + a final or intermediate result set produced by a plan node. + +* ``MaxCardProperty`` - Represents a provable maximum number of rows in a final or + intermediate result set produced by a plan node. + +* ``EquivalenceClassProperty`` - Represents classes of equivalent variable and + constant references that hold for a final or intermediate result set produced + by a plan node. diff --git a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java index 86eec143f7256..716169c69a70d 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java @@ -173,7 +173,7 @@ public class FeaturesConfig private double memoryRevokingThreshold = 0.9; private boolean parseDecimalLiteralsAsDouble; private boolean useMarkDistinct = true; - private boolean exploitConstraints; + private boolean exploitConstraints = true; private boolean preferPartialAggregation = true; private PartialAggregationStrategy partialAggregationStrategy = PartialAggregationStrategy.ALWAYS; private double partialAggregationByteReductionThreshold = 0.5; diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesProviderImpl.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesProviderImpl.java index 879e0b7bec017..329b8956818bc 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesProviderImpl.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesProviderImpl.java @@ -203,12 +203,8 @@ public LogicalProperties getAggregationProperties(AggregationNode aggregationNod throw new IllegalStateException("Expected source PlanNode to be a GroupReference with LogicalProperties"); } - if (aggregationNode.getGroupingKeys().isEmpty() && aggregationNode.getAggregations().isEmpty()) { - throw new IllegalStateException("Aggregation node with no grouping columns and no aggregation functions"); - } - LogicalPropertiesImpl sourceProperties = (LogicalPropertiesImpl) ((GroupReference) aggregationNode.getSource()).getLogicalProperties().get(); - if (!aggregationNode.getAggregations().isEmpty() && aggregationNode.getGroupingKeys().isEmpty()) { + if (aggregationNode.getGroupingKeys().isEmpty()) { //aggregation with no grouping variables, single row output return propagateAndLimitProperties(sourceProperties, Long.valueOf(1)); } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantAggregateDistinct.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantAggregateDistinct.java index 206bda1154797..2f06bef0049bf 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantAggregateDistinct.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantAggregateDistinct.java @@ -23,11 +23,11 @@ import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import java.util.stream.Stream; import static com.facebook.presto.spi.plan.AggregationNode.Aggregation.removeDistinct; import static com.facebook.presto.sql.planner.plan.Patterns.aggregation; +import static com.google.common.collect.ImmutableSet.toImmutableSet; /** * Removes distinct from aggregates where the combination of aggregate columns and grouping variables contain a unique key. @@ -60,16 +60,18 @@ public Result apply(AggregationNode node, Captures captures, Context context) ImmutableMap.Builder aggregationsBuilder = ImmutableMap.builder(); for (Map.Entry agg : node.getAggregations().entrySet()) { - Set varAndGroupingKeySet = - Stream.concat(node.getGroupingKeys().stream().map(VariableReferenceExpression.class::cast), - (agg.getValue()).getArguments().stream().map(VariableReferenceExpression.class::cast)) - .collect(Collectors.toSet()); - if (agg.getValue().isDistinct() && ((GroupReference) node.getSource()).getLogicalProperties().get().isDistinct(varAndGroupingKeySet)) { - aggregationsBuilder.put(agg.getKey(), removeDistinct(agg.getValue())); - } - else { - aggregationsBuilder.put(agg); + if (node.getGroupingKeys().stream().allMatch(key -> key instanceof VariableReferenceExpression) && + (agg.getValue()).getArguments().stream().allMatch(arg -> arg instanceof VariableReferenceExpression)) { + Set varAndGroupingKeySet = + Stream.concat(node.getGroupingKeys().stream().map(VariableReferenceExpression.class::cast), + (agg.getValue()).getArguments().stream().map(VariableReferenceExpression.class::cast)) + .collect(toImmutableSet()); + if (agg.getValue().isDistinct() && ((GroupReference) node.getSource()).getLogicalProperties().get().isDistinct(varAndGroupingKeySet)) { + aggregationsBuilder.put(agg.getKey(), removeDistinct(agg.getValue())); + continue; + } } + aggregationsBuilder.put(agg); } Map newAggregations = aggregationsBuilder.build(); diff --git a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java index 9bc5345b23cd5..58816046888c7 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java @@ -158,7 +158,7 @@ public void testDefaults() .setFilterAndProjectMinOutputPageSize(new DataSize(500, KILOBYTE)) .setFilterAndProjectMinOutputPageRowCount(256) .setUseMarkDistinct(true) - .setExploitConstraints(false) + .setExploitConstraints(true) .setPreferPartialAggregation(true) .setPartialAggregationStrategy(PartialAggregationStrategy.ALWAYS) .setPartialAggregationByteReductionThreshold(0.5) @@ -372,7 +372,7 @@ public void testExplicitPropertyMappings() .put("arrayagg.implementation", "LEGACY") .put("multimapagg.implementation", "LEGACY") .put("optimizer.use-mark-distinct", "false") - .put("optimizer.exploit-constraints", "true") + .put("optimizer.exploit-constraints", "false") .put("optimizer.prefer-partial-aggregation", "false") .put("optimizer.partial-aggregation-strategy", "automatic") .put("optimizer.partial-aggregation-byte-reduction-threshold", "0.8") @@ -576,7 +576,7 @@ public void testExplicitPropertyMappings() .setFilterAndProjectMinOutputPageSize(new DataSize(1, MEGABYTE)) .setFilterAndProjectMinOutputPageRowCount(2048) .setUseMarkDistinct(false) - .setExploitConstraints(true) + .setExploitConstraints(false) .setPreferPartialAggregation(false) .setPartialAggregationStrategy(PartialAggregationStrategy.AUTOMATIC) .setPartialAggregationByteReductionThreshold(0.8) diff --git a/presto-main/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java b/presto-main/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java index 5f04475dfa7db..ca4b9d35d80ba 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java @@ -1398,6 +1398,21 @@ public void testComplexOrderBy() " COUNT(*), " + " SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) " + " )", + output(aggregation(ImmutableMap.of(), + values()))); + + Session session = Session.builder(this.getQueryRunner().getDefaultSession()) + .setSystemProperty(EXPLOIT_CONSTRAINTS, Boolean.toString(false)) + .build(); + assertDistributedPlan("SELECT COUNT(*) " + + "FROM (values ARRAY['a', 'b']) as t(col1) " + + "ORDER BY " + + " IF( " + + " SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) > 0, " + + " COUNT(*), " + + " SUM(REDUCE(col1, ROW(0),(l, r) -> l, x -> 1)) " + + " )", + session, output( project( exchange(