Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new plan canonicalization strategy for history based optimizer #21832

Merged
merged 1 commit into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
*/
package com.facebook.presto.common.plan;

import java.util.List;

import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableList;

public enum PlanCanonicalizationStrategy
{
/**
Expand All @@ -31,7 +26,7 @@ public enum PlanCanonicalizationStrategy
*
* This is used in context of fragment result caching
*/
DEFAULT,
DEFAULT(0),
/**
* CONNECTOR strategy will canonicalize plan according to DEFAULT strategy, and additionally
* canoncialize `TableScanNode` by giving a connector specific implementation. Unlike DEFAULT strategy,
Expand All @@ -46,9 +41,9 @@ public enum PlanCanonicalizationStrategy
*
* This is used in context of history based optimizations.
*/
CONNECTOR,
CONNECTOR(1),
/**
* REMOVE_SAFE_CONSTANTS strategy is used to canonicalize plan with
* IGNORE_SAFE_CONSTANTS strategy is used to canonicalize plan with
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GG

* CONNECTOR strategy and will additionally remove constants from plan
* which are not bound to have impact on plan statistics.
*
Expand All @@ -61,15 +56,37 @@ public enum PlanCanonicalizationStrategy
*
* This is used in context of history based optimizations.
*/
REMOVE_SAFE_CONSTANTS;
IGNORE_SAFE_CONSTANTS(2),

/**
* IGNORE_SCAN_CONSTANTS further relaxes over the IGNORE_SAFE_CONSTANTS strategy.
* In IGNORE_SAFE_CONSTANTS, only predicate on partitioned column in scan node is canonicalized, but
* in IGNORE_SCAN_CONSTANTS, predicates on non-partitioned columns in scan node are also canonicalized
*
* For example:
* `SELECT *, 1 FROM table` will be equivalent to `SELECT *, 2 FROM table`
* `SELECT * FROM table WHERE id = 1` will also be equivalent to `SELECT * FROM table WHERE id = 1000` even if id is not partitioned column
*
* This is used in context of history based optimizations.
*/
IGNORE_SCAN_CONSTANTS(3);

/**
* Creates a list of PlanCanonicalizationStrategy to be used for history based optimizations.
* Output is ordered by decreasing accuracy of statistics, at benefit of more coverage.
* TODO: Remove CONNECTOR strategy
*/
public static List<PlanCanonicalizationStrategy> historyBasedPlanCanonicalizationStrategyList()

// Smaller value means more accurate
private final int errorLevel;

PlanCanonicalizationStrategy(int errorLevel)
{
this.errorLevel = errorLevel;
}

public int getErrorLevel()
{
return unmodifiableList(asList(REMOVE_SAFE_CONSTANTS));
return errorLevel;
}
}
8 changes: 8 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,14 @@ Optimizer Properties
Extract expressions which have constant value from filter and assignment expressions, and replace the expressions with
constant value.

``optimizer.history-based-optimizer-plan-canonicalization-strategies``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``string``
* **Default value:** ``IGNORE_SAFE_CONSTANTS``

Plan canonicalization strategies used to canonicalize a query plan for history based optimization.


Planner Properties
--------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ private TupleDomain<ColumnHandle> getConstraint(PlanCanonicalizationStrategy can
.transform(ColumnHandle.class::cast)
.intersect(constraint);

constraint = constraint.canonicalize(HiveTableLayoutHandle::isPartitionKey);
constraint = canonicalizationStrategy.equals(PlanCanonicalizationStrategy.IGNORE_SCAN_CONSTANTS) ? constraint.canonicalize(x -> true) : constraint.canonicalize(HiveTableLayoutHandle::isPartitionKey);
return constraint;
}

Expand All @@ -305,7 +305,7 @@ public static TupleDomain<Subfield> canonicalizeDomainPredicate(TupleDomain<Subf
if (!subfield.getPath().isEmpty() || !predicateColumns.containsKey(subfield.getRootName())) {
return subfield;
}
return isPartitionKey(predicateColumns.get(subfield.getRootName())) ? null : subfield;
return isPartitionKey(predicateColumns.get(subfield.getRootName())) || strategy.equals(PlanCanonicalizationStrategy.IGNORE_SCAN_CONSTANTS) ? null : subfield;
})
.canonicalize(ignored -> false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@

import static com.facebook.presto.SystemSessionProperties.REWRITE_EXPRESSION_WITH_CONSTANT_EXPRESSION;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.CONNECTOR;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.REMOVE_SAFE_CONSTANTS;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.IGNORE_SAFE_CONSTANTS;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.IGNORE_SCAN_CONSTANTS;
import static com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG;
import static com.facebook.presto.hive.HiveSessionProperties.PUSHDOWN_FILTER_ENABLED;
import static com.facebook.presto.sql.planner.CanonicalPlanGenerator.generateCanonicalPlan;
Expand Down Expand Up @@ -112,13 +113,37 @@ public void testCanonicalizationStrategies()
pushdownFilterEnabled(),
"SELECT orderkey from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey from test_orders where ds = '2020-09-02' AND orderkey < 20",
REMOVE_SAFE_CONSTANTS);
IGNORE_SAFE_CONSTANTS);

assertSameCanonicalLeafPlan(
pushdownFilterEnabled(),
"SELECT orderkey, CAST('1' AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10 AND ts >= '00:01'",
"SELECT orderkey, CAST('11' AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 10 AND ts >= '00:02'",
REMOVE_SAFE_CONSTANTS);
IGNORE_SAFE_CONSTANTS);

assertDifferentCanonicalLeafPlan(
pushdownFilterEnabled(),
"SELECT orderkey, CAST('1' AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey = 10",
"SELECT orderkey, CAST('11' AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey = 20",
IGNORE_SAFE_CONSTANTS);

assertDifferentCanonicalLeafPlan(
pushdownFilterEnabled(),
"SELECT orderkey from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey from test_orders where ds = '2020-09-02' AND orderkey < 20",
IGNORE_SCAN_CONSTANTS);

assertSameCanonicalLeafPlan(
pushdownFilterEnabled(),
"SELECT orderkey, CAST('1' AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10 AND ts >= '00:01'",
"SELECT orderkey, CAST('11' AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 10 AND ts >= '00:02'",
IGNORE_SCAN_CONSTANTS);

assertSameCanonicalLeafPlan(
pushdownFilterEnabled(),
"SELECT orderkey, CAST('1' AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey = 10",
"SELECT orderkey, CAST('11' AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey = 20",
IGNORE_SCAN_CONSTANTS);
}
finally {
queryRunner.execute("DROP TABLE IF EXISTS test_orders");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
import static com.facebook.presto.SystemSessionProperties.USE_HISTORY_BASED_PLAN_STATISTICS;
import static com.facebook.presto.SystemSessionProperties.USE_PERFECTLY_CONSISTENT_HISTORIES;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.CONNECTOR;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.REMOVE_SAFE_CONSTANTS;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.IGNORE_SAFE_CONSTANTS;
import static com.facebook.presto.common.plan.PlanCanonicalizationStrategy.IGNORE_SCAN_CONSTANTS;
import static com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG;
import static com.facebook.presto.hive.HiveSessionProperties.PUSHDOWN_FILTER_ENABLED;
import static com.facebook.presto.sql.planner.CanonicalPlanGenerator.generateCanonicalPlan;
Expand Down Expand Up @@ -120,11 +121,20 @@ public void testCanonicalizationStrategies()
assertSamePlanHash(
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey, CAST(2 AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 10",
REMOVE_SAFE_CONSTANTS);
IGNORE_SAFE_CONSTANTS);
assertDifferentPlanHash(
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 20",
REMOVE_SAFE_CONSTANTS);
IGNORE_SAFE_CONSTANTS);

assertSamePlanHash(
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey, CAST(2 AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 10",
IGNORE_SCAN_CONSTANTS);
assertDifferentPlanHash(
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-01' AND orderkey < 10",
"SELECT orderkey, CAST(1 AS VARCHAR) from test_orders where ds = '2020-09-02' AND orderkey < 20",
IGNORE_SCAN_CONSTANTS);

assertSamePlanHash(
"INSERT INTO test_orders select * from test_orders",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@
import com.facebook.presto.tests.AbstractTestQueryFramework;
import com.facebook.presto.tests.DistributedQueryRunner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.intellij.lang.annotations.Language;
import org.testng.annotations.Test;

import java.util.Map;

import static com.facebook.presto.SystemSessionProperties.HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY;
import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE;
import static com.facebook.presto.SystemSessionProperties.PARTIAL_AGGREGATION_STRATEGY;
import static com.facebook.presto.SystemSessionProperties.RESTRICT_HISTORY_BASED_OPTIMIZATION_TO_COMPLEX_QUERY;
Expand Down Expand Up @@ -93,6 +97,38 @@ public void testHistoryBasedStatsCalculator()
}
}

@Test
public void testHistoryBasedStatsCalculatorMultipleStrategies()
{
try {
getQueryRunner().execute("CREATE TABLE test_orders WITH (partitioned_by = ARRAY['ds', 'ts']) AS " +
"SELECT orderkey, orderpriority, comment, custkey, '2020-09-01' as ds, '00:01' as ts FROM orders WHERE orderkey < 1000 " +
"UNION ALL " +
"SELECT orderkey, orderpriority, comment, custkey, '2020-09-02' as ds, '00:02' as ts FROM orders WHERE orderkey >= 1000 AND orderkey < 2000");

// CBO Statistics
assertPlan(
"SELECT *, 1 FROM test_orders where ds = '2020-09-01' and orderpriority = '1-URGENT'",
anyTree(node(ProjectNode.class, any())).withOutputRowCount(51.0));

// HBO Statistics
executeAndTrackHistory("SELECT *, 1 FROM test_orders where ds = '2020-09-01' and orderpriority = '1-URGENT'",
createSession(ImmutableMap.of(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SAFE_CONSTANTS,IGNORE_SCAN_CONSTANTS")));
assertPlan(createSession(ImmutableMap.of(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SAFE_CONSTANTS,IGNORE_SCAN_CONSTANTS")),
"SELECT *, 2 FROM test_orders where ds = '2020-09-02' and orderpriority = '1-URGENT'",
anyTree(node(ProjectNode.class, any()).withOutputRowCount(48)));
assertPlan(createSession(ImmutableMap.of(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SAFE_CONSTANTS,IGNORE_SCAN_CONSTANTS")),
"SELECT *, 2 FROM test_orders where ds = '2020-09-02' and orderpriority = '2-HIGH'",
anyTree(node(ProjectNode.class, any()).withOutputRowCount(48)));
assertPlan(createSession(ImmutableMap.of(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SAFE_CONSTANTS")),
"SELECT *, 2 FROM test_orders where ds = '2020-09-02' and orderpriority = '2-HIGH'",
anyTree(node(ProjectNode.class, any()).withOutputRowCount(49.6)));
}
finally {
getQueryRunner().execute("DROP TABLE IF EXISTS test_orders");
}
}

@Test
public void testInsertTable()
{
Expand Down Expand Up @@ -164,7 +200,7 @@ public void testPartialAggStatistics()
"SELECT orderkey, orderpriority, comment, custkey, '2020-09-01' as ds, '00:01' as ts FROM orders where orderkey < 2000 ");

String query = "SELECT count(*) FROM test_orders group by custkey";
Session session = createSession("always");
Session session = createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "always"));
Plan plan = plan(query, session);

assertTrue(PlanNodeSearcher.searchFrom(plan.getRoot())
Expand All @@ -173,9 +209,9 @@ public void testPartialAggStatistics()
.isPresent());

// collect HBO Statistics
executeAndTrackHistory(query, createSession("always"));
executeAndTrackHistory(query, createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "always")));

plan = plan(query, createSession("automatic"));
plan = plan(query, createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "automatic")));

assertTrue(PlanNodeSearcher.searchFrom(plan.getRoot())
.where(node -> node instanceof AggregationNode && ((AggregationNode) node).getStep() == AggregationNode.Step.PARTIAL).findAll().isEmpty());
Expand All @@ -196,11 +232,11 @@ public void testPartialAggStatisticsGroupByPartKey()
// collect HBO Statistics
String queryGBPartitionKey = "SELECT ds FROM test_orders group by ds";

Plan plan = plan(queryGBPartitionKey, createSession("always"));
Plan plan = plan(queryGBPartitionKey, createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "always")));

assertTrue(PlanNodeSearcher.searchFrom(plan.getRoot())
.where(node -> node instanceof AggregationNode && ((AggregationNode) node).getStep() == AggregationNode.Step.PARTIAL).findFirst().isPresent());
executeAndTrackHistory(queryGBPartitionKey, createSession("always"));
executeAndTrackHistory(queryGBPartitionKey, createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "always")));
}
finally {
getQueryRunner().execute("DROP TABLE IF EXISTS test_orders");
Expand All @@ -225,19 +261,19 @@ private void executeAndTrackHistory(String sql, Session session)

private Session defaultSession()
{
return createSession("automatic");
return createSession(ImmutableMap.of(PARTIAL_AGGREGATION_STRATEGY, "automatic"));
}

private Session createSession(String partialAggregationStrategy)
private Session createSession(Map<String, String> properties)
{
return Session.builder(getQueryRunner().getDefaultSession())
Session.SessionBuilder builder = Session.builder(getQueryRunner().getDefaultSession())
.setSystemProperty(USE_HISTORY_BASED_PLAN_STATISTICS, "true")
.setSystemProperty(TRACK_HISTORY_BASED_PLAN_STATISTICS, "true")
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, "automatic")
.setSystemProperty(PARTIAL_AGGREGATION_STRATEGY, partialAggregationStrategy)
.setSystemProperty(USE_PARTIAL_AGGREGATION_HISTORY, "true")
.setCatalogSessionProperty(HIVE_CATALOG, PUSHDOWN_FILTER_ENABLED, "true")
.setSystemProperty(RESTRICT_HISTORY_BASED_OPTIMIZATION_TO_COMPLEX_QUERY, "false")
.build();
.setSystemProperty(RESTRICT_HISTORY_BASED_OPTIMIZATION_TO_COMPLEX_QUERY, "false");
properties.forEach((property, value) -> builder.setSystemProperty(property, value));
return builder.build();
}
}
Loading
Loading