Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect HBO stats from complete stages in failed queries #20947

Merged
merged 1 commit into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,14 @@ Optimizer Properties

Plan canonicalization strategies used to canonicalize a query plan for history based optimization.

``optimizer.track-history-stats-from-failed-queries``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``true``

Track history based plan statistics from complete plan fragments in failed queries.


Planner Properties
--------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Session property Description
=========================================================== =========================================================================================================================================================================================================== ===============
use_history_based_plan_statistics Enable using historical statistics for query optimization False
track_history_based_plan_statistics Enable recording the statistics of the current query as history statistics so as to be used by future queries False
track_history_stats_from_failed_queries Track history based plan statistics from complete plan fragments in failed queries True
history_based_optimizer_timeout_limit Timeout for history based optimizer 10 seconds
restrict_history_based_optimization_to_complex_query Enable history based optimization only for complex queries, i.e. queries with join and aggregation True
history_input_table_statistics_matching_threshold When the size difference between current table and history table exceed this threshold, do not match history statistics. When value is 0, use the default value set by hbo.history-matching-threshold 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ public final class SystemSessionProperties
public static final String SEGMENTED_AGGREGATION_ENABLED = "segmented_aggregation_enabled";
public static final String USE_HISTORY_BASED_PLAN_STATISTICS = "use_history_based_plan_statistics";
public static final String TRACK_HISTORY_BASED_PLAN_STATISTICS = "track_history_based_plan_statistics";
public static final String TRACK_HISTORY_STATS_FROM_FAILED_QUERIES = "track_history_stats_from_failed_queries";
public static final String USE_PERFECTLY_CONSISTENT_HISTORIES = "use_perfectly_consistent_histories";
public static final String HISTORY_CANONICAL_PLAN_NODE_LIMIT = "history_canonical_plan_node_limit";
public static final String HISTORY_BASED_OPTIMIZER_TIMEOUT_LIMIT = "history_based_optimizer_timeout_limit";
Expand Down Expand Up @@ -1522,6 +1523,11 @@ public SystemSessionProperties(
"Track history based plan statistics service in query optimizer",
featuresConfig.isTrackHistoryBasedPlanStatistics(),
false),
booleanProperty(
TRACK_HISTORY_STATS_FROM_FAILED_QUERIES,
"Track history based plan statistics from complete plan fragments in failed queries",
featuresConfig.isTrackHistoryStatsFromFailedQuery(),
false),
booleanProperty(
USE_PERFECTLY_CONSISTENT_HISTORIES,
"Use perfectly consistent histories for history based optimizations, even when parts of a query are re-ordered.",
Expand Down Expand Up @@ -2986,6 +2992,11 @@ public static boolean trackHistoryBasedPlanStatisticsEnabled(Session session)
return session.getSystemProperty(TRACK_HISTORY_BASED_PLAN_STATISTICS, Boolean.class);
}

public static boolean trackHistoryStatsFromFailedQuery(Session session)
{
return session.getSystemProperty(TRACK_HISTORY_STATS_FROM_FAILED_QUERIES, Boolean.class);
}

public static boolean usePerfectlyConsistentHistories(Session session)
{
return session.getSystemProperty(USE_PERFECTLY_CONSISTENT_HISTORIES, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.facebook.presto.common.type.FixedWidthType;
import com.facebook.presto.execution.QueryExecution;
import com.facebook.presto.execution.QueryInfo;
import com.facebook.presto.execution.StageExecutionState;
import com.facebook.presto.execution.StageInfo;
import com.facebook.presto.metadata.SessionPropertyManager;
import com.facebook.presto.spi.plan.AggregationNode;
Expand All @@ -42,6 +43,7 @@
import com.facebook.presto.sql.planner.plan.TableWriterNode;
import com.facebook.presto.sql.planner.planPrinter.PlanNodeStats;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;

Expand All @@ -54,6 +56,7 @@

import static com.facebook.presto.SystemSessionProperties.getHistoryBasedOptimizerTimeoutLimit;
import static com.facebook.presto.SystemSessionProperties.trackHistoryBasedPlanStatisticsEnabled;
import static com.facebook.presto.SystemSessionProperties.trackHistoryStatsFromFailedQuery;
import static com.facebook.presto.SystemSessionProperties.trackPartialAggregationHistory;
import static com.facebook.presto.common.resourceGroups.QueryType.INSERT;
import static com.facebook.presto.common.resourceGroups.QueryType.SELECT;
Expand Down Expand Up @@ -106,10 +109,10 @@ public Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> getQueryStats(QueryIn
return ImmutableMap.of();
}

// Only update statistics for successful queries
if (queryInfo.getFailureInfo() != null ||
!queryInfo.getOutputStage().isPresent() ||
!queryInfo.getOutputStage().get().getPlan().isPresent()) {
// If track_history_stats_from_failed_queries is set to true, we do not require that the query is successful
boolean trackStatsForFailedQueries = trackHistoryStatsFromFailedQuery(session);
boolean querySucceed = queryInfo.getFailureInfo() == null;
if ((!querySucceed && !trackStatsForFailedQueries) || !queryInfo.getOutputStage().isPresent() || !queryInfo.getOutputStage().get().getPlan().isPresent()) {
return ImmutableMap.of();
}

Expand All @@ -124,7 +127,17 @@ public Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> getQueryStats(QueryIn
}

StageInfo outputStage = queryInfo.getOutputStage().get();
List<StageInfo> allStages = outputStage.getAllStages();
List<StageInfo> allStages = ImmutableList.of();
if (querySucceed) {
allStages = outputStage.getAllStages();
}
else if (trackStatsForFailedQueries) {
allStages = outputStage.getAllStages().stream().filter(x -> x.getLatestAttemptExecutionInfo().getState().equals(StageExecutionState.FINISHED)).collect(toImmutableList());
}

if (allStages.isEmpty()) {
return ImmutableMap.of();
}

Map<PlanNodeId, PlanNodeStats> planNodeStatsMap = aggregateStageStats(allStages);
Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> planStatisticsMap = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public class FeaturesConfig
private int maxReorderedJoins = 9;
private boolean useHistoryBasedPlanStatistics;
private boolean trackHistoryBasedPlanStatistics;
private boolean trackHistoryStatsFromFailedQuery = true;
private boolean usePerfectlyConsistentHistories;
private int historyCanonicalPlanNodeLimit = 1000;
private Duration historyBasedOptimizerTimeout = new Duration(10, SECONDS);
Expand Down Expand Up @@ -919,6 +920,18 @@ public FeaturesConfig setTrackHistoryBasedPlanStatistics(boolean trackHistoryBas
return this;
}

public boolean isTrackHistoryStatsFromFailedQuery()
{
return trackHistoryStatsFromFailedQuery;
}

@Config("optimizer.track-history-stats-from-failed-queries")
public FeaturesConfig setTrackHistoryStatsFromFailedQuery(boolean trackHistoryStatsFromFailedQuery)
{
this.trackHistoryStatsFromFailedQuery = trackHistoryStatsFromFailedQuery;
return this;
}

public boolean isUsePerfectlyConsistentHistories()
{
return usePerfectlyConsistentHistories;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public void testDefaults()
.setMaxReorderedJoins(9)
.setUseHistoryBasedPlanStatistics(false)
.setTrackHistoryBasedPlanStatistics(false)
.setTrackHistoryStatsFromFailedQuery(true)
.setUsePartialAggregationHistory(false)
.setTrackPartialAggregationHistory(true)
.setUsePerfectlyConsistentHistories(false)
Expand Down Expand Up @@ -314,6 +315,7 @@ public void testExplicitPropertyMappings()
.put("optimizer.max-reordered-joins", "5")
.put("optimizer.use-history-based-plan-statistics", "true")
.put("optimizer.track-history-based-plan-statistics", "true")
.put("optimizer.track-history-stats-from-failed-queries", "false")
.put("optimizer.use-partial-aggregation-history", "true")
.put("optimizer.track-partial-aggregation-history", "false")
.put("optimizer.use-perfectly-consistent-histories", "true")
Expand Down Expand Up @@ -513,6 +515,7 @@ public void testExplicitPropertyMappings()
.setMaxReorderedJoins(5)
.setUseHistoryBasedPlanStatistics(true)
.setTrackHistoryBasedPlanStatistics(true)
.setTrackHistoryStatsFromFailedQuery(false)
.setUsePartialAggregationHistory(true)
.setTrackPartialAggregationHistory(false)
.setUsePerfectlyConsistentHistories(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@
import org.testng.annotations.Test;

import static com.facebook.presto.SystemSessionProperties.HISTORY_CANONICAL_PLAN_NODE_LIMIT;
import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE;
import static com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY;
import static com.facebook.presto.SystemSessionProperties.RESTRICT_HISTORY_BASED_OPTIMIZATION_TO_COMPLEX_QUERY;
import static com.facebook.presto.SystemSessionProperties.TRACK_HISTORY_BASED_PLAN_STATISTICS;
import static com.facebook.presto.SystemSessionProperties.TRACK_HISTORY_STATS_FROM_FAILED_QUERIES;
import static com.facebook.presto.SystemSessionProperties.USE_HISTORY_BASED_PLAN_STATISTICS;
import static com.facebook.presto.SystemSessionProperties.USE_PERFECTLY_CONSISTENT_HISTORIES;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.any;
Expand Down Expand Up @@ -121,6 +124,29 @@ public void testHistoryBasedStatsCalculator()
anyTree(node(AggregationNode.class, node(ExchangeNode.class, anyTree(any()))).withOutputRowCount(3).withOutputSize(54)));
}

@Test
public void testFailedQuery()
{
String sql = "select o.orderkey, l.partkey, l.mapcol[o.orderkey] from (select orderkey, partkey, mapcol from (select *, map(array[1], array[2]) mapcol from lineitem)) l " +
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Join will fail, but the build input will succeed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add this comment in the test

"join orders o on l.partkey=o.custkey where length(comment)>10";
Session session = Session.builder(createSession())
.setSystemProperty(TRACK_HISTORY_STATS_FROM_FAILED_QUERIES, "true")
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, "PARTITIONED")
.setSystemProperty(JOIN_REORDERING_STRATEGY, "NONE")
.build();
// CBO Statistics
assertPlan(session, sql, anyTree(anyTree(any()), anyTree(node(ProjectNode.class, node(FilterNode.class, any())).withOutputRowCount(Double.NaN))));

// HBO Statistics
try {
getQueryRunner().execute(session, sql);
}
catch (Exception e) {
getHistoryProvider().waitProcessQueryEvents();
}
assertPlan(session, sql, anyTree(anyTree(any()), anyTree(node(ProjectNode.class, node(FilterNode.class, any())).withOutputRowCount(15000))));
}

@Test
public void testUnion()
{
Expand Down
Loading