Skip to content

Commit

Permalink
Record stats of successful stages of failed queries in HBO
Browse files Browse the repository at this point in the history
  • Loading branch information
feilong-liu committed Mar 11, 2024
1 parent 4629ca5 commit 7c8253f
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 3 deletions.
8 changes: 8 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,14 @@ Optimizer Properties
Enable analysis and propagation of logical properties (distinct keys, cardinality, etc.) among the nodes of
a query plan. The optimizer may then use these properties to perform various optimizations.

``optimizer.track-history-stats-from-failed-query``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``true``

Track history based plan statistics from complete plan fragments in failed queries.


Planner Properties
--------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Session property Description
=========================================================== =========================================================================================================================================================================================================== ===============
use_history_based_plan_statistics Enable using historical statistics for query optimization False
track_history_based_plan_statistics Enable recording the statistics of the current query as history statistics so as to be used by future queries False
track_history_stats_from_failed_query Track history based plan statistics from complete plan fragments in failed queries True
history_based_optimizer_timeout_limit Timeout for history based optimizer 10 seconds
restrict_history_based_optimization_to_complex_query Enable history based optimization only for complex queries, i.e. queries with join and aggregation True
history_input_table_statistics_matching_threshold When the size difference between current table and history table exceed this threshold, do not match history statistics. When value is 0, use the default value set by hbo.history-matching-threshold 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ public final class SystemSessionProperties
public static final String SEGMENTED_AGGREGATION_ENABLED = "segmented_aggregation_enabled";
public static final String USE_HISTORY_BASED_PLAN_STATISTICS = "use_history_based_plan_statistics";
public static final String TRACK_HISTORY_BASED_PLAN_STATISTICS = "track_history_based_plan_statistics";
public static final String TRACK_HISTORY_STATS_FROM_FAILED_QUERY = "track_history_stats_from_failed_query";
public static final String USE_PERFECTLY_CONSISTENT_HISTORIES = "use_perfectly_consistent_histories";
public static final String HISTORY_CANONICAL_PLAN_NODE_LIMIT = "history_canonical_plan_node_limit";
public static final String HISTORY_BASED_OPTIMIZER_TIMEOUT_LIMIT = "history_based_optimizer_timeout_limit";
Expand Down Expand Up @@ -1522,6 +1523,11 @@ public SystemSessionProperties(
"Track history based plan statistics service in query optimizer",
featuresConfig.isTrackHistoryBasedPlanStatistics(),
false),
booleanProperty(
TRACK_HISTORY_STATS_FROM_FAILED_QUERY,
"Track history based plan statistics from complete plan fragments in failed queries",
featuresConfig.isTrackHistoryStatsFromFailedQuery(),
false),
booleanProperty(
USE_PERFECTLY_CONSISTENT_HISTORIES,
"Use perfectly consistent histories for history based optimizations, even when parts of a query are re-ordered.",
Expand Down Expand Up @@ -2986,6 +2992,11 @@ public static boolean trackHistoryBasedPlanStatisticsEnabled(Session session)
return session.getSystemProperty(TRACK_HISTORY_BASED_PLAN_STATISTICS, Boolean.class);
}

public static boolean trackHistoryStatsFromFailedQuery(Session session)
{
return session.getSystemProperty(TRACK_HISTORY_STATS_FROM_FAILED_QUERY, Boolean.class);
}

public static boolean usePerfectlyConsistentHistories(Session session)
{
return session.getSystemProperty(USE_PERFECTLY_CONSISTENT_HISTORIES, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@

import static com.facebook.presto.SystemSessionProperties.getHistoryBasedOptimizerTimeoutLimit;
import static com.facebook.presto.SystemSessionProperties.trackHistoryBasedPlanStatisticsEnabled;
import static com.facebook.presto.SystemSessionProperties.trackHistoryStatsFromFailedQuery;
import static com.facebook.presto.SystemSessionProperties.trackPartialAggregationHistory;
import static com.facebook.presto.common.resourceGroups.QueryType.INSERT;
import static com.facebook.presto.common.resourceGroups.QueryType.SELECT;
Expand Down Expand Up @@ -106,8 +107,9 @@ public Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> getQueryStats(QueryIn
return ImmutableMap.of();
}

// Only update statistics for successful queries
if (queryInfo.getFailureInfo() != null ||
// If track_history_based_plan_statistics_from_complete_stages_in_failed_query is set to true, we do not require that the query is successful
boolean trackStatsForFailedQueries = trackHistoryStatsFromFailedQuery(session);
if ((queryInfo.getFailureInfo() != null && !trackStatsForFailedQueries) ||
!queryInfo.getOutputStage().isPresent() ||
!queryInfo.getOutputStage().get().getPlan().isPresent()) {
return ImmutableMap.of();
Expand All @@ -124,7 +126,7 @@ public Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> getQueryStats(QueryIn
}

StageInfo outputStage = queryInfo.getOutputStage().get();
List<StageInfo> allStages = outputStage.getAllStages();
List<StageInfo> allStages = trackStatsForFailedQueries ? outputStage.getAllStages().stream().filter(x -> x.isFinalStageInfo()).collect(toImmutableList()) : outputStage.getAllStages();

Map<PlanNodeId, PlanNodeStats> planNodeStatsMap = aggregateStageStats(allStages);
Map<PlanNodeWithHash, PlanStatisticsWithSourceInfo> planStatisticsMap = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public class FeaturesConfig
private int maxReorderedJoins = 9;
private boolean useHistoryBasedPlanStatistics;
private boolean trackHistoryBasedPlanStatistics;
private boolean trackHistoryStatsFromFailedQuery = true;
private boolean usePerfectlyConsistentHistories;
private int historyCanonicalPlanNodeLimit = 1000;
private Duration historyBasedOptimizerTimeout = new Duration(10, SECONDS);
Expand Down Expand Up @@ -919,6 +920,18 @@ public FeaturesConfig setTrackHistoryBasedPlanStatistics(boolean trackHistoryBas
return this;
}

public boolean isTrackHistoryStatsFromFailedQuery()
{
return trackHistoryStatsFromFailedQuery;
}

@Config("optimizer.track-history-stats-from-failed-query")
public FeaturesConfig setTrackHistoryStatsFromFailedQuery(boolean trackHistoryStatsFromFailedQuery)
{
this.trackHistoryStatsFromFailedQuery = trackHistoryStatsFromFailedQuery;
return this;
}

public boolean isUsePerfectlyConsistentHistories()
{
return usePerfectlyConsistentHistories;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public void testDefaults()
.setMaxReorderedJoins(9)
.setUseHistoryBasedPlanStatistics(false)
.setTrackHistoryBasedPlanStatistics(false)
.setTrackHistoryStatsFromFailedQuery(true)
.setUsePartialAggregationHistory(false)
.setTrackPartialAggregationHistory(true)
.setUsePerfectlyConsistentHistories(false)
Expand Down Expand Up @@ -314,6 +315,7 @@ public void testExplicitPropertyMappings()
.put("optimizer.max-reordered-joins", "5")
.put("optimizer.use-history-based-plan-statistics", "true")
.put("optimizer.track-history-based-plan-statistics", "true")
.put("optimizer.track-history-stats-from-failed-query", "false")
.put("optimizer.use-partial-aggregation-history", "true")
.put("optimizer.track-partial-aggregation-history", "false")
.put("optimizer.use-perfectly-consistent-histories", "true")
Expand Down Expand Up @@ -513,6 +515,7 @@ public void testExplicitPropertyMappings()
.setMaxReorderedJoins(5)
.setUseHistoryBasedPlanStatistics(true)
.setTrackHistoryBasedPlanStatistics(true)
.setTrackHistoryStatsFromFailedQuery(false)
.setUsePartialAggregationHistory(true)
.setTrackPartialAggregationHistory(false)
.setUsePerfectlyConsistentHistories(true)
Expand Down

0 comments on commit 7c8253f

Please sign in to comment.