Skip to content

Commit

Permalink
[Enhancement] limit max predicate columns when using full analyze pre…
Browse files Browse the repository at this point in the history
…dicate column instead of sample all columns on auto collect statistics

Signed-off-by: stephen <stephen5217@163.com>
  • Loading branch information
stephen-shelby committed Feb 21, 2025
1 parent e55e165 commit 4b5a5ec
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
3 changes: 3 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -2130,6 +2130,9 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, comment = "If full analyze predicate columns instead of sample all columns")
public static boolean statistic_auto_collect_use_full_predicate_column_for_sample = true;

@ConfField(mutable = true, comment = "max columns size of full analyze predicate columns instead of sample all columns")
public static int statistic_auto_collect_max_predicate_column_size_on_sample_strategy = 16;

/**
* Max row count in statistics collect per query
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ private static void createJob(List<StatisticsCollectJob> allTableJobMap, NativeA
// Use predicate columns if suitable
TableName tableName = new TableName(db.getOriginName(), table.getName());
int numColumns = table.getColumns().size();
List<String> predicateColumnNames = null;
List<String> predicateColNames = null;
boolean existsPredicateColumns = false;
boolean enablePredicateColumnStrategy = false;
if (basicStatsMeta != null && !basicStatsMeta.isInitJobMeta() && useBasicStats &&
Expand All @@ -473,7 +473,7 @@ private static void createJob(List<StatisticsCollectJob> allTableJobMap, NativeA
List<ColumnUsage> predicateColumns = PredicateColumnsMgr.getInstance().queryPredicateColumns(tableName);
if (CollectionUtils.isNotEmpty(predicateColumns) && predicateColumns.size() < numColumns) {
OlapTable olap = (OlapTable) table;
predicateColumnNames = predicateColumns.stream().map(x -> x.getOlapColumnName(olap)).toList();
predicateColNames = predicateColumns.stream().map(x -> x.getOlapColumnName(olap)).toList();
existsPredicateColumns = true;
if (numColumns > Config.statistic_auto_collect_predicate_columns_threshold) {
enablePredicateColumnStrategy = true;
Expand Down Expand Up @@ -533,7 +533,8 @@ private static void createJob(List<StatisticsCollectJob> allTableJobMap, NativeA
if (job.getAnalyzeType() != StatsConstants.AnalyzeType.HISTOGRAM &&
healthy < Config.statistic_auto_collect_sample_threshold &&
sumDataSize > Config.statistic_auto_collect_small_table_size) {
if (!(Config.statistic_auto_collect_use_full_predicate_column_for_sample && existsPredicateColumns)) {
if (!(Config.statistic_auto_collect_use_full_predicate_column_for_sample && existsPredicateColumns &&
predicateColNames.size() < Config.statistic_auto_collect_max_predicate_column_size_on_sample_strategy)) {
LOG.debug("statistics job choose sample on real-time update table: {}" +
", last collect time: {}, current healthy: {}, full collect healthy limit: {}, " +
", update data size: {}MB, full collect healthy data size limit: <{}MB",
Expand All @@ -550,8 +551,8 @@ private static void createJob(List<StatisticsCollectJob> allTableJobMap, NativeA
}
}

if (enablePredicateColumnStrategy && CollectionUtils.isNotEmpty(predicateColumnNames)) {
columnNames = predicateColumnNames;
if (enablePredicateColumnStrategy && CollectionUtils.isNotEmpty(predicateColNames)) {
columnNames = predicateColNames;
columnTypes = columnNames.stream().map(col -> table.getColumn(col).getType()).collect(Collectors.toList());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,5 +242,19 @@ public void testAutoAnalyzePredicateColumns() throws Exception {
Assertions.assertEquals(List.of("v1", "v2", "v3"), job0.getColumnNames());
Config.statistic_auto_collect_predicate_columns_threshold = defaultValue;
}

{
long defaultSmallTableSize = Config.statistic_auto_collect_small_table_size;
Config.statistic_auto_collect_small_table_size = -1;
int defaultPredicateColumnSize = Config.statistic_auto_collect_max_predicate_column_size_on_sample_strategy;
Config.statistic_auto_collect_max_predicate_column_size_on_sample_strategy = -1;
List<StatisticsCollectJob> collectJobs = StatisticsCollectJobFactory.buildStatisticsCollectJob(analyzeJob);
Assertions.assertEquals(1, collectJobs.size());
StatisticsCollectJob job0 = collectJobs.get(0);
Assertions.assertEquals(StatsConstants.AnalyzeType.SAMPLE, job0.getType());
Assertions.assertEquals(List.of("v1", "v2", "v3"), job0.getColumnNames());
Config.statistic_auto_collect_small_table_size = defaultSmallTableSize;
Config.statistic_auto_collect_max_predicate_column_size_on_sample_strategy = defaultPredicateColumnSize;
}
}
}

0 comments on commit 4b5a5ec

Please sign in to comment.