diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 925f7b37b149f2..86c32c430c5f91 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -540,6 +540,7 @@ terminal String KW_ROW, KW_ROWS, KW_S3, + KW_SAMPLE, KW_SCHEMA, KW_SCHEMAS, KW_SECOND, @@ -890,6 +891,10 @@ nonterminal PauseSyncJobStmt pause_sync_job_stmt; nonterminal StopSyncJobStmt stop_sync_job_stmt; nonterminal JobName job_name; +// analyze +nonterminal Map with_analysis_properties; +nonterminal List> opt_with_analysis_properties; + nonterminal String opt_db, procedure_or_function, opt_comment, opt_engine; nonterminal ColumnDef.DefaultValue opt_default_value; nonterminal Boolean opt_if_exists, opt_if_not_exists; @@ -2812,30 +2817,45 @@ show_create_reporitory_stmt ::= // analyze statment analyze_stmt ::= - KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols opt_properties:properties - {: - boolean is_whole_tbl = (cols == null); - boolean is_histogram = false; - boolean is_increment = false; - RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment); - :} - | KW_ANALYZE opt_sync:sync KW_INCREMENTAL KW_TABLE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties - {: - boolean is_whole_tbl = (cols == null); - boolean is_histogram = false; - boolean is_increment = true; - RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment); - :} - | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON ident_list:cols opt_partition_names:partitionNames opt_properties:properties + // statistics + KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols + opt_with_analysis_properties:withAnalysisProperties opt_properties:properties {: - boolean is_whole_tbl = false; - boolean is_histogram = true; - boolean is_increment = false; - RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment); + if (properties == null) { + properties = Maps.newHashMap(); + } + for (Map property : withAnalysisProperties) { + properties.putAll(property); + } + if (!properties.containsKey("sync")) { + properties.put("sync", String.valueOf(sync)); + } + // Rule: If no type is specified, see if there is a specified column + if (!properties.containsKey("analysis.type")) { + if ((cols == null)) { + properties.put("analysis.type", "INDEX"); + } else { + properties.put("analysis.type", "COLUMN"); + } + } + RESULT = new AnalyzeStmt(tbl, cols, properties); :} - | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM + // histogram + | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols KW_UPDATE KW_HISTOGRAM + opt_with_analysis_properties:withAnalysisProperties opt_properties:properties {: - RESULT = new AnalyzeStmt(tbl, sync, null, new HashMap<>(), true, true, false); + if (properties == null) { + properties = Maps.newHashMap(); + } + for (Map property : withAnalysisProperties) { + properties.putAll(property); + } + if (!properties.containsKey("sync")) { + properties.put("sync", String.valueOf(sync)); + } + // TODO: Support materialized view + properties.put("analysis.type", "HISTOGRAM"); + RESULT = new AnalyzeStmt(tbl, cols, properties); :} ; @@ -5730,6 +5750,53 @@ ident_list ::= :} ; +with_analysis_properties ::= + KW_SYNC + {: + RESULT = new HashMap() {{ + put("sync", "true"); + }}; + :} + | KW_INCREMENTAL + {: + RESULT = new HashMap() {{ + put("incremental", "true"); + }}; + :} + | KW_SAMPLE KW_PERCENT INTEGER_LITERAL:samplePercent + {: + RESULT = new HashMap() {{ + put("sample.percent", String.valueOf(samplePercent.intValue())); + }}; + :} + | KW_SAMPLE KW_ROWS INTEGER_LITERAL:sampleRows + {: + RESULT = new HashMap() {{ + put("sample.rows", String.valueOf(sampleRows.intValue())); + }}; + :} + | KW_BUCKETS INTEGER_LITERAL:numBuckets + {: + RESULT = new HashMap() {{ + put("num.buckets", String.valueOf(numBuckets.intValue())); + }}; + :} + ; + +opt_with_analysis_properties ::= + /* empty */ + {: + RESULT = Lists.newArrayList(); + :} + | opt_with_analysis_properties:withAnalysisProperties + KW_WITH with_analysis_properties:property + {: + withAnalysisProperties.add(property); + RESULT = withAnalysisProperties; + :} + ; + + expr_list ::= expr:e {: @@ -7368,6 +7435,8 @@ keyword ::= {: RESULT = id; :} | KW_EXPIRED: id {: RESULT = id; :} + | KW_SAMPLE:id + {: RESULT = id; :} ; // Identifier that contain keyword diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java index d189a8aeeec5df..093dfad776e840 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java @@ -21,22 +21,22 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.View; import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; import org.apache.doris.common.util.PrintableMap; -import org.apache.doris.common.util.Util; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; @@ -44,35 +44,57 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Predicate; import java.util.stream.Collectors; /** - * Collect statistics. - * - * syntax: - * ANALYZE [[ db_name.tb_name ] [( column_name [, ...] )], ...] [ PROPERTIES(...) ] - * db_name.tb_name: collect table and column statistics from tb_name - * column_name: collect column statistics from column_name - * properties: properties of statistics jobs + * Column Statistics Collection Syntax: + * ANALYZE [ SYNC ] TABLE table_name + * [ (column_name [, ...]) ] + * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ] + * [ PROPERTIES ('key' = 'value', ...) ]; + * + * Column histogram collection syntax: + * ANALYZE [ SYNC ] TABLE table_name + * [ (column_name [, ...]) ] + * UPDATE HISTOGRAM + * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ] + * [ PROPERTIES ('key' = 'value', ...) ]; + * + * Illustrate: + * - sync:Collect statistics synchronously. Return after collecting. + * - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported. + * - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled. + * - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics. + * - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`. + * - column_name: The specified destination column must be a column that exists in `table_name`, + * and multiple column names are separated by commas. + * - properties:Properties used to set statistics tasks. Currently only the following configurations + * are supported (equivalent to the with statement) + * - 'sync' = 'true' + * - 'incremental' = 'true' + * - 'sample.percent' = '50' + * - 'sample.rows' = '1000' + * - 'num.buckets' = 10 */ public class AnalyzeStmt extends DdlStmt { - // time to wait for collect statistics - public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = "cbo_statistics_task_timeout_sec"; + // The properties passed in by the user through "with" or "properties('K', 'V')" + public static final String PROPERTY_SYNC = "sync"; + public static final String PROPERTY_INCREMENTAL = "incremental"; + public static final String PROPERTY_SAMPLE_PERCENT = "sample.percent"; + public static final String PROPERTY_SAMPLE_ROWS = "sample.rows"; + public static final String PROPERTY_NUM_BUCKETS = "num.buckets"; + public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type"; private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() - .add(CBO_STATISTICS_TASK_TIMEOUT_SEC) + .add(PROPERTY_SYNC) + .add(PROPERTY_INCREMENTAL) + .add(PROPERTY_SAMPLE_PERCENT) + .add(PROPERTY_SAMPLE_ROWS) + .add(PROPERTY_NUM_BUCKETS) + .add(PROPERTY_ANALYSIS_TYPE) .build(); - private static final Predicate DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 0L; - - public boolean isWholeTbl; - public boolean isHistogram; - public boolean isIncrement; - private final TableName tableName; - - private final boolean sync; private final List columnNames; private final Map properties; @@ -81,19 +103,11 @@ public class AnalyzeStmt extends DdlStmt { private TableIf table; public AnalyzeStmt(TableName tableName, - boolean sync, - List columnNames, - Map properties, - Boolean isWholeTbl, - Boolean isHistogram, - Boolean isIncrement) { + List columnNames, + Map properties) { this.tableName = tableName; - this.sync = sync; this.columnNames = columnNames; this.properties = properties; - this.isWholeTbl = isWholeTbl; - this.isHistogram = isHistogram; - this.isIncrement = isIncrement; } @Override @@ -133,6 +147,15 @@ public void analyze(Analyzer analyzer) throws UserException { } checkProperties(); + + // TODO support external table + if (properties.containsKey(PROPERTY_SAMPLE_PERCENT) + || properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + if (!(table instanceof OlapTable)) { + throw new AnalysisException("Sampling statistics " + + "collection of external tables is not supported"); + } + } } @Override @@ -153,18 +176,88 @@ private void checkAnalyzePriv(String dbName, String tblName) throws AnalysisExce } private void checkProperties() throws UserException { - if (properties != null) { - Optional optional = properties.keySet().stream().filter( - entity -> !PROPERTIES_SET.contains(entity)).findFirst(); - if (optional.isPresent()) { - throw new AnalysisException(optional.get() + " is invalid property"); + if (properties == null || properties.isEmpty()) { + throw new AnalysisException("analysis properties should not be empty"); + } + + String msgTemplate = "%s = %s is invalid property"; + Optional optional = properties.keySet().stream().filter( + entity -> !PROPERTIES_SET.contains(entity)).findFirst(); + + if (optional.isPresent()) { + String msg = String.format(msgTemplate, optional.get(), properties.get(optional.get())); + throw new AnalysisException(msg); + } + + if (properties.containsKey(PROPERTY_SYNC)) { + try { + Boolean.valueOf(properties.get(PROPERTY_SYNC)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_SYNC, properties.get(PROPERTY_SYNC)); + throw new AnalysisException(msg); + } + } + + if (properties.containsKey(PROPERTY_INCREMENTAL)) { + try { + Boolean.valueOf(properties.get(PROPERTY_INCREMENTAL)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_INCREMENTAL, properties.get(PROPERTY_INCREMENTAL)); + throw new AnalysisException(msg); } + } + + if (properties.containsKey(PROPERTY_SAMPLE_PERCENT) + && properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + throw new AnalysisException("only one sampling parameter can be specified simultaneously"); + } + + if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { + checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT), + 0, 100, false, "should be > 0 and < 100"); + } - long taskTimeout = ((Long) Util.getLongPropertyOrDefault( - properties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC), - Config.max_cbo_statistics_task_timeout_sec, DESIRED_TASK_TIMEOUT_SEC, - CBO_STATISTICS_TASK_TIMEOUT_SEC + " should > 0")).intValue(); - properties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC, String.valueOf(taskTimeout)); + if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + checkNumericProperty(PROPERTY_SAMPLE_ROWS, properties.get(PROPERTY_SAMPLE_ROWS), + 0, Integer.MAX_VALUE, false, "needs at least 1 row"); + } + + if (properties.containsKey(PROPERTY_NUM_BUCKETS)) { + checkNumericProperty(PROPERTY_NUM_BUCKETS, properties.get(PROPERTY_NUM_BUCKETS), + 1, Integer.MAX_VALUE, true, "needs at least 1 buckets"); + } + + if (properties.containsKey(PROPERTY_ANALYSIS_TYPE)) { + try { + AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_ANALYSIS_TYPE, properties.get(PROPERTY_ANALYSIS_TYPE)); + throw new AnalysisException(msg); + } + } + + if (properties.containsKey(PROPERTY_INCREMENTAL) + && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) == AnalysisType.HISTOGRAM) { + throw new AnalysisException(PROPERTY_INCREMENTAL + " collection of histograms is not supported"); + } + + if (properties.containsKey(PROPERTY_NUM_BUCKETS) + && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) != AnalysisType.HISTOGRAM) { + throw new AnalysisException(PROPERTY_NUM_BUCKETS + " can only be specified when collecting histograms"); + } + } + + private void checkNumericProperty(String key, String value, int lowerBound, int upperBound, + boolean includeBoundary, String errorMsg) throws AnalysisException { + if (!StringUtils.isNumeric(value)) { + String msg = String.format("%s = %s is an invalid property.", key, value); + throw new AnalysisException(msg); + } + int intValue = Integer.parseInt(value); + boolean isOutOfBounds = (includeBoundary && (intValue < lowerBound || intValue > upperBound)) + || (!includeBoundary && (intValue <= lowerBound || intValue >= upperBound)); + if (isOutOfBounds) { + throw new AnalysisException(key + " " + errorMsg); } } @@ -198,8 +291,47 @@ public Set getColumnNames() { } public Map getProperties() { - // TODO add default properties - return properties != null ? properties : Maps.newHashMap(); + return properties; + } + + public boolean isSync() { + return Boolean.parseBoolean(properties.get(PROPERTY_SYNC)); + } + + public boolean isIncremental() { + return Boolean.parseBoolean(properties.get(PROPERTY_INCREMENTAL)); + } + + public int getSamplePercent() { + if (!properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_SAMPLE_PERCENT)); + } + + public int getSampleRows() { + if (!properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS)); + } + + public int getNumBuckets() { + if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_NUM_BUCKETS)); + } + + public AnalysisType getAnalysisType() { + return AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); + } + + public AnalysisMethod getAnalysisMethod() { + if (getSamplePercent() > 0 || getSampleRows() > 0) { + return AnalysisMethod.SAMPLE; + } + return AnalysisMethod.FULL; } @Override @@ -207,27 +339,22 @@ public String toSql() { StringBuilder sb = new StringBuilder(); sb.append("ANALYZE"); - if (isIncrement) { - sb.append(" "); - sb.append("INCREMENTAL"); - } - if (tableName != null) { sb.append(" "); sb.append(tableName.toSql()); } - if (isHistogram) { - sb.append(" "); - sb.append("UPDATE HISTOGRAM ON"); - sb.append(" "); - sb.append(StringUtils.join(columnNames, ",")); - } else if (columnNames != null) { + if (columnNames != null) { sb.append("("); sb.append(StringUtils.join(columnNames, ",")); sb.append(")"); } + if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) { + sb.append(" "); + sb.append("UPDATE HISTOGRAM"); + } + if (properties != null) { sb.append(" "); sb.append("PROPERTIES("); @@ -239,8 +366,4 @@ public String toSql() { return sb.toString(); } - - public boolean isSync() { - return sync; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 433135d5c830d1..4f2c3abe9a5c9f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -90,19 +90,17 @@ public StatisticsCache getStatisticsCache() { } // Each analyze stmt corresponding to an analysis job. - public void createAnalysisJob(AnalyzeStmt analyzeStmt) throws DdlException { - String catalogName = analyzeStmt.getCatalogName(); - String db = analyzeStmt.getDBName(); - TableName tbl = analyzeStmt.getTblName(); - StatisticsUtil.convertTableNameToObjects(tbl); - Set colNames = analyzeStmt.getColumnNames(); - Map analysisTaskInfos = new HashMap<>(); + public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException { long jobId = Env.getCurrentEnv().getNextId(); - createTaskForEachColumns(analyzeStmt, catalogName, db, tbl, colNames, analysisTaskInfos, jobId); - createTaskForMVIdx(analyzeStmt, catalogName, db, tbl, analysisTaskInfos, jobId); - persistAnalysisJob(catalogName, db, tbl, jobId); + AnalysisTaskInfoBuilder taskInfoBuilder = buildCommonTaskInfo(stmt, jobId); + Map analysisTaskInfos = new HashMap<>(); + + // start build analysis tasks + createTaskForEachColumns(stmt.getColumnNames(), taskInfoBuilder, analysisTaskInfos); + createTaskForMVIdx(stmt.getTable(), taskInfoBuilder, analysisTaskInfos, stmt.getAnalysisType()); + persistAnalysisJob(taskInfoBuilder); - if (analyzeStmt.isSync()) { + if (stmt.isSync()) { syncExecute(analysisTaskInfos.values()); return; } @@ -111,42 +109,79 @@ public void createAnalysisJob(AnalyzeStmt analyzeStmt) throws DdlException { analysisTaskInfos.values().forEach(taskScheduler::schedule); } - private void persistAnalysisJob(String catalogName, String db, TableName tbl, - long jobId) throws DdlException { + private AnalysisTaskInfoBuilder buildCommonTaskInfo(AnalyzeStmt stmt, long jobId) { + AnalysisTaskInfoBuilder taskInfoBuilder = new AnalysisTaskInfoBuilder(); + String catalogName = stmt.getCatalogName(); + String db = stmt.getDBName(); + TableName tbl = stmt.getTblName(); + StatisticsUtil.convertTableNameToObjects(tbl); + String tblName = tbl.getTbl(); + int samplePercent = stmt.getSamplePercent(); + int sampleRows = stmt.getSampleRows(); + AnalysisType analysisType = stmt.getAnalysisType(); + AnalysisMethod analysisMethod = stmt.getAnalysisMethod(); + + taskInfoBuilder.setJobId(jobId); + taskInfoBuilder.setCatalogName(catalogName); + taskInfoBuilder.setDbName(db); + taskInfoBuilder.setTblName(tblName); + taskInfoBuilder.setJobType(JobType.MANUAL); + taskInfoBuilder.setState(AnalysisState.PENDING); + taskInfoBuilder.setScheduleType(ScheduleType.ONCE); + + if (analysisMethod == AnalysisMethod.SAMPLE) { + taskInfoBuilder.setAnalysisMethod(AnalysisMethod.SAMPLE); + taskInfoBuilder.setSamplePercent(samplePercent); + taskInfoBuilder.setSampleRows(sampleRows); + } else { + taskInfoBuilder.setAnalysisMethod(AnalysisMethod.FULL); + } + + if (analysisType == AnalysisType.HISTOGRAM) { + taskInfoBuilder.setAnalysisType(AnalysisType.HISTOGRAM); + int numBuckets = stmt.getNumBuckets(); + int maxBucketNum = numBuckets > 0 ? numBuckets + : StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM; + taskInfoBuilder.setMaxBucketNum(maxBucketNum); + } else { + taskInfoBuilder.setAnalysisType(AnalysisType.COLUMN); + } + + return taskInfoBuilder; + } + + private void persistAnalysisJob(AnalysisTaskInfoBuilder taskInfoBuilder) throws DdlException { try { - AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId( - jobId).setTaskId(-1) - .setCatalogName(catalogName).setDbName(db) - .setTblName(tbl.getTbl()) - .setJobType(JobType.MANUAL) - .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX) - .setScheduleType(ScheduleType.ONCE).build(); + AnalysisTaskInfoBuilder jobInfoBuilder = taskInfoBuilder.copy(); + AnalysisTaskInfo analysisTaskInfo = jobInfoBuilder.setTaskId(-1).build(); StatisticsRepository.persistAnalysisTask(analysisTaskInfo); } catch (Throwable t) { throw new DdlException(t.getMessage(), t); } } - private void createTaskForMVIdx(AnalyzeStmt analyzeStmt, String catalogName, String db, TableName tbl, - Map analysisTaskInfos, long jobId) throws DdlException { - if (!(analyzeStmt.isWholeTbl && analyzeStmt.getTable().getType().equals(TableType.OLAP))) { + private void createTaskForMVIdx(TableIf table, AnalysisTaskInfoBuilder taskInfoBuilder, + Map analysisTaskInfos, AnalysisType analysisType) throws DdlException { + TableType type = table.getType(); + if (analysisType != AnalysisType.INDEX || !type.equals(TableType.OLAP)) { + // not need to collect statistics for materialized view return; } - OlapTable olapTable = (OlapTable) analyzeStmt.getTable(); + + taskInfoBuilder.setAnalysisType(analysisType); + OlapTable olapTable = (OlapTable) table; + try { olapTable.readLock(); for (MaterializedIndexMeta meta : olapTable.getIndexIdToMeta().values()) { if (meta.getDefineStmt() == null) { continue; } + AnalysisTaskInfoBuilder indexTaskInfoBuilder = taskInfoBuilder.copy(); + long indexId = meta.getIndexId(); long taskId = Env.getCurrentEnv().getNextId(); - AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId( - jobId).setTaskId(taskId) - .setCatalogName(catalogName).setDbName(db) - .setTblName(tbl.getTbl()) - .setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL) - .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX) - .setScheduleType(ScheduleType.ONCE).build(); + AnalysisTaskInfo analysisTaskInfo = indexTaskInfoBuilder.setIndexId(indexId) + .setTaskId(taskId).build(); try { StatisticsRepository.persistAnalysisTask(analysisTaskInfo); } catch (Exception e) { @@ -159,19 +194,14 @@ private void createTaskForMVIdx(AnalyzeStmt analyzeStmt, String catalogName, Str } } - private void createTaskForEachColumns(AnalyzeStmt analyzeStmt, String catalogName, String db, TableName tbl, - Set colNames, Map analysisTaskInfos, - long jobId) throws DdlException { + private void createTaskForEachColumns(Set colNames, AnalysisTaskInfoBuilder taskInfoBuilder, + Map analysisTaskInfos) throws DdlException { for (String colName : colNames) { + AnalysisTaskInfoBuilder colTaskInfoBuilder = taskInfoBuilder.copy(); + long indexId = -1; long taskId = Env.getCurrentEnv().getNextId(); - AnalysisType analType = analyzeStmt.isHistogram ? AnalysisType.HISTOGRAM : AnalysisType.COLUMN; - AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(jobId) - .setTaskId(taskId).setCatalogName(catalogName).setDbName(db) - .setTblName(tbl.getTbl()).setColName(colName) - .setJobType(JobType.MANUAL) - .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType) - .setState(AnalysisState.PENDING) - .setScheduleType(ScheduleType.ONCE).build(); + AnalysisTaskInfo analysisTaskInfo = colTaskInfoBuilder.setColName(colName) + .setIndexId(indexId).setTaskId(taskId).build(); try { StatisticsRepository.persistAnalysisTask(analysisTaskInfo); } catch (Exception e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java index 004d51b55eb72b..c0f04b5eb53e8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java @@ -70,9 +70,11 @@ public enum ScheduleType { public final AnalysisType analysisType; - // TODO: define constants or get them from configuration properties - public final double sampleRate = 1.0; - public final int maxBucketNum = 128; + public final int samplePercent; + + public final int sampleRows; + + public final int maxBucketNum; public String message; @@ -84,9 +86,9 @@ public enum ScheduleType { public final ScheduleType scheduleType; public AnalysisTaskInfo(long jobId, long taskId, String catalogName, String dbName, String tblName, - String colName, Long indexId, JobType jobType, - AnalysisMethod analysisMethod, AnalysisType analysisType, String message, - int lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType) { + String colName, Long indexId, JobType jobType, AnalysisMethod analysisMethod, + AnalysisType analysisType, int samplePercent, int sampleRows, int maxBucketNum, + String message, int lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType) { this.jobId = jobId; this.taskId = taskId; this.catalogName = catalogName; @@ -97,6 +99,9 @@ public AnalysisTaskInfo(long jobId, long taskId, String catalogName, String dbNa this.jobType = jobType; this.analysisMethod = analysisMethod; this.analysisType = analysisType; + this.samplePercent = samplePercent; + this.sampleRows = sampleRows; + this.maxBucketNum = maxBucketNum; this.message = message; this.lastExecTimeInMs = lastExecTimeInMs; this.state = state; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java index e804388153db9e..e03f4f8d63f49f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java @@ -33,6 +33,9 @@ public class AnalysisTaskInfoBuilder { private JobType jobType; private AnalysisMethod analysisMethod; private AnalysisType analysisType; + private int maxBucketNum; + private int samplePercent; + private int sampleRows; private String message; private int lastExecTimeInMs; private AnalysisState state; @@ -88,6 +91,21 @@ public AnalysisTaskInfoBuilder setAnalysisType(AnalysisType analysisType) { return this; } + public AnalysisTaskInfoBuilder setMaxBucketNum(int maxBucketNum) { + this.maxBucketNum = maxBucketNum; + return this; + } + + public AnalysisTaskInfoBuilder setSamplePercent(int samplePercent) { + this.samplePercent = samplePercent; + return this; + } + + public AnalysisTaskInfoBuilder setSampleRows(int sampleRows) { + this.sampleRows = sampleRows; + return this; + } + public AnalysisTaskInfoBuilder setMessage(String message) { this.message = message; return this; @@ -109,7 +127,29 @@ public AnalysisTaskInfoBuilder setScheduleType(ScheduleType scheduleType) { } public AnalysisTaskInfo build() { - return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, tblName, colName, - indexId, jobType, analysisMethod, analysisType, message, lastExecTimeInMs, state, scheduleType); + return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, tblName, + colName, indexId, jobType, analysisMethod, analysisType, samplePercent, + sampleRows, maxBucketNum, message, lastExecTimeInMs, state, scheduleType); + } + + public AnalysisTaskInfoBuilder copy() { + return new AnalysisTaskInfoBuilder() + .setJobId(jobId) + .setTaskId(taskId) + .setCatalogName(catalogName) + .setDbName(dbName) + .setTblName(tblName) + .setColName(colName) + .setIndexId(indexId) + .setJobType(jobType) + .setAnalysisMethod(analysisMethod) + .setAnalysisType(analysisType) + .setSamplePercent(samplePercent) + .setSampleRows(sampleRows) + .setMaxBucketNum(maxBucketNum) + .setMessage(message) + .setLastExecTimeInMs(lastExecTimeInMs) + .setState(state) + .setScheduleType(scheduleType); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 001d1339a67859..53c268426d8e39 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; import com.google.common.annotations.VisibleForTesting; @@ -189,4 +190,15 @@ private boolean isUnsupportedType(PrimitiveType type) { return unsupportedType.contains(type); } + protected String getSampleExpression() { + if (info.analysisMethod == AnalysisMethod.FULL) { + return ""; + } + // TODO Add sampling methods for external tables + if (info.samplePercent > 0) { + return String.format("TABLESAMPLE(%d PERCENT)", info.samplePercent); + } else { + return String.format("TABLESAMPLE(%d ROWS)", info.sampleRows); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java index 5adb34d4b06a1f..58f9ad01a777d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -19,8 +19,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.FeConstants; -import org.apache.doris.qe.AutoCloseConnectContext; -import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; @@ -28,8 +27,6 @@ import java.util.HashMap; import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; /** * Each task analyze one column. @@ -49,10 +46,7 @@ public class HistogramTask extends BaseAnalysisTask { + " HISTOGRAM(`${colName}`, ${maxBucketNum}) AS buckets, " + " NOW() AS create_time " + "FROM " - + " `${dbName}`.`${tblName}`"; - - private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART = ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE - + " PARTITION (${partName})"; + + " `${dbName}`.`${tblName}` ${sampleExpr}"; @VisibleForTesting public HistogramTask() { @@ -71,44 +65,29 @@ public void execute() throws Exception { params.put("catalogId", String.valueOf(catalog.getId())); params.put("dbId", String.valueOf(db.getId())); params.put("tblId", String.valueOf(tbl.getId())); - params.put("idxId", "-1"); + params.put("idxId", String.valueOf(info.indexId)); params.put("colId", String.valueOf(info.colName)); params.put("dbName", info.dbName); params.put("tblName", String.valueOf(info.tblName)); params.put("colName", String.valueOf(info.colName)); - params.put("sampleRate", String.valueOf(info.sampleRate)); + params.put("sampleRate", getSampleRateFunction()); + params.put("sampleExpr", getSampleExpression()); params.put("maxBucketNum", String.valueOf(info.maxBucketNum)); - params.put("percentValue", String.valueOf((int) (info.sampleRate * 100))); - String histogramSql; - Set partitionNames = tbl.getPartitionNames(); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + StatisticsUtil.execUpdate(stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE)); + Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1, col.getName()); + } - if (partitionNames.isEmpty()) { - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE); - } else { - try { - tbl.readLock(); - String partNames = partitionNames.stream() - .filter(x -> tbl.getPartition(x) != null) - .map(partName -> "`" + partName + "`") - .collect(Collectors.joining(",")); - params.put("partName", partNames); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART); - } finally { - tbl.readUnlock(); - } + private String getSampleRateFunction() { + if (info.analysisMethod == AnalysisMethod.FULL) { + return "0"; } - - LOG.info("SQL to collect the histogram:\n {}", histogramSql); - - try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) { - r.connectContext.getSessionVariable().disableNereidsPlannerOnce(); - this.stmtExecutor = new StmtExecutor(r.connectContext, histogramSql); - this.stmtExecutor.execute(); + if (info.samplePercent > 0) { + return String.valueOf(info.samplePercent / 100.0); + } else { + double sampRate = (double) info.sampleRows / tbl.getRowCount(); + return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate); } - - Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1, col.getName()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java index 62eb72ec9e367e..a3bac1bbc8bb3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java @@ -50,11 +50,11 @@ public class MVAnalysisTask extends BaseAnalysisTask { private static final String ANALYZE_MV_PART = INSERT_PART_STATISTICS - + " FROM (${sql}) mv"; + + " FROM (${sql}) mv ${sampleExpr}"; private static final String ANALYZE_MV_COL = INSERT_COL_STATISTICS + " (SELECT NDV(`${colName}`) AS ndv " - + " FROM (${sql}) mv) t2\n"; + + " FROM (${sql}) mv) t2"; private MaterializedIndexMeta meta; @@ -118,9 +118,11 @@ public void execute() throws Exception { params.put("colName", colName); params.put("tblName", String.valueOf(info.tblName)); params.put("sql", sql); + params.put("sampleExpr", getSampleExpression()); StatisticsUtil.execUpdate(ANALYZE_MV_PART, params); } params.remove("partId"); + params.remove("sampleExpr"); params.put("type", column.getType().toString()); StatisticsUtil.execUpdate(ANALYZE_MV_COL, params); Env.getCurrentEnv().getStatisticsCache() diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index de848a25923bc9..24e30e2126a281 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -40,7 +40,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { private static final String ANALYZE_PARTITION_SQL_TEMPLATE = INSERT_PART_STATISTICS + "FROM `${dbName}`.`${tblName}` " - + "PARTITION ${partName}"; + + "PARTITION ${partName} ${sampleExpr}"; // TODO Currently, NDV is computed for the full table; in fact, // NDV should only be computed for the relevant partition. @@ -64,12 +64,13 @@ public void execute() throws Exception { params.put("catalogId", String.valueOf(catalog.getId())); params.put("dbId", String.valueOf(db.getId())); params.put("tblId", String.valueOf(tbl.getId())); - params.put("idxId", "-1"); + params.put("idxId", String.valueOf(info.indexId)); params.put("colId", String.valueOf(info.colName)); params.put("dataSizeFunction", getDataSizeFunction(col)); params.put("dbName", info.dbName); params.put("colName", String.valueOf(info.colName)); params.put("tblName", String.valueOf(info.tblName)); + params.put("sampleExpr", getSampleExpression()); List partitionAnalysisSQLs = new ArrayList<>(); try { tbl.readLock(); @@ -90,6 +91,7 @@ public void execute() throws Exception { } execSQLs(partitionAnalysisSQLs); params.remove("partId"); + params.remove("sampleExpr"); params.put("type", col.getType().toString()); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index d5d473eac6b223..46bf364f7cce95 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -68,4 +68,5 @@ public class StatisticConstants { public static final int FETCH_LIMIT = 10000; public static final int FETCH_INTERVAL_IN_MS = 500; + public static final int HISTOGRAM_MAX_BUCKET_NUM = 128; } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index ec6511411c3b7f..62a237e3b3db27 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -418,6 +418,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("soname", new Integer(SqlParserSymbols.KW_SONAME)); keywordMap.put("split", new Integer(SqlParserSymbols.KW_SPLIT)); keywordMap.put("sql_block_rule", new Integer(SqlParserSymbols.KW_SQL_BLOCK_RULE)); + keywordMap.put("sample", new Integer(SqlParserSymbols.KW_SAMPLE)); keywordMap.put("start", new Integer(SqlParserSymbols.KW_START)); keywordMap.put("stats", new Integer(SqlParserSymbols.KW_STATS)); keywordMap.put("status", new Integer(SqlParserSymbols.KW_STATUS)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java index caf316429f278d..78d4e829abbf7b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java @@ -75,7 +75,7 @@ public void test1TaskCreation() throws Exception { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); StmtExecutor executor = getSqlStmtExecutor( - "ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1 PARTITION (p_201701)"); + "ANALYZE TABLE t1(col1) UPDATE HISTOGRAM"); Assertions.assertNotNull(executor); ConcurrentMap> taskMap = diff --git a/regression-test/data/statistics/show_stats_test.out b/regression-test/data/statistics/show_stats_test.out new file mode 100644 index 00000000000000..577d4e5090338f --- /dev/null +++ b/regression-test/data/statistics/show_stats_test.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +city 6.0 4.0 0.0 42.0 6.0 '上海' '深圳' + +-- !sql -- +city VARCHAR(20) 0.0 4 [{"lower_expr":"上海","upper_expr":"上海","count":1.0,"pre_sum":0.0,"ndv":1.0},{"lower_expr":"北京","upper_expr":"北京","count":2.0,"pre_sum":1.0,"ndv":1.0},{"lower_expr":"广州","upper_expr":"广州","count":1.0,"pre_sum":3.0,"ndv":1.0},{"lower_expr":"深圳","upper_expr":"深圳","count":2.0,"pre_sum":4.0,"ndv":1.0}] + diff --git a/regression-test/suites/statistics/alter_col_stats.groovy b/regression-test/suites/statistics/alter_col_stats.groovy index 958e3341d41e64..3babc9ebbbf756 100644 --- a/regression-test/suites/statistics/alter_col_stats.groovy +++ b/regression-test/suites/statistics/alter_col_stats.groovy @@ -34,7 +34,7 @@ suite("alter_column_stats") { sql """INSERT INTO statistics_test VALUES(3, 'c', '2013-01-01')""" sql """ANALYZE TABLE statistics_test""" - sql """ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2""" + sql """ANALYZE TABLE statistics_test(col1, col2) UPDATE HISTOGRAM""" sleep(9000) diff --git a/regression-test/suites/statistics/analyze_test.groovy b/regression-test/suites/statistics/analyze_test.groovy index f8414d9f68e6d9..d348f78ba3c411 100644 --- a/regression-test/suites/statistics/analyze_test.groovy +++ b/regression-test/suites/statistics/analyze_test.groovy @@ -29,6 +29,10 @@ suite("analyze_test") { def tblName3 = "${dbName3}.analyze_test_tbl_3" + def dbName4 = "analyze_test_db_4" + + def tblName4 = "${dbName4}.analyze_test_tbl_4" + sql """ DROP DATABASE IF EXISTS ${dbName1}; @@ -55,6 +59,13 @@ suite("analyze_test") { CREATE DATABASE ${dbName3}; """ + sql """ + DROP DATABASE IF EXISTS ${dbName4} + """ + + sql """ + CREATE DATABASE ${dbName4}; + """ sql """ DROP TABLE IF EXISTS ${tblName1} @@ -95,6 +106,18 @@ suite("analyze_test") { "enable_unique_key_merge_on_write"="true" );""" + sql """ + DROP TABLE IF EXISTS ${tblName4} + """ + + sql """CREATE TABLE ${tblName4} (analyze_test_col1 varchar(11451) not null, analyze_test_col2 int not null, analyze_test_col3 int not null) + UNIQUE KEY(analyze_test_col1) + DISTRIBUTED BY HASH(analyze_test_col1) + BUCKETS 3 + PROPERTIES( + "replication_num"="1", + "enable_unique_key_merge_on_write"="true" + );""" sql """insert into ${tblName1} values(1, 2, 3);""" sql """insert into ${tblName1} values(4, 5, 6);""" @@ -114,6 +137,12 @@ suite("analyze_test") { sql """insert into ${tblName3} values(3, 8, 2);""" sql """insert into ${tblName3} values(5, 2, 1);""" + sql """insert into ${tblName4} values(1, 2, 3);""" + sql """insert into ${tblName4} values(4, 5, 6);""" + sql """insert into ${tblName4} values(7, 1, 9);""" + sql """insert into ${tblName4} values(3, 8, 2);""" + sql """insert into ${tblName4} values(5, 2, 1);""" + sql """ delete from __internal_schema.column_statistics where col_id in ('analyze_test_col1', 'analyze_test_col2', 'analyze_test_col3') """ @@ -142,6 +171,22 @@ suite("analyze_test") { ALTER TABLE ${tblName3} DROP COLUMN analyze_test_col2; """ + sql """ + analyze sync table ${tblName4} with sample rows 5; + """ + + sql """ + analyze table ${tblName4} with sync with incremental with sample percent 20; + """ + + sql """ + analyze sync table ${tblName4} update histogram with sample percent 20 with buckets 2; + """ + + sql """ + analyze table ${tblName4} update histogram with sync with sample rows 20 with buckets 2; + """ + sql """ DROP TABLE ${tblName2} """ @@ -150,6 +195,10 @@ suite("analyze_test") { DROP DATABASE ${dbName1} """ + sql """ + DROP DATABASE ${dbName4} + """ + sql """ DROP EXPIRED STATS """ diff --git a/regression-test/suites/statistics/show_stats_test.groovy b/regression-test/suites/statistics/show_stats_test.groovy new file mode 100644 index 00000000000000..fb65741c2171a1 --- /dev/null +++ b/regression-test/suites/statistics/show_stats_test.groovy @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_show_stats") { + def dbName = "stats_test" + def tblName = "${dbName}.example_tbl" + + sql "DROP DATABASE IF EXISTS ${dbName}" + + sql "CREATE DATABASE IF NOT EXISTS ${dbName};" + + sql "DROP TABLE IF EXISTS ${tblName}" + + sql """ + CREATE TABLE IF NOT EXISTS ${tblName} ( + `user_id` LARGEINT NOT NULL, + `date` DATE NOT NULL, + `city` VARCHAR(20), + `age` SMALLINT, + `sex` TINYINT, + `last_visit_date` DATETIME REPLACE, + `cost` BIGINT SUM, + `max_dwell_time` INT MAX, + `min_dwell_time` INT MIN + ) ENGINE=OLAP + AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) + PARTITION BY LIST(`date`) + ( + PARTITION `p_201701` VALUES IN ("2017-10-01"), + PARTITION `p_201702` VALUES IN ("2017-10-02"), + PARTITION `p_201703` VALUES IN ("2017-10-03"), + PARTITION `default` + ) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + INSERT INTO ${tblName} (`user_id`, `date`, `city`, `age`, + `sex`, `last_visit_date`, `cost`, + `max_dwell_time`, `min_dwell_time`) + VALUES (10000, "2017-10-01", "北京", 20, 0, "2017-10-01 07:00:00", 15, 2, 2), + (10000, "2017-10-01", "北京", 20, 0, "2017-10-01 06:00:00", 20, 10, 10), + (10001, "2017-10-01", "北京", 30, 1, "2017-10-01 17:05:45", 2, 22, 22), + (10002, "2017-10-02", "上海", 20, 1, "2017-10-02 12:59:12", 200, 5, 5), + (10003, "2017-10-02", "广州", 32, 0, "2017-10-02 11:20:00", 30, 11, 11), + (10004, "2017-10-01", "深圳", 35, 0, "2017-10-01 10:00:15", 100, 3, 3), + (10004, "2017-10-03", "深圳", 35, 0, "2017-10-03 10:20:22", 11, 6, 6); + """ + + sql "ANALYZE sync TABLE ${tblName};" + + sql "ANALYZE sync TABLE ${tblName} UPDATE HISTOGRAM;" + + qt_sql "SHOW COLUMN STATS ${tblName}(city);" + + qt_sql "SHOW COLUMN HISTOGRAM ${tblName}(city);" + + sql "DROP DATABASE IF EXISTS ${dbName}" + + sql "CREATE DATABASE IF NOT EXISTS ${dbName};" +}