Skip to content

Commit

Permalink
[improvement](statistics)Support get index row count and table delta …
Browse files Browse the repository at this point in the history
…rows. (#38516)

backport: #38492
  • Loading branch information
Jibing-Li authored Jul 31, 2024
1 parent ce64963 commit 08453f5
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 23 deletions.
7 changes: 6 additions & 1 deletion fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4166,7 +4166,12 @@ show_param ::=
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null);
:}
/* show index stats */
| KW_INDEX KW_STATS table_name:tbl ident:id
{:
RESULT = new ShowTableStatsStmt(tbl, null, false, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ public void analyze(Analyzer analyzer) throws UserException {
throw new AnalysisException(optional.get() + " is invalid statistics");
}

if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'");
}

// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
Expand Down Expand Up @@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("user_inject")
.build();

private final TableName tableName;
private static final ImmutableList<String> INDEX_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("table_name")
.add("index_name")
.add("row_count")
.build();

private final TableName tableName;
private final PartitionNames partitionNames;
private final boolean cached;
private final String indexName;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
this.indexName = indexName;
}

public TableName getTableName() {
Expand Down Expand Up @@ -116,7 +125,13 @@ public void analyze(Analyzer analyzer) throws UserException {
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();

for (String title : TITLE_NAMES) {
ImmutableList<String> titles;
if (indexName != null) {
titles = INDEX_TITLE_NAMES;
} else {
titles = TITLE_NAMES;
}
for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
Expand All @@ -126,15 +141,29 @@ public TableIf getTable() {
return table;
}

public long getPartitionId() {
if (partitionNames == null) {
return 0;
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
}
String partitionName = partitionNames.getPartitionNames().get(0);
return table.getPartition(partitionName).getId();
return constructTableResultSet(tableStatistic);
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
public ShowResultSet constructResultSet(long rowCount) {
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(String.valueOf(rowCount));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
Expand All @@ -146,7 +175,7 @@ public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
row.add(String.valueOf(tableStatistic.rowCount));
LocalDateTime dateTime =
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
java.time.ZoneId.systemDefault());
java.time.ZoneId.systemDefault());
String formattedDateTime = dateTime.format(formatter);
row.add(formattedDateTime);
row.add(tableStatistic.analyzeColumns().toString());
Expand All @@ -157,17 +186,24 @@ public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructResultSet(long rowCount) {
public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
}
OlapTable olapTable = (OlapTable) table;
Long indexId = olapTable.getIndexIdByName(indexName);
if (indexId == null) {
throw new RuntimeException(String.format("Index %s not exist.", indexName));
}
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

public class AnalysisInfo implements Writable {

Expand Down Expand Up @@ -202,6 +204,8 @@ public enum ScheduleType {

public final boolean userInject;

public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
Expand Down Expand Up @@ -350,4 +354,8 @@ public void markFailed() {
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}

public void addIndexRowCount(long indexId, long rowCount) {
indexesRowCount.put(indexId, rowCount);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.datasource.CatalogIf;
Expand Down Expand Up @@ -333,6 +334,14 @@ protected void runQuery(String sql) {
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
// Update index row count after analyze.
if (this instanceof OlapAnalysisTask) {
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
// For sync job, get jobInfo from job.jobInfo.
jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId;
jobInfo.addIndexRowCount(indexId, colStatsData.count);
}
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
job.appendBuf(this, Collections.singletonList(colStatsData));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
Expand Down Expand Up @@ -263,12 +264,13 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
long indexId = alterColumnStatsStmt.getIndexId();
if (rowCount == null) {
throw new RuntimeException("Row count is null.");
}
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
if (rowCount != null) {
builder.setCount(Double.parseDouble(rowCount));
}
builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
Expand Down Expand Up @@ -323,9 +325,14 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
.setColToPartitions(Maps.newHashMap())
.setRowCount((long) Double.parseDouble(rowCount))
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
if (objects.table instanceof OlapTable) {
indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId;
mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount));
}
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
} else {
// update partition granularity statistics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;
Expand All @@ -34,6 +35,7 @@
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
Expand All @@ -42,7 +44,7 @@
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

public class TableStatsMeta implements Writable {
public class TableStatsMeta implements Writable, GsonPostProcessable {

@SerializedName("tblId")
public final long tblId;
Expand Down Expand Up @@ -75,6 +77,9 @@ public class TableStatsMeta implements Writable {
@SerializedName("userInjected")
public boolean userInjected;

@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
Expand Down Expand Up @@ -158,6 +163,8 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.rowCount;
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream()
Expand All @@ -176,4 +183,26 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
}
}
}

@Override
public void gsonPostProcess() throws IOException {
if (indexesRowCount == null) {
indexesRowCount = new ConcurrentHashMap<>();
}
}

public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}

private void clearStaleIndexRowCount(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
while (iterator.hasNext()) {
long key = iterator.next();
if (indexIds.contains(key)) {
iterator.remove();
}
}
}
}
39 changes: 39 additions & 0 deletions regression-test/suites/statistics/test_analyze_mv.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,28 @@ suite("test_analyze_mv") {

sql """analyze table mvTestDup with sync;"""

// Test show index row count
def result_row = sql """show index stats mvTestDup mvTestDup"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv1"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv2"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv2", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv3"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("4", result_row[0][2])

// Compare show whole table column stats result with show single column.
def result_all = sql """show column stats mvTestDup"""
assertEquals(12, result_all.size())
Expand Down Expand Up @@ -417,6 +439,23 @@ suite("test_analyze_mv") {
assertEquals("4001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])

// Test alter table index row count.
sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mvTestDup;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("150000000", result_row[0][2])
sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv1;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("3443", result_row[0][2])
sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv3;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("234234", result_row[0][2])

sql """drop stats mvTestDup"""
result_sample = sql """show column stats mvTestDup"""
assertEquals(0, result_sample.size())
Expand Down

0 comments on commit 08453f5

Please sign in to comment.