Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics, util/ranger: add cardinality estimation trace for GetRowCountBy... #30321

Merged
merged 13 commits into from
Dec 7, 2021
6 changes: 3 additions & 3 deletions planner/core/rule_partition_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (s *partitionProcessor) findUsedPartitions(ctx sessionctx.Context, tbl tabl
ranges := detachedResult.Ranges
used := make([]int, 0, len(ranges))
for _, r := range ranges {
if r.IsPointNullable(ctx) {
if r.IsPointNullable(ctx.GetSessionVars().StmtCtx) {
if !r.HighVal[0].IsNull() {
if len(r.HighVal) != len(partIdx) {
used = []int{-1}
Expand Down Expand Up @@ -473,7 +473,7 @@ func (l *listPartitionPruner) locateColumnPartitionsByCondition(cond expression.
return nil, true, nil
}
var locations []tables.ListPartitionLocation
if r.IsPointNullable(l.ctx) {
if r.IsPointNullable(l.ctx.GetSessionVars().StmtCtx) {
location, err := colPrune.LocatePartition(sc, r.HighVal[0])
if types.ErrOverflow.Equal(err) {
return nil, true, nil // return full-scan if over-flow
Expand Down Expand Up @@ -555,7 +555,7 @@ func (l *listPartitionPruner) findUsedListPartitions(conds []expression.Expressi
}
used := make(map[int]struct{}, len(ranges))
for _, r := range ranges {
if r.IsPointNullable(l.ctx) {
if r.IsPointNullable(l.ctx.GetSessionVars().StmtCtx) {
if len(r.HighVal) != len(exprCols) {
return l.fullRange, nil
}
Expand Down
75 changes: 69 additions & 6 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ import (
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/tracing"
"go.uber.org/atomic"
"go.uber.org/zap"
)

const (
Expand Down Expand Up @@ -331,39 +334,65 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error) {
var result float64
c, ok := coll.Columns[colID]
if !ok || c.IsInvalid(sc, coll.Pseudo) {
if len(intRanges) == 0 {
return 0, nil
}
if intRanges[0].LowVal[0].Kind() == types.KindInt64 {
return getPseudoRowCountBySignedIntRanges(intRanges, float64(coll.Count)), nil
result = getPseudoRowCountBySignedIntRanges(intRanges, float64(coll.Count))
} else {
result = getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count))
}
if sc.EnableOptimizerCETrace && ok {
CETraceRange(sc, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats-Pseudo", uint64(result))
}
return getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count)), nil
return result, nil
}
result, err := c.GetColumnRowCount(sc, intRanges, coll.Count, true)
if sc.EnableOptimizerCETrace {
CETraceRange(sc, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats", uint64(result))
}
return result, errors.Trace(err)
}

// GetRowCountByColumnRanges estimates the row count by a slice of Range.
func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error) {
c, ok := coll.Columns[colID]
if !ok || c.IsInvalid(sc, coll.Pseudo) {
return GetPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0)
result, err := GetPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0)
if err == nil && sc.EnableOptimizerCETrace && ok {
CETraceRange(sc, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
}
return result, err
}
result, err := c.GetColumnRowCount(sc, colRanges, coll.Count, false)
if sc.EnableOptimizerCETrace {
CETraceRange(sc, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats", uint64(result))
time-and-fate marked this conversation as resolved.
Show resolved Hide resolved
}
return result, errors.Trace(err)
}

// GetRowCountByIndexRanges estimates the row count by a slice of Range.
func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error) {
idx := coll.Indices[idxID]
if idx == nil || idx.IsInvalid(coll.Pseudo) {
idx, ok := coll.Indices[idxID]
colNames := make([]string, 0, 8)
if ok {
for _, col := range idx.Info.Columns {
colNames = append(colNames, col.Name.O)
}
}
if !ok || idx.IsInvalid(coll.Pseudo) {
colsLen := -1
if idx != nil && idx.Info.Unique {
colsLen = len(idx.Info.Columns)
}
return getPseudoRowCountByIndexRanges(sc, indexRanges, float64(coll.Count), colsLen)
result, err := getPseudoRowCountByIndexRanges(sc, indexRanges, float64(coll.Count), colsLen)
if err == nil && sc.EnableOptimizerCETrace && ok {
CETraceRange(sc, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
}
return result, err
}
var result float64
var err error
Expand All @@ -372,9 +401,43 @@ func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idx
} else {
result, err = idx.GetRowCount(sc, coll, indexRanges, coll.Count)
}
if sc.EnableOptimizerCETrace {
CETraceRange(sc, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
}
return result, errors.Trace(err)
}

// CETraceRange appends a list of ranges and related information into CE trace
func CETraceRange(sc *stmtctx.StatementContext, tableID int64, colNames []string, ranges []*ranger.Range, tp string, rowCount uint64) {
allPoint := true
for _, ran := range ranges {
if !ran.IsPointNullable(sc) {
allPoint = false
break
}
}
if allPoint {
tp = tp + "-Point"
} else {
tp = tp + "-Range"
}
expr, err := ranger.RangesToString(sc, ranges, colNames)
if err != nil {
logutil.BgLogger().Debug("[OptimizerTrace] Failed to trace CE of ranges", zap.Error(err))
}
// We don't need to record meaningless expressions.
if expr == "" || expr == "true" || expr == "false" {
return
}
CERecord := tracing.CETraceRecord{
TableID: tableID,
Type: tp,
Expr: expr,
RowCount: rowCount,
}
sc.OptimizerCETrace = append(sc.OptimizerCETrace, &CERecord)
}

// PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.
func (t *Table) PseudoAvgCountPerValue() float64 {
return float64(t.Count) / pseudoEqualRate
Expand Down
134 changes: 130 additions & 4 deletions statistics/testdata/trace_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,41 @@
{
"Expr": "a > 0 and a < 2",
"Trace": [
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Point",
"Expr": "((a = 1))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Point",
"Expr": "((a = 1))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((a > 0 and a < 2))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Point",
"Expr": "((a = 1))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Point",
"Expr": "((a = 1))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
Expand All @@ -24,6 +59,27 @@
{
"Expr": "a >= 1 and a < 10",
"Trace": [
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Range",
"Expr": "((a >= 1 and a < 10))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((a >= 1 and a < 10))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Range",
"Expr": "((a >= 1 and a < 10))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
Expand All @@ -43,6 +99,20 @@
{
"Expr": "a < 3 or b < 4",
"Trace": [
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((a < 3))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Range",
"Expr": "((a < 3))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
Expand All @@ -64,6 +134,13 @@
"Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((b < 4))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
Expand Down Expand Up @@ -99,18 +176,25 @@
"Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((b < 4))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Table Stats-Expression-CNF",
"Expr": "`lt`(test.t.a, 3)",
"Expr": "`lt`(test.t.b, 4)",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Table Stats-Expression-CNF",
"Expr": "`lt`(test.t.a, 3)",
"Expr": "`lt`(test.t.b, 4)",
"RowCount": 6
},
{
Expand All @@ -120,18 +204,32 @@
"Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Range",
"Expr": "((a < 3))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Range",
"Expr": "((a < 3))",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Table Stats-Expression-CNF",
"Expr": "`lt`(test.t.b, 4)",
"Expr": "`lt`(test.t.a, 3)",
"RowCount": 6
},
{
"TableID": 57,
"TableName": "",
"Type": "Table Stats-Expression-CNF",
"Expr": "`lt`(test.t.b, 4)",
"Expr": "`lt`(test.t.a, 3)",
"RowCount": 6
},
{
Expand Down Expand Up @@ -160,6 +258,34 @@
{
"Expr": "a = 1 and b = 2",
"Trace": [
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Point",
"Expr": "((a = 1) and (b = 2))",
"RowCount": 2
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Point",
"Expr": "((a = 1))",
"RowCount": 4
},
{
"TableID": 57,
"TableName": "",
"Type": "Column Stats-Point",
"Expr": "((b = 2))",
"RowCount": 3
},
{
"TableID": 57,
"TableName": "",
"Type": "Index Stats-Point",
"Expr": "((a = 1) and (b = 2))",
"RowCount": 2
},
{
"TableID": 57,
"TableName": "",
Expand Down
Loading