Skip to content

Commit

Permalink
planner: fix wrong row-sizes used in cost model (#33845)
Browse files Browse the repository at this point in the history
close #33844
  • Loading branch information
qw4990 authored Apr 13, 2022
1 parent 3ab2df9 commit f5c2710
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 37 deletions.
9 changes: 5 additions & 4 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -254,17 +254,18 @@ Projection 10.00 root tpch.lineitem.l_orderkey, Column#35, tpch.orders.o_orderd
└─TopN 10.00 root Column#35:desc, tpch.orders.o_orderdate, offset:0, count:10
└─HashAgg 40252367.98 root group by:Column#48, Column#49, Column#50, funcs:sum(Column#44)->Column#35, funcs:firstrow(Column#45)->tpch.orders.o_orderdate, funcs:firstrow(Column#46)->tpch.orders.o_shippriority, funcs:firstrow(Column#47)->tpch.lineitem.l_orderkey
└─Projection 91515927.49 root mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))->Column#44, tpch.orders.o_orderdate, tpch.orders.o_shippriority, tpch.lineitem.l_orderkey, tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority
└─HashJoin 91515927.49 root inner join, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)]
└─IndexHashJoin 91515927.49 root inner join, inner:IndexLookUp, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey, equal cond:eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)
├─HashJoin(Build) 22592975.51 root inner join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
│ ├─TableReader(Build) 1498236.00 root data:Selection
│ │ └─Selection 1498236.00 cop[tikv] eq(tpch.customer.c_mktsegment, "AUTOMOBILE")
│ │ └─TableFullScan 7500000.00 cop[tikv] table:customer keep order:false
│ └─TableReader(Probe) 36870000.00 root data:Selection
│ └─Selection 36870000.00 cop[tikv] lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000)
│ └─TableFullScan 75000000.00 cop[tikv] table:orders keep order:false
└─TableReader(Probe) 163047704.27 root data:Selection
└─Selection 163047704.27 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableFullScan 300005811.00 cop[tikv] table:lineitem keep order:false
└─IndexLookUp(Probe) 4.05 root
├─IndexRangeScan(Build) 7.45 cop[tikv] table:lineitem, index:PRIMARY(L_ORDERKEY, L_LINENUMBER) range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false
└─Selection(Probe) 4.05 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableRowIDScan 7.45 cop[tikv] table:lineitem keep order:false
/*
Q4 Order Priority Checking Query
This query determines how well the order priority system is working and gives an assessment of customer satisfaction.
Expand Down
10 changes: 8 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,8 @@ func (p *LogicalJoin) constructInnerTableScanTask(
isPartition: ds.isPartition,

underInnerIndexJoin: true,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.SetSchema(ds.schema.Clone())
if rowCount <= 0 {
Expand All @@ -983,7 +985,7 @@ func (p *LogicalJoin) constructInnerTableScanTask(
StatsVersion: ds.stats.StatsVersion,
// NDV would not be used in cost computation of IndexJoin, set leave it as default nil.
}
rowSize := ds.TblColHists.GetTableAvgRowSize(p.ctx, ds.TblCols, ts.StoreType, true)
rowSize := ts.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
copTask := &copTask{
tablePlan: ts,
Expand Down Expand Up @@ -1055,6 +1057,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
Desc: desc,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblColHists: ds.TblColHists,
pkIsHandleCol: ds.getPKIsHandleCol(),

underInnerIndexJoin: true,
}.Init(ds.ctx, ds.blockOffset)
Expand All @@ -1078,6 +1082,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
TableAsName: ds.TableAsName,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.schema = is.dataSourceSchema.Clone()
if ds.tableInfo.IsCommonHandle {
Expand Down Expand Up @@ -1151,7 +1157,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
tmpPath.CountAfterAccess = cnt
}
is.stats = ds.tableStats.ScaleByExpectCnt(tmpPath.CountAfterAccess)
rowSize := is.indexScanRowSize(path.Index, ds, true)
rowSize := is.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cop.cst = tmpPath.CountAfterAccess * rowSize * sessVars.GetScanFactor(ds.tableInfo)
finalStats := ds.tableStats.ScaleByExpectCnt(rowCount)
Expand Down
44 changes: 26 additions & 18 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1034,9 +1034,8 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c
func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (
indexPlan PhysicalPlan,
partialCost float64) {
idx := path.Index
is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false)
rowSize := is.indexScanRowSize(idx, ds, false)
rowSize := is.stats.HistColl.GetAvgRowSize(is.ctx, is.schema.Columns, true, false)
// TODO: Consider using isCoveringIndex() to avoid another TableRead
indexConds := path.IndexFilters
sessVars := ds.ctx.GetSessionVars()
Expand Down Expand Up @@ -1151,6 +1150,8 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
HandleCols: ds.handleCols,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.SetSchema(ds.schema.Clone())
err := setIndexMergeTableScanHandleCols(ds, ts)
Expand All @@ -1164,7 +1165,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
}
}
}
rowSize := ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true)
rowSize := ts.getScanRowSize()
partialCost += totalRowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
ts.stats = ds.tableStats.ScaleByExpectCnt(totalRowCount)
if ds.statisticTable.Pseudo {
Expand Down Expand Up @@ -1307,6 +1308,8 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
TableAsName: ds.TableAsName,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, is.blockOffset)
ts.SetSchema(ds.schema.Clone())
ts.SetCost(cost)
Expand Down Expand Up @@ -1358,22 +1361,20 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
return task, nil
}

func (is *PhysicalIndexScan) indexScanRowSize(idx *model.IndexInfo, ds *DataSource, isForScan bool) float64 {
func (is *PhysicalIndexScan) getScanRowSize() float64 {
idx := is.Index
scanCols := make([]*expression.Column, 0, len(idx.Columns)+1)
// If `initSchema` has already appended the handle column in schema, just use schema columns, otherwise, add extra handle column.
if len(idx.Columns) == len(is.schema.Columns) {
scanCols = append(scanCols, is.schema.Columns...)
handleCol := ds.getPKIsHandleCol()
handleCol := is.pkIsHandleCol
if handleCol != nil {
scanCols = append(scanCols, handleCol)
}
} else {
scanCols = is.schema.Columns
}
if isForScan {
return ds.TblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique)
}
return ds.TblColHists.GetAvgRowSize(is.ctx, scanCols, true, false)
return is.tblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique)
}

// initSchema is used to set the schema of PhysicalIndexScan. Before calling this,
Expand Down Expand Up @@ -2085,6 +2086,15 @@ func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *pro
}
}

func (ts *PhysicalTableScan) getScanRowSize() float64 {
if ts.StoreType == kv.TiKV {
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.tblCols, ts.StoreType, true)
}
// If `ts.handleCol` is nil, then the schema of tableScan doesn't have handle column.
// This logic can be ensured in column pruning.
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.Schema().Columns, ts.StoreType, ts.HandleCols != nil)
}

func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) {
ts := PhysicalTableScan{
Table: ds.tableInfo,
Expand All @@ -2096,6 +2106,9 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
Ranges: path.Ranges,
AccessCondition: path.AccessConds,
StoreType: path.StoreType,
HandleCols: ds.handleCols,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.filterCondition = make([]expression.Expression, len(path.TableFilters))
copy(ts.filterCondition, path.TableFilters)
Expand Down Expand Up @@ -2135,14 +2148,7 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
// we still need to assume values are uniformly distributed. For simplicity, we use uniform-assumption
// for all columns now, as we do in `deriveStatsByFilter`.
ts.stats = ds.tableStats.ScaleByExpectCnt(rowCount)
var rowSize float64
if ts.StoreType == kv.TiKV {
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true)
} else {
// If `ds.handleCol` is nil, then the schema of tableScan doesn't have handle column.
// This logic can be ensured in column pruning.
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ts.Schema().Columns, ts.StoreType, ds.handleCols != nil)
}
rowSize := ts.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
if isMatchProp {
Expand Down Expand Up @@ -2170,6 +2176,8 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper
dataSourceSchema: ds.schema,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblColHists: ds.TblColHists,
pkIsHandleCol: ds.getPKIsHandleCol(),
}.Init(ds.ctx, ds.blockOffset)
statsTbl := ds.statisticTable
if statsTbl.Indices[idx.ID] != nil {
Expand All @@ -2188,7 +2196,7 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper
}
}
is.stats = ds.tableStats.ScaleByExpectCnt(rowCount)
rowSize := is.indexScanRowSize(idx, ds, true)
rowSize := is.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
if isMatchProp {
Expand Down
4 changes: 2 additions & 2 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5296,11 +5296,11 @@ func TestIndexJoinCost(t *testing.T) {
` └─Selection_8 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`,
` └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`))
tk.MustQuery(`explain format=verbose select /*+ TIDB_INLJ(t_outer, t_inner_idx) */ * from t_outer, t_inner_idx where t_outer.a=t_inner_idx.a`).Check(testkit.Rows( // IndexJoin with inner IndexLookup
`IndexJoin_11 12487.50 529388.13 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`,
`IndexJoin_11 12487.50 518149.38 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`,
`├─TableReader_23(Build) 9990.00 36412.58 root data:Selection_22`,
`│ └─Selection_22 9990.00 465000.00 cop[tikv] not(isnull(test.t_outer.a))`,
`│ └─TableFullScan_21 10000.00 435000.00 cop[tikv] table:t_outer keep order:false, stats:pseudo`,
`└─IndexLookUp_10(Probe) 1.25 35.34 root `,
`└─IndexLookUp_10(Probe) 1.25 34.21 root `,
` ├─Selection_9(Build) 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`,
` │ └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`,
` └─TableRowIDScan_8(Probe) 1.25 0.00 cop[tikv] table:t_inner_idx keep order:false, stats:pseudo`))
Expand Down
6 changes: 6 additions & 0 deletions planner/core/physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ type PhysicalIndexScan struct {
// required by cost model
// IndexScan operators under inner side of IndexJoin no need to consider net seek cost
underInnerIndexJoin bool
// tblColHists contains all columns before pruning, which are used to calculate row-size
tblColHists *statistics.HistColl
pkIsHandleCol *expression.Column
}

// Clone implements PhysicalPlan interface.
Expand Down Expand Up @@ -541,6 +544,9 @@ type PhysicalTableScan struct {
// required by cost model
// TableScan operators under inner side of IndexJoin no need to consider net seek cost
underInnerIndexJoin bool
// tblCols and tblColHists contains all columns before pruning, which are used to calculate row-size
tblCols []*expression.Column
tblColHists *statistics.HistColl
}

// Clone implements PhysicalPlan interface.
Expand Down
2 changes: 2 additions & 0 deletions planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1457,6 +1457,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName
Ranges: ranger.FullRange(),
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
// There is no alternative plan choices, so just use pseudo stats to avoid panic.
is.stats = &property.StatsInfo{HistColl: &(statistics.PseudoTable(tblInfo)).HistColl}
Expand All @@ -1474,6 +1475,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName
TableAsName: &tblInfo.Name,
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
ts.SetSchema(idxColSchema)
ts.Columns = ExpandVirtualColumn(ts.Columns, ts.schema, ts.Table.Columns)
Expand Down
6 changes: 4 additions & 2 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,10 @@ func (t *copTask) finishIndexPlan() {
}

// Calculate the IO cost of table scan here because we cannot know its stats until we finish index plan.
rowSize := t.tblColHists.GetIndexAvgRowSize(t.indexPlan.SCtx(), t.tblCols, is.Index.Unique)
t.cst += cnt * rowSize * sessVars.GetScanFactor(tableInfo)
for p = t.tablePlan; len(p.Children()) > 0; p = p.Children()[0] {
}
ts := p.(*PhysicalTableScan)
t.cst += cnt * ts.getScanRowSize() * sessVars.GetScanFactor(tableInfo)
}

func (t *copTask) getStoreType() kv.StoreType {
Expand Down
18 changes: 9 additions & 9 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@
{
"SQL": "select * from t where a > 1 order by f",
"Plan": [
"IndexLookUp_14 3333.33 139413.67 root ",
"IndexLookUp_14 3333.33 136747.00 root ",
"├─Selection_13(Build) 3333.33 0.00 cop[tikv] gt(test.t.a, 1)",
"│ └─IndexFullScan_11 10000.00 555000.00 cop[tikv] table:t, index:f(f) keep order:true, stats:pseudo",
"└─TableRowIDScan_12(Probe) 3333.33 555000.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand All @@ -1828,9 +1828,9 @@
{
"SQL": "select * from t where f > 1",
"Plan": [
"TableReader_7 3333.33 88640.22 root data:Selection_6",
"└─Selection_6 3333.33 1140000.00 cop[tikv] gt(test.t.f, 1)",
" └─TableFullScan_5 10000.00 1110000.00 cop[tikv] table:t keep order:false, stats:pseudo"
"IndexLookUp_10 3333.33 86674.83 root ",
"├─IndexRangeScan_8(Build) 3333.33 185000.00 cop[tikv] table:t, index:f(f) range:(1,+inf], keep order:false, stats:pseudo",
"└─TableRowIDScan_9(Probe) 3333.33 185000.00 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Warnings": [
"Note 1105 [t,f,f_g] remain after pruning paths for t given Prop{SortItems: [], TaskTp: rootTask}"
Expand All @@ -1849,7 +1849,7 @@
{
"SQL": "select * from t where f > 3 and g = 5",
"Plan": [
"IndexLookUp_15 3.33 215.74 root ",
"IndexLookUp_15 3.33 206.74 root ",
"├─IndexRangeScan_12(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo",
"└─Selection_14(Probe) 3.33 0.00 cop[tikv] gt(test.t.f, 3)",
" └─TableRowIDScan_13 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand All @@ -1861,8 +1861,8 @@
{
"SQL": "select * from t where g = 5 order by f",
"Plan": [
"Sort_5 10.00 362.68 root test.t.f",
"└─IndexLookUp_13 10.00 239.01 root ",
"Sort_5 10.00 353.68 root test.t.f",
"└─IndexLookUp_13 10.00 230.01 root ",
" ├─IndexRangeScan_11(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo",
" └─TableRowIDScan_12(Probe) 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo"
],
Expand All @@ -1873,7 +1873,7 @@
{
"SQL": "select * from t where d = 3 order by c, e",
"Plan": [
"IndexLookUp_15 10.00 57230.78 root ",
"IndexLookUp_15 10.00 57222.78 root ",
"├─Selection_14(Build) 10.00 0.00 cop[tikv] eq(test.t.d, 3)",
"│ └─IndexFullScan_12 10000.00 825000.00 cop[tikv] table:t, index:c_d_e(c, d, e) keep order:true, stats:pseudo",
"└─TableRowIDScan_13(Probe) 10.00 825000.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand Down Expand Up @@ -1931,7 +1931,7 @@
{
"SQL": "explain format = 'verbose' select * from t where b > 5",
"Plan": [
"IndexLookUp_7 3.00 64.81 root ",
"IndexLookUp_7 3.00 57.91 root ",
"├─IndexRangeScan_5(Build) 3.00 171.00 cop[tikv] table:t, index:idx_b(b) range:(5,+inf], keep order:false",
"└─TableRowIDScan_6(Probe) 3.00 171.00 cop[tikv] table:t keep order:false"
],
Expand Down

0 comments on commit f5c2710

Please sign in to comment.