Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: fix wrong row-sizes used in cost model #33845

Merged
merged 25 commits into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -254,17 +254,18 @@ Projection 10.00 root tpch.lineitem.l_orderkey, Column#35, tpch.orders.o_orderd
└─TopN 10.00 root Column#35:desc, tpch.orders.o_orderdate, offset:0, count:10
└─HashAgg 40252367.98 root group by:Column#48, Column#49, Column#50, funcs:sum(Column#44)->Column#35, funcs:firstrow(Column#45)->tpch.orders.o_orderdate, funcs:firstrow(Column#46)->tpch.orders.o_shippriority, funcs:firstrow(Column#47)->tpch.lineitem.l_orderkey
└─Projection 91515927.49 root mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))->Column#44, tpch.orders.o_orderdate, tpch.orders.o_shippriority, tpch.lineitem.l_orderkey, tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority
└─HashJoin 91515927.49 root inner join, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)]
└─IndexHashJoin 91515927.49 root inner join, inner:IndexLookUp, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey, equal cond:eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)
├─HashJoin(Build) 22592975.51 root inner join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
│ ├─TableReader(Build) 1498236.00 root data:Selection
│ │ └─Selection 1498236.00 cop[tikv] eq(tpch.customer.c_mktsegment, "AUTOMOBILE")
│ │ └─TableFullScan 7500000.00 cop[tikv] table:customer keep order:false
│ └─TableReader(Probe) 36870000.00 root data:Selection
│ └─Selection 36870000.00 cop[tikv] lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000)
│ └─TableFullScan 75000000.00 cop[tikv] table:orders keep order:false
└─TableReader(Probe) 163047704.27 root data:Selection
└─Selection 163047704.27 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableFullScan 300005811.00 cop[tikv] table:lineitem keep order:false
└─IndexLookUp(Probe) 4.05 root
├─IndexRangeScan(Build) 7.45 cop[tikv] table:lineitem, index:PRIMARY(L_ORDERKEY, L_LINENUMBER) range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false
└─Selection(Probe) 4.05 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableRowIDScan 7.45 cop[tikv] table:lineitem keep order:false
Comment on lines -257 to +268
Copy link
Contributor Author

@qw4990 qw4990 Apr 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran our TPCH bench again and this plan change won't cause regression:
image

/*
Q4 Order Priority Checking Query
This query determines how well the order priority system is working and gives an assessment of customer satisfaction.
Expand Down
10 changes: 8 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,8 @@ func (p *LogicalJoin) constructInnerTableScanTask(
isPartition: ds.isPartition,

underInnerIndexJoin: true,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use ts.StoreType but we don't explicitly set it(just implicitly set it to TiKV). Do we need to improve it in the future?

Copy link
Contributor Author

@qw4990 qw4990 Apr 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually no need to set it explicitly here since IndexLookJoin is only for TiKV, but I think you are right, it's better to set it explicitly here to make it clearer, and I'll fix it in the next PR.

}.Init(ds.ctx, ds.blockOffset)
ts.SetSchema(ds.schema.Clone())
if rowCount <= 0 {
Expand All @@ -983,7 +985,7 @@ func (p *LogicalJoin) constructInnerTableScanTask(
StatsVersion: ds.stats.StatsVersion,
// NDV would not be used in cost computation of IndexJoin, set leave it as default nil.
}
rowSize := ds.TblColHists.GetTableAvgRowSize(p.ctx, ds.TblCols, ts.StoreType, true)
rowSize := ts.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
copTask := &copTask{
tablePlan: ts,
Expand Down Expand Up @@ -1055,6 +1057,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
Desc: desc,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblColHists: ds.TblColHists,
pkIsHandleCol: ds.getPKIsHandleCol(),

underInnerIndexJoin: true,
}.Init(ds.ctx, ds.blockOffset)
Expand All @@ -1078,6 +1082,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
TableAsName: ds.TableAsName,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.schema = is.dataSourceSchema.Clone()
if ds.tableInfo.IsCommonHandle {
Expand Down Expand Up @@ -1151,7 +1157,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
tmpPath.CountAfterAccess = cnt
}
is.stats = ds.tableStats.ScaleByExpectCnt(tmpPath.CountAfterAccess)
rowSize := is.indexScanRowSize(path.Index, ds, true)
rowSize := is.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cop.cst = tmpPath.CountAfterAccess * rowSize * sessVars.GetScanFactor(ds.tableInfo)
finalStats := ds.tableStats.ScaleByExpectCnt(rowCount)
Expand Down
44 changes: 26 additions & 18 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1034,9 +1034,8 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c
func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (
indexPlan PhysicalPlan,
partialCost float64) {
idx := path.Index
is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false)
rowSize := is.indexScanRowSize(idx, ds, false)
rowSize := is.stats.HistColl.GetAvgRowSize(is.ctx, is.schema.Columns, true, false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we change the actual logic here? In indexScanRowSize we check whether to add handleCol before calculating rowSize.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is a small logical change, but I think this change is acceptable since 1) it won't cause plan-regression, 2) I'll further improve it later, 3) the current implementation is too sophisticated to maintain.

// TODO: Consider using isCoveringIndex() to avoid another TableRead
indexConds := path.IndexFilters
sessVars := ds.ctx.GetSessionVars()
Expand Down Expand Up @@ -1151,6 +1150,8 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
HandleCols: ds.handleCols,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.SetSchema(ds.schema.Clone())
err := setIndexMergeTableScanHandleCols(ds, ts)
Expand All @@ -1164,7 +1165,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
}
}
}
rowSize := ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true)
rowSize := ts.getScanRowSize()
partialCost += totalRowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
ts.stats = ds.tableStats.ScaleByExpectCnt(totalRowCount)
if ds.statisticTable.Pseudo {
Expand Down Expand Up @@ -1307,6 +1308,8 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
TableAsName: ds.TableAsName,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, is.blockOffset)
ts.SetSchema(ds.schema.Clone())
ts.SetCost(cost)
Expand Down Expand Up @@ -1358,22 +1361,20 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
return task, nil
}

func (is *PhysicalIndexScan) indexScanRowSize(idx *model.IndexInfo, ds *DataSource, isForScan bool) float64 {
func (is *PhysicalIndexScan) getScanRowSize() float64 {
idx := is.Index
scanCols := make([]*expression.Column, 0, len(idx.Columns)+1)
// If `initSchema` has already appended the handle column in schema, just use schema columns, otherwise, add extra handle column.
if len(idx.Columns) == len(is.schema.Columns) {
scanCols = append(scanCols, is.schema.Columns...)
handleCol := ds.getPKIsHandleCol()
handleCol := is.pkIsHandleCol
if handleCol != nil {
scanCols = append(scanCols, handleCol)
}
} else {
scanCols = is.schema.Columns
}
if isForScan {
return ds.TblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique)
}
return ds.TblColHists.GetAvgRowSize(is.ctx, scanCols, true, false)
return is.tblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique)
}

// initSchema is used to set the schema of PhysicalIndexScan. Before calling this,
Expand Down Expand Up @@ -2085,6 +2086,15 @@ func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *pro
}
}

func (ts *PhysicalTableScan) getScanRowSize() float64 {
if ts.StoreType == kv.TiKV {
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.tblCols, ts.StoreType, true)
}
// If `ts.handleCol` is nil, then the schema of tableScan doesn't have handle column.
// This logic can be ensured in column pruning.
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.Schema().Columns, ts.StoreType, ts.HandleCols != nil)
}

func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) {
ts := PhysicalTableScan{
Table: ds.tableInfo,
Expand All @@ -2096,6 +2106,9 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
Ranges: path.Ranges,
AccessCondition: path.AccessConds,
StoreType: path.StoreType,
HandleCols: ds.handleCols,
tblCols: ds.TblCols,
tblColHists: ds.TblColHists,
}.Init(ds.ctx, ds.blockOffset)
ts.filterCondition = make([]expression.Expression, len(path.TableFilters))
copy(ts.filterCondition, path.TableFilters)
Expand Down Expand Up @@ -2135,14 +2148,7 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
// we still need to assume values are uniformly distributed. For simplicity, we use uniform-assumption
// for all columns now, as we do in `deriveStatsByFilter`.
ts.stats = ds.tableStats.ScaleByExpectCnt(rowCount)
var rowSize float64
if ts.StoreType == kv.TiKV {
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true)
} else {
// If `ds.handleCol` is nil, then the schema of tableScan doesn't have handle column.
// This logic can be ensured in column pruning.
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ts.Schema().Columns, ts.StoreType, ds.handleCols != nil)
}
rowSize := ts.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
if isMatchProp {
Expand Down Expand Up @@ -2170,6 +2176,8 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper
dataSourceSchema: ds.schema,
isPartition: ds.isPartition,
physicalTableID: ds.physicalTableID,
tblColHists: ds.TblColHists,
pkIsHandleCol: ds.getPKIsHandleCol(),
}.Init(ds.ctx, ds.blockOffset)
statsTbl := ds.statisticTable
if statsTbl.Indices[idx.ID] != nil {
Expand All @@ -2188,7 +2196,7 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper
}
}
is.stats = ds.tableStats.ScaleByExpectCnt(rowCount)
rowSize := is.indexScanRowSize(idx, ds, true)
rowSize := is.getScanRowSize()
sessVars := ds.ctx.GetSessionVars()
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo)
if isMatchProp {
Expand Down
4 changes: 2 additions & 2 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5296,11 +5296,11 @@ func TestIndexJoinCost(t *testing.T) {
` └─Selection_8 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`,
` └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`))
tk.MustQuery(`explain format=verbose select /*+ TIDB_INLJ(t_outer, t_inner_idx) */ * from t_outer, t_inner_idx where t_outer.a=t_inner_idx.a`).Check(testkit.Rows( // IndexJoin with inner IndexLookup
`IndexJoin_11 12487.50 529388.13 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`,
`IndexJoin_11 12487.50 518149.38 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`,
`├─TableReader_23(Build) 9990.00 36412.58 root data:Selection_22`,
`│ └─Selection_22 9990.00 465000.00 cop[tikv] not(isnull(test.t_outer.a))`,
`│ └─TableFullScan_21 10000.00 435000.00 cop[tikv] table:t_outer keep order:false, stats:pseudo`,
`└─IndexLookUp_10(Probe) 1.25 35.34 root `,
`└─IndexLookUp_10(Probe) 1.25 34.21 root `,
` ├─Selection_9(Build) 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`,
` │ └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`,
` └─TableRowIDScan_8(Probe) 1.25 0.00 cop[tikv] table:t_inner_idx keep order:false, stats:pseudo`))
Expand Down
6 changes: 6 additions & 0 deletions planner/core/physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ type PhysicalIndexScan struct {
// required by cost model
// IndexScan operators under inner side of IndexJoin no need to consider net seek cost
underInnerIndexJoin bool
// tblColHists contains all columns before pruning, which are used to calculate row-size
tblColHists *statistics.HistColl
pkIsHandleCol *expression.Column
}

// Clone implements PhysicalPlan interface.
Expand Down Expand Up @@ -541,6 +544,9 @@ type PhysicalTableScan struct {
// required by cost model
// TableScan operators under inner side of IndexJoin no need to consider net seek cost
underInnerIndexJoin bool
// tblCols and tblColHists contains all columns before pruning, which are used to calculate row-size
tblCols []*expression.Column
tblColHists *statistics.HistColl
}

// Clone implements PhysicalPlan interface.
Expand Down
2 changes: 2 additions & 0 deletions planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1457,6 +1457,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName
Ranges: ranger.FullRange(),
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
// There is no alternative plan choices, so just use pseudo stats to avoid panic.
is.stats = &property.StatsInfo{HistColl: &(statistics.PseudoTable(tblInfo)).HistColl}
Expand All @@ -1474,6 +1475,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName
TableAsName: &tblInfo.Name,
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
ts.SetSchema(idxColSchema)
ts.Columns = ExpandVirtualColumn(ts.Columns, ts.schema, ts.Table.Columns)
Expand Down
6 changes: 4 additions & 2 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,10 @@ func (t *copTask) finishIndexPlan() {
}

// Calculate the IO cost of table scan here because we cannot know its stats until we finish index plan.
rowSize := t.tblColHists.GetIndexAvgRowSize(t.indexPlan.SCtx(), t.tblCols, is.Index.Unique)
t.cst += cnt * rowSize * sessVars.GetScanFactor(tableInfo)
for p = t.tablePlan; len(p.Children()) > 0; p = p.Children()[0] {
}
ts := p.(*PhysicalTableScan)
t.cst += cnt * ts.getScanRowSize() * sessVars.GetScanFactor(tableInfo)
}

func (t *copTask) getStoreType() kv.StoreType {
Expand Down
18 changes: 9 additions & 9 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@
{
"SQL": "select * from t where a > 1 order by f",
"Plan": [
"IndexLookUp_14 3333.33 139413.67 root ",
"IndexLookUp_14 3333.33 136747.00 root ",
"├─Selection_13(Build) 3333.33 0.00 cop[tikv] gt(test.t.a, 1)",
"│ └─IndexFullScan_11 10000.00 555000.00 cop[tikv] table:t, index:f(f) keep order:true, stats:pseudo",
"└─TableRowIDScan_12(Probe) 3333.33 555000.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand All @@ -1828,9 +1828,9 @@
{
"SQL": "select * from t where f > 1",
"Plan": [
"TableReader_7 3333.33 88640.22 root data:Selection_6",
"└─Selection_6 3333.33 1140000.00 cop[tikv] gt(test.t.f, 1)",
" └─TableFullScan_5 10000.00 1110000.00 cop[tikv] table:t keep order:false, stats:pseudo"
"IndexLookUp_10 3333.33 86674.83 root ",
"├─IndexRangeScan_8(Build) 3333.33 185000.00 cop[tikv] table:t, index:f(f) range:(1,+inf], keep order:false, stats:pseudo",
"└─TableRowIDScan_9(Probe) 3333.33 185000.00 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Warnings": [
"Note 1105 [t,f,f_g] remain after pruning paths for t given Prop{SortItems: [], TaskTp: rootTask}"
Expand All @@ -1849,7 +1849,7 @@
{
"SQL": "select * from t where f > 3 and g = 5",
"Plan": [
"IndexLookUp_15 3.33 215.74 root ",
"IndexLookUp_15 3.33 206.74 root ",
"├─IndexRangeScan_12(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo",
"└─Selection_14(Probe) 3.33 0.00 cop[tikv] gt(test.t.f, 3)",
" └─TableRowIDScan_13 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand All @@ -1861,8 +1861,8 @@
{
"SQL": "select * from t where g = 5 order by f",
"Plan": [
"Sort_5 10.00 362.68 root test.t.f",
"└─IndexLookUp_13 10.00 239.01 root ",
"Sort_5 10.00 353.68 root test.t.f",
"└─IndexLookUp_13 10.00 230.01 root ",
" ├─IndexRangeScan_11(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo",
" └─TableRowIDScan_12(Probe) 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo"
],
Expand All @@ -1873,7 +1873,7 @@
{
"SQL": "select * from t where d = 3 order by c, e",
"Plan": [
"IndexLookUp_15 10.00 57230.78 root ",
"IndexLookUp_15 10.00 57222.78 root ",
"├─Selection_14(Build) 10.00 0.00 cop[tikv] eq(test.t.d, 3)",
"│ └─IndexFullScan_12 10000.00 825000.00 cop[tikv] table:t, index:c_d_e(c, d, e) keep order:true, stats:pseudo",
"└─TableRowIDScan_13(Probe) 10.00 825000.00 cop[tikv] table:t keep order:false, stats:pseudo"
Expand Down Expand Up @@ -1931,7 +1931,7 @@
{
"SQL": "explain format = 'verbose' select * from t where b > 5",
"Plan": [
"IndexLookUp_7 3.00 64.81 root ",
"IndexLookUp_7 3.00 57.91 root ",
"├─IndexRangeScan_5(Build) 3.00 171.00 cop[tikv] table:t, index:idx_b(b) range:(5,+inf], keep order:false",
"└─TableRowIDScan_6(Probe) 3.00 171.00 cop[tikv] table:t keep order:false"
],
Expand Down