diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 855021af61533..da6f043b22912 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -254,7 +254,7 @@ Projection 10.00 root tpch.lineitem.l_orderkey, Column#35, tpch.orders.o_orderd └─TopN 10.00 root Column#35:desc, tpch.orders.o_orderdate, offset:0, count:10 └─HashAgg 40252367.98 root group by:Column#48, Column#49, Column#50, funcs:sum(Column#44)->Column#35, funcs:firstrow(Column#45)->tpch.orders.o_orderdate, funcs:firstrow(Column#46)->tpch.orders.o_shippriority, funcs:firstrow(Column#47)->tpch.lineitem.l_orderkey └─Projection 91515927.49 root mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))->Column#44, tpch.orders.o_orderdate, tpch.orders.o_shippriority, tpch.lineitem.l_orderkey, tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority - └─HashJoin 91515927.49 root inner join, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] + └─IndexHashJoin 91515927.49 root inner join, inner:IndexLookUp, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey, equal cond:eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey) ├─HashJoin(Build) 22592975.51 root inner join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] │ ├─TableReader(Build) 1498236.00 root data:Selection │ │ └─Selection 1498236.00 cop[tikv] eq(tpch.customer.c_mktsegment, "AUTOMOBILE") @@ -262,9 +262,10 @@ Projection 10.00 root tpch.lineitem.l_orderkey, Column#35, tpch.orders.o_orderd │ └─TableReader(Probe) 36870000.00 root data:Selection │ └─Selection 36870000.00 cop[tikv] lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000) │ └─TableFullScan 75000000.00 cop[tikv] table:orders keep order:false - └─TableReader(Probe) 163047704.27 root data:Selection - └─Selection 163047704.27 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) - └─TableFullScan 300005811.00 cop[tikv] table:lineitem keep order:false + └─IndexLookUp(Probe) 4.05 root + ├─IndexRangeScan(Build) 7.45 cop[tikv] table:lineitem, index:PRIMARY(L_ORDERKEY, L_LINENUMBER) range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false + └─Selection(Probe) 4.05 cop[tikv] gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) + └─TableRowIDScan 7.45 cop[tikv] table:lineitem keep order:false /* Q4 Order Priority Checking Query This query determines how well the order priority system is working and gives an assessment of customer satisfaction. diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index a510ae89fccd7..69c2a337b18d5 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -959,6 +959,8 @@ func (p *LogicalJoin) constructInnerTableScanTask( isPartition: ds.isPartition, underInnerIndexJoin: true, + tblCols: ds.TblCols, + tblColHists: ds.TblColHists, }.Init(ds.ctx, ds.blockOffset) ts.SetSchema(ds.schema.Clone()) if rowCount <= 0 { @@ -983,7 +985,7 @@ func (p *LogicalJoin) constructInnerTableScanTask( StatsVersion: ds.stats.StatsVersion, // NDV would not be used in cost computation of IndexJoin, set leave it as default nil. } - rowSize := ds.TblColHists.GetTableAvgRowSize(p.ctx, ds.TblCols, ts.StoreType, true) + rowSize := ts.getScanRowSize() sessVars := ds.ctx.GetSessionVars() copTask := &copTask{ tablePlan: ts, @@ -1055,6 +1057,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask( Desc: desc, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, + tblColHists: ds.TblColHists, + pkIsHandleCol: ds.getPKIsHandleCol(), underInnerIndexJoin: true, }.Init(ds.ctx, ds.blockOffset) @@ -1078,6 +1082,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask( TableAsName: ds.TableAsName, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, + tblCols: ds.TblCols, + tblColHists: ds.TblColHists, }.Init(ds.ctx, ds.blockOffset) ts.schema = is.dataSourceSchema.Clone() if ds.tableInfo.IsCommonHandle { @@ -1151,7 +1157,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( tmpPath.CountAfterAccess = cnt } is.stats = ds.tableStats.ScaleByExpectCnt(tmpPath.CountAfterAccess) - rowSize := is.indexScanRowSize(path.Index, ds, true) + rowSize := is.getScanRowSize() sessVars := ds.ctx.GetSessionVars() cop.cst = tmpPath.CountAfterAccess * rowSize * sessVars.GetScanFactor(ds.tableInfo) finalStats := ds.tableStats.ScaleByExpectCnt(rowCount) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 22348e0ee0462..4eb777b61b522 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1034,9 +1034,8 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) ( indexPlan PhysicalPlan, partialCost float64) { - idx := path.Index is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false) - rowSize := is.indexScanRowSize(idx, ds, false) + rowSize := is.stats.HistColl.GetAvgRowSize(is.ctx, is.schema.Columns, true, false) // TODO: Consider using isCoveringIndex() to avoid another TableRead indexConds := path.IndexFilters sessVars := ds.ctx.GetSessionVars() @@ -1151,6 +1150,8 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, HandleCols: ds.handleCols, + tblCols: ds.TblCols, + tblColHists: ds.TblColHists, }.Init(ds.ctx, ds.blockOffset) ts.SetSchema(ds.schema.Clone()) err := setIndexMergeTableScanHandleCols(ds, ts) @@ -1164,7 +1165,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, } } } - rowSize := ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true) + rowSize := ts.getScanRowSize() partialCost += totalRowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) ts.stats = ds.tableStats.ScaleByExpectCnt(totalRowCount) if ds.statisticTable.Pseudo { @@ -1307,6 +1308,8 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, TableAsName: ds.TableAsName, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, + tblCols: ds.TblCols, + tblColHists: ds.TblColHists, }.Init(ds.ctx, is.blockOffset) ts.SetSchema(ds.schema.Clone()) ts.SetCost(cost) @@ -1358,22 +1361,20 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, return task, nil } -func (is *PhysicalIndexScan) indexScanRowSize(idx *model.IndexInfo, ds *DataSource, isForScan bool) float64 { +func (is *PhysicalIndexScan) getScanRowSize() float64 { + idx := is.Index scanCols := make([]*expression.Column, 0, len(idx.Columns)+1) // If `initSchema` has already appended the handle column in schema, just use schema columns, otherwise, add extra handle column. if len(idx.Columns) == len(is.schema.Columns) { scanCols = append(scanCols, is.schema.Columns...) - handleCol := ds.getPKIsHandleCol() + handleCol := is.pkIsHandleCol if handleCol != nil { scanCols = append(scanCols, handleCol) } } else { scanCols = is.schema.Columns } - if isForScan { - return ds.TblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique) - } - return ds.TblColHists.GetAvgRowSize(is.ctx, scanCols, true, false) + return is.tblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique) } // initSchema is used to set the schema of PhysicalIndexScan. Before calling this, @@ -2085,6 +2086,15 @@ func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *pro } } +func (ts *PhysicalTableScan) getScanRowSize() float64 { + if ts.StoreType == kv.TiKV { + return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.tblCols, ts.StoreType, true) + } + // If `ts.handleCol` is nil, then the schema of tableScan doesn't have handle column. + // This logic can be ensured in column pruning. + return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.Schema().Columns, ts.StoreType, ts.HandleCols != nil) +} + func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) { ts := PhysicalTableScan{ Table: ds.tableInfo, @@ -2096,6 +2106,9 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper Ranges: path.Ranges, AccessCondition: path.AccessConds, StoreType: path.StoreType, + HandleCols: ds.handleCols, + tblCols: ds.TblCols, + tblColHists: ds.TblColHists, }.Init(ds.ctx, ds.blockOffset) ts.filterCondition = make([]expression.Expression, len(path.TableFilters)) copy(ts.filterCondition, path.TableFilters) @@ -2135,14 +2148,7 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper // we still need to assume values are uniformly distributed. For simplicity, we use uniform-assumption // for all columns now, as we do in `deriveStatsByFilter`. ts.stats = ds.tableStats.ScaleByExpectCnt(rowCount) - var rowSize float64 - if ts.StoreType == kv.TiKV { - rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true) - } else { - // If `ds.handleCol` is nil, then the schema of tableScan doesn't have handle column. - // This logic can be ensured in column pruning. - rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ts.Schema().Columns, ts.StoreType, ds.handleCols != nil) - } + rowSize := ts.getScanRowSize() sessVars := ds.ctx.GetSessionVars() cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) if isMatchProp { @@ -2170,6 +2176,8 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper dataSourceSchema: ds.schema, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, + tblColHists: ds.TblColHists, + pkIsHandleCol: ds.getPKIsHandleCol(), }.Init(ds.ctx, ds.blockOffset) statsTbl := ds.statisticTable if statsTbl.Indices[idx.ID] != nil { @@ -2188,7 +2196,7 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper } } is.stats = ds.tableStats.ScaleByExpectCnt(rowCount) - rowSize := is.indexScanRowSize(idx, ds, true) + rowSize := is.getScanRowSize() sessVars := ds.ctx.GetSessionVars() cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) if isMatchProp { diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 3bd8b527d2e3b..19e6ac3d7eb00 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -5296,11 +5296,11 @@ func TestIndexJoinCost(t *testing.T) { ` └─Selection_8 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`, ` └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`)) tk.MustQuery(`explain format=verbose select /*+ TIDB_INLJ(t_outer, t_inner_idx) */ * from t_outer, t_inner_idx where t_outer.a=t_inner_idx.a`).Check(testkit.Rows( // IndexJoin with inner IndexLookup - `IndexJoin_11 12487.50 529388.13 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`, + `IndexJoin_11 12487.50 518149.38 root inner join, inner:IndexLookUp_10, outer key:test.t_outer.a, inner key:test.t_inner_idx.a, equal cond:eq(test.t_outer.a, test.t_inner_idx.a)`, `├─TableReader_23(Build) 9990.00 36412.58 root data:Selection_22`, `│ └─Selection_22 9990.00 465000.00 cop[tikv] not(isnull(test.t_outer.a))`, `│ └─TableFullScan_21 10000.00 435000.00 cop[tikv] table:t_outer keep order:false, stats:pseudo`, - `└─IndexLookUp_10(Probe) 1.25 35.34 root `, + `└─IndexLookUp_10(Probe) 1.25 34.21 root `, ` ├─Selection_9(Build) 1.25 0.00 cop[tikv] not(isnull(test.t_inner_idx.a))`, ` │ └─IndexRangeScan_7 1.25 0.00 cop[tikv] table:t_inner_idx, index:a(a) range: decided by [eq(test.t_inner_idx.a, test.t_outer.a)], keep order:false, stats:pseudo`, ` └─TableRowIDScan_8(Probe) 1.25 0.00 cop[tikv] table:t_inner_idx keep order:false, stats:pseudo`)) diff --git a/planner/core/physical_plans.go b/planner/core/physical_plans.go index e533ecb7b717c..b0aff71a5c647 100644 --- a/planner/core/physical_plans.go +++ b/planner/core/physical_plans.go @@ -441,6 +441,9 @@ type PhysicalIndexScan struct { // required by cost model // IndexScan operators under inner side of IndexJoin no need to consider net seek cost underInnerIndexJoin bool + // tblColHists contains all columns before pruning, which are used to calculate row-size + tblColHists *statistics.HistColl + pkIsHandleCol *expression.Column } // Clone implements PhysicalPlan interface. @@ -541,6 +544,9 @@ type PhysicalTableScan struct { // required by cost model // TableScan operators under inner side of IndexJoin no need to consider net seek cost underInnerIndexJoin bool + // tblCols and tblColHists contains all columns before pruning, which are used to calculate row-size + tblCols []*expression.Column + tblColHists *statistics.HistColl } // Clone implements PhysicalPlan interface. diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 9d965aedb9310..d65007513a891 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -1457,6 +1457,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName Ranges: ranger.FullRange(), physicalTableID: physicalID, isPartition: isPartition, + tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl, }.Init(b.ctx, b.getSelectOffset()) // There is no alternative plan choices, so just use pseudo stats to avoid panic. is.stats = &property.StatsInfo{HistColl: &(statistics.PseudoTable(tblInfo)).HistColl} @@ -1474,6 +1475,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(ctx context.Context, dbName TableAsName: &tblInfo.Name, physicalTableID: physicalID, isPartition: isPartition, + tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl, }.Init(b.ctx, b.getSelectOffset()) ts.SetSchema(idxColSchema) ts.Columns = ExpandVirtualColumn(ts.Columns, ts.schema, ts.Table.Columns) diff --git a/planner/core/task.go b/planner/core/task.go index d8ab2369214e3..6bf9f6ac729e8 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -187,8 +187,10 @@ func (t *copTask) finishIndexPlan() { } // Calculate the IO cost of table scan here because we cannot know its stats until we finish index plan. - rowSize := t.tblColHists.GetIndexAvgRowSize(t.indexPlan.SCtx(), t.tblCols, is.Index.Unique) - t.cst += cnt * rowSize * sessVars.GetScanFactor(tableInfo) + for p = t.tablePlan; len(p.Children()) > 0; p = p.Children()[0] { + } + ts := p.(*PhysicalTableScan) + t.cst += cnt * ts.getScanRowSize() * sessVars.GetScanFactor(tableInfo) } func (t *copTask) getStoreType() kv.StoreType { diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index db5f789ede11c..87da8449649af 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1816,7 +1816,7 @@ { "SQL": "select * from t where a > 1 order by f", "Plan": [ - "IndexLookUp_14 3333.33 139413.67 root ", + "IndexLookUp_14 3333.33 136747.00 root ", "├─Selection_13(Build) 3333.33 0.00 cop[tikv] gt(test.t.a, 1)", "│ └─IndexFullScan_11 10000.00 555000.00 cop[tikv] table:t, index:f(f) keep order:true, stats:pseudo", "└─TableRowIDScan_12(Probe) 3333.33 555000.00 cop[tikv] table:t keep order:false, stats:pseudo" @@ -1828,9 +1828,9 @@ { "SQL": "select * from t where f > 1", "Plan": [ - "TableReader_7 3333.33 88640.22 root data:Selection_6", - "└─Selection_6 3333.33 1140000.00 cop[tikv] gt(test.t.f, 1)", - " └─TableFullScan_5 10000.00 1110000.00 cop[tikv] table:t keep order:false, stats:pseudo" + "IndexLookUp_10 3333.33 86674.83 root ", + "├─IndexRangeScan_8(Build) 3333.33 185000.00 cop[tikv] table:t, index:f(f) range:(1,+inf], keep order:false, stats:pseudo", + "└─TableRowIDScan_9(Probe) 3333.33 185000.00 cop[tikv] table:t keep order:false, stats:pseudo" ], "Warnings": [ "Note 1105 [t,f,f_g] remain after pruning paths for t given Prop{SortItems: [], TaskTp: rootTask}" @@ -1849,7 +1849,7 @@ { "SQL": "select * from t where f > 3 and g = 5", "Plan": [ - "IndexLookUp_15 3.33 215.74 root ", + "IndexLookUp_15 3.33 206.74 root ", "├─IndexRangeScan_12(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo", "└─Selection_14(Probe) 3.33 0.00 cop[tikv] gt(test.t.f, 3)", " └─TableRowIDScan_13 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo" @@ -1861,8 +1861,8 @@ { "SQL": "select * from t where g = 5 order by f", "Plan": [ - "Sort_5 10.00 362.68 root test.t.f", - "└─IndexLookUp_13 10.00 239.01 root ", + "Sort_5 10.00 353.68 root test.t.f", + "└─IndexLookUp_13 10.00 230.01 root ", " ├─IndexRangeScan_11(Build) 10.00 570.00 cop[tikv] table:t, index:g(g) range:[5,5], keep order:false, stats:pseudo", " └─TableRowIDScan_12(Probe) 10.00 570.00 cop[tikv] table:t keep order:false, stats:pseudo" ], @@ -1873,7 +1873,7 @@ { "SQL": "select * from t where d = 3 order by c, e", "Plan": [ - "IndexLookUp_15 10.00 57230.78 root ", + "IndexLookUp_15 10.00 57222.78 root ", "├─Selection_14(Build) 10.00 0.00 cop[tikv] eq(test.t.d, 3)", "│ └─IndexFullScan_12 10000.00 825000.00 cop[tikv] table:t, index:c_d_e(c, d, e) keep order:true, stats:pseudo", "└─TableRowIDScan_13(Probe) 10.00 825000.00 cop[tikv] table:t keep order:false, stats:pseudo" @@ -1931,7 +1931,7 @@ { "SQL": "explain format = 'verbose' select * from t where b > 5", "Plan": [ - "IndexLookUp_7 3.00 64.81 root ", + "IndexLookUp_7 3.00 57.91 root ", "├─IndexRangeScan_5(Build) 3.00 171.00 cop[tikv] table:t, index:idx_b(b) range:(5,+inf], keep order:false", "└─TableRowIDScan_6(Probe) 3.00 171.00 cop[tikv] table:t keep order:false" ],