-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
planner: fix wrong row-sizes used in cost model #33845
Changes from 22 commits
029b606
b877ec4
c070de8
7f25f11
4a908e0
15f50f8
7fe50d7
a9d7b0d
36d8bc0
21cb413
9d2b421
2257464
c2bb472
a74991f
415b85d
28c9163
0f01ca9
30915b0
0c17da2
be938b3
88cdfd7
99647ac
273db42
1b3cc1f
98d8772
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -959,6 +959,8 @@ func (p *LogicalJoin) constructInnerTableScanTask( | |
isPartition: ds.isPartition, | ||
|
||
underInnerIndexJoin: true, | ||
tblCols: ds.TblCols, | ||
tblColHists: ds.TblColHists, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually no need to set it explicitly here since |
||
}.Init(ds.ctx, ds.blockOffset) | ||
ts.SetSchema(ds.schema.Clone()) | ||
if rowCount <= 0 { | ||
|
@@ -983,7 +985,7 @@ func (p *LogicalJoin) constructInnerTableScanTask( | |
StatsVersion: ds.stats.StatsVersion, | ||
// NDV would not be used in cost computation of IndexJoin, set leave it as default nil. | ||
} | ||
rowSize := ds.TblColHists.GetTableAvgRowSize(p.ctx, ds.TblCols, ts.StoreType, true) | ||
rowSize := ts.getScanRowSize() | ||
sessVars := ds.ctx.GetSessionVars() | ||
copTask := &copTask{ | ||
tablePlan: ts, | ||
|
@@ -1055,6 +1057,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask( | |
Desc: desc, | ||
isPartition: ds.isPartition, | ||
physicalTableID: ds.physicalTableID, | ||
tblColHists: ds.TblColHists, | ||
pkIsHandleCol: ds.getPKIsHandleCol(), | ||
|
||
underInnerIndexJoin: true, | ||
}.Init(ds.ctx, ds.blockOffset) | ||
|
@@ -1078,6 +1082,8 @@ func (p *LogicalJoin) constructInnerIndexScanTask( | |
TableAsName: ds.TableAsName, | ||
isPartition: ds.isPartition, | ||
physicalTableID: ds.physicalTableID, | ||
tblCols: ds.TblCols, | ||
tblColHists: ds.TblColHists, | ||
}.Init(ds.ctx, ds.blockOffset) | ||
ts.schema = is.dataSourceSchema.Clone() | ||
if ds.tableInfo.IsCommonHandle { | ||
|
@@ -1151,7 +1157,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( | |
tmpPath.CountAfterAccess = cnt | ||
} | ||
is.stats = ds.tableStats.ScaleByExpectCnt(tmpPath.CountAfterAccess) | ||
rowSize := is.indexScanRowSize(path.Index, ds, true) | ||
rowSize := is.getScanRowSize() | ||
sessVars := ds.ctx.GetSessionVars() | ||
cop.cst = tmpPath.CountAfterAccess * rowSize * sessVars.GetScanFactor(ds.tableInfo) | ||
finalStats := ds.tableStats.ScaleByExpectCnt(rowCount) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1034,9 +1034,8 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c | |
func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) ( | ||
indexPlan PhysicalPlan, | ||
partialCost float64) { | ||
idx := path.Index | ||
is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false) | ||
rowSize := is.indexScanRowSize(idx, ds, false) | ||
rowSize := is.stats.HistColl.GetAvgRowSize(is.ctx, is.schema.Columns, true, false) | ||
qw4990 marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we change the actual logic here? In There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is a small logical change, but I think this change is acceptable since 1) it won't cause plan-regression, 2) I'll further improve it later, 3) the current implementation is too sophisticated to maintain. |
||
// TODO: Consider using isCoveringIndex() to avoid another TableRead | ||
indexConds := path.IndexFilters | ||
sessVars := ds.ctx.GetSessionVars() | ||
|
@@ -1151,6 +1150,8 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, | |
isPartition: ds.isPartition, | ||
physicalTableID: ds.physicalTableID, | ||
HandleCols: ds.handleCols, | ||
tblCols: ds.TblCols, | ||
tblColHists: ds.TblColHists, | ||
}.Init(ds.ctx, ds.blockOffset) | ||
ts.SetSchema(ds.schema.Clone()) | ||
err := setIndexMergeTableScanHandleCols(ds, ts) | ||
|
@@ -1164,7 +1165,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, | |
} | ||
} | ||
} | ||
rowSize := ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true) | ||
rowSize := ts.getScanRowSize() | ||
partialCost += totalRowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) | ||
ts.stats = ds.tableStats.ScaleByExpectCnt(totalRowCount) | ||
if ds.statisticTable.Pseudo { | ||
|
@@ -1307,6 +1308,8 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, | |
TableAsName: ds.TableAsName, | ||
isPartition: ds.isPartition, | ||
physicalTableID: ds.physicalTableID, | ||
tblCols: ds.TblCols, | ||
tblColHists: ds.TblColHists, | ||
}.Init(ds.ctx, is.blockOffset) | ||
ts.SetSchema(ds.schema.Clone()) | ||
ts.SetCost(cost) | ||
|
@@ -1358,22 +1361,20 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, | |
return task, nil | ||
} | ||
|
||
func (is *PhysicalIndexScan) indexScanRowSize(idx *model.IndexInfo, ds *DataSource, isForScan bool) float64 { | ||
func (is *PhysicalIndexScan) getScanRowSize() float64 { | ||
idx := is.Index | ||
scanCols := make([]*expression.Column, 0, len(idx.Columns)+1) | ||
// If `initSchema` has already appended the handle column in schema, just use schema columns, otherwise, add extra handle column. | ||
if len(idx.Columns) == len(is.schema.Columns) { | ||
scanCols = append(scanCols, is.schema.Columns...) | ||
handleCol := ds.getPKIsHandleCol() | ||
handleCol := is.pkIsHandleCol | ||
if handleCol != nil { | ||
scanCols = append(scanCols, handleCol) | ||
} | ||
} else { | ||
scanCols = is.schema.Columns | ||
} | ||
if isForScan { | ||
return ds.TblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique) | ||
} | ||
return ds.TblColHists.GetAvgRowSize(is.ctx, scanCols, true, false) | ||
return is.tblColHists.GetIndexAvgRowSize(is.ctx, scanCols, is.Index.Unique) | ||
} | ||
|
||
// initSchema is used to set the schema of PhysicalIndexScan. Before calling this, | ||
|
@@ -2085,6 +2086,15 @@ func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *pro | |
} | ||
} | ||
|
||
func (ts *PhysicalTableScan) getScanRowSize() float64 { | ||
qw4990 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if ts.StoreType == kv.TiKV { | ||
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.tblCols, ts.StoreType, true) | ||
} | ||
// If `ts.handleCol` is nil, then the schema of tableScan doesn't have handle column. | ||
// This logic can be ensured in column pruning. | ||
return ts.tblColHists.GetTableAvgRowSize(ts.ctx, ts.Schema().Columns, ts.StoreType, ts.HandleCols != nil) | ||
} | ||
|
||
func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) { | ||
ts := PhysicalTableScan{ | ||
Table: ds.tableInfo, | ||
|
@@ -2096,6 +2106,9 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper | |
Ranges: path.Ranges, | ||
AccessCondition: path.AccessConds, | ||
StoreType: path.StoreType, | ||
HandleCols: ds.handleCols, | ||
tblCols: ds.TblCols, | ||
tblColHists: ds.TblColHists, | ||
}.Init(ds.ctx, ds.blockOffset) | ||
ts.filterCondition = make([]expression.Expression, len(path.TableFilters)) | ||
copy(ts.filterCondition, path.TableFilters) | ||
|
@@ -2135,14 +2148,7 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper | |
// we still need to assume values are uniformly distributed. For simplicity, we use uniform-assumption | ||
// for all columns now, as we do in `deriveStatsByFilter`. | ||
ts.stats = ds.tableStats.ScaleByExpectCnt(rowCount) | ||
var rowSize float64 | ||
if ts.StoreType == kv.TiKV { | ||
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ds.TblCols, ts.StoreType, true) | ||
} else { | ||
// If `ds.handleCol` is nil, then the schema of tableScan doesn't have handle column. | ||
// This logic can be ensured in column pruning. | ||
rowSize = ds.TblColHists.GetTableAvgRowSize(ds.ctx, ts.Schema().Columns, ts.StoreType, ds.handleCols != nil) | ||
} | ||
rowSize := ts.getScanRowSize() | ||
sessVars := ds.ctx.GetSessionVars() | ||
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) | ||
if isMatchProp { | ||
|
@@ -2170,6 +2176,8 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper | |
dataSourceSchema: ds.schema, | ||
isPartition: ds.isPartition, | ||
physicalTableID: ds.physicalTableID, | ||
tblColHists: ds.TblColHists, | ||
pkIsHandleCol: ds.getPKIsHandleCol(), | ||
}.Init(ds.ctx, ds.blockOffset) | ||
statsTbl := ds.statisticTable | ||
if statsTbl.Indices[idx.ID] != nil { | ||
|
@@ -2188,7 +2196,7 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper | |
} | ||
} | ||
is.stats = ds.tableStats.ScaleByExpectCnt(rowCount) | ||
rowSize := is.indexScanRowSize(idx, ds, true) | ||
rowSize := is.getScanRowSize() | ||
sessVars := ds.ctx.GetSessionVars() | ||
cost := rowCount * rowSize * sessVars.GetScanFactor(ds.tableInfo) | ||
if isMatchProp { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I ran our TPCH bench again and this plan change won't cause regression:
