From d9e1d63aaf4bd2ce6be36b3307fc93764aeb9dce Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 8 Aug 2022 14:58:22 +0800 Subject: [PATCH 01/13] support reader operators cost detail Signed-off-by: yisaer --- planner/core/find_best_task.go | 6 +- planner/core/plan_cost.go | 87 ++++++++++++++++--------- planner/core/plan_cost_detail.go | 92 ++++++++++++++++++++++++++ planner/core/plan_cost_detail_test.go | 94 ++++++++++++++++++++++----- 4 files changed, 232 insertions(+), 47 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index ee0c5ec72c2c3..a6deded3078f2 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -2080,7 +2080,7 @@ func (ds *DataSource) convertToPointGet(prop *property.PhysicalProperty, candida } func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, - candidate *candidatePath, hashPartColName *ast.ColumnName, _ *physicalOptimizeOp) (task task) { + candidate *candidatePath, hashPartColName *ast.ColumnName, opt *physicalOptimizeOp) (task task) { if !prop.IsSortItemEmpty() && !candidate.isMatchProp { return invalidTask } @@ -2111,7 +2111,7 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, batchPointGetPlan.Handles = append(batchPointGetPlan.Handles, kv.IntHandle(ran.LowVal[0].GetInt64())) } batchPointGetPlan.accessCols = ds.TblCols - cost = batchPointGetPlan.GetCost() + cost = batchPointGetPlan.GetCost(opt) // Add filter condition to table plan now. if len(candidate.path.TableFilters) > 0 { sessVars := ds.ctx.GetSessionVars() @@ -2139,7 +2139,7 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, } else { batchPointGetPlan.accessCols = ds.TblCols } - cost = batchPointGetPlan.GetCost() + cost = batchPointGetPlan.GetCost(opt) // Add index condition to table plan now. if len(candidate.path.IndexFilters)+len(candidate.path.TableFilters) > 0 { sessVars := ds.ctx.GetSessionVars() diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index 1bd4417fdd81e..e704b0ceaa25b 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -266,21 +266,28 @@ func (p *PhysicalIndexReader) GetPlanCost(_ property.TaskType, option *PlanCostO if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + var rowCount, rowSize, netFactor, indexPlanCost, netSeekCost float64 + sqlScanConcurrency := p.ctx.GetSessionVars().DistSQLScanConcurrency() // child's cost childCost, err := p.indexPlan.GetPlanCost(property.CopSingleReadTaskType, option) if err != nil { return 0, err } - p.planCost = childCost + indexPlanCost = childCost + p.planCost = indexPlanCost // net I/O cost: rows * row-size * net-factor tblStats := getTblStats(p.indexPlan) - rowSize := tblStats.GetAvgRowSize(p.ctx, p.indexPlan.Schema().Columns, true, false) - p.planCost += getCardinality(p.indexPlan, costFlag) * rowSize * getTableNetFactor(p.indexPlan) + rowSize = tblStats.GetAvgRowSize(p.ctx, p.indexPlan.Schema().Columns, true, false) + rowCount = getCardinality(p.indexPlan, costFlag) + netFactor = getTableNetFactor(p.indexPlan) + p.planCost += rowCount * rowSize * netFactor // net seek cost - p.planCost += estimateNetSeekCost(p.indexPlan) + netSeekCost = estimateNetSeekCost(p.indexPlan) + p.planCost += netSeekCost // consider concurrency - p.planCost /= float64(p.ctx.GetSessionVars().DistSQLScanConcurrency()) + p.planCost /= float64(sqlScanConcurrency) + setPhysicalIndexReaderCostDetail(p, option.tracer, rowCount, rowSize, netFactor, netSeekCost, indexPlanCost, sqlScanConcurrency) p.planCostInit = true return p.planCost, nil } @@ -300,21 +307,27 @@ func (p *PhysicalTableReader) GetPlanCost(_ property.TaskType, option *PlanCostO } p.planCost = 0 netFactor := getTableNetFactor(p.tablePlan) - switch p.StoreType { + var rowCount, rowSize, netSeekCost, tableCost float64 + sqlScanConcurrency := p.ctx.GetSessionVars().DistSQLScanConcurrency() + storeType := p.StoreType + switch storeType { case kv.TiKV: // child's cost childCost, err := p.tablePlan.GetPlanCost(property.CopSingleReadTaskType, option) if err != nil { return 0, err } + tableCost = childCost p.planCost = childCost // net I/O cost: rows * row-size * net-factor - rowSize := getTblStats(p.tablePlan).GetAvgRowSize(p.ctx, p.tablePlan.Schema().Columns, false, false) - p.planCost += getCardinality(p.tablePlan, costFlag) * rowSize * netFactor + rowSize = getTblStats(p.tablePlan).GetAvgRowSize(p.ctx, p.tablePlan.Schema().Columns, false, false) + rowCount = getCardinality(p.tablePlan, costFlag) + p.planCost += rowCount * rowSize * netFactor // net seek cost - p.planCost += estimateNetSeekCost(p.tablePlan) + netSeekCost = estimateNetSeekCost(p.tablePlan) + p.planCost += netSeekCost // consider concurrency - p.planCost /= float64(p.ctx.GetSessionVars().DistSQLScanConcurrency()) + p.planCost /= float64(sqlScanConcurrency) case kv.TiFlash: var concurrency, rowSize, seekCost float64 _, isMPP := p.tablePlan.(*PhysicalExchangeSender) @@ -357,6 +370,9 @@ func (p *PhysicalTableReader) GetPlanCost(_ property.TaskType, option *PlanCostO p.planCost /= 1000000000 } } + setPhysicalTableReaderCostDetail(p, option.tracer, + rowCount, rowSize, netFactor, netSeekCost, tableCost, + sqlScanConcurrency, storeType) p.planCostInit = true return p.planCost, nil } @@ -428,15 +444,18 @@ func (p *PhysicalTableScan) GetPlanCost(taskType property.TaskType, option *Plan } var selfCost float64 - switch p.ctx.GetSessionVars().CostModelVersion { + var rowCount, rowSize, scanFactor float64 + costModelVersion := p.ctx.GetSessionVars().CostModelVersion + switch costModelVersion { case modelVer1: // scan cost: rows * row-size * scan-factor - scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + scanFactor = p.ctx.GetSessionVars().GetScanFactor(p.Table) if p.Desc && p.prop != nil && p.prop.ExpectedCnt >= smallScanThreshold { scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) } - selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + rowCount = getCardinality(p, costFlag) + rowSize = p.getScanRowSize() + selfCost = rowCount * rowSize * scanFactor case modelVer2: // scan cost: rows * log2(row-size) * scan-factor - var scanFactor float64 switch taskType { case property.MppTaskType: // use a dedicated scan-factor for TiFlash // no need to distinguish `Scan` and `DescScan` for TiFlash for now @@ -448,16 +467,17 @@ func (p *PhysicalTableScan) GetPlanCost(taskType property.TaskType, option *Plan } } // the formula `log(rowSize)` is based on experiment results - rowSize := math.Max(p.getScanRowSize(), 2.0) // to guarantee logRowSize >= 1 + rowSize = math.Max(p.getScanRowSize(), 2.0) // to guarantee logRowSize >= 1 logRowSize := math.Log2(rowSize) - selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor + rowCount = getCardinality(p, costFlag) + selfCost = rowCount * logRowSize * scanFactor // give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans. if p.StoreType == kv.TiFlash { selfCost += 2000 * logRowSize * scanFactor } } - + setPhysicalTableOrIndexScanCostDetail(p, option.tracer, rowCount, rowSize, scanFactor, costModelVersion) p.planCost = selfCost p.planCostInit = true return p.planCost, nil @@ -471,23 +491,28 @@ func (p *PhysicalIndexScan) GetPlanCost(_ property.TaskType, option *PlanCostOpt } var selfCost float64 - switch p.ctx.GetSessionVars().CostModelVersion { + var rowCount, rowSize, scanFactor float64 + costModelVersion := p.ctx.GetSessionVars().CostModelVersion + switch costModelVersion { case modelVer1: // scan cost: rows * row-size * scan-factor - scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + scanFactor = p.ctx.GetSessionVars().GetScanFactor(p.Table) if p.Desc && p.prop != nil && p.prop.ExpectedCnt >= smallScanThreshold { scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) } - selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + rowCount = getCardinality(p, costFlag) + rowSize = p.getScanRowSize() + selfCost = rowCount * rowSize * scanFactor case modelVer2: - scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + scanFactor = p.ctx.GetSessionVars().GetScanFactor(p.Table) if p.Desc { scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) } - rowSize := math.Max(p.getScanRowSize(), 2.0) + rowCount = getCardinality(p, costFlag) + rowSize = math.Max(p.getScanRowSize(), 2.0) logRowSize := math.Log2(rowSize) - selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor + selfCost = rowCount * logRowSize * scanFactor } - + setPhysicalTableOrIndexScanCostDetail(p, option.tracer, rowCount, rowSize, scanFactor, costModelVersion) p.planCost = selfCost p.planCostInit = true return p.planCost, nil @@ -1173,7 +1198,7 @@ func (p *PhysicalTopN) GetPlanCost(taskType property.TaskType, option *PlanCostO } // GetCost returns cost of the PointGetPlan. -func (p *BatchPointGetPlan) GetCost() float64 { +func (p *BatchPointGetPlan) GetCost(opt *physicalOptimizeOp) float64 { cols := p.accessCols if cols == nil { return 0 // the cost of BatchGet generated in fast plan optimization is always 0 @@ -1188,9 +1213,13 @@ func (p *BatchPointGetPlan) GetCost() float64 { rowCount = float64(len(p.IndexValues)) rowSize = p.stats.HistColl.GetIndexAvgRowSize(p.ctx, cols, p.IndexInfo.Unique) } - cost += rowCount * rowSize * sessVars.GetNetworkFactor(p.TblInfo) - cost += rowCount * sessVars.GetSeekFactor(p.TblInfo) - cost /= float64(sessVars.DistSQLScanConcurrency()) + networkFactor := sessVars.GetNetworkFactor(p.TblInfo) + seekFactor := sessVars.GetSeekFactor(p.TblInfo) + scanConcurrency := sessVars.DistSQLScanConcurrency() + cost += rowCount * rowSize * networkFactor + cost += rowCount * seekFactor + cost /= float64(scanConcurrency) + setBatchPointGetPlanCostDetail(p, opt, rowCount, rowSize, networkFactor, seekFactor, scanConcurrency) return cost } @@ -1200,7 +1229,7 @@ func (p *BatchPointGetPlan) GetPlanCost(_ property.TaskType, option *PlanCostOpt if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } - p.planCost = p.GetCost() + p.planCost = p.GetCost(option.tracer) p.planCostInit = true return p.planCost, nil } diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index ba5f2fe20b196..d15dc8116324b 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -16,17 +16,33 @@ package core import ( "fmt" + "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/util/tracing" ) const ( + // RowCountLbl indicates for rowCount + RowCountLbl = "rowCount" // RowSizeLbl indicates rowSize RowSizeLbl = "rowSize" + // NetworkFactorLbl indicates networkFactor NetworkFactorLbl = "networkFactor" // SeekFactorLbl indicates seekFactor SeekFactorLbl = "seekFactor" + // ScanFactorLbl indicates for scanFactor + ScanFactorLbl = "scanFactor" + + // ScanConcurrencyLbl indicates sql scan concurrency + ScanConcurrencyLbl = "scanConcurrency" + + // NetSeekCostLbl indicates netSeek cost + NetSeekCostLbl = "netSeekCost" + // TablePlanCostLbl indicates tablePlan cost + TablePlanCostLbl = "tablePlanCost" + // IndexPlanCostLbl indicates indexPlan cost + IndexPlanCostLbl = "indexPlanCost" ) func setPointGetPlanCostDetail(p *PointGetPlan, opt *physicalOptimizeOp, @@ -41,3 +57,79 @@ func setPointGetPlanCostDetail(p *PointGetPlan, opt *physicalOptimizeOp, SetDesc(fmt.Sprintf("%s*%s+%s", RowSizeLbl, NetworkFactorLbl, SeekFactorLbl)) opt.appendPlanCostDetail(detail) } + +func setBatchPointGetPlanCostDetail(p *BatchPointGetPlan, opt *physicalOptimizeOp, + rowCount, rowSize, networkFactor, seekFactor float64, scanConcurrency int) { + if opt == nil { + return + } + detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) + detail.AddParam(RowCountLbl, rowCount). + AddParam(RowSizeLbl, rowSize). + AddParam(NetworkFactorLbl, networkFactor). + AddParam(SeekFactorLbl, seekFactor). + AddParam(ScanConcurrencyLbl, scanConcurrency). + SetDesc(fmt.Sprintf("(%s*%s*%s+%s*%s)/%s", + RowCountLbl, RowSizeLbl, NetworkFactorLbl, RowCountLbl, SeekFactorLbl, ScanConcurrencyLbl)) + opt.appendPlanCostDetail(detail) +} + +func setPhysicalTableOrIndexScanCostDetail(p PhysicalPlan, opt *physicalOptimizeOp, + rowCount, rowSize, scanFactor float64, costModelVersion int) { + if opt == nil { + return + } + _, ok1 := p.(*PhysicalTableScan) + _, ok2 := p.(*PhysicalIndexScan) + if !ok1 && !ok2 { + return + } + detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) + detail.AddParam(RowCountLbl, rowCount). + AddParam(RowSizeLbl, rowSize). + AddParam(ScanFactorLbl, scanFactor) + var desc string + if costModelVersion == modelVer1 { + desc = fmt.Sprintf("%s*%s*%s", RowCountLbl, RowSizeLbl, ScanFactorLbl) + } else { + desc = fmt.Sprintf("%s*log2(%s)*%s", RowCountLbl, RowSizeLbl, ScanFactorLbl) + } + detail.SetDesc(desc) + opt.appendPlanCostDetail(detail) +} + +func setPhysicalTableReaderCostDetail(p *PhysicalTableReader, opt *physicalOptimizeOp, + rowCount, rowSize, networkFactor, netSeekCost, tablePlanCost float64, + scanConcurrency int, storeType kv.StoreType) { + if opt == nil || storeType != kv.TiKV { + return + } + detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) + detail.AddParam(RowCountLbl, rowCount). + AddParam(RowSizeLbl, rowSize). + AddParam(NetworkFactorLbl, networkFactor). + AddParam(NetSeekCostLbl, netSeekCost). + AddParam(TablePlanCostLbl, tablePlanCost). + AddParam(ScanConcurrencyLbl, scanConcurrency) + detail.SetDesc(fmt.Sprintf("(%s+%s*%s*%s+%s)/%s", TablePlanCostLbl, + RowCountLbl, RowSizeLbl, NetworkFactorLbl, NetSeekCostLbl, ScanConcurrencyLbl)) + opt.appendPlanCostDetail(detail) +} + +func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptimizeOp, + rowCount, rowSize, networkFactor, netSeekCost, indexPlanCost float64, + scanConcurrency int) { + if opt == nil { + return + } + detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) + detail.AddParam(RowCountLbl, rowCount). + AddParam(RowSizeLbl, rowSize). + AddParam(NetworkFactorLbl, networkFactor). + AddParam(NetSeekCostLbl, netSeekCost). + AddParam(IndexPlanCostLbl, indexPlanCost). + AddParam(ScanConcurrencyLbl, scanConcurrency) + detail.SetDesc(fmt.Sprintf("(%s+%s*%s*%s+%s)/%s", IndexPlanCostLbl, + RowCountLbl, RowSizeLbl, NetworkFactorLbl, NetSeekCostLbl, ScanConcurrencyLbl)) + opt.appendPlanCostDetail(detail) +} diff --git a/planner/core/plan_cost_detail_test.go b/planner/core/plan_cost_detail_test.go index 3802ff1efa42f..8f1fadae88781 100644 --- a/planner/core/plan_cost_detail_test.go +++ b/planner/core/plan_cost_detail_test.go @@ -35,23 +35,87 @@ func TestPlanCostDetail(t *testing.T) { tk := testkit.NewTestKit(t, store) tk.MustExec("use test") tk.MustExec(`create table t (a int primary key, b int, c int, d int, k int, key b(b), key cd(c, d), unique key(k))`) - // assert PointGet cost detail - testPointGetCostDetail(t, tk, p, dom) -} - -func testPointGetCostDetail(t *testing.T, tk *testkit.TestKit, p *parser.Parser, dom *domain.Domain) { - tk.Session().GetSessionVars().StmtCtx.EnableOptimizeTrace = true - costDetails := optimize(t, "select * from t where a = 1", p, tk.Session(), dom) - assertPG := false - for _, cd := range costDetails { - if cd.GetPlanType() == plancodec.TypePointGet { - assertPG = true - require.True(t, cd.Exists(core.RowSizeLbl)) - require.True(t, cd.Exists(core.NetworkFactorLbl)) - require.True(t, cd.Exists(core.SeekFactorLbl)) + testcases := []struct { + sql string + assertLbls []string + tp string + }{ + { + tp: plancodec.TypePointGet, + sql: "select * from t where a = 1", + assertLbls: []string{ + core.RowSizeLbl, + core.NetworkFactorLbl, + core.SeekFactorLbl, + }, + }, + { + tp: plancodec.TypeBatchPointGet, + sql: "select * from t where a = 1 or a = 2 or a = 3", + assertLbls: []string{ + core.RowCountLbl, + core.RowSizeLbl, + core.NetworkFactorLbl, + core.SeekFactorLbl, + core.ScanConcurrencyLbl, + }, + }, + { + tp: plancodec.TypeTableFullScan, + sql: "select * from t", + assertLbls: []string{ + core.RowCountLbl, + core.RowSizeLbl, + core.ScanFactorLbl, + }, + }, + { + tp: plancodec.TypeTableReader, + sql: "select * from t", + assertLbls: []string{ + core.RowCountLbl, + core.RowSizeLbl, + core.NetworkFactorLbl, + core.NetSeekCostLbl, + core.TablePlanCostLbl, + core.ScanConcurrencyLbl, + }, + }, + { + tp: plancodec.TypeIndexFullScan, + sql: "select b from t", + assertLbls: []string{ + core.RowCountLbl, + core.RowSizeLbl, + core.ScanFactorLbl, + }, + }, + { + tp: plancodec.TypeIndexReader, + sql: "select b from t", + assertLbls: []string{ + core.RowCountLbl, + core.RowSizeLbl, + core.NetworkFactorLbl, + core.NetSeekCostLbl, + core.IndexPlanCostLbl, + core.ScanConcurrencyLbl, + }, + }, + } + for _, tc := range testcases { + costDetails := optimize(t, tc.sql, p, tk.Session(), dom) + asserted := false + for _, cd := range costDetails { + if cd.GetPlanType() == tc.tp { + asserted = true + for _, lbl := range tc.assertLbls { + require.True(t, cd.Exists(lbl)) + } + } } + require.True(t, asserted) } - require.True(t, assertPG) } func optimize(t *testing.T, sql string, p *parser.Parser, ctx sessionctx.Context, dom *domain.Domain) map[int]*tracing.PhysicalPlanCostDetail { From 0b5da58da979a659557c534f505b3682cb49bb45 Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 8 Aug 2022 15:03:32 +0800 Subject: [PATCH 02/13] support reader operators cost detail Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index d15dc8116324b..b1e8e471056bd 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -16,8 +16,8 @@ package core import ( "fmt" - "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/util/tracing" ) From 4ec704fcac74aab035c83518819fe3ac0bce86ca Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 8 Aug 2022 15:04:49 +0800 Subject: [PATCH 03/13] add comment Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 1 + 1 file changed, 1 insertion(+) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index b1e8e471056bd..6dab5cf9cc21a 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -101,6 +101,7 @@ func setPhysicalTableOrIndexScanCostDetail(p PhysicalPlan, opt *physicalOptimize func setPhysicalTableReaderCostDetail(p *PhysicalTableReader, opt *physicalOptimizeOp, rowCount, rowSize, networkFactor, netSeekCost, tablePlanCost float64, scanConcurrency int, storeType kv.StoreType) { + // tracer haven't support non tikv plan for now if opt == nil || storeType != kv.TiKV { return } From a8a50088de266935e115b66559d64b5cf35bd837 Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 10 Aug 2022 12:23:05 +0800 Subject: [PATCH 04/13] support hashjoin cost detail Signed-off-by: yisaer --- planner/core/plan_cost.go | 12 +- planner/core/plan_cost_detail.go | 206 ++++++++++++++++++++++++++ planner/core/plan_cost_detail_test.go | 76 ++++++---- planner/core/task.go | 6 +- planner/implementation/join.go | 2 +- util/tracing/opt_trace.go | 28 ++-- 6 files changed, 281 insertions(+), 49 deletions(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index e704b0ceaa25b..2f3f254bc5b00 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -920,7 +920,7 @@ func (p *PhysicalMergeJoin) GetPlanCost(taskType property.TaskType, option *Plan } // GetCost computes cost of hash join operator itself. -func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64) float64 { +func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64, op *physicalOptimizeOp) float64 { buildCnt, probeCnt := lCnt, rCnt build := p.children[0] // Taking the right as the inner for right join or using the outer to build a hash table. @@ -928,18 +928,23 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint buildCnt, probeCnt = rCnt, lCnt build = p.children[1] } + // build/probe rowCount sessVars := p.ctx.GetSessionVars() oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load() memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint + // build rowSize rowSize := getAvgRowSize(build.statsInfo(), build.Schema()) spill := oomUseTmpStorage && memQuota > 0 && rowSize*buildCnt > float64(memQuota) && p.storeTp != kv.TiFlash // Cost of building hash table. + // cpu factor cpuFactor := sessVars.GetCPUFactor() if isMPP && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { cpuFactor = sessVars.GetTiFlashCPUFactor() // use the dedicated TiFlash CPU Factor on modelVer2 } cpuCost := buildCnt * cpuFactor + // memory factor memoryCost := buildCnt * sessVars.GetMemoryFactor() + // disk factor diskCost := buildCnt * sessVars.GetDiskFactor() * rowSize // Number of matched row pairs regarding the equal join conditions. helper := &fullJoinRowCountHelper{ @@ -1002,6 +1007,8 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint } else { diskCost = 0 } + setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, + rowSize, numPairs, probeCost, probeDiskCost, cpuCost, memQuota) return cpuCost + memoryCost + diskCost } @@ -1019,7 +1026,8 @@ func (p *PhysicalHashJoin) GetPlanCost(taskType property.TaskType, option *PlanC } p.planCost += childCost } - p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag), taskType == property.MppTaskType, costFlag) + p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag), + taskType == property.MppTaskType, costFlag, option.tracer) p.planCostInit = true return p.planCost, nil } diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 6dab5cf9cc21a..46889be47e7dc 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -26,6 +26,12 @@ const ( RowCountLbl = "rowCount" // RowSizeLbl indicates rowSize RowSizeLbl = "rowSize" + // BuildRowCountLbl indicates rowCount on build side + BuildRowCountLbl = "buildRowCount" + // ProbeRowCountLbl indicates rowCount on probe side + ProbeRowCountLbl = "probeRowCount" + // NumPairsLbl indicates numPairs + NumPairsLbl = "numPairs" // NetworkFactorLbl indicates networkFactor NetworkFactorLbl = "networkFactor" @@ -33,9 +39,21 @@ const ( SeekFactorLbl = "seekFactor" // ScanFactorLbl indicates for scanFactor ScanFactorLbl = "scanFactor" + // SelectionFactorLbl indicates selection factor + SelectionFactorLbl = "selectionFactor" + // CpuFactorLbl indicates cpu factor + CpuFactorLbl = "cpuFactor" + // MemoryFactorLbl indicates mem factor + MemoryFactorLbl = "memoryFactor" + // DiskFactorLbl indicates disk factor + DiskFactorLbl = "diskFactor" + // ConcurrencyFactorLbl indicates for concurrency factor + ConcurrencyFactorLbl = "concurrencyFactor" // ScanConcurrencyLbl indicates sql scan concurrency ScanConcurrencyLbl = "scanConcurrency" + // HashJoinConcurrencyLbl indicates concurrency for hash join + HashJoinConcurrencyLbl = "hashJoinConcurrency" // NetSeekCostLbl indicates netSeek cost NetSeekCostLbl = "netSeekCost" @@ -43,6 +61,30 @@ const ( TablePlanCostLbl = "tablePlanCost" // IndexPlanCostLbl indicates indexPlan cost IndexPlanCostLbl = "indexPlanCost" + + // ProbeCostDetailLbl indicates probeCost + ProbeCostDetailLbl = "probeCostDetail" + // ProbeCostDescLbl indicates description for probe cost + ProbeCostDescLbl = "probeCostDesc" + // CpuCostDetailLbl indicates cpuCost detail + CpuCostDetailLbl = "cpuCostDetail" + // CpuCostDescLbl indicates description for cpu cost + CpuCostDescLbl = "cpuCostDesc" + // MemCostDetailLbl indicates mem cost detail + MemCostDetailLbl = "memCostDetail" + // MemCostDescLbl indicates description for mem cost + MemCostDescLbl = "memCostDesc" + // DiskCostDetailLbl indicates disk cost detail + DiskCostDetailLbl = "diskCostDetail" + // DiskCostDescLbl indicates description for disk cost + DiskCostDescLbl = "diskCostDesc" + // ProbeDiskCostLbl indicates probe disk cost detail + ProbeDiskCostLbl = "probeDiskCostDetail" + // ProbeDiskCostDescLbl indicates description for probe disk cost + ProbeDiskCostDescLbl = "probeDiskCostDesc" + + // MemQuotaLbl indicates memory quota + MemQuotaLbl = "memQuota" ) func setPointGetPlanCostDetail(p *PointGetPlan, opt *physicalOptimizeOp, @@ -134,3 +176,167 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim RowCountLbl, RowSizeLbl, NetworkFactorLbl, NetSeekCostLbl, ScanConcurrencyLbl)) opt.appendPlanCostDetail(detail) } + +func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, + spill bool, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, + probeCost, probeDiskCost, cpuCost float64, memQuota int64) { + if opt == nil { + return + } + detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) + sessVars := p.ctx.GetSessionVars() + diskCostDetail := &HashJoinDiskCostDetail{ + Spill: spill, + UseOuterToBuild: p.UseOuterToBuild, + BuildRowCount: buildCnt, + DiskFactor: sessVars.GetDiskFactor(), + RowSize: rowSize, + ProbeDiskCost: &HashJoinProbeDiskCostDetail{ + SelectionFactor: SelectionFactor, + NumPairs: numPairs, + HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, + Cost: probeDiskCost, + }, + } + memoryCostDetail := &HashJoinMemoryCostDetail{ + Spill: spill, + MemQuota: memQuota, + RowSize: rowSize, + BuildRowCount: buildCnt, + MemoryFactor: sessVars.GetMemoryFactor(), + } + cpuCostDetail := &HashJoinCpuCostDetail{ + BuildRowCount: buildCnt, + CpuFactor: cpuFactor, + ConcurrencyFactor: sessVars.GetConcurrencyFactor(), + ProbeCost: &HashJoinProbeCostDetail{ + NumPairs: numPairs, + HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, + SelectionFactor: SelectionFactor, + ProbeRowCount: probeCnt, + Cost: probeCost, + }, + HashJoinConcurrency: p.Concurrency, + Spill: spill, + Cost: cpuCost, + } + + // record cpu cost detail + detail.AddParam(CpuCostDetailLbl, cpuCostDetail). + AddParam(CpuCostDescLbl, cpuCostDetail.desc()). + AddParam(ProbeCostDescLbl, cpuCostDetail.probeCostDesc()) + // record memory cost detail + detail.AddParam(MemCostDetailLbl, memoryCostDetail). + AddParam(MemCostDescLbl, memoryCostDetail.desc()) + // record disk cost detail + detail.AddParam(DiskCostDetailLbl, diskCostDetail). + AddParam(DiskCostDescLbl, diskCostDetail.desc()). + AddParam(ProbeDiskCostDescLbl, diskCostDetail.probeDesc()) + + detail.SetDesc(fmt.Sprintf("%s+%s+%s+all children cost", CpuCostDetailLbl, MemCostDetailLbl, DiskCostDetailLbl)) + opt.appendPlanCostDetail(detail) +} + +type HashJoinProbeCostDetail struct { + NumPairs float64 `json:"numPairs"` + HasConditions bool `json:"hasConditions"` + SelectionFactor float64 `json:"selectionFactor"` + ProbeRowCount float64 `json:"probeRowCount"` + Cost float64 `json:"cost"` +} + +type HashJoinCpuCostDetail struct { + BuildRowCount float64 `json:"buildRowCount"` + CpuFactor float64 `json:"cpuFactor"` + ConcurrencyFactor float64 `json:"concurrencyFactor"` + ProbeCost *HashJoinProbeCostDetail `json:"probeCost"` + HashJoinConcurrency uint `json:"hashJoinConcurrency"` + Spill bool `json:"spill"` + Cost float64 `json:"cost"` +} + +func (h *HashJoinCpuCostDetail) desc() string { + var cpuCostDesc string + buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CpuFactorLbl) + if h.Spill { + cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s", + buildCostDesc, + ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, + buildCostDesc) + } else { + cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s/%s", + buildCostDesc, + ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, + buildCostDesc, HashJoinConcurrencyLbl) + } + return cpuCostDesc +} + +func (h *HashJoinCpuCostDetail) probeCostDesc() string { + var probeCostDesc string + if h.ProbeCost.HasConditions { + probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s", + NumPairsLbl, CpuFactorLbl, SelectionFactorLbl, + ProbeRowCountLbl, CpuFactorLbl, HashJoinConcurrencyLbl) + } else { + probeCostDesc = fmt.Sprintf("(%s*%s)/%s", + NumPairsLbl, CpuFactorLbl, + HashJoinConcurrencyLbl) + } + return probeCostDesc +} + +type HashJoinMemoryCostDetail struct { + Spill bool `json:"spill"` + MemQuota int64 `json:"memQuota"` + RowSize float64 `json:"rowSize"` + BuildRowCount float64 `json:"buildRowCount"` + MemoryFactor float64 `json:"memoryFactor"` +} + +func (h *HashJoinMemoryCostDetail) desc() string { + memCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, MemoryFactorLbl) + if h.Spill { + memCostDesc = fmt.Sprintf("%s*%s/(%s*%s)", memCostDesc, MemQuotaLbl, RowSizeLbl, BuildRowCountLbl) + } + return memCostDesc +} + +type HashJoinProbeDiskCostDetail struct { + SelectionFactor float64 `json:"selectionFactor"` + NumPairs float64 `json:"numPairs"` + HasConditions bool `json:"hasConditions"` + Cost float64 `json:"cost"` +} + +type HashJoinDiskCostDetail struct { + Spill bool `json:"spill"` + UseOuterToBuild bool `json:"useOuterToBuild"` + BuildRowCount float64 `json:"buildRowCount"` + DiskFactor float64 `json:"diskFactor"` + RowSize float64 `json:"rowSize"` + ProbeDiskCost *HashJoinProbeDiskCostDetail `json:"probeDiskCost"` +} + +func (h *HashJoinDiskCostDetail) desc() string { + if !h.Spill { + return "" + } + buildDiskCost := fmt.Sprintf("%s*%s*%s", BuildRowCountLbl, DiskFactorLbl, RowSizeLbl) + desc := fmt.Sprintf("%s+%s", buildDiskCost, ProbeDiskCostLbl) + if h.UseOuterToBuild { + desc = fmt.Sprintf("%s+%s", desc, buildDiskCost) + } + return desc +} + +func (h *HashJoinDiskCostDetail) probeDesc() string { + if !h.Spill { + return "" + } + desc := fmt.Sprintf("%s*%s*%s", NumPairsLbl, DiskFactorLbl, RowSizeLbl) + if h.ProbeDiskCost.HasConditions { + desc = fmt.Sprintf("%s*%s", desc, SelectionFactorLbl) + } + return desc +} diff --git a/planner/core/plan_cost_detail_test.go b/planner/core/plan_cost_detail_test.go index 8f1fadae88781..6e6eb8dd49f09 100644 --- a/planner/core/plan_cost_detail_test.go +++ b/planner/core/plan_cost_detail_test.go @@ -20,7 +20,7 @@ import ( "github.com/pingcap/tidb/domain" "github.com/pingcap/tidb/parser" - "github.com/pingcap/tidb/planner/core" + plannercore "github.com/pingcap/tidb/planner/core" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/testkit" "github.com/pingcap/tidb/util/hint" @@ -40,66 +40,80 @@ func TestPlanCostDetail(t *testing.T) { assertLbls []string tp string }{ + { + tp: plancodec.TypeHashJoin, + sql: "select /*+ HASH_JOIN(t1, t2) */ * from t t1 join t t2 on t1.k = t2.k where t1.a = 1;", + assertLbls: []string{ + plannercore.CpuCostDetailLbl, + plannercore.CpuCostDescLbl, + plannercore.ProbeCostDescLbl, + plannercore.MemCostDetailLbl, + plannercore.MemCostDescLbl, + plannercore.DiskCostDetailLbl, + plannercore.DiskCostDescLbl, + plannercore.ProbeDiskCostDescLbl, + }, + }, { tp: plancodec.TypePointGet, sql: "select * from t where a = 1", assertLbls: []string{ - core.RowSizeLbl, - core.NetworkFactorLbl, - core.SeekFactorLbl, + plannercore.RowSizeLbl, + plannercore.NetworkFactorLbl, + plannercore.SeekFactorLbl, }, }, { tp: plancodec.TypeBatchPointGet, sql: "select * from t where a = 1 or a = 2 or a = 3", assertLbls: []string{ - core.RowCountLbl, - core.RowSizeLbl, - core.NetworkFactorLbl, - core.SeekFactorLbl, - core.ScanConcurrencyLbl, + plannercore.RowCountLbl, + plannercore.RowSizeLbl, + plannercore.NetworkFactorLbl, + plannercore.SeekFactorLbl, + plannercore.ScanConcurrencyLbl, }, }, { tp: plancodec.TypeTableFullScan, sql: "select * from t", assertLbls: []string{ - core.RowCountLbl, - core.RowSizeLbl, - core.ScanFactorLbl, + plannercore.RowCountLbl, + plannercore.RowSizeLbl, + plannercore.ScanFactorLbl, }, }, { tp: plancodec.TypeTableReader, sql: "select * from t", assertLbls: []string{ - core.RowCountLbl, - core.RowSizeLbl, - core.NetworkFactorLbl, - core.NetSeekCostLbl, - core.TablePlanCostLbl, - core.ScanConcurrencyLbl, + plannercore.RowCountLbl, + plannercore.RowSizeLbl, + plannercore.NetworkFactorLbl, + plannercore.NetSeekCostLbl, + plannercore.TablePlanCostLbl, + plannercore.ScanConcurrencyLbl, }, }, { tp: plancodec.TypeIndexFullScan, sql: "select b from t", assertLbls: []string{ - core.RowCountLbl, - core.RowSizeLbl, - core.ScanFactorLbl, + plannercore.RowCountLbl, + plannercore.RowSizeLbl, + plannercore.ScanFactorLbl, }, }, { tp: plancodec.TypeIndexReader, sql: "select b from t", assertLbls: []string{ - core.RowCountLbl, - core.RowSizeLbl, - core.NetworkFactorLbl, - core.NetSeekCostLbl, - core.IndexPlanCostLbl, - core.ScanConcurrencyLbl, + plannercore.RowCountLbl, + plannercore.RowSizeLbl, + plannercore.NetworkFactorLbl, + plannercore.NetSeekCostLbl, + plannercore.IndexPlanCostLbl, + plannercore.ScanConcurrencyLbl, }, }, } @@ -121,16 +135,16 @@ func TestPlanCostDetail(t *testing.T) { func optimize(t *testing.T, sql string, p *parser.Parser, ctx sessionctx.Context, dom *domain.Domain) map[int]*tracing.PhysicalPlanCostDetail { stmt, err := p.ParseOneStmt(sql, "", "") require.NoError(t, err) - err = core.Preprocess(ctx, stmt, core.WithPreprocessorReturn(&core.PreprocessorReturn{InfoSchema: dom.InfoSchema()})) + err = plannercore.Preprocess(ctx, stmt, plannercore.WithPreprocessorReturn(&plannercore.PreprocessorReturn{InfoSchema: dom.InfoSchema()})) require.NoError(t, err) - sctx := core.MockContext() + sctx := plannercore.MockContext() sctx.GetSessionVars().StmtCtx.EnableOptimizeTrace = true sctx.GetSessionVars().EnableNewCostInterface = true - builder, _ := core.NewPlanBuilder().Init(sctx, dom.InfoSchema(), &hint.BlockHintProcessor{}) + builder, _ := plannercore.NewPlanBuilder().Init(sctx, dom.InfoSchema(), &hint.BlockHintProcessor{}) domain.GetDomain(sctx).MockInfoCacheAndLoadInfoSchema(dom.InfoSchema()) plan, err := builder.Build(context.TODO(), stmt) require.NoError(t, err) - _, _, err = core.DoOptimize(context.TODO(), sctx, builder.GetOptFlag(), plan.(core.LogicalPlan)) + _, _, err = plannercore.DoOptimize(context.TODO(), sctx, builder.GetOptFlag(), plan.(plannercore.LogicalPlan)) require.NoError(t, err) return sctx.GetSessionVars().StmtCtx.OptimizeTracer.Physical.PhysicalPlanCostDetails } diff --git a/planner/core/task.go b/planner/core/task.go index 67bd81b556d78..a8c0e4eec0931 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -329,7 +329,7 @@ func (p *PhysicalHashJoin) attach2Task(tasks ...task) task { p.SetChildren(lTask.plan(), rTask.plan()) task := &rootTask{ p: p, - cst: lTask.cost() + rTask.cost() + p.GetCost(lTask.count(), rTask.count(), false, 0), + cst: lTask.cost() + rTask.cost() + p.GetCost(lTask.count(), rTask.count(), false, 0, nil), } p.cost = task.cost() return task @@ -550,7 +550,7 @@ func (p *PhysicalHashJoin) attach2TaskForMpp(tasks ...task) task { outerTask = rTask } task := &mppTask{ - cst: lCost + rCost + p.GetCost(lTask.count(), rTask.count(), false, 0), + cst: lCost + rCost + p.GetCost(lTask.count(), rTask.count(), false, 0, nil), p: p, partTp: outerTask.partTp, hashCols: outerTask.hashCols, @@ -581,7 +581,7 @@ func (p *PhysicalHashJoin) attach2TaskForTiFlash(tasks ...task) task { tblColHists: rTask.tblColHists, indexPlanFinished: true, tablePlan: p, - cst: lCost + rCost + p.GetCost(lTask.count(), rTask.count(), false, 0), + cst: lCost + rCost + p.GetCost(lTask.count(), rTask.count(), false, 0, nil), } p.cost = task.cst return task diff --git a/planner/implementation/join.go b/planner/implementation/join.go index d601632ec25c1..3c514a26d8a47 100644 --- a/planner/implementation/join.go +++ b/planner/implementation/join.go @@ -29,7 +29,7 @@ func (impl *HashJoinImpl) CalcCost(_ float64, children ...memo.Implementation) f hashJoin := impl.plan.(*plannercore.PhysicalHashJoin) // The children here are only used to calculate the cost. hashJoin.SetChildren(children[0].GetPlan(), children[1].GetPlan()) - selfCost := hashJoin.GetCost(children[0].GetPlan().StatsCount(), children[1].GetPlan().StatsCount(), false, 0) + selfCost := hashJoin.GetCost(children[0].GetPlan().StatsCount(), children[1].GetPlan().StatsCount(), false, 0, nil) impl.cost = selfCost + children[0].GetCost() + children[1].GetCost() return impl.cost } diff --git a/util/tracing/opt_trace.go b/util/tracing/opt_trace.go index 7520c5cc0d4b8..8112379534b46 100644 --- a/util/tracing/opt_trace.go +++ b/util/tracing/opt_trace.go @@ -232,44 +232,48 @@ func (tracer *OptimizeTracer) RecordFinalPlan(final *PlanTrace) { // PhysicalPlanCostDetail indicates cost detail type PhysicalPlanCostDetail struct { - id int - tp string - params map[string]interface{} - desc string + ID int `json:"id"` + TP string `json:"type"` + Params map[string]interface{} `json:"params"` + Desc string `json:"desc"` } // NewPhysicalPlanCostDetail creates a cost detail func NewPhysicalPlanCostDetail(id int, tp string) *PhysicalPlanCostDetail { return &PhysicalPlanCostDetail{ - id: id, - tp: tp, - params: make(map[string]interface{}), + ID: id, + TP: tp, + Params: make(map[string]interface{}), } } // AddParam adds param func (d *PhysicalPlanCostDetail) AddParam(k string, v interface{}) *PhysicalPlanCostDetail { - d.params[k] = v + // discard empty param value + if s, ok := v.(string); ok && len(s) < 1 { + return d + } + d.Params[k] = v return d } // SetDesc sets desc func (d *PhysicalPlanCostDetail) SetDesc(desc string) { - d.desc = desc + d.Desc = desc } // GetPlanID gets plan id func (d *PhysicalPlanCostDetail) GetPlanID() int { - return d.id + return d.ID } // GetPlanType gets plan type func (d *PhysicalPlanCostDetail) GetPlanType() string { - return d.tp + return d.TP } // Exists checks whether key exists in params func (d *PhysicalPlanCostDetail) Exists(k string) bool { - _, ok := d.params[k] + _, ok := d.Params[k] return ok } From a3965bef3819675e777dd9f24522af7d77d56811 Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 10 Aug 2022 12:44:20 +0800 Subject: [PATCH 05/13] support hashjoin cost detail Signed-off-by: yisaer --- planner/core/plan_cost.go | 9 ++------- planner/core/plan_cost_detail.go | 13 +++++++++---- planner/core/plan_cost_detail_test.go | 2 -- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index 2f3f254bc5b00..170f98b6f4813 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -928,23 +928,18 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint buildCnt, probeCnt = rCnt, lCnt build = p.children[1] } - // build/probe rowCount sessVars := p.ctx.GetSessionVars() oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load() memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint - // build rowSize rowSize := getAvgRowSize(build.statsInfo(), build.Schema()) spill := oomUseTmpStorage && memQuota > 0 && rowSize*buildCnt > float64(memQuota) && p.storeTp != kv.TiFlash // Cost of building hash table. - // cpu factor cpuFactor := sessVars.GetCPUFactor() if isMPP && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { cpuFactor = sessVars.GetTiFlashCPUFactor() // use the dedicated TiFlash CPU Factor on modelVer2 } cpuCost := buildCnt * cpuFactor - // memory factor memoryCost := buildCnt * sessVars.GetMemoryFactor() - // disk factor diskCost := buildCnt * sessVars.GetDiskFactor() * rowSize // Number of matched row pairs regarding the equal join conditions. helper := &fullJoinRowCountHelper{ @@ -1007,8 +1002,8 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint } else { diskCost = 0 } - setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, - rowSize, numPairs, probeCost, probeDiskCost, cpuCost, memQuota) + setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, + cpuCost, probeCost, memoryCost, diskCost, probeDiskCost, memQuota) return cpuCost + memoryCost + diskCost } diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 46889be47e7dc..16bb58e576f9e 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -177,9 +177,10 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim opt.appendPlanCostDetail(detail) } -func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, - spill bool, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, - probeCost, probeDiskCost, cpuCost float64, memQuota int64) { +func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, spill bool, + buildCnt, probeCnt, cpuFactor, rowSize, numPairs, + cpuCost, probeCpuCost, memCost, diskCost, probeDiskCost float64, + memQuota int64) { if opt == nil { return } @@ -197,6 +198,7 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, Cost: probeDiskCost, }, + Cost: diskCost, } memoryCostDetail := &HashJoinMemoryCostDetail{ Spill: spill, @@ -204,6 +206,7 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, RowSize: rowSize, BuildRowCount: buildCnt, MemoryFactor: sessVars.GetMemoryFactor(), + Cost: memCost, } cpuCostDetail := &HashJoinCpuCostDetail{ BuildRowCount: buildCnt, @@ -214,7 +217,7 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, SelectionFactor: SelectionFactor, ProbeRowCount: probeCnt, - Cost: probeCost, + Cost: probeCpuCost, }, HashJoinConcurrency: p.Concurrency, Spill: spill, @@ -292,6 +295,7 @@ type HashJoinMemoryCostDetail struct { RowSize float64 `json:"rowSize"` BuildRowCount float64 `json:"buildRowCount"` MemoryFactor float64 `json:"memoryFactor"` + Cost float64 `json:"cost"` } func (h *HashJoinMemoryCostDetail) desc() string { @@ -316,6 +320,7 @@ type HashJoinDiskCostDetail struct { DiskFactor float64 `json:"diskFactor"` RowSize float64 `json:"rowSize"` ProbeDiskCost *HashJoinProbeDiskCostDetail `json:"probeDiskCost"` + Cost float64 `json:"cost"` } func (h *HashJoinDiskCostDetail) desc() string { diff --git a/planner/core/plan_cost_detail_test.go b/planner/core/plan_cost_detail_test.go index 6e6eb8dd49f09..c49e4695eb51f 100644 --- a/planner/core/plan_cost_detail_test.go +++ b/planner/core/plan_cost_detail_test.go @@ -50,8 +50,6 @@ func TestPlanCostDetail(t *testing.T) { plannercore.MemCostDetailLbl, plannercore.MemCostDescLbl, plannercore.DiskCostDetailLbl, - plannercore.DiskCostDescLbl, - plannercore.ProbeDiskCostDescLbl, }, }, { From 62f941e262852fb5e3c54d86a7f05e88570129ab Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 10 Aug 2022 16:13:12 +0800 Subject: [PATCH 06/13] fix lint Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 35 +++++++++++++++------------ planner/core/plan_cost_detail_test.go | 4 +-- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 16bb58e576f9e..2faf4c9bcda56 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -66,10 +66,10 @@ const ( ProbeCostDetailLbl = "probeCostDetail" // ProbeCostDescLbl indicates description for probe cost ProbeCostDescLbl = "probeCostDesc" - // CpuCostDetailLbl indicates cpuCost detail - CpuCostDetailLbl = "cpuCostDetail" - // CpuCostDescLbl indicates description for cpu cost - CpuCostDescLbl = "cpuCostDesc" + // CPUCostDetailLbl indicates cpuCost detail + CPUCostDetailLbl = "cpuCostDetail" + // CPUCostDescLbl indicates description for cpu cost + CPUCostDescLbl = "cpuCostDesc" // MemCostDetailLbl indicates mem cost detail MemCostDetailLbl = "memCostDetail" // MemCostDescLbl indicates description for mem cost @@ -179,7 +179,7 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, spill bool, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, - cpuCost, probeCpuCost, memCost, diskCost, probeDiskCost float64, + cpuCost, probeCPUCost, memCost, diskCost, probeDiskCost float64, memQuota int64) { if opt == nil { return @@ -208,16 +208,16 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, MemoryFactor: sessVars.GetMemoryFactor(), Cost: memCost, } - cpuCostDetail := &HashJoinCpuCostDetail{ + cpuCostDetail := &HashJoinCPUCostDetail{ BuildRowCount: buildCnt, - CpuFactor: cpuFactor, + CPUFactor: cpuFactor, ConcurrencyFactor: sessVars.GetConcurrencyFactor(), ProbeCost: &HashJoinProbeCostDetail{ NumPairs: numPairs, HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, SelectionFactor: SelectionFactor, ProbeRowCount: probeCnt, - Cost: probeCpuCost, + Cost: probeCPUCost, }, HashJoinConcurrency: p.Concurrency, Spill: spill, @@ -225,8 +225,8 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, } // record cpu cost detail - detail.AddParam(CpuCostDetailLbl, cpuCostDetail). - AddParam(CpuCostDescLbl, cpuCostDetail.desc()). + detail.AddParam(CPUCostDetailLbl, cpuCostDetail). + AddParam(CPUCostDescLbl, cpuCostDetail.desc()). AddParam(ProbeCostDescLbl, cpuCostDetail.probeCostDesc()) // record memory cost detail detail.AddParam(MemCostDetailLbl, memoryCostDetail). @@ -236,10 +236,11 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, AddParam(DiskCostDescLbl, diskCostDetail.desc()). AddParam(ProbeDiskCostDescLbl, diskCostDetail.probeDesc()) - detail.SetDesc(fmt.Sprintf("%s+%s+%s+all children cost", CpuCostDetailLbl, MemCostDetailLbl, DiskCostDetailLbl)) + detail.SetDesc(fmt.Sprintf("%s+%s+%s+all children cost", CPUCostDetailLbl, MemCostDetailLbl, DiskCostDetailLbl)) opt.appendPlanCostDetail(detail) } +// HashJoinProbeCostDetail indicates probe cpu cost detail type HashJoinProbeCostDetail struct { NumPairs float64 `json:"numPairs"` HasConditions bool `json:"hasConditions"` @@ -248,9 +249,10 @@ type HashJoinProbeCostDetail struct { Cost float64 `json:"cost"` } -type HashJoinCpuCostDetail struct { +// HashJoinCPUCostDetail indicates cpu cost detail +type HashJoinCPUCostDetail struct { BuildRowCount float64 `json:"buildRowCount"` - CpuFactor float64 `json:"cpuFactor"` + CPUFactor float64 `json:"cpuFactor"` ConcurrencyFactor float64 `json:"concurrencyFactor"` ProbeCost *HashJoinProbeCostDetail `json:"probeCost"` HashJoinConcurrency uint `json:"hashJoinConcurrency"` @@ -258,7 +260,7 @@ type HashJoinCpuCostDetail struct { Cost float64 `json:"cost"` } -func (h *HashJoinCpuCostDetail) desc() string { +func (h *HashJoinCPUCostDetail) desc() string { var cpuCostDesc string buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CpuFactorLbl) if h.Spill { @@ -275,7 +277,7 @@ func (h *HashJoinCpuCostDetail) desc() string { return cpuCostDesc } -func (h *HashJoinCpuCostDetail) probeCostDesc() string { +func (h *HashJoinCPUCostDetail) probeCostDesc() string { var probeCostDesc string if h.ProbeCost.HasConditions { probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s", @@ -289,6 +291,7 @@ func (h *HashJoinCpuCostDetail) probeCostDesc() string { return probeCostDesc } +// HashJoinMemoryCostDetail indicates memory cost detail type HashJoinMemoryCostDetail struct { Spill bool `json:"spill"` MemQuota int64 `json:"memQuota"` @@ -306,6 +309,7 @@ func (h *HashJoinMemoryCostDetail) desc() string { return memCostDesc } +// HashJoinProbeDiskCostDetail indicates probe disk cost detail type HashJoinProbeDiskCostDetail struct { SelectionFactor float64 `json:"selectionFactor"` NumPairs float64 `json:"numPairs"` @@ -313,6 +317,7 @@ type HashJoinProbeDiskCostDetail struct { Cost float64 `json:"cost"` } +// HashJoinDiskCostDetail indicates disk cost detail type HashJoinDiskCostDetail struct { Spill bool `json:"spill"` UseOuterToBuild bool `json:"useOuterToBuild"` diff --git a/planner/core/plan_cost_detail_test.go b/planner/core/plan_cost_detail_test.go index c49e4695eb51f..34584773aa6e8 100644 --- a/planner/core/plan_cost_detail_test.go +++ b/planner/core/plan_cost_detail_test.go @@ -44,8 +44,8 @@ func TestPlanCostDetail(t *testing.T) { tp: plancodec.TypeHashJoin, sql: "select /*+ HASH_JOIN(t1, t2) */ * from t t1 join t t2 on t1.k = t2.k where t1.a = 1;", assertLbls: []string{ - plannercore.CpuCostDetailLbl, - plannercore.CpuCostDescLbl, + plannercore.CPUCostDetailLbl, + plannercore.CPUCostDescLbl, plannercore.ProbeCostDescLbl, plannercore.MemCostDetailLbl, plannercore.MemCostDescLbl, From 6c4eeebe3c2151ed9a2c132cc4997a5d68ed919f Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 10 Aug 2022 16:22:24 +0800 Subject: [PATCH 07/13] revise code Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 2faf4c9bcda56..18c3b5233942f 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -41,8 +41,8 @@ const ( ScanFactorLbl = "scanFactor" // SelectionFactorLbl indicates selection factor SelectionFactorLbl = "selectionFactor" - // CpuFactorLbl indicates cpu factor - CpuFactorLbl = "cpuFactor" + // CPUactorLbl indicates cpu factor + CPUactorLbl = "cpuFactor" // MemoryFactorLbl indicates mem factor MemoryFactorLbl = "memoryFactor" // DiskFactorLbl indicates disk factor @@ -262,7 +262,7 @@ type HashJoinCPUCostDetail struct { func (h *HashJoinCPUCostDetail) desc() string { var cpuCostDesc string - buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CpuFactorLbl) + buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUactorLbl) if h.Spill { cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s", buildCostDesc, @@ -281,11 +281,11 @@ func (h *HashJoinCPUCostDetail) probeCostDesc() string { var probeCostDesc string if h.ProbeCost.HasConditions { probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s", - NumPairsLbl, CpuFactorLbl, SelectionFactorLbl, - ProbeRowCountLbl, CpuFactorLbl, HashJoinConcurrencyLbl) + NumPairsLbl, CPUactorLbl, SelectionFactorLbl, + ProbeRowCountLbl, CPUactorLbl, HashJoinConcurrencyLbl) } else { probeCostDesc = fmt.Sprintf("(%s*%s)/%s", - NumPairsLbl, CpuFactorLbl, + NumPairsLbl, CPUactorLbl, HashJoinConcurrencyLbl) } return probeCostDesc From e64f93c7b9eee21a4a6d5a88100acaff2ceddefe Mon Sep 17 00:00:00 2001 From: yisaer Date: Thu, 11 Aug 2022 11:02:30 +0800 Subject: [PATCH 08/13] revise code Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 18c3b5233942f..2bebb9c61ad4d 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -41,8 +41,8 @@ const ( ScanFactorLbl = "scanFactor" // SelectionFactorLbl indicates selection factor SelectionFactorLbl = "selectionFactor" - // CPUactorLbl indicates cpu factor - CPUactorLbl = "cpuFactor" + // CPUFactorLbl indicates cpu factor + CPUFactorLbl = "cpuFactor" // MemoryFactorLbl indicates mem factor MemoryFactorLbl = "memoryFactor" // DiskFactorLbl indicates disk factor @@ -262,7 +262,7 @@ type HashJoinCPUCostDetail struct { func (h *HashJoinCPUCostDetail) desc() string { var cpuCostDesc string - buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUactorLbl) + buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUFactorLbl) if h.Spill { cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s", buildCostDesc, @@ -281,11 +281,11 @@ func (h *HashJoinCPUCostDetail) probeCostDesc() string { var probeCostDesc string if h.ProbeCost.HasConditions { probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s", - NumPairsLbl, CPUactorLbl, SelectionFactorLbl, - ProbeRowCountLbl, CPUactorLbl, HashJoinConcurrencyLbl) + NumPairsLbl, CPUFactorLbl, SelectionFactorLbl, + ProbeRowCountLbl, CPUFactorLbl, HashJoinConcurrencyLbl) } else { probeCostDesc = fmt.Sprintf("(%s*%s)/%s", - NumPairsLbl, CPUactorLbl, + NumPairsLbl, CPUFactorLbl, HashJoinConcurrencyLbl) } return probeCostDesc From b795817e80ad915aca2eb80777a6a0cd6a036b21 Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 17 Aug 2022 13:49:45 +0800 Subject: [PATCH 09/13] address the comment Signed-off-by: yisaer --- planner/core/plan_cost.go | 22 +++++++++++++++------- planner/core/plan_cost_detail.go | 30 ++++++++++++++---------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index 170f98b6f4813..7e7277c653e67 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -938,9 +938,13 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint if isMPP && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { cpuFactor = sessVars.GetTiFlashCPUFactor() // use the dedicated TiFlash CPU Factor on modelVer2 } + diskFactor := sessVars.GetMemoryFactor() + memoryFactor := sessVars.GetMemoryFactor() + concurrencyFactor := sessVars.GetConcurrencyFactor() + cpuCost := buildCnt * cpuFactor - memoryCost := buildCnt * sessVars.GetMemoryFactor() - diskCost := buildCnt * sessVars.GetDiskFactor() * rowSize + memoryCost := buildCnt * memoryFactor + diskCost := buildCnt * diskFactor * rowSize // Number of matched row pairs regarding the equal join conditions. helper := &fullJoinRowCountHelper{ cartesian: false, @@ -974,7 +978,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint // Cost of querying hash table is cheap actually, so we just compute the cost of // evaluating `OtherConditions` and joining row pairs. probeCost := numPairs * cpuFactor - probeDiskCost := numPairs * sessVars.GetDiskFactor() * rowSize + probeDiskCost := numPairs * diskFactor * rowSize // Cost of evaluating outer filter. if len(p.LeftConditions)+len(p.RightConditions) > 0 { // Input outer count for the above compution should be adjusted by SelectionFactor. @@ -985,7 +989,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint diskCost += probeDiskCost probeCost /= float64(p.Concurrency) // Cost of additional concurrent goroutines. - cpuCost += probeCost + float64(p.Concurrency+1)*sessVars.GetConcurrencyFactor() + cpuCost += probeCost + float64(p.Concurrency+1)*concurrencyFactor // Cost of traveling the hash table to resolve missing matched cases when building the hash table from the outer table if p.UseOuterToBuild { if spill { @@ -994,7 +998,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint } else { cpuCost += buildCnt * cpuFactor / float64(p.Concurrency) } - diskCost += buildCnt * sessVars.GetDiskFactor() * rowSize + diskCost += buildCnt * diskFactor * rowSize } if spill { @@ -1002,8 +1006,12 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint } else { diskCost = 0 } - setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, - cpuCost, probeCost, memoryCost, diskCost, probeDiskCost, memQuota) + if op != nil { + setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, + cpuCost, probeCost, memoryCost, diskCost, probeDiskCost, + diskFactor, memoryFactor, concurrencyFactor, + memQuota) + } return cpuCost + memoryCost + diskCost } diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 2bebb9c61ad4d..e5f2eaec16c07 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -179,18 +179,18 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, spill bool, buildCnt, probeCnt, cpuFactor, rowSize, numPairs, - cpuCost, probeCPUCost, memCost, diskCost, probeDiskCost float64, + cpuCost, probeCPUCost, memCost, diskCost, probeDiskCost, + diskFactor, memoryFactor, concurrencyFactor float64, memQuota int64) { if opt == nil { return } detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP()) - sessVars := p.ctx.GetSessionVars() diskCostDetail := &HashJoinDiskCostDetail{ Spill: spill, UseOuterToBuild: p.UseOuterToBuild, BuildRowCount: buildCnt, - DiskFactor: sessVars.GetDiskFactor(), + DiskFactor: diskFactor, RowSize: rowSize, ProbeDiskCost: &HashJoinProbeDiskCostDetail{ SelectionFactor: SelectionFactor, @@ -205,13 +205,13 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, MemQuota: memQuota, RowSize: rowSize, BuildRowCount: buildCnt, - MemoryFactor: sessVars.GetMemoryFactor(), + MemoryFactor: memoryFactor, Cost: memCost, } cpuCostDetail := &HashJoinCPUCostDetail{ BuildRowCount: buildCnt, CPUFactor: cpuFactor, - ConcurrencyFactor: sessVars.GetConcurrencyFactor(), + ConcurrencyFactor: concurrencyFactor, ProbeCost: &HashJoinProbeCostDetail{ NumPairs: numPairs, HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0, @@ -258,21 +258,19 @@ type HashJoinCPUCostDetail struct { HashJoinConcurrency uint `json:"hashJoinConcurrency"` Spill bool `json:"spill"` Cost float64 `json:"cost"` + UseOuterToBuild bool `json:"useOuterToBuild"` } func (h *HashJoinCPUCostDetail) desc() string { var cpuCostDesc string - buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUFactorLbl) - if h.Spill { - cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s", - buildCostDesc, - ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, - buildCostDesc) - } else { - cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s/%s", - buildCostDesc, - ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, - buildCostDesc, HashJoinConcurrencyLbl) + buildCostDesc := fmt.Sprintf("%s+(%s+1)*%s)+%s*%s", + ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, BuildRowCountLbl, CPUFactorLbl) + if h.UseOuterToBuild { + if h.Spill { + buildCostDesc = fmt.Sprintf("%s+%s*%s", buildCostDesc, BuildRowCountLbl, CPUFactorLbl) + } else { + buildCostDesc = fmt.Sprintf("%s+%s*%s/%s", buildCostDesc, BuildRowCountLbl, CPUFactorLbl, HashJoinConcurrencyLbl) + } } return cpuCostDesc } From fb3818860e6b3ac9986937400fb39fe724257bd4 Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 17 Aug 2022 14:31:37 +0800 Subject: [PATCH 10/13] address the comment Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index e5f2eaec16c07..3a3f53d7e7aaa 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -263,13 +263,13 @@ type HashJoinCPUCostDetail struct { func (h *HashJoinCPUCostDetail) desc() string { var cpuCostDesc string - buildCostDesc := fmt.Sprintf("%s+(%s+1)*%s)+%s*%s", - ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl, BuildRowCountLbl, CPUFactorLbl) + buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUFactorLbl) + cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)", buildCostDesc, ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl) if h.UseOuterToBuild { if h.Spill { - buildCostDesc = fmt.Sprintf("%s+%s*%s", buildCostDesc, BuildRowCountLbl, CPUFactorLbl) + cpuCostDesc = fmt.Sprintf("%s+%s", cpuCostDesc, buildCostDesc) } else { - buildCostDesc = fmt.Sprintf("%s+%s*%s/%s", buildCostDesc, BuildRowCountLbl, CPUFactorLbl, HashJoinConcurrencyLbl) + buildCostDesc = fmt.Sprintf("%s+%s/%s", cpuCostDesc, buildCostDesc, HashJoinConcurrencyLbl) } } return cpuCostDesc From 4798b92a238b49b74d4143c98ff4296e6f59d303 Mon Sep 17 00:00:00 2001 From: yisaer Date: Wed, 17 Aug 2022 15:08:24 +0800 Subject: [PATCH 11/13] address the comment Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 3a3f53d7e7aaa..57d0db20ce6cb 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -269,7 +269,7 @@ func (h *HashJoinCPUCostDetail) desc() string { if h.Spill { cpuCostDesc = fmt.Sprintf("%s+%s", cpuCostDesc, buildCostDesc) } else { - buildCostDesc = fmt.Sprintf("%s+%s/%s", cpuCostDesc, buildCostDesc, HashJoinConcurrencyLbl) + cpuCostDesc = fmt.Sprintf("%s+%s/%s", cpuCostDesc, buildCostDesc, HashJoinConcurrencyLbl) } } return cpuCostDesc From 62040c2c2a3891947b92f92cfd1e5e42d29dc1dd Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 22 Aug 2022 13:21:34 +0800 Subject: [PATCH 12/13] address the comment Signed-off-by: yisaer --- planner/core/plan_cost_detail.go | 1 + 1 file changed, 1 insertion(+) diff --git a/planner/core/plan_cost_detail.go b/planner/core/plan_cost_detail.go index 57d0db20ce6cb..08c701d7ad3a3 100644 --- a/planner/core/plan_cost_detail.go +++ b/planner/core/plan_cost_detail.go @@ -222,6 +222,7 @@ func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, HashJoinConcurrency: p.Concurrency, Spill: spill, Cost: cpuCost, + UseOuterToBuild: p.UseOuterToBuild, } // record cpu cost detail From e44ec892723e69a4529f0202ea3a0bb7488c6930 Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 22 Aug 2022 15:19:58 +0800 Subject: [PATCH 13/13] address the comment Signed-off-by: yisaer --- planner/core/plan_cost.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index c9078bd3f3900..93d5fc88a659a 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -946,7 +946,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint if isMPP && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { cpuFactor = sessVars.GetTiFlashCPUFactor() // use the dedicated TiFlash CPU Factor on modelVer2 } - diskFactor := sessVars.GetMemoryFactor() + diskFactor := sessVars.GetDiskFactor() memoryFactor := sessVars.GetMemoryFactor() concurrencyFactor := sessVars.GetConcurrencyFactor()