Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support HashJoin cost detail #37012

Merged
merged 21 commits into from
Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions planner/core/plan_cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ func (p *PhysicalMergeJoin) GetPlanCost(taskType property.TaskType, option *Plan
}

// GetCost computes cost of hash join operator itself.
func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64) float64 {
func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64, op *physicalOptimizeOp) float64 {
buildCnt, probeCnt := lCnt, rCnt
build := p.children[0]
// Taking the right as the inner for right join or using the outer to build a hash table.
Expand Down Expand Up @@ -1002,6 +1002,8 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
} else {
diskCost = 0
}
setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, rowSize, numPairs,
cpuCost, probeCost, memoryCost, diskCost, probeDiskCost, memQuota)
return cpuCost + memoryCost + diskCost
}

Expand All @@ -1019,7 +1021,8 @@ func (p *PhysicalHashJoin) GetPlanCost(taskType property.TaskType, option *PlanC
}
p.planCost += childCost
}
p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag), taskType == property.MppTaskType, costFlag)
p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag),
taskType == property.MppTaskType, costFlag, option.tracer)
p.planCostInit = true
return p.planCost, nil
}
Expand Down
216 changes: 216 additions & 0 deletions planner/core/plan_cost_detail.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,65 @@ const (
RowCountLbl = "rowCount"
// RowSizeLbl indicates rowSize
RowSizeLbl = "rowSize"
// BuildRowCountLbl indicates rowCount on build side
BuildRowCountLbl = "buildRowCount"
// ProbeRowCountLbl indicates rowCount on probe side
ProbeRowCountLbl = "probeRowCount"
// NumPairsLbl indicates numPairs
NumPairsLbl = "numPairs"

// NetworkFactorLbl indicates networkFactor
NetworkFactorLbl = "networkFactor"
// SeekFactorLbl indicates seekFactor
SeekFactorLbl = "seekFactor"
// ScanFactorLbl indicates for scanFactor
ScanFactorLbl = "scanFactor"
// SelectionFactorLbl indicates selection factor
SelectionFactorLbl = "selectionFactor"
// CPUFactorLbl indicates cpu factor
CPUFactorLbl = "cpuFactor"
// MemoryFactorLbl indicates mem factor
MemoryFactorLbl = "memoryFactor"
// DiskFactorLbl indicates disk factor
DiskFactorLbl = "diskFactor"
// ConcurrencyFactorLbl indicates for concurrency factor
ConcurrencyFactorLbl = "concurrencyFactor"

// ScanConcurrencyLbl indicates sql scan concurrency
ScanConcurrencyLbl = "scanConcurrency"
// HashJoinConcurrencyLbl indicates concurrency for hash join
HashJoinConcurrencyLbl = "hashJoinConcurrency"

// NetSeekCostLbl indicates netSeek cost
NetSeekCostLbl = "netSeekCost"
// TablePlanCostLbl indicates tablePlan cost
TablePlanCostLbl = "tablePlanCost"
// IndexPlanCostLbl indicates indexPlan cost
IndexPlanCostLbl = "indexPlanCost"

// ProbeCostDetailLbl indicates probeCost
ProbeCostDetailLbl = "probeCostDetail"
// ProbeCostDescLbl indicates description for probe cost
ProbeCostDescLbl = "probeCostDesc"
// CPUCostDetailLbl indicates cpuCost detail
CPUCostDetailLbl = "cpuCostDetail"
// CPUCostDescLbl indicates description for cpu cost
CPUCostDescLbl = "cpuCostDesc"
// MemCostDetailLbl indicates mem cost detail
MemCostDetailLbl = "memCostDetail"
// MemCostDescLbl indicates description for mem cost
MemCostDescLbl = "memCostDesc"
// DiskCostDetailLbl indicates disk cost detail
DiskCostDetailLbl = "diskCostDetail"
// DiskCostDescLbl indicates description for disk cost
DiskCostDescLbl = "diskCostDesc"
// ProbeDiskCostLbl indicates probe disk cost detail
ProbeDiskCostLbl = "probeDiskCostDetail"
// ProbeDiskCostDescLbl indicates description for probe disk cost
ProbeDiskCostDescLbl = "probeDiskCostDesc"

// MemQuotaLbl indicates memory quota
MemQuotaLbl = "memQuota"
)

func setPointGetPlanCostDetail(p *PointGetPlan, opt *physicalOptimizeOp,
Expand Down Expand Up @@ -134,3 +176,177 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim
RowCountLbl, RowSizeLbl, NetworkFactorLbl, NetSeekCostLbl, ScanConcurrencyLbl))
opt.appendPlanCostDetail(detail)
}

func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, spill bool,
buildCnt, probeCnt, cpuFactor, rowSize, numPairs,
cpuCost, probeCPUCost, memCost, diskCost, probeDiskCost float64,
memQuota int64) {
if opt == nil {
return
}
detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP())
sessVars := p.ctx.GetSessionVars()
diskCostDetail := &HashJoinDiskCostDetail{
Spill: spill,
UseOuterToBuild: p.UseOuterToBuild,
BuildRowCount: buildCnt,
DiskFactor: sessVars.GetDiskFactor(),
RowSize: rowSize,
ProbeDiskCost: &HashJoinProbeDiskCostDetail{
SelectionFactor: SelectionFactor,
NumPairs: numPairs,
HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0,
Cost: probeDiskCost,
},
Cost: diskCost,
}
memoryCostDetail := &HashJoinMemoryCostDetail{
Spill: spill,
MemQuota: memQuota,
RowSize: rowSize,
BuildRowCount: buildCnt,
MemoryFactor: sessVars.GetMemoryFactor(),
Cost: memCost,
}
cpuCostDetail := &HashJoinCPUCostDetail{
BuildRowCount: buildCnt,
CPUFactor: cpuFactor,
ConcurrencyFactor: sessVars.GetConcurrencyFactor(),
ProbeCost: &HashJoinProbeCostDetail{
NumPairs: numPairs,
HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0,
SelectionFactor: SelectionFactor,
ProbeRowCount: probeCnt,
Cost: probeCPUCost,
},
HashJoinConcurrency: p.Concurrency,
Spill: spill,
Cost: cpuCost,
}

// record cpu cost detail
detail.AddParam(CPUCostDetailLbl, cpuCostDetail).
AddParam(CPUCostDescLbl, cpuCostDetail.desc()).
AddParam(ProbeCostDescLbl, cpuCostDetail.probeCostDesc())
// record memory cost detail
detail.AddParam(MemCostDetailLbl, memoryCostDetail).
AddParam(MemCostDescLbl, memoryCostDetail.desc())
// record disk cost detail
detail.AddParam(DiskCostDetailLbl, diskCostDetail).
AddParam(DiskCostDescLbl, diskCostDetail.desc()).
AddParam(ProbeDiskCostDescLbl, diskCostDetail.probeDesc())

detail.SetDesc(fmt.Sprintf("%s+%s+%s+all children cost", CPUCostDetailLbl, MemCostDetailLbl, DiskCostDetailLbl))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I notice that in previous PRs, each label in the Desc corresponds to exactly one value in the Param with the same label. But this PR doesn't follow this.
For example, we didn't explain the relationship between CPUCostDetailLbl, CPUCostDescLbl, and ProbeCostDescLbl.
Would we improve this sometime?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently no, we may ignore this in interval server and revise it in dashboard.

opt.appendPlanCostDetail(detail)
}

// HashJoinProbeCostDetail indicates probe cpu cost detail
type HashJoinProbeCostDetail struct {
NumPairs float64 `json:"numPairs"`
HasConditions bool `json:"hasConditions"`
SelectionFactor float64 `json:"selectionFactor"`
ProbeRowCount float64 `json:"probeRowCount"`
Cost float64 `json:"cost"`
}

// HashJoinCPUCostDetail indicates cpu cost detail
type HashJoinCPUCostDetail struct {
BuildRowCount float64 `json:"buildRowCount"`
CPUFactor float64 `json:"cpuFactor"`
ConcurrencyFactor float64 `json:"concurrencyFactor"`
ProbeCost *HashJoinProbeCostDetail `json:"probeCost"`
HashJoinConcurrency uint `json:"hashJoinConcurrency"`
Spill bool `json:"spill"`
Cost float64 `json:"cost"`
}

func (h *HashJoinCPUCostDetail) desc() string {
var cpuCostDesc string
buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUFactorLbl)
if h.Spill {
cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s",
buildCostDesc,
ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl,
buildCostDesc)
} else {
cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)+%s/%s",
buildCostDesc,
ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl,
buildCostDesc, HashJoinConcurrencyLbl)
}
return cpuCostDesc
}

func (h *HashJoinCPUCostDetail) probeCostDesc() string {
var probeCostDesc string
if h.ProbeCost.HasConditions {
probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s",
NumPairsLbl, CPUFactorLbl, SelectionFactorLbl,
ProbeRowCountLbl, CPUFactorLbl, HashJoinConcurrencyLbl)
} else {
probeCostDesc = fmt.Sprintf("(%s*%s)/%s",
NumPairsLbl, CPUFactorLbl,
HashJoinConcurrencyLbl)
}
return probeCostDesc
}

// HashJoinMemoryCostDetail indicates memory cost detail
type HashJoinMemoryCostDetail struct {
Spill bool `json:"spill"`
MemQuota int64 `json:"memQuota"`
RowSize float64 `json:"rowSize"`
BuildRowCount float64 `json:"buildRowCount"`
MemoryFactor float64 `json:"memoryFactor"`
Cost float64 `json:"cost"`
}

func (h *HashJoinMemoryCostDetail) desc() string {
memCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, MemoryFactorLbl)
if h.Spill {
memCostDesc = fmt.Sprintf("%s*%s/(%s*%s)", memCostDesc, MemQuotaLbl, RowSizeLbl, BuildRowCountLbl)
}
return memCostDesc
}

// HashJoinProbeDiskCostDetail indicates probe disk cost detail
type HashJoinProbeDiskCostDetail struct {
SelectionFactor float64 `json:"selectionFactor"`
NumPairs float64 `json:"numPairs"`
HasConditions bool `json:"hasConditions"`
Cost float64 `json:"cost"`
}

// HashJoinDiskCostDetail indicates disk cost detail
type HashJoinDiskCostDetail struct {
Spill bool `json:"spill"`
UseOuterToBuild bool `json:"useOuterToBuild"`
BuildRowCount float64 `json:"buildRowCount"`
DiskFactor float64 `json:"diskFactor"`
RowSize float64 `json:"rowSize"`
ProbeDiskCost *HashJoinProbeDiskCostDetail `json:"probeDiskCost"`
Cost float64 `json:"cost"`
}

func (h *HashJoinDiskCostDetail) desc() string {
if !h.Spill {
return ""
}
buildDiskCost := fmt.Sprintf("%s*%s*%s", BuildRowCountLbl, DiskFactorLbl, RowSizeLbl)
desc := fmt.Sprintf("%s+%s", buildDiskCost, ProbeDiskCostLbl)
if h.UseOuterToBuild {
desc = fmt.Sprintf("%s+%s", desc, buildDiskCost)
}
return desc
}

func (h *HashJoinDiskCostDetail) probeDesc() string {
if !h.Spill {
return ""
}
desc := fmt.Sprintf("%s*%s*%s", NumPairsLbl, DiskFactorLbl, RowSizeLbl)
if h.ProbeDiskCost.HasConditions {
desc = fmt.Sprintf("%s*%s", desc, SelectionFactorLbl)
}
return desc
}
Loading