planner: set min for high risk plan steps (#56631)

close #55126
pingcap · Oct 29, 2024 · 8fde2d6 · 8fde2d6
1 parent 1c386db
commit 8fde2d6
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 70 deletions.
diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go
@@ -54,6 +54,8 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols
 			size += 8 /* row_id length */
 		}
 	}
+	// Avoid errors related to size less than zero
+	size = max(0, size)
 	return
 }
 
@@ -80,6 +82,8 @@ func GetAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*e
 			}
 		}
 	}
+	// Avoid errors related to size less than zero
+	size = max(0, size)
 	if sessionVars.EnableChunkRPC && !isForScan {
 		// Add 1/8 byte for each column's nullBitMap byte.
 		return size + float64(len(cols))/8
@@ -107,7 +111,7 @@ func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression
 		}
 	}
 	// Add 8 byte for each column's size record. See `DataInDiskByRows` for details.
-	return size + float64(8*len(cols))
+	return max(0, size+float64(8*len(cols)))
 }
 
 // AvgColSize is the average column size of the histogram. These sizes are derived from function `encode`
@@ -126,7 +130,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 	histCount := c.TotalRowCount()
 	notNullRatio := 1.0
 	if histCount > 0 {
-		notNullRatio = 1.0 - float64(c.NullCount)/histCount
+		notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount)
 	}
 	switch c.Histogram.Tp.GetType() {
 	case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
@@ -137,7 +141,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 		}
 	}
 	// Keep two decimal place.
-	return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
+	return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100)
 }
 
 // AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode`
@@ -147,17 +151,17 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
 		return 0
 	}
 	fixedLen := chunk.GetFixedLen(c.Histogram.Tp)
-	if fixedLen != -1 {
+	if fixedLen >= 0 {
 		return float64(fixedLen)
 	}
 	// Keep two decimal place.
 	// Add 8 bytes for unfixed-len type's offsets.
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round(avgSize*100)/100 + 8
+		return max(0, math.Round(avgSize*100)/100) + 8
 	}
-	return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8
+	return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8
 }
 
 // AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived
@@ -172,14 +176,14 @@ func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 {
 		notNullRatio = 1.0 - float64(c.NullCount)/histCount
 	}
 	size := chunk.GetFixedLen(c.Histogram.Tp)
-	if size != -1 {
+	if size >= 0 {
 		return float64(size) * notNullRatio
 	}
 	// Keep two decimal place.
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round((avgSize)*100) / 100
+		return max(0, math.Round((avgSize)*100)/100)
 	}
 	return math.Round((avgSize-math.Log2(avgSize))*100) / 100
 }
diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
@@ -470,12 +470,12 @@
         "Plan": [
           "Projection 0.00 root  test_partition.t1.id, test_partition.t1.a, test_partition.t1.b, test_partition.t2.id, test_partition.t2.a, test_partition.t2.b",
           "└─HashJoin 0.00 root  CARTESIAN inner join",
-          "  ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
-          "  │ └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
-          "  │   └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
-          "  └─TableReader(Probe) 0.01 root partition:p0 data:Selection",
-          "    └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
-          "      └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
+          "  ├─TableReader(Build) 0.01 root partition:p0 data:Selection",
+          "  │ └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
+          "  │   └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
+          "  └─TableReader(Probe) 0.00 root partition:p1 data:Selection",
+          "    └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
+          "      └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
         ],
         "IndexPlan": [
           "HashJoin 0.03 root  CARTESIAN inner join",

diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
@@ -130,16 +130,16 @@
         "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
         "Result": [
           "HashJoin 0.33 root  inner join, equal:[eq(test.tp.c, test.t2.a)]",
-          "├─IndexJoin(Build) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
-          "│ ├─TableReader(Build) 0.33 root  data:Selection",
-          "│ │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
-          "│ │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
-          "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
-          "│   ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
-          "│   │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
-          "│   └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
-          "└─TableReader(Probe) 1.00 root  data:TableRangeScan",
-          "  └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
+          "├─TableReader(Build) 1.00 root  data:TableRangeScan",
+          "│ └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]",
+          "└─IndexJoin(Probe) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
+          "  ├─TableReader(Build) 0.33 root  data:Selection",
+          "  │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
+          "  │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
+          "  └─IndexLookUp(Probe) 0.33 root partition:p0 ",
+          "    ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
+          "    │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
+          "    └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]"
         ]
       }
     ]

diff --git a/pkg/planner/core/casetest/testdata/integration_suite_out.json b/pkg/planner/core/casetest/testdata/integration_suite_out.json
@@ -165,7 +165,7 @@
       {
         "SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 )  from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed",
         "Plan": [
-          "HashJoin_17 3.00 32770.77 root  CARTESIAN left outer semi join",
+          "HashJoin_17 3.00 32781.07 root  CARTESIAN left outer semi join",
           "├─Selection_22(Build) 0.80 31149.25 root  eq(2, Column#18)",
           "│ └─StreamAgg_29 1.00 31099.35 root  funcs:count(1)->Column#18",
           "│   └─TableReader_41 3.00 30949.65 root  MppVersion: 2, data:ExchangeSender_40",

diff --git a/pkg/planner/core/plan_cost_ver1.go b/pkg/planner/core/plan_cost_ver1.go
@@ -1251,10 +1251,10 @@ func getCardinality(operator base.PhysicalPlan, costFlag uint64) float64 {
 		if actualProbeCnt == 0 {
 			return 0
 		}
-		return getOperatorActRows(operator) / float64(actualProbeCnt)
+		return max(0, getOperatorActRows(operator)/float64(actualProbeCnt))
 	}
 	rows := operator.StatsCount()
-	if rows == 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
+	if rows <= 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
 		// 0 est-row can lead to 0 operator cost which makes plan choice unstable.
 		rows = 1
 	}

diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go
@@ -103,6 +103,20 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option
 	return p.PlanCostVer2, nil
 }
 
+const (
+	// MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches
+	// zero, all plan choices result in a low cost - making it difficult to differentiate plan choices.
+	// A low value of 1.0 here is used for most (non probe acceses) to reduce this risk.
+	MinNumRows = 1.0
+	// MinRowSize provides a minimum column length to ensure that any adjustment or calculation
+	// in costing does not go below this value. 2.0 is used as a reasonable lowest column length.
+	MinRowSize = 2.0
+	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
+	TiFlashStartupRowPenalty = 10000
+	// MaxPenaltyRowCount applies a penalty for high risk scans
+	MaxPenaltyRowCount = 1000
+)
+
 // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
 // plan-cost = rows * log2(row-size) * scan-factor
 // log2(row-size) is from experiments.
@@ -112,23 +126,14 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
 	}
 
 	rows := getCardinality(p, option.CostFlag)
-	rowSize := math.Max(getAvgRowSize(p.StatsInfo(), p.schema.Columns), 2.0) // consider all index columns
+	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) // consider all index columns
 	scanFactor := getTaskScanFactorVer2(p, kv.TiKV, taskType)
 
 	p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
 	p.PlanCostInit = true
 	return p.PlanCostVer2, nil
 }
 
-const (
-	// MinRowSize provides a minimum to avoid underestimation
-	MinRowSize = 2.0
-	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
-	TiFlashStartupRowPenalty = 10000
-	// MaxPenaltyRowCount applies a penalty for high risk scans
-	MaxPenaltyRowCount = 1000
-)
-
 // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
 // plan-cost = rows * log2(row-size) * scan-factor
 // log2(row-size) is from experiments.
@@ -137,17 +142,19 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
 		return p.PlanCostVer2, nil
 	}
 
-	rows := getCardinality(p, option.CostFlag)
-
 	var columns []*expression.Column
 	if p.StoreType == kv.TiKV { // Assume all columns for TiKV
 		columns = p.tblCols
 	} else { // TiFlash
 		columns = p.schema.Columns
 	}
+	rows := getCardinality(p, option.CostFlag)
 	rowSize := getAvgRowSize(p.StatsInfo(), columns)
-	// Ensure rowSize has a reasonable minimum value to avoid underestimation
-	rowSize = math.Max(rowSize, MinRowSize)
+	// Ensure rows and rowSize have a reasonable minimum value to avoid underestimation
+	if !p.isChildOfIndexLookUp {
+		rows = max(MinNumRows, rows)
+		rowSize = max(rowSize, MinRowSize)
+	}
 
 	scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
 	p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
@@ -177,7 +184,7 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
 
 		shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
 		if shouldApplyPenalty {
-			newRowCount := math.Min(MaxPenaltyRowCount, math.Max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
+			newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
 			p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
 		}
 	}
@@ -235,7 +242,7 @@ func (p *PhysicalTableReader) GetPlanCostVer2(taskType property.TaskType, option
 	}
 
 	rows := getCardinality(p.tablePlan, option.CostFlag)
-	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.schema.Columns))
 	netFactor := getTaskNetFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().DistSQLScanConcurrency())
 	childType := property.CopSingleReadTaskType
@@ -395,8 +402,8 @@ func (p *PhysicalSort) GetPlanCostVer2(taskType property.TaskType, option *optim
 		return p.PlanCostVer2, nil
 	}
 
-	rows := math.Max(getCardinality(p.Children()[0], option.CostFlag), 1)
-	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	diskFactor := defaultVer2Factors.TiDBDisk
@@ -443,14 +450,14 @@ func (p *PhysicalTopN) GetPlanCostVer2(taskType property.TaskType, option *optim
 		return p.PlanCostVer2, nil
 	}
 
-	rows := getCardinality(p.Children()[0], option.CostFlag)
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
 	n := max(1, float64(p.Count+p.Offset))
 	if n > 10000 {
 		// It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`.
 		// For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged.
 		n = min(n, rows)
 	}
-	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 
@@ -499,9 +506,9 @@ func (p *PhysicalHashAgg) GetPlanCostVer2(taskType property.TaskType, option *op
 		return p.PlanCostVer2, nil
 	}
 
-	inputRows := getCardinality(p.Children()[0], option.CostFlag)
-	outputRows := getCardinality(p, option.CostFlag)
-	outputRowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	inputRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	outputRows := max(MinNumRows, getCardinality(p, option.CostFlag))
+	outputRowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().HashAggFinalConcurrency())
@@ -531,8 +538,8 @@ func (p *PhysicalMergeJoin) GetPlanCostVer2(taskType property.TaskType, option *
 		return p.PlanCostVer2, nil
 	}
 
-	leftRows := getCardinality(p.Children()[0], option.CostFlag)
-	rightRows := getCardinality(p.Children()[1], option.CostFlag)
+	leftRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rightRows := max(MinNumRows, getCardinality(p.Children()[1], option.CostFlag))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 
 	filterCost := costusage.SumCostVer2(filterCostVer2(option, leftRows, p.LeftConditions, cpuFactor),
@@ -570,9 +577,9 @@ func (p *PhysicalHashJoin) GetPlanCostVer2(taskType property.TaskType, option *o
 		build, probe = probe, build
 		buildFilters, probeFilters = probeFilters, buildFilters
 	}
-	buildRows := getCardinality(build, option.CostFlag)
+	buildRows := max(MinNumRows, getCardinality(build, option.CostFlag))
 	probeRows := getCardinality(probe, option.CostFlag)
-	buildRowSize := getAvgRowSize(build.StatsInfo(), build.Schema().Columns)
+	buildRowSize := max(MinRowSize, getAvgRowSize(build.StatsInfo(), build.Schema().Columns))
 	tidbConcurrency := float64(p.Concurrency)
 	mppConcurrency := float64(3) // TODO: remove this empirical value
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)