From 206723f2e6760e6aba25f7479e59f504cfa62a14 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 23 Feb 2024 16:49:58 +0800 Subject: [PATCH] planner: fix join resolveIndex won't find its column from children schema and amend join's lused and rused logic for reversed column ref from join schema to its children (#51258) close pingcap/tidb#42588 --- pkg/executor/benchmark_test.go | 100 ++++++++-- pkg/executor/builder.go | 45 +++-- pkg/executor/joiner.go | 17 +- pkg/executor/test/tiflashtest/tiflash_test.go | 26 +-- pkg/planner/cascades/optimize.go | 2 +- pkg/planner/core/plan.go | 15 +- pkg/planner/core/resolve_indices.go | 37 ++-- pkg/planner/core/rule_column_pruning.go | 186 ++++++++++++------ pkg/planner/core/rule_max_min_eliminate.go | 8 +- .../planner/core/casetest/integration.result | 97 +++++++++ tests/integrationtest/run-tests.sh | 4 +- .../t/planner/core/casetest/integration.test | 94 +++++++++ 12 files changed, 490 insertions(+), 141 deletions(-) diff --git a/pkg/executor/benchmark_test.go b/pkg/executor/benchmark_test.go index 458ca9250c0f2..f3b097346c6a7 100644 --- a/pkg/executor/benchmark_test.go +++ b/pkg/executor/benchmark_test.go @@ -15,11 +15,13 @@ package executor import ( + "cmp" "context" "encoding/base64" "fmt" "math/rand" "os" + "slices" "sort" "strconv" "strings" @@ -884,6 +886,44 @@ func defaultHashJoinTestCase(cols []*types.FieldType, joinType core.JoinType, us return tc } +func prepareResolveIndices(joinSchema, lSchema, rSchema *expression.Schema, joinType core.JoinType) *expression.Schema { + colsNeedResolving := joinSchema.Len() + // The last output column of this two join is the generated column to indicate whether the row is matched or not. + if joinType == core.LeftOuterSemiJoin || joinType == core.AntiLeftOuterSemiJoin { + colsNeedResolving-- + } + mergedSchema := expression.MergeSchema(lSchema, rSchema) + // To avoid that two plan shares the same column slice. + shallowColSlice := make([]*expression.Column, joinSchema.Len()) + copy(shallowColSlice, joinSchema.Columns) + joinSchema = expression.NewSchema(shallowColSlice...) + foundCnt := 0 + // Here we want to resolve all join schema columns directly as a merged schema, and you know same name + // col in join schema should be separately redirected to corresponded same col in child schema. But two + // column sets are **NOT** always ordered, see comment: https://github.com/pingcap/tidb/pull/45831#discussion_r1481031471 + // we are using mapping mechanism instead of moving j forward. + marked := make([]bool, mergedSchema.Len()) + for i := 0; i < colsNeedResolving; i++ { + findIdx := -1 + for j := 0; j < len(mergedSchema.Columns); j++ { + if !joinSchema.Columns[i].Equal(nil, mergedSchema.Columns[j]) || marked[j] { + continue + } + // resolve to a same unique id one, and it not being marked. + findIdx = j + break + } + if findIdx != -1 { + // valid one. + joinSchema.Columns[i] = joinSchema.Columns[i].Clone().(*expression.Column) + joinSchema.Columns[i].Index = findIdx + marked[findIdx] = true + foundCnt++ + } + } + return joinSchema +} + func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec exec.Executor) *HashJoinExec { if testCase.useOuterToBuild { innerExec, outerExec = outerExec, innerExec @@ -907,6 +947,10 @@ func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec exec.Exec joinSchema.Append(cols0...) joinSchema.Append(cols1...) } + // todo: need systematic way to protect. + // physical join should resolveIndices to get right schema column index. + // otherwise, markChildrenUsedColsForTest will fail below. + joinSchema = prepareResolveIndices(joinSchema, innerExec.Schema(), outerExec.Schema(), core.InnerJoin) joinKeysColIdx := make([]int, 0, len(testCase.keyIdx)) joinKeysColIdx = append(joinKeysColIdx, testCase.keyIdx...) @@ -962,25 +1006,39 @@ func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec exec.Exec // markChildrenUsedColsForTest compares each child with the output schema, and mark // each column of the child is used by output or not. -func markChildrenUsedColsForTest(outputSchema *expression.Schema, childSchemas ...*expression.Schema) (childrenUsed [][]bool) { - childrenUsed = make([][]bool, 0, len(childSchemas)) - markedOffsets := make(map[int]struct{}) - for _, col := range outputSchema.Columns { - markedOffsets[col.Index] = struct{}{} +func markChildrenUsedColsForTest(outputSchema *expression.Schema, childSchemas ...*expression.Schema) (childrenUsed [][]int) { + childrenUsed = make([][]int, 0, len(childSchemas)) + markedOffsets := make(map[int]int) + for originalIdx, col := range outputSchema.Columns { + markedOffsets[col.Index] = originalIdx } prefixLen := 0 + type intPair struct { + first int + second int + } + // for example here. + // left child schema: [col11] + // right child schema: [col21, col22] + // output schema is [col11, col22, col21], if not records the original derived order after physical resolve index. + // the lused will be [0], the rused will be [0,1], while the actual order is dismissed, [1,0] is correct for rused. for _, childSchema := range childSchemas { - used := make([]bool, len(childSchema.Columns)) + usedIdxPair := make([]intPair, 0, len(childSchema.Columns)) for i := range childSchema.Columns { - if _, ok := markedOffsets[prefixLen+i]; ok { - used[i] = true + if originalIdx, ok := markedOffsets[prefixLen+i]; ok { + usedIdxPair = append(usedIdxPair, intPair{first: originalIdx, second: i}) } } - childrenUsed = append(childrenUsed, used) - } - for _, child := range childSchemas { - used := expression.GetUsedList(outputSchema.Columns, child) - childrenUsed = append(childrenUsed, used) + // sort the used idxes according their original indexes derived after resolveIndex. + slices.SortFunc(usedIdxPair, func(a, b intPair) int { + return cmp.Compare(a.first, b.first) + }) + usedIdx := make([]int, 0, len(childSchema.Columns)) + for _, one := range usedIdxPair { + usedIdx = append(usedIdx, one.second) + } + childrenUsed = append(childrenUsed, usedIdx) + prefixLen += childSchema.Len() } return } @@ -1582,6 +1640,20 @@ func prepareMergeJoinExec(tc *mergeJoinTestCase, joinSchema *expression.Schema, isOuterJoin: false, } + var usedIdx [][]int + if tc.childrenUsedSchema != nil { + usedIdx = make([][]int, 0, len(tc.childrenUsedSchema)) + for _, childSchema := range tc.childrenUsedSchema { + used := make([]int, 0, len(childSchema)) + for idx, one := range childSchema { + if one { + used = append(used, idx) + } + } + usedIdx = append(usedIdx, used) + } + } + mergeJoinExec.joiner = newJoiner( tc.ctx, 0, @@ -1590,7 +1662,7 @@ func prepareMergeJoinExec(tc *mergeJoinTestCase, joinSchema *expression.Schema, nil, exec.RetTypes(leftExec), exec.RetTypes(rightExec), - tc.childrenUsedSchema, + usedIdx, false, ) diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go index 1d4183a569cba..066a80da4c7a9 100644 --- a/pkg/executor/builder.go +++ b/pkg/executor/builder.go @@ -798,14 +798,11 @@ func (b *executorBuilder) buildLimit(v *plannercore.PhysicalLimit) exec.Executor end: v.Offset + v.Count, } + childUsedSchemaLen := v.Children()[0].Schema().Len() childUsedSchema := markChildrenUsedCols(v.Schema().Columns, v.Children()[0].Schema())[0] e.columnIdxsUsedByChild = make([]int, 0, len(childUsedSchema)) - for i, used := range childUsedSchema { - if used { - e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, i) - } - } - if len(e.columnIdxsUsedByChild) == len(childUsedSchema) { + e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, childUsedSchema...) + if len(e.columnIdxsUsedByChild) == childUsedSchemaLen { e.columnIdxsUsedByChild = nil // indicates that all columns are used. LimitExec will improve performance for this condition. } return e @@ -2914,21 +2911,39 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) exec.Executor { // markChildrenUsedCols compares each child with the output schema, and mark // each column of the child is used by output or not. -func markChildrenUsedCols(outputCols []*expression.Column, childSchemas ...*expression.Schema) (childrenUsed [][]bool) { - childrenUsed = make([][]bool, 0, len(childSchemas)) - markedOffsets := make(map[int]struct{}) - for _, col := range outputCols { - markedOffsets[col.Index] = struct{}{} +func markChildrenUsedCols(outputCols []*expression.Column, childSchemas ...*expression.Schema) (childrenUsed [][]int) { + childrenUsed = make([][]int, 0, len(childSchemas)) + markedOffsets := make(map[int]int) + // keep the original maybe reversed order. + for originalIdx, col := range outputCols { + markedOffsets[col.Index] = originalIdx } prefixLen := 0 + type intPair struct { + first int + second int + } + // for example here. + // left child schema: [col11] + // right child schema: [col21, col22] + // output schema is [col11, col22, col21], if not records the original derived order after physical resolve index. + // the lused will be [0], the rused will be [0,1], while the actual order is dismissed, [1,0] is correct for rused. for _, childSchema := range childSchemas { - used := make([]bool, len(childSchema.Columns)) + usedIdxPair := make([]intPair, 0, len(childSchema.Columns)) for i := range childSchema.Columns { - if _, ok := markedOffsets[prefixLen+i]; ok { - used[i] = true + if originalIdx, ok := markedOffsets[prefixLen+i]; ok { + usedIdxPair = append(usedIdxPair, intPair{first: originalIdx, second: i}) } } - childrenUsed = append(childrenUsed, used) + // sort the used idxes according their original indexes derived after resolveIndex. + slices.SortFunc(usedIdxPair, func(a, b intPair) int { + return cmp.Compare(a.first, b.first) + }) + usedIdx := make([]int, 0, len(childSchema.Columns)) + for _, one := range usedIdxPair { + usedIdx = append(usedIdx, one.second) + } + childrenUsed = append(childrenUsed, usedIdx) prefixLen += childSchema.Len() } return diff --git a/pkg/executor/joiner.go b/pkg/executor/joiner.go index db46d6ccebb3f..2578110882080 100644 --- a/pkg/executor/joiner.go +++ b/pkg/executor/joiner.go @@ -132,7 +132,7 @@ func JoinerType(j joiner) plannercore.JoinType { func newJoiner(ctx sessionctx.Context, joinType plannercore.JoinType, outerIsRight bool, defaultInner []types.Datum, filter []expression.Expression, - lhsColTypes, rhsColTypes []*types.FieldType, childrenUsed [][]bool, isNA bool) joiner { + lhsColTypes, rhsColTypes []*types.FieldType, childrenUsed [][]int, isNA bool) joiner { base := baseJoiner{ ctx: ctx, conditions: filter, @@ -141,19 +141,14 @@ func newJoiner(ctx sessionctx.Context, joinType plannercore.JoinType, } base.selected = make([]bool, 0, chunk.InitialCapacity) base.isNull = make([]bool, 0, chunk.InitialCapacity) + // lused and rused should be followed with its original order. + // the case is that is join schema rely on the reversed order + // of child's schema, here we should keep it original order. if childrenUsed != nil { base.lUsed = make([]int, 0, len(childrenUsed[0])) // make it non-nil - for i, used := range childrenUsed[0] { - if used { - base.lUsed = append(base.lUsed, i) - } - } + base.lUsed = append(base.lUsed, childrenUsed[0]...) base.rUsed = make([]int, 0, len(childrenUsed[1])) // make it non-nil - for i, used := range childrenUsed[1] { - if used { - base.rUsed = append(base.rUsed, i) - } - } + base.rUsed = append(base.rUsed, childrenUsed[1]...) logutil.BgLogger().Debug("InlineProjection", zap.Ints("lUsed", base.lUsed), zap.Ints("rUsed", base.rUsed), zap.Int("lCount", len(lhsColTypes)), zap.Int("rCount", len(rhsColTypes))) diff --git a/pkg/executor/test/tiflashtest/tiflash_test.go b/pkg/executor/test/tiflashtest/tiflash_test.go index 2b0796de8f6d5..57fe2a6ddd5b1 100644 --- a/pkg/executor/test/tiflashtest/tiflash_test.go +++ b/pkg/executor/test/tiflashtest/tiflash_test.go @@ -1446,19 +1446,19 @@ func TestDisaggregatedTiFlashQuery(t *testing.T) { err = domain.GetDomain(tk.Session()).DDL().UpdateTableReplicaInfo(tk.Session(), tb.Meta().ID, true) require.NoError(t, err) tk.MustQuery("explain select * from t1 where c1 < 2").Check(testkit.Rows( - "PartitionUnion_10 9970.00 root ", - "├─TableReader_15 3323.33 root MppVersion: 2, data:ExchangeSender_14", - "│ └─ExchangeSender_14 3323.33 mpp[tiflash] ExchangeType: PassThrough", - "│ └─Selection_13 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - "│ └─TableFullScan_12 10000.00 mpp[tiflash] table:t1, partition:p0 pushed down filter:empty, keep order:false, stats:pseudo", - "├─TableReader_19 3323.33 root MppVersion: 2, data:ExchangeSender_18", - "│ └─ExchangeSender_18 3323.33 mpp[tiflash] ExchangeType: PassThrough", - "│ └─Selection_17 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - "│ └─TableFullScan_16 10000.00 mpp[tiflash] table:t1, partition:p1 pushed down filter:empty, keep order:false, stats:pseudo", - "└─TableReader_23 3323.33 root MppVersion: 2, data:ExchangeSender_22", - " └─ExchangeSender_22 3323.33 mpp[tiflash] ExchangeType: PassThrough", - " └─Selection_21 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - " └─TableFullScan_20 10000.00 mpp[tiflash] table:t1, partition:p2 pushed down filter:empty, keep order:false, stats:pseudo")) + "PartitionUnion_11 9970.00 root ", + "├─TableReader_16 3323.33 root MppVersion: 2, data:ExchangeSender_15", + "│ └─ExchangeSender_15 3323.33 mpp[tiflash] ExchangeType: PassThrough", + "│ └─Selection_14 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + "│ └─TableFullScan_13 10000.00 mpp[tiflash] table:t1, partition:p0 pushed down filter:empty, keep order:false, stats:pseudo", + "├─TableReader_20 3323.33 root MppVersion: 2, data:ExchangeSender_19", + "│ └─ExchangeSender_19 3323.33 mpp[tiflash] ExchangeType: PassThrough", + "│ └─Selection_18 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + "│ └─TableFullScan_17 10000.00 mpp[tiflash] table:t1, partition:p1 pushed down filter:empty, keep order:false, stats:pseudo", + "└─TableReader_24 3323.33 root MppVersion: 2, data:ExchangeSender_23", + " └─ExchangeSender_23 3323.33 mpp[tiflash] ExchangeType: PassThrough", + " └─Selection_22 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + " └─TableFullScan_21 10000.00 mpp[tiflash] table:t1, partition:p2 pushed down filter:empty, keep order:false, stats:pseudo")) } func TestMPPMemoryTracker(t *testing.T) { diff --git a/pkg/planner/cascades/optimize.go b/pkg/planner/cascades/optimize.go index d16ab3a7c7113..339541a0cb4f5 100644 --- a/pkg/planner/cascades/optimize.go +++ b/pkg/planner/cascades/optimize.go @@ -116,7 +116,7 @@ func (opt *Optimizer) FindBestPlan(sctx sessionctx.Context, logical plannercore. } func (*Optimizer) onPhasePreprocessing(_ sessionctx.Context, plan plannercore.LogicalPlan) (plannercore.LogicalPlan, error) { - err := plan.PruneColumns(plan.Schema().Columns, nil) + plan, err := plan.PruneColumns(plan.Schema().Columns, nil) if err != nil { return nil, err } diff --git a/pkg/planner/core/plan.go b/pkg/planner/core/plan.go index 464c99be11b39..a4ef8e31d90f6 100644 --- a/pkg/planner/core/plan.go +++ b/pkg/planner/core/plan.go @@ -256,8 +256,8 @@ type LogicalPlan interface { // Because it might change the root if the having clause exists, we need to return a plan that represents a new root. PredicatePushDown([]expression.Expression, *logicalOptimizeOp) ([]expression.Expression, LogicalPlan) - // PruneColumns prunes the unused columns. - PruneColumns([]*expression.Column, *logicalOptimizeOp) error + // PruneColumns prunes the unused columns, and return the new logical plan if changed, otherwise it's same. + PruneColumns([]*expression.Column, *logicalOptimizeOp) (LogicalPlan, error) // findBestTask converts the logical plan to the physical plan. It's a new interface. // It is called recursively from the parent to the children to create the result physical plan. @@ -759,11 +759,16 @@ func (*baseLogicalPlan) ExtractCorrelatedCols() []*expression.CorrelatedColumn { } // PruneColumns implements LogicalPlan interface. -func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { if len(p.children) == 0 { - return nil + return p.self, nil + } + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err } - return p.children[0].PruneColumns(parentUsedCols, opt) + return p.self, nil } // Schema implements Plan Schema interface. diff --git a/pkg/planner/core/resolve_indices.go b/pkg/planner/core/resolve_indices.go index e43405fb1f6db..d2ec0cb0db53a 100644 --- a/pkg/planner/core/resolve_indices.go +++ b/pkg/planner/core/resolve_indices.go @@ -141,26 +141,33 @@ func (p *PhysicalHashJoin) ResolveIndicesItself() (err error) { copy(shallowColSlice, p.schema.Columns) p.schema = expression.NewSchema(shallowColSlice...) foundCnt := 0 - // The two column sets are all ordered. And the colsNeedResolving is the subset of the mergedSchema. - // So we can just move forward j if there's no matching is found. - // We don't use the normal ResolvIndices here since there might be duplicate columns in the schema. - // e.g. The schema of child_0 is [col0, col0, col1] - // ResolveIndices will only resolve all col0 reference of the current plan to the first col0. - for i, j := 0, 0; i < colsNeedResolving && j < len(mergedSchema.Columns); { - if !p.schema.Columns[i].Equal(nil, mergedSchema.Columns[j]) { - j++ - continue + + // Here we want to resolve all join schema columns directly as a merged schema, and you know same name + // col in join schema should be separately redirected to corresponded same col in child schema. But two + // column sets are **NOT** always ordered, see comment: https://github.com/pingcap/tidb/pull/45831#discussion_r1481031471 + // we are using mapping mechanism instead of moving j forward. + marked := make([]bool, mergedSchema.Len()) + for i := 0; i < colsNeedResolving; i++ { + findIdx := -1 + for j := 0; j < len(mergedSchema.Columns); j++ { + if !p.schema.Columns[i].Equal(p.SCtx(), mergedSchema.Columns[j]) || marked[j] { + continue + } + // resolve to a same unique id one, and it not being marked. + findIdx = j + break + } + if findIdx != -1 { + // valid one. + p.schema.Columns[i] = p.schema.Columns[i].Clone().(*expression.Column) + p.schema.Columns[i].Index = findIdx + marked[findIdx] = true + foundCnt++ } - p.schema.Columns[i] = p.schema.Columns[i].Clone().(*expression.Column) - p.schema.Columns[i].Index = j - i++ - j++ - foundCnt++ } if foundCnt < colsNeedResolving { return errors.Errorf("Some columns of %v cannot find the reference from its child(ren)", p.ExplainID().String()) } - return } diff --git a/pkg/planner/core/rule_column_pruning.go b/pkg/planner/core/rule_column_pruning.go index f82b1a36e18f4..d9acd8feef08b 100644 --- a/pkg/planner/core/rule_column_pruning.go +++ b/pkg/planner/core/rule_column_pruning.go @@ -33,8 +33,11 @@ type columnPruner struct { func (*columnPruner) optimize(_ context.Context, lp LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, bool, error) { planChanged := false - err := lp.PruneColumns(lp.Schema().Columns, opt) - return lp, planChanged, err + lp, err := lp.PruneColumns(lp.Schema().Columns, opt) + if err != nil { + return nil, planChanged, err + } + return lp, planChanged, nil } // ExprsHasSideEffects checks if any of the expressions has side effects. @@ -70,8 +73,7 @@ func exprHasSetVarOrSleep(expr expression.Expression) bool { // the level projection expressions construction is left to the last logical optimize rule) // // so when do the rule_column_pruning here, we just prune the schema is enough. -func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - child := p.children[0] +func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { // Expand need those extra redundant distinct group by columns projected from underlying projection. // distinct GroupByCol must be used by aggregate above, to make sure this, append distinctGroupByCol again. parentUsedCols = append(parentUsedCols, p.distinctGroupByCol...) @@ -86,13 +88,17 @@ func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *l } appendColumnPruneTraceStep(p, prunedColumns, opt) // Underlying still need to keep the distinct group by columns and parent used columns. - return child.PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. // If any expression has SetVar function or Sleep function, we do not prune it. -func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - child := p.children[0] +func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.schema) prunedColumns := make([]*expression.Column, 0) @@ -107,18 +113,28 @@ func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, op appendColumnPruneTraceStep(p, prunedColumns, opt) selfUsedCols := make([]*expression.Column, 0, len(p.Exprs)) selfUsedCols = expression.ExtractColumnsFromExpressions(selfUsedCols, p.Exprs, nil) - return child.PruneColumns(selfUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(selfUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := p.children[0] parentUsedCols = expression.ExtractColumnsFromExpressions(parentUsedCols, p.Conditions, nil) - return child.PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = child.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := la.children[0] used := expression.GetUsedList(parentUsedCols, la.Schema()) prunedColumns := make([]*expression.Column, 0) @@ -164,7 +180,7 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, newAgg, err = aggregation.NewAggFuncDesc(la.SCtx(), ast.AggFuncCount, []expression.Expression{expression.NewOne()}, false) } if err != nil { - return err + return nil, err } la.AggFuncs = append(la.AggFuncs, newAgg) col := &expression.Column{ @@ -191,10 +207,13 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, } } appendGroupByItemsPruneTraceStep(la, prunedGroupByItems, opt) - err := child.PruneColumns(selfUsedCols, opt) + var err error + la.children[0], err = child.PruneColumns(selfUsedCols, opt) if err != nil { - return err + return nil, err } + // update children[0] + child = la.children[0] // Do an extra Projection Elimination here. This is specially for empty Projection below Aggregation. // This kind of Projection would cause some bugs for MPP plan and is safe to be removed. // This kind of Projection should be removed in Projection Elimination, but currently PrunColumnsAgain is @@ -205,7 +224,7 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, la.SetChildren(childOfChild) } } - return nil + return la, nil } func pruneByItems(p LogicalPlan, old []*util.ByItems, opt *logicalOptimizeOp) (byItems []*util.ByItems, @@ -242,27 +261,36 @@ func pruneByItems(p LogicalPlan, old []*util.ByItems, opt *logicalOptimizeOp) (b // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - child := ls.children[0] +func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { var cols []*expression.Column ls.ByItems, cols = pruneByItems(ls, ls.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + var err error + ls.children[0], err = ls.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return ls, nil } // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := lt.children[0] var cols []*expression.Column lt.ByItems, cols = pruneByItems(lt, lt.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + var err error + lt.children[0], err = child.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return lt, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.schema) hasBeenUsed := false for i := range used { @@ -278,10 +306,12 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt used[i] = true } } - for _, child := range p.Children() { - err := child.PruneColumns(parentUsedCols, opt) + + var err error + for i, child := range p.Children() { + p.Children()[i], err = child.PruneColumns(parentUsedCols, opt) if err != nil { - return err + return nil, err } } @@ -312,11 +342,11 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt } } } - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { for i := 0; i < p.handleCols.NumCols(); i++ { parentUsedCols = append(parentUsedCols, p.handleCols.GetCol(i)) } @@ -327,13 +357,17 @@ func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt } condCols := expression.ExtractColumnsFromExpressions(nil, p.conditions, nil) parentUsedCols = append(parentUsedCols, condCols...) - return p.children[0].PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, ds.schema) - exprCols := expression.ExtractColumnsFromExpressions(nil, ds.allConds, nil) exprUsed := expression.GetUsedList(exprCols, ds.schema) prunedColumns := make([]*expression.Column, 0) @@ -379,11 +413,11 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log if ds.handleCols != nil && ds.handleCols.IsInt() && ds.schema.ColumnIndex(ds.handleCols.GetCol(0)) == -1 { ds.handleCols = nil } - return nil + return ds, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { switch p.TableInfo.Name.O { case infoschema.TableStatementsSummary, infoschema.TableStatementsSummaryHistory, @@ -397,7 +431,7 @@ func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt infoschema.TableDeadlocks, infoschema.ClusterTableDeadlocks: default: - return nil + return p, nil } prunedColumns := make([]*expression.Column, 0) used := expression.GetUsedList(parentUsedCols, p.schema) @@ -410,11 +444,11 @@ func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt } } appendColumnPruneTraceStep(p, prunedColumns, opt) - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.Schema()) prunedColumns := make([]*expression.Column, 0) for i := len(used) - 1; i >= 0; i-- { @@ -424,7 +458,7 @@ func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt } } appendColumnPruneTraceStep(p, prunedColumns, opt) - return nil + return p, nil } func (p *LogicalJoin) extractUsedCols(parentUsedCols []*expression.Column) (leftCols []*expression.Column, rightCols []*expression.Column) { @@ -460,18 +494,19 @@ func (p *LogicalJoin) mergeSchema() { } // PruneColumns implements LogicalPlan interface. -func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { leftCols, rightCols := p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(leftCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(leftCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(p.children[0]) - err = p.children[1].PruneColumns(rightCols, opt) + p.children[1], err = p.children[1].PruneColumns(rightCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(p.children[1]) @@ -481,16 +516,18 @@ func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *log parentUsedCols = append(parentUsedCols, joinCol) } p.inlineProjection(parentUsedCols, opt) - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { leftCols, rightCols := la.extractUsedCols(parentUsedCols) - err := la.children[1].PruneColumns(rightCols, opt) + var err error + // column pruning for child-1. + la.children[1], err = la.children[1].PruneColumns(rightCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(la.children[1]) @@ -499,20 +536,29 @@ func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *l leftCols = append(leftCols, &col.Column) } - err = la.children[0].PruneColumns(leftCols, opt) + // column pruning for child-0. + la.children[0], err = la.children[0].PruneColumns(leftCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(la.children[0]) - la.mergeSchema() - return nil + return la, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { + var err error if !IsSelectForUpdateLockType(p.Lock.LockType) { - return p.baseLogicalPlan.PruneColumns(parentUsedCols, opt) + // when use .baseLogicalPlan to call the PruneColumns, it means current plan itself has + // nothing to pruning or plan change, so they resort to its children's column pruning logic. + // so for the returned logical plan here, p is definitely determined, we just need to collect + // those extra deeper call error in handling children's column pruning. + _, err = p.baseLogicalPlan.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } for tblID, cols := range p.tblID2Handle { @@ -526,11 +572,15 @@ func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *log parentUsedCols = append(parentUsedCols, physTblIDCol) } } - return p.children[0].PruneColumns(parentUsedCols, opt) + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { windowColumns := p.GetWindowResultColumns() cnt := 0 for _, col := range parentUsedCols { @@ -548,14 +598,15 @@ func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *l } parentUsedCols = parentUsedCols[:cnt] parentUsedCols = p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) if err != nil { - return err + return nil, err } p.SetSchema(p.children[0].Schema().Clone()) p.Schema().Append(windowColumns...) - return nil + return p, nil } func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []*expression.Column { @@ -574,19 +625,21 @@ func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []* } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { if len(parentUsedCols) == 0 { // happens when LIMIT appears in UPDATE. - return nil + return p, nil } savedUsedCols := make([]*expression.Column, len(parentUsedCols)) copy(savedUsedCols, parentUsedCols) - if err := p.children[0].PruneColumns(parentUsedCols, opt); err != nil { - return err + + var err error + if p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt); err != nil { + return nil, err } p.schema = nil p.inlineProjection(savedUsedCols, opt) - return nil + return p, nil } func (*columnPruner) name() string { @@ -707,11 +760,16 @@ func preferKeyColumnFromTable(dataSource *DataSource, originColumns []*expressio // PruneColumns implements the interface of LogicalPlan. // LogicalCTE just do a empty function call. It's logical optimize is indivisual phase. -func (*LogicalCTE) PruneColumns(_ []*expression.Column, _ *logicalOptimizeOp) error { - return nil +func (p *LogicalCTE) PruneColumns(_ []*expression.Column, _ *logicalOptimizeOp) (LogicalPlan, error) { + return p, nil } // PruneColumns implements the interface of LogicalPlan. -func (p *LogicalSequence) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - return p.children[len(p.children)-1].PruneColumns(parentUsedCols, opt) +func (p *LogicalSequence) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { + var err error + p.children[len(p.children)-1], err = p.children[len(p.children)-1].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } diff --git a/pkg/planner/core/rule_max_min_eliminate.go b/pkg/planner/core/rule_max_min_eliminate.go index 03cc81a60067d..c0d812be2ca3b 100644 --- a/pkg/planner/core/rule_max_min_eliminate.go +++ b/pkg/planner/core/rule_max_min_eliminate.go @@ -155,9 +155,15 @@ func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation, newAgg := LogicalAggregation{AggFuncs: []*aggregation.AggFuncDesc{f}}.Init(agg.SCtx(), agg.SelectBlockOffset()) newAgg.SetChildren(a.cloneSubPlans(agg.children[0])) newAgg.schema = expression.NewSchema(agg.schema.Columns[i]) - if err := newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt); err != nil { + // Since LogicalAggregation doesn’t use the parent LogicalPlan, passing an incorrect parameter here won’t affect subsequent optimizations. + var ( + p LogicalPlan + err error + ) + if p, err = newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt); err != nil { return nil, false } + newAgg = p.(*LogicalAggregation) aggs = append(aggs, newAgg) } return aggs, true diff --git a/tests/integrationtest/r/planner/core/casetest/integration.result b/tests/integrationtest/r/planner/core/casetest/integration.result index bc23983d5b3ca..1b2eefc689c9d 100644 --- a/tests/integrationtest/r/planner/core/casetest/integration.result +++ b/tests/integrationtest/r/planner/core/casetest/integration.result @@ -1461,3 +1461,100 @@ IndexJoin_13 2658.67 root anti semi join, inner:IndexLookUp_12, outer key:plann ├─IndexRangeScan_9(Build) 12500.00 cop[tikv] table:t2_part, index:a(a) range: decided by [eq(planner__core__casetest__integration.t2_part.a, planner__core__casetest__integration.t1.a)], keep order:false, stats:pseudo └─Selection_11(Probe) 4154.17 cop[tikv] lt(planner__core__casetest__integration.t2_part.b, 20) └─TableRowIDScan_10 12500.00 cop[tikv] table:t2_part keep order:false, stats:pseudo +drop database if exists testdb; +create database testdb; +use testdb; +drop table if exists `t270`; +CREATE TABLE `t270` ( +`vkey` int(11) DEFAULT NULL, +`pkey` int(11) DEFAULT NULL, +`c1128` varchar(100) DEFAULT NULL, +`c1129` int(11) DEFAULT NULL, +`c1130` varchar(100) DEFAULT NULL, +`c1131` double DEFAULT NULL, +`c1132` varchar(100) DEFAULT NULL, +`c1133` double DEFAULT NULL, +`c1134` varchar(100) DEFAULT NULL, +`c1135` int(11) DEFAULT NULL +); +drop table if exists `t271`; +CREATE TABLE `t271` ( +`vkey` int(11) DEFAULT NULL, +`pkey` int(11) DEFAULT NULL, +`c1136` varchar(100) DEFAULT NULL, +`c1137` int(11) DEFAULT NULL, +`c1138` varchar(100) DEFAULT NULL, +`c1139` int(11) DEFAULT NULL, +`c1140` double DEFAULT NULL, +`c1141` int(11) DEFAULT NULL +); +drop table if exists `t272`; +CREATE TABLE `t272` ( +`vkey` int(11) DEFAULT NULL, +`pkey` int(11) DEFAULT NULL, +`c1142` int(11) DEFAULT NULL, +`c1143` varchar(100) DEFAULT NULL, +`c1144` int(11) DEFAULT NULL, +`c1145` int(11) DEFAULT NULL, +`c1146` varchar(100) DEFAULT NULL, +`c1147` double DEFAULT NULL, +`c1148` varchar(100) DEFAULT NULL, +`c1149` double DEFAULT NULL +); +CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `t273_test` (`c0`, `c1`, `c2`, `c3`, `c4`) AS SELECT AVG(37) OVER (PARTITION BY `ref_0`.`c1136` ORDER BY `ref_0`.`vkey` DESC,`ref_0`.`pkey` DESC,`ref_0`.`c1136` DESC,`ref_0`.`c1137`,`ref_0`.`c1138` DESC,`ref_0`.`c1139` DESC,`ref_0`.`c1140` DESC,`ref_0`.`c1141`) AS `c0`,COALESCE(`ref_0`.`c1137`, `ref_0`.`c1141`) AS `c1`,`ref_0`.`vkey` AS `c2`,`ref_0`.`pkey` AS `c3`,`ref_0`.`c1138` AS `c4` FROM `testdb`.`t271` AS `ref_0` WHERE EXISTS (SELECT `subq_0`.`c2` AS `c0`,`subq_0`.`c0` AS `c1`,`subq_0`.`c0` AS `c2`,`subq_0`.`c0` AS `c3`,CASE WHEN EXISTS (SELECT `ref_9`.`c1131` AS `c0`,`ref_9`.`c1131` AS `c1`,(FALSE) XOR (((-45)=(-69)) OR ((-0)>(-71))) AS `c2`,`ref_9`.`c1133` AS `c3`,`ref_9`.`c1128` AS `c4`,-0 AS `c5`,1 AS `c6`,`ref_9`.`c1132` AS `c7`,`ref_9`.`c1131` AS `c8`,`ref_9`.`c1130` AS `c9`,NULL AS `c10` FROM `testdb`.`t270` AS `ref_9` WHERE (-0)<(-8) UNION ALL SELECT `ref_0`.`c1140` AS `c0`,`ref_11`.`c1133` AS `c1`,(NULL)<(NULL) AS `c2`,`ref_0`.`c1140` AS `c3`,`ref_0`.`c1136` AS `c4`,95 AS `c5`,NOT (_UTF8MB4'mum#M' LIKE _UTF8MB4'%_U') AS `c6`,`ref_11`.`c1128` AS `c7`,`ref_11`.`c1131` AS `c8`,(SELECT `c1143` AS `c1143` FROM `testdb`.`t272` ORDER BY `c1143` LIMIT 3,1) AS `c9`,97 AS `c10` FROM `testdb`.`t270` AS `ref_11` WHERE NOT (TRUE)) THEN _UTF8MB4'xf' ELSE _UTF8MB4'>c' END LIKE _UTF8MB4'_^^' AS `c4`,`subq_0`.`c1` AS `c5`,`ref_0`.`vkey` AS `c6`,((`subq_0`.`c1`)=(SELECT `ref_12`.`c1132` AS `c0` FROM `testdb`.`t270` AS `ref_12` WHERE TRUE ORDER BY `c0` DESC LIMIT 1)) XOR ((`ref_0`.`pkey`)>=(SELECT (SELECT `vkey` AS `vkey` FROM `testdb`.`t271` ORDER BY `vkey` LIMIT 1,1) AS `c0` FROM `testdb`.`t271` AS `ref_13` WHERE (-24)<=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)) ORDER BY `c0` LIMIT 1)) AS `c7`,`ref_0`.`pkey` AS `c8`,`subq_0`.`c2` AS `c9`,`ref_0`.`vkey` AS `c10`,`ref_0`.`c1139` AS `c11`,TRUE AS `c12`,`subq_0`.`c0` AS `c13`,`subq_0`.`c2` AS `c14`,`subq_0`.`c2` AS `c15`,FALSE AS `c16`,CASE WHEN ((FALSE) OR ((((FALSE) XOR (((-73)<(-91)) OR (((-0) BETWEEN (-0) AND (-0)) AND ((NULL) OR ((0)>((SELECT COUNT(`c1131`) AS `count(c1131)` FROM `testdb`.`t270`))))))) AND ((-19)>(NULL))) OR (((77)<(73)) AND (NOT (((73) IN (SELECT 0 AS `c0` FROM `testdb`.`t271` AS `ref_14` WHERE (NULL) AND (NULL) EXCEPT SELECT NULL AS `c0` FROM `testdb`.`t270` AS `ref_15` WHERE (`ref_15`.`c1131`)!=(SELECT `ref_15`.`c1133` AS `c0` FROM `testdb`.`t270` AS `ref_16` WHERE _UTF8MB4'$@-X' LIKE _UTF8MB4'__%' ORDER BY `c0` DESC LIMIT 1))) IS TRUE))))) OR (NOT ((-24)<=(-43))) THEN `subq_0`.`c1` ELSE `subq_0`.`c2` END AS `c17`,`subq_0`.`c1` AS `c18`,`subq_0`.`c0` AS `c19`,`subq_0`.`c0` AS `c20`,`subq_0`.`c2` AS `c21`,`subq_0`.`c0` AS `c22`,`subq_0`.`c2` AS `c23`,`subq_0`.`c0` AS `c24`,`ref_0`.`c1141` AS `c25` FROM (SELECT DISTINCT TRUE AS `c0`,`ref_1`.`c1143` AS `c1`,`ref_1`.`c1146` AS `c2` FROM `testdb`.`t272` AS `ref_1` WHERE NOT (((`ref_0`.`c1136`)!=(SELECT `ref_2`.`c1146` AS `c0` FROM `testdb`.`t272` AS `ref_2` WHERE (62) BETWEEN ((SELECT COUNT(`c1147`) AS `count(c1147)` FROM `testdb`.`t272`)) AND (-0) ORDER BY `c0` LIMIT 1)) XOR ((-0) BETWEEN (0) AND (-0)))) AS `subq_0` WHERE (CHAR_LENGTH(CASE WHEN ((`subq_0`.`c0`) IS NOT NULL) OR ((`ref_0`.`c1138`)>(SELECT `ref_0`.`c1138` AS `c0` FROM `testdb`.`t272` AS `ref_3` WHERE FALSE ORDER BY `c0` DESC LIMIT 1)) THEN _UTF8MB4'' ELSE _UTF8MB4'tL' END)) BETWEEN (ABS(46%-11)) AND (CASE WHEN (((((`subq_0`.`c2`) IN (SELECT `ref_4`.`c1134` AS `c0` FROM `testdb`.`t270` AS `ref_4` WHERE (NULL LIKE _UTF8MB4'%Ny') OR (EXISTS (SELECT DISTINCT `ref_5`.`c1136` AS `c0`,`ref_5`.`c1140` AS `c1` FROM `testdb`.`t271` AS `ref_5` WHERE FALSE UNION ALL SELECT `ref_4`.`c1130` AS `c0`,`ref_4`.`c1131` AS `c1` FROM `testdb`.`t271` AS `ref_6` WHERE (-97) BETWEEN (73) AND (-10))) UNION ALL SELECT `ref_7`.`c1138` AS `c0` FROM `testdb`.`t271` AS `ref_7` WHERE FALSE)) IS TRUE) OR (NULL)) AND ((NULL)>=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)))) XOR (((`ref_0`.`vkey`) IN (SELECT `ref_8`.`c1145` AS `c0` FROM `testdb`.`t272` AS `ref_8` WHERE ((FALSE) AND (NULL)) OR ((`ref_8`.`c1144`) IS NULL))) IS TRUE) THEN 87 ELSE CASE WHEN ((`ref_0`.`c1138`) IS NULL) OR ((-22)!=(-0)) THEN 17 ELSE -67 END END)) ORDER BY `c0` DESC,`c1` DESC,`c2`,`c3`,`c4` DESC; +select +(select +subq_1.c0 as c0 +from +t273_test as ref_84 +where exists ( +select +(select +ref_86.c1147 as c0 +from +t272 as ref_86 +where (subq_1.c0) > (subq_1.c0) +window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc) +order by c0 limit 1 +) as c3, +(select +subq_1.c0 as c0 +from +t273_test as ref_89 +order by c0 limit 1) as c4 +from +t271 as ref_85 +) +order by c0 desc limit 1) as c1 +from +(select 1 as c0) as subq_1; +c1 +NULL +select +(select +subq_1.c0 as c0 +from +t271 as ref_84 +where exists ( +select +(select +ref_86.c1147 as c0 +from +t272 as ref_86 +where (subq_1.c0) > (subq_1.c0) +window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc) +order by c0 limit 1 +) as c3, +(select +subq_1.c0 as c0 +from +t271 as ref_89 +order by c0 limit 1) as c4 +from +t271 as ref_85 +) +order by c0 desc limit 1) as c1 +from +(select 1 as c0) as subq_1; +c1 +NULL diff --git a/tests/integrationtest/run-tests.sh b/tests/integrationtest/run-tests.sh index 40e05b91cab38..6902a326896fa 100755 --- a/tests/integrationtest/run-tests.sh +++ b/tests/integrationtest/run-tests.sh @@ -78,9 +78,9 @@ function build_tidb_server() echo "building tidb-server binary: $tidb_server" rm -rf $tidb_server if [ "${TIDB_TEST_STORE_NAME}" = "tikv" ]; then - GO111MODULE=on go build -o $tidb_server github.com/pingcap/tidb/tidb-server + GO111MODULE=on go build -o $tidb_server github.com/pingcap/tidb/cmd/tidb-server else - GO111MODULE=on go build -race -o $tidb_server github.com/pingcap/tidb/tidb-server + GO111MODULE=on go build -race -o $tidb_server github.com/pingcap/tidb/cmd/tidb-server fi } diff --git a/tests/integrationtest/t/planner/core/casetest/integration.test b/tests/integrationtest/t/planner/core/casetest/integration.test index e2cae34d3003f..c9c3a78fd7429 100644 --- a/tests/integrationtest/t/planner/core/casetest/integration.test +++ b/tests/integrationtest/t/planner/core/casetest/integration.test @@ -475,3 +475,97 @@ set @@tidb_opt_fix_control = "44262:ON"; explain select /*+ TIDB_INLJ(t2_part@sel_2) */ * from t1 where t1.b<10 and not exists (select 1 from t2_part where t1.a=t2_part.a and t2_part.b<20); --disable_warnings +# TestIssue42588 +drop database if exists testdb; +create database testdb; +use testdb; +drop table if exists `t270`; +CREATE TABLE `t270` ( + `vkey` int(11) DEFAULT NULL, + `pkey` int(11) DEFAULT NULL, + `c1128` varchar(100) DEFAULT NULL, + `c1129` int(11) DEFAULT NULL, + `c1130` varchar(100) DEFAULT NULL, + `c1131` double DEFAULT NULL, + `c1132` varchar(100) DEFAULT NULL, + `c1133` double DEFAULT NULL, + `c1134` varchar(100) DEFAULT NULL, + `c1135` int(11) DEFAULT NULL +); +drop table if exists `t271`; +CREATE TABLE `t271` ( + `vkey` int(11) DEFAULT NULL, + `pkey` int(11) DEFAULT NULL, + `c1136` varchar(100) DEFAULT NULL, + `c1137` int(11) DEFAULT NULL, + `c1138` varchar(100) DEFAULT NULL, + `c1139` int(11) DEFAULT NULL, + `c1140` double DEFAULT NULL, + `c1141` int(11) DEFAULT NULL +); +drop table if exists `t272`; +CREATE TABLE `t272` ( + `vkey` int(11) DEFAULT NULL, + `pkey` int(11) DEFAULT NULL, + `c1142` int(11) DEFAULT NULL, + `c1143` varchar(100) DEFAULT NULL, + `c1144` int(11) DEFAULT NULL, + `c1145` int(11) DEFAULT NULL, + `c1146` varchar(100) DEFAULT NULL, + `c1147` double DEFAULT NULL, + `c1148` varchar(100) DEFAULT NULL, + `c1149` double DEFAULT NULL +); +CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `t273_test` (`c0`, `c1`, `c2`, `c3`, `c4`) AS SELECT AVG(37) OVER (PARTITION BY `ref_0`.`c1136` ORDER BY `ref_0`.`vkey` DESC,`ref_0`.`pkey` DESC,`ref_0`.`c1136` DESC,`ref_0`.`c1137`,`ref_0`.`c1138` DESC,`ref_0`.`c1139` DESC,`ref_0`.`c1140` DESC,`ref_0`.`c1141`) AS `c0`,COALESCE(`ref_0`.`c1137`, `ref_0`.`c1141`) AS `c1`,`ref_0`.`vkey` AS `c2`,`ref_0`.`pkey` AS `c3`,`ref_0`.`c1138` AS `c4` FROM `testdb`.`t271` AS `ref_0` WHERE EXISTS (SELECT `subq_0`.`c2` AS `c0`,`subq_0`.`c0` AS `c1`,`subq_0`.`c0` AS `c2`,`subq_0`.`c0` AS `c3`,CASE WHEN EXISTS (SELECT `ref_9`.`c1131` AS `c0`,`ref_9`.`c1131` AS `c1`,(FALSE) XOR (((-45)=(-69)) OR ((-0)>(-71))) AS `c2`,`ref_9`.`c1133` AS `c3`,`ref_9`.`c1128` AS `c4`,-0 AS `c5`,1 AS `c6`,`ref_9`.`c1132` AS `c7`,`ref_9`.`c1131` AS `c8`,`ref_9`.`c1130` AS `c9`,NULL AS `c10` FROM `testdb`.`t270` AS `ref_9` WHERE (-0)<(-8) UNION ALL SELECT `ref_0`.`c1140` AS `c0`,`ref_11`.`c1133` AS `c1`,(NULL)<(NULL) AS `c2`,`ref_0`.`c1140` AS `c3`,`ref_0`.`c1136` AS `c4`,95 AS `c5`,NOT (_UTF8MB4'mum#M' LIKE _UTF8MB4'%_U') AS `c6`,`ref_11`.`c1128` AS `c7`,`ref_11`.`c1131` AS `c8`,(SELECT `c1143` AS `c1143` FROM `testdb`.`t272` ORDER BY `c1143` LIMIT 3,1) AS `c9`,97 AS `c10` FROM `testdb`.`t270` AS `ref_11` WHERE NOT (TRUE)) THEN _UTF8MB4'xf' ELSE _UTF8MB4'>c' END LIKE _UTF8MB4'_^^' AS `c4`,`subq_0`.`c1` AS `c5`,`ref_0`.`vkey` AS `c6`,((`subq_0`.`c1`)=(SELECT `ref_12`.`c1132` AS `c0` FROM `testdb`.`t270` AS `ref_12` WHERE TRUE ORDER BY `c0` DESC LIMIT 1)) XOR ((`ref_0`.`pkey`)>=(SELECT (SELECT `vkey` AS `vkey` FROM `testdb`.`t271` ORDER BY `vkey` LIMIT 1,1) AS `c0` FROM `testdb`.`t271` AS `ref_13` WHERE (-24)<=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)) ORDER BY `c0` LIMIT 1)) AS `c7`,`ref_0`.`pkey` AS `c8`,`subq_0`.`c2` AS `c9`,`ref_0`.`vkey` AS `c10`,`ref_0`.`c1139` AS `c11`,TRUE AS `c12`,`subq_0`.`c0` AS `c13`,`subq_0`.`c2` AS `c14`,`subq_0`.`c2` AS `c15`,FALSE AS `c16`,CASE WHEN ((FALSE) OR ((((FALSE) XOR (((-73)<(-91)) OR (((-0) BETWEEN (-0) AND (-0)) AND ((NULL) OR ((0)>((SELECT COUNT(`c1131`) AS `count(c1131)` FROM `testdb`.`t270`))))))) AND ((-19)>(NULL))) OR (((77)<(73)) AND (NOT (((73) IN (SELECT 0 AS `c0` FROM `testdb`.`t271` AS `ref_14` WHERE (NULL) AND (NULL) EXCEPT SELECT NULL AS `c0` FROM `testdb`.`t270` AS `ref_15` WHERE (`ref_15`.`c1131`)!=(SELECT `ref_15`.`c1133` AS `c0` FROM `testdb`.`t270` AS `ref_16` WHERE _UTF8MB4'$@-X' LIKE _UTF8MB4'__%' ORDER BY `c0` DESC LIMIT 1))) IS TRUE))))) OR (NOT ((-24)<=(-43))) THEN `subq_0`.`c1` ELSE `subq_0`.`c2` END AS `c17`,`subq_0`.`c1` AS `c18`,`subq_0`.`c0` AS `c19`,`subq_0`.`c0` AS `c20`,`subq_0`.`c2` AS `c21`,`subq_0`.`c0` AS `c22`,`subq_0`.`c2` AS `c23`,`subq_0`.`c0` AS `c24`,`ref_0`.`c1141` AS `c25` FROM (SELECT DISTINCT TRUE AS `c0`,`ref_1`.`c1143` AS `c1`,`ref_1`.`c1146` AS `c2` FROM `testdb`.`t272` AS `ref_1` WHERE NOT (((`ref_0`.`c1136`)!=(SELECT `ref_2`.`c1146` AS `c0` FROM `testdb`.`t272` AS `ref_2` WHERE (62) BETWEEN ((SELECT COUNT(`c1147`) AS `count(c1147)` FROM `testdb`.`t272`)) AND (-0) ORDER BY `c0` LIMIT 1)) XOR ((-0) BETWEEN (0) AND (-0)))) AS `subq_0` WHERE (CHAR_LENGTH(CASE WHEN ((`subq_0`.`c0`) IS NOT NULL) OR ((`ref_0`.`c1138`)>(SELECT `ref_0`.`c1138` AS `c0` FROM `testdb`.`t272` AS `ref_3` WHERE FALSE ORDER BY `c0` DESC LIMIT 1)) THEN _UTF8MB4'' ELSE _UTF8MB4'tL' END)) BETWEEN (ABS(46%-11)) AND (CASE WHEN (((((`subq_0`.`c2`) IN (SELECT `ref_4`.`c1134` AS `c0` FROM `testdb`.`t270` AS `ref_4` WHERE (NULL LIKE _UTF8MB4'%Ny') OR (EXISTS (SELECT DISTINCT `ref_5`.`c1136` AS `c0`,`ref_5`.`c1140` AS `c1` FROM `testdb`.`t271` AS `ref_5` WHERE FALSE UNION ALL SELECT `ref_4`.`c1130` AS `c0`,`ref_4`.`c1131` AS `c1` FROM `testdb`.`t271` AS `ref_6` WHERE (-97) BETWEEN (73) AND (-10))) UNION ALL SELECT `ref_7`.`c1138` AS `c0` FROM `testdb`.`t271` AS `ref_7` WHERE FALSE)) IS TRUE) OR (NULL)) AND ((NULL)>=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)))) XOR (((`ref_0`.`vkey`) IN (SELECT `ref_8`.`c1145` AS `c0` FROM `testdb`.`t272` AS `ref_8` WHERE ((FALSE) AND (NULL)) OR ((`ref_8`.`c1144`) IS NULL))) IS TRUE) THEN 87 ELSE CASE WHEN ((`ref_0`.`c1138`) IS NULL) OR ((-22)!=(-0)) THEN 17 ELSE -67 END END)) ORDER BY `c0` DESC,`c1` DESC,`c2`,`c3`,`c4` DESC; +select + (select + subq_1.c0 as c0 + from + t273_test as ref_84 + where exists ( + select + (select + ref_86.c1147 as c0 + from + t272 as ref_86 + where (subq_1.c0) > (subq_1.c0) + window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc) + order by c0 limit 1 + ) as c3, + (select + subq_1.c0 as c0 + from + t273_test as ref_89 + order by c0 limit 1) as c4 + from + t271 as ref_85 + ) + order by c0 desc limit 1) as c1 +from + (select 1 as c0) as subq_1; +select + (select + subq_1.c0 as c0 + from + t271 as ref_84 + where exists ( + select + (select + ref_86.c1147 as c0 + from + t272 as ref_86 + where (subq_1.c0) > (subq_1.c0) + window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc) + order by c0 limit 1 + ) as c3, + (select + subq_1.c0 as c0 + from + t271 as ref_89 + order by c0 limit 1) as c4 + from + t271 as ref_85 + ) + order by c0 desc limit 1) as c1 +from + (select 1 as c0) as subq_1; \ No newline at end of file