diff --git a/executor/benchmark_test.go b/executor/benchmark_test.go index ee7394cd890dc..7a81d0f9817f3 100644 --- a/executor/benchmark_test.go +++ b/executor/benchmark_test.go @@ -45,6 +45,7 @@ import ( "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/stringutil" "go.uber.org/zap/zapcore" + "golang.org/x/exp/slices" ) var ( @@ -882,6 +883,44 @@ func defaultHashJoinTestCase(cols []*types.FieldType, joinType core.JoinType, us return tc } +func prepareResolveIndices(joinSchema, lSchema, rSchema *expression.Schema, joinType core.JoinType) *expression.Schema { + colsNeedResolving := joinSchema.Len() + // The last output column of this two join is the generated column to indicate whether the row is matched or not. + if joinType == core.LeftOuterSemiJoin || joinType == core.AntiLeftOuterSemiJoin { + colsNeedResolving-- + } + mergedSchema := expression.MergeSchema(lSchema, rSchema) + // To avoid that two plan shares the same column slice. + shallowColSlice := make([]*expression.Column, joinSchema.Len()) + copy(shallowColSlice, joinSchema.Columns) + joinSchema = expression.NewSchema(shallowColSlice...) + foundCnt := 0 + // Here we want to resolve all join schema columns directly as a merged schema, and you know same name + // col in join schema should be separately redirected to corresponded same col in child schema. But two + // column sets are **NOT** always ordered, see comment: https://github.com/pingcap/tidb/pull/45831#discussion_r1481031471 + // we are using mapping mechanism instead of moving j forward. + marked := make([]bool, mergedSchema.Len()) + for i := 0; i < colsNeedResolving; i++ { + findIdx := -1 + for j := 0; j < len(mergedSchema.Columns); j++ { + if !joinSchema.Columns[i].Equal(nil, mergedSchema.Columns[j]) || marked[j] { + continue + } + // resolve to a same unique id one, and it not being marked. + findIdx = j + break + } + if findIdx != -1 { + // valid one. + joinSchema.Columns[i] = joinSchema.Columns[i].Clone().(*expression.Column) + joinSchema.Columns[i].Index = findIdx + marked[findIdx] = true + foundCnt++ + } + } + return joinSchema +} + func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec Executor) *HashJoinExec { if testCase.useOuterToBuild { innerExec, outerExec = outerExec, innerExec @@ -905,6 +944,10 @@ func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec Executor) joinSchema.Append(cols0...) joinSchema.Append(cols1...) } + // todo: need systematic way to protect. + // physical join should resolveIndices to get right schema column index. + // otherwise, markChildrenUsedColsForTest will fail below. + joinSchema = prepareResolveIndices(joinSchema, innerExec.Schema(), outerExec.Schema(), core.InnerJoin) joinKeysColIdx := make([]int, 0, len(testCase.keyIdx)) joinKeysColIdx = append(joinKeysColIdx, testCase.keyIdx...) @@ -960,25 +1003,39 @@ func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec Executor) // markChildrenUsedColsForTest compares each child with the output schema, and mark // each column of the child is used by output or not. -func markChildrenUsedColsForTest(outputSchema *expression.Schema, childSchemas ...*expression.Schema) (childrenUsed [][]bool) { - childrenUsed = make([][]bool, 0, len(childSchemas)) - markedOffsets := make(map[int]struct{}) - for _, col := range outputSchema.Columns { - markedOffsets[col.Index] = struct{}{} +func markChildrenUsedColsForTest(outputSchema *expression.Schema, childSchemas ...*expression.Schema) (childrenUsed [][]int) { + childrenUsed = make([][]int, 0, len(childSchemas)) + markedOffsets := make(map[int]int) + for originalIdx, col := range outputSchema.Columns { + markedOffsets[col.Index] = originalIdx } prefixLen := 0 + type intPair struct { + first int + second int + } + // for example here. + // left child schema: [col11] + // right child schema: [col21, col22] + // output schema is [col11, col22, col21], if not records the original derived order after physical resolve index. + // the lused will be [0], the rused will be [0,1], while the actual order is dismissed, [1,0] is correct for rused. for _, childSchema := range childSchemas { - used := make([]bool, len(childSchema.Columns)) + usedIdxPair := make([]intPair, 0, len(childSchema.Columns)) for i := range childSchema.Columns { - if _, ok := markedOffsets[prefixLen+i]; ok { - used[i] = true + if originalIdx, ok := markedOffsets[prefixLen+i]; ok { + usedIdxPair = append(usedIdxPair, intPair{first: originalIdx, second: i}) } } - childrenUsed = append(childrenUsed, used) - } - for _, child := range childSchemas { - used := expression.GetUsedList(outputSchema.Columns, child) - childrenUsed = append(childrenUsed, used) + // sort the used idxes according their original indexes derived after resolveIndex. + slices.SortFunc(usedIdxPair, func(a, b intPair) bool { + return a.first < b.first + }) + usedIdx := make([]int, 0, len(childSchema.Columns)) + for _, one := range usedIdxPair { + usedIdx = append(usedIdx, one.second) + } + childrenUsed = append(childrenUsed, usedIdx) + prefixLen += childSchema.Len() } return } @@ -1580,6 +1637,20 @@ func prepareMergeJoinExec(tc *mergeJoinTestCase, joinSchema *expression.Schema, isOuterJoin: false, } + var usedIdx [][]int + if tc.childrenUsedSchema != nil { + usedIdx = make([][]int, 0, len(tc.childrenUsedSchema)) + for _, childSchema := range tc.childrenUsedSchema { + used := make([]int, 0, len(childSchema)) + for idx, one := range childSchema { + if one { + used = append(used, idx) + } + } + usedIdx = append(usedIdx, used) + } + } + mergeJoinExec.joiner = newJoiner( tc.ctx, 0, @@ -1588,7 +1659,7 @@ func prepareMergeJoinExec(tc *mergeJoinTestCase, joinSchema *expression.Schema, nil, retTypes(leftExec), retTypes(rightExec), - tc.childrenUsedSchema, + usedIdx, false, ) diff --git a/executor/builder.go b/executor/builder.go index 33c213bce8c9c..5708032279b97 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -755,14 +755,11 @@ func (b *executorBuilder) buildLimit(v *plannercore.PhysicalLimit) Executor { end: v.Offset + v.Count, } + childUsedSchemaLen := v.Children()[0].Schema().Len() childUsedSchema := markChildrenUsedCols(v.Schema().Columns, v.Children()[0].Schema())[0] e.columnIdxsUsedByChild = make([]int, 0, len(childUsedSchema)) - for i, used := range childUsedSchema { - if used { - e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, i) - } - } - if len(e.columnIdxsUsedByChild) == len(childUsedSchema) { + e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, childUsedSchema...) + if len(e.columnIdxsUsedByChild) == childUsedSchemaLen { e.columnIdxsUsedByChild = nil // indicates that all columns are used. LimitExec will improve performance for this condition. } return e @@ -3055,21 +3052,39 @@ func constructDistExec(sctx sessionctx.Context, plans []plannercore.PhysicalPlan // markChildrenUsedCols compares each child with the output schema, and mark // each column of the child is used by output or not. -func markChildrenUsedCols(outputCols []*expression.Column, childSchemas ...*expression.Schema) (childrenUsed [][]bool) { - childrenUsed = make([][]bool, 0, len(childSchemas)) - markedOffsets := make(map[int]struct{}) - for _, col := range outputCols { - markedOffsets[col.Index] = struct{}{} +func markChildrenUsedCols(outputCols []*expression.Column, childSchemas ...*expression.Schema) (childrenUsed [][]int) { + childrenUsed = make([][]int, 0, len(childSchemas)) + markedOffsets := make(map[int]int) + // keep the original maybe reversed order. + for originalIdx, col := range outputCols { + markedOffsets[col.Index] = originalIdx } prefixLen := 0 + type intPair struct { + first int + second int + } + // for example here. + // left child schema: [col11] + // right child schema: [col21, col22] + // output schema is [col11, col22, col21], if not records the original derived order after physical resolve index. + // the lused will be [0], the rused will be [0,1], while the actual order is dismissed, [1,0] is correct for rused. for _, childSchema := range childSchemas { - used := make([]bool, len(childSchema.Columns)) + usedIdxPair := make([]intPair, 0, len(childSchema.Columns)) for i := range childSchema.Columns { - if _, ok := markedOffsets[prefixLen+i]; ok { - used[i] = true + if originalIdx, ok := markedOffsets[prefixLen+i]; ok { + usedIdxPair = append(usedIdxPair, intPair{first: originalIdx, second: i}) } } - childrenUsed = append(childrenUsed, used) + // sort the used idxes according their original indexes derived after resolveIndex. + slices.SortFunc(usedIdxPair, func(a, b intPair) bool { + return a.first < b.first + }) + usedIdx := make([]int, 0, len(childSchema.Columns)) + for _, one := range usedIdxPair { + usedIdx = append(usedIdx, one.second) + } + childrenUsed = append(childrenUsed, usedIdx) prefixLen += childSchema.Len() } return diff --git a/executor/joiner.go b/executor/joiner.go index 842135802444f..4bcc4d042fbd9 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -132,7 +132,7 @@ func JoinerType(j joiner) plannercore.JoinType { func newJoiner(ctx sessionctx.Context, joinType plannercore.JoinType, outerIsRight bool, defaultInner []types.Datum, filter []expression.Expression, - lhsColTypes, rhsColTypes []*types.FieldType, childrenUsed [][]bool, isNA bool) joiner { + lhsColTypes, rhsColTypes []*types.FieldType, childrenUsed [][]int, isNA bool) joiner { base := baseJoiner{ ctx: ctx, conditions: filter, @@ -141,19 +141,14 @@ func newJoiner(ctx sessionctx.Context, joinType plannercore.JoinType, } base.selected = make([]bool, 0, chunk.InitialCapacity) base.isNull = make([]bool, 0, chunk.InitialCapacity) + // lused and rused should be followed with its original order. + // the case is that is join schema rely on the reversed order + // of child's schema, here we should keep it original order. if childrenUsed != nil { base.lUsed = make([]int, 0, len(childrenUsed[0])) // make it non-nil - for i, used := range childrenUsed[0] { - if used { - base.lUsed = append(base.lUsed, i) - } - } + base.lUsed = append(base.lUsed, childrenUsed[0]...) base.rUsed = make([]int, 0, len(childrenUsed[1])) // make it non-nil - for i, used := range childrenUsed[1] { - if used { - base.rUsed = append(base.rUsed, i) - } - } + base.rUsed = append(base.rUsed, childrenUsed[1]...) logutil.BgLogger().Debug("InlineProjection", zap.Ints("lUsed", base.lUsed), zap.Ints("rUsed", base.rUsed), zap.Int("lCount", len(lhsColTypes)), zap.Int("rCount", len(rhsColTypes))) diff --git a/executor/tiflashtest/tiflash_test.go b/executor/tiflashtest/tiflash_test.go index 30dcb626888e8..7a99bab37c86d 100644 --- a/executor/tiflashtest/tiflash_test.go +++ b/executor/tiflashtest/tiflash_test.go @@ -1449,19 +1449,19 @@ func TestDisaggregatedTiFlashQuery(t *testing.T) { err = domain.GetDomain(tk.Session()).DDL().UpdateTableReplicaInfo(tk.Session(), tb.Meta().ID, true) require.NoError(t, err) tk.MustQuery("explain select * from t1 where c1 < 2").Check(testkit.Rows( - "PartitionUnion_10 9970.00 root ", - "├─TableReader_15 3323.33 root MppVersion: 1, data:ExchangeSender_14", - "│ └─ExchangeSender_14 3323.33 mpp[tiflash] ExchangeType: PassThrough", - "│ └─Selection_13 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - "│ └─TableFullScan_12 10000.00 mpp[tiflash] table:t1, partition:p0 pushed down filter:empty, keep order:false, stats:pseudo", - "├─TableReader_19 3323.33 root MppVersion: 1, data:ExchangeSender_18", - "│ └─ExchangeSender_18 3323.33 mpp[tiflash] ExchangeType: PassThrough", - "│ └─Selection_17 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - "│ └─TableFullScan_16 10000.00 mpp[tiflash] table:t1, partition:p1 pushed down filter:empty, keep order:false, stats:pseudo", - "└─TableReader_23 3323.33 root MppVersion: 1, data:ExchangeSender_22", - " └─ExchangeSender_22 3323.33 mpp[tiflash] ExchangeType: PassThrough", - " └─Selection_21 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", - " └─TableFullScan_20 10000.00 mpp[tiflash] table:t1, partition:p2 pushed down filter:empty, keep order:false, stats:pseudo")) + "PartitionUnion_11 9970.00 root ", + "├─TableReader_16 3323.33 root MppVersion: 1, data:ExchangeSender_15", + "│ └─ExchangeSender_15 3323.33 mpp[tiflash] ExchangeType: PassThrough", + "│ └─Selection_14 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + "│ └─TableFullScan_13 10000.00 mpp[tiflash] table:t1, partition:p0 pushed down filter:empty, keep order:false, stats:pseudo", + "├─TableReader_20 3323.33 root MppVersion: 1, data:ExchangeSender_19", + "│ └─ExchangeSender_19 3323.33 mpp[tiflash] ExchangeType: PassThrough", + "│ └─Selection_18 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + "│ └─TableFullScan_17 10000.00 mpp[tiflash] table:t1, partition:p1 pushed down filter:empty, keep order:false, stats:pseudo", + "└─TableReader_24 3323.33 root MppVersion: 1, data:ExchangeSender_23", + " └─ExchangeSender_23 3323.33 mpp[tiflash] ExchangeType: PassThrough", + " └─Selection_22 3323.33 mpp[tiflash] lt(test.t1.c1, 2)", + " └─TableFullScan_21 10000.00 mpp[tiflash] table:t1, partition:p2 pushed down filter:empty, keep order:false, stats:pseudo")) } func TestMPPMemoryTracker(t *testing.T) { diff --git a/planner/cascades/optimize.go b/planner/cascades/optimize.go index 29be0272e011e..e74696b356acd 100644 --- a/planner/cascades/optimize.go +++ b/planner/cascades/optimize.go @@ -116,7 +116,8 @@ func (opt *Optimizer) FindBestPlan(sctx sessionctx.Context, logical plannercore. } func (*Optimizer) onPhasePreprocessing(_ sessionctx.Context, plan plannercore.LogicalPlan) (plannercore.LogicalPlan, error) { - err := plan.PruneColumns(plan.Schema().Columns, nil) + var err error + plan, err = plan.PruneColumns(plan.Schema().Columns, nil) if err != nil { return nil, err } diff --git a/planner/core/casetest/integration_test.go b/planner/core/casetest/integration_test.go index 4b47fa6383140..0a0eeb014fa65 100644 --- a/planner/core/casetest/integration_test.go +++ b/planner/core/casetest/integration_test.go @@ -3596,3 +3596,20 @@ func TestIssue41957(t *testing.T) { tk.MustExec("CREATE TABLE `github_events` (\n `id` bigint(20) NOT NULL DEFAULT '0',\n `type` varchar(29) NOT NULL DEFAULT 'Event',\n `created_at` datetime NOT NULL DEFAULT '1970-01-01 00:00:00',\n `repo_id` bigint(20) NOT NULL DEFAULT '0',\n `repo_name` varchar(140) NOT NULL DEFAULT '',\n `actor_id` bigint(20) NOT NULL DEFAULT '0',\n `actor_login` varchar(40) NOT NULL DEFAULT '',\n `language` varchar(26) NOT NULL DEFAULT '',\n `additions` bigint(20) NOT NULL DEFAULT '0',\n `deletions` bigint(20) NOT NULL DEFAULT '0',\n `action` varchar(11) NOT NULL DEFAULT '',\n `number` int(11) NOT NULL DEFAULT '0',\n `commit_id` varchar(40) NOT NULL DEFAULT '',\n `comment_id` bigint(20) NOT NULL DEFAULT '0',\n `org_login` varchar(40) NOT NULL DEFAULT '',\n `org_id` bigint(20) NOT NULL DEFAULT '0',\n `state` varchar(6) NOT NULL DEFAULT '',\n `closed_at` datetime NOT NULL DEFAULT '1970-01-01 00:00:00',\n `comments` int(11) NOT NULL DEFAULT '0',\n `pr_merged_at` datetime NOT NULL DEFAULT '1970-01-01 00:00:00',\n `pr_merged` tinyint(1) NOT NULL DEFAULT '0',\n `pr_changed_files` int(11) NOT NULL DEFAULT '0',\n `pr_review_comments` int(11) NOT NULL DEFAULT '0',\n `pr_or_issue_id` bigint(20) NOT NULL DEFAULT '0',\n `event_day` date NOT NULL,\n `event_month` date NOT NULL,\n `event_year` int(11) NOT NULL,\n `push_size` int(11) NOT NULL DEFAULT '0',\n `push_distinct_size` int(11) NOT NULL DEFAULT '0',\n `creator_user_login` varchar(40) NOT NULL DEFAULT '',\n `creator_user_id` bigint(20) NOT NULL DEFAULT '0',\n `pr_or_issue_created_at` datetime NOT NULL DEFAULT '1970-01-01 00:00:00',\n KEY `index_github_events_on_id` (`id`),\n KEY `index_github_events_on_created_at` (`created_at`),\n KEY `index_github_events_on_repo_id_type_action_month_actor_login` (`repo_id`,`type`,`action`,`event_month`,`actor_login`),\n KEY `index_ge_on_repo_id_type_action_pr_merged_created_at_add_del` (`repo_id`,`type`,`action`,`pr_merged`,`created_at`,`additions`,`deletions`),\n KEY `index_ge_on_creator_id_type_action_merged_created_at_add_del` (`creator_user_id`,`type`,`action`,`pr_merged`,`created_at`,`additions`,`deletions`),\n KEY `index_ge_on_actor_id_type_action_created_at_repo_id_commits` (`actor_id`,`type`,`action`,`created_at`,`repo_id`,`push_distinct_size`),\n KEY `index_ge_on_repo_id_type_action_created_at_number_pdsize_psize` (`repo_id`,`type`,`action`,`created_at`,`number`,`push_distinct_size`,`push_size`),\n KEY `index_ge_on_repo_id_type_action_created_at_actor_login` (`repo_id`,`type`,`action`,`created_at`,`actor_login`),\n KEY `index_ge_on_repo_name_type` (`repo_name`,`type`),\n KEY `index_ge_on_actor_login_type` (`actor_login`,`type`),\n KEY `index_ge_on_org_login_type` (`org_login`,`type`),\n KEY `index_ge_on_language` (`language`),\n KEY `index_ge_on_org_id_type` (`org_id`,`type`),\n KEY `index_ge_on_actor_login_lower` ((lower(`actor_login`))),\n KEY `index_ge_on_repo_name_lower` ((lower(`repo_name`))),\n KEY `index_ge_on_language_lower` ((lower(`language`))),\n KEY `index_ge_on_type_action` (`type`,`action`) /*!80000 INVISIBLE */,\n KEY `index_ge_on_repo_id_type_created_at` (`repo_id`,`type`,`created_at`),\n KEY `index_ge_on_repo_id_created_at` (`repo_id`,`created_at`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\nPARTITION BY LIST COLUMNS(`type`)\n(PARTITION `push_event` VALUES IN ('PushEvent'),\n PARTITION `create_event` VALUES IN ('CreateEvent'),\n PARTITION `pull_request_event` VALUES IN ('PullRequestEvent'),\n PARTITION `watch_event` VALUES IN ('WatchEvent'),\n PARTITION `issue_comment_event` VALUES IN ('IssueCommentEvent'),\n PARTITION `issues_event` VALUES IN ('IssuesEvent'),\n PARTITION `delete_event` VALUES IN ('DeleteEvent'),\n PARTITION `fork_event` VALUES IN ('ForkEvent'),\n PARTITION `pull_request_review_comment_event` VALUES IN ('PullRequestReviewCommentEvent'),\n PARTITION `pull_request_review_event` VALUES IN ('PullRequestReviewEvent'),\n PARTITION `gollum_event` VALUES IN ('GollumEvent'),\n PARTITION `release_event` VALUES IN ('ReleaseEvent'),\n PARTITION `member_event` VALUES IN ('MemberEvent'),\n PARTITION `commit_comment_event` VALUES IN ('CommitCommentEvent'),\n PARTITION `public_event` VALUES IN ('PublicEvent'),\n PARTITION `gist_event` VALUES IN ('GistEvent'),\n PARTITION `follow_event` VALUES IN ('FollowEvent'),\n PARTITION `event` VALUES IN ('Event'),\n PARTITION `download_event` VALUES IN ('DownloadEvent'),\n PARTITION `team_add_event` VALUES IN ('TeamAddEvent'),\n PARTITION `fork_apply_event` VALUES IN ('ForkApplyEvent'))\n") tk.MustQuery("SELECT\n repo_id, GROUP_CONCAT(\n DISTINCT actor_login\n ORDER BY cnt DESC\n SEPARATOR ','\n ) AS actor_logins\nFROM (\n SELECT\n ge.repo_id AS repo_id,\n ge.actor_login AS actor_login,\n COUNT(*) AS cnt\n FROM github_events ge\n WHERE\n type = 'PullRequestEvent' AND action = 'opened'\n AND (ge.created_at >= DATE_SUB(NOW(), INTERVAL 1 DAY) AND ge.created_at <= NOW())\n GROUP BY ge.repo_id, ge.actor_login\n ORDER BY cnt DESC\n) sub\nGROUP BY repo_id").Check(testkit.Rows()) } + +func TestIssue42588(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec(`use test`) + + tk.MustExec("drop database if exists testdb;") + tk.MustExec("create database testdb;") + tk.MustExec("use testdb;") + tk.MustExec("drop table if exists `t270`;") + tk.MustExec("CREATE TABLE `t270` (\n `vkey` int(11) DEFAULT NULL,\n `pkey` int(11) DEFAULT NULL,\n `c1128` varchar(100) DEFAULT NULL,\n `c1129` int(11) DEFAULT NULL,\n `c1130` varchar(100) DEFAULT NULL,\n `c1131` double DEFAULT NULL,\n `c1132` varchar(100) DEFAULT NULL,\n `c1133` double DEFAULT NULL,\n `c1134` varchar(100) DEFAULT NULL,\n `c1135` int(11) DEFAULT NULL\n);") + tk.MustExec("drop table if exists `t271`;\nCREATE TABLE `t271` (\n `vkey` int(11) DEFAULT NULL,\n `pkey` int(11) DEFAULT NULL,\n `c1136` varchar(100) DEFAULT NULL,\n `c1137` int(11) DEFAULT NULL,\n `c1138` varchar(100) DEFAULT NULL,\n `c1139` int(11) DEFAULT NULL,\n `c1140` double DEFAULT NULL,\n `c1141` int(11) DEFAULT NULL\n);") + tk.MustExec("drop table if exists `t272`;\nCREATE TABLE `t272` (\n `vkey` int(11) DEFAULT NULL,\n `pkey` int(11) DEFAULT NULL,\n `c1142` int(11) DEFAULT NULL,\n `c1143` varchar(100) DEFAULT NULL,\n `c1144` int(11) DEFAULT NULL,\n `c1145` int(11) DEFAULT NULL,\n `c1146` varchar(100) DEFAULT NULL,\n `c1147` double DEFAULT NULL,\n `c1148` varchar(100) DEFAULT NULL,\n `c1149` double DEFAULT NULL\n);") + tk.MustExec("CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `t273_test` (`c0`, `c1`, `c2`, `c3`, `c4`) AS SELECT AVG(37) OVER (PARTITION BY `ref_0`.`c1136` ORDER BY `ref_0`.`vkey` DESC,`ref_0`.`pkey` DESC,`ref_0`.`c1136` DESC,`ref_0`.`c1137`,`ref_0`.`c1138` DESC,`ref_0`.`c1139` DESC,`ref_0`.`c1140` DESC,`ref_0`.`c1141`) AS `c0`,COALESCE(`ref_0`.`c1137`, `ref_0`.`c1141`) AS `c1`,`ref_0`.`vkey` AS `c2`,`ref_0`.`pkey` AS `c3`,`ref_0`.`c1138` AS `c4` FROM `testdb`.`t271` AS `ref_0` WHERE EXISTS (SELECT `subq_0`.`c2` AS `c0`,`subq_0`.`c0` AS `c1`,`subq_0`.`c0` AS `c2`,`subq_0`.`c0` AS `c3`,CASE WHEN EXISTS (SELECT `ref_9`.`c1131` AS `c0`,`ref_9`.`c1131` AS `c1`,(FALSE) XOR (((-45)=(-69)) OR ((-0)>(-71))) AS `c2`,`ref_9`.`c1133` AS `c3`,`ref_9`.`c1128` AS `c4`,-0 AS `c5`,1 AS `c6`,`ref_9`.`c1132` AS `c7`,`ref_9`.`c1131` AS `c8`,`ref_9`.`c1130` AS `c9`,NULL AS `c10` FROM `testdb`.`t270` AS `ref_9` WHERE (-0)<(-8) UNION ALL SELECT `ref_0`.`c1140` AS `c0`,`ref_11`.`c1133` AS `c1`,(NULL)<(NULL) AS `c2`,`ref_0`.`c1140` AS `c3`,`ref_0`.`c1136` AS `c4`,95 AS `c5`,NOT (_UTF8MB4'mum#M' LIKE _UTF8MB4'%_U') AS `c6`,`ref_11`.`c1128` AS `c7`,`ref_11`.`c1131` AS `c8`,(SELECT `c1143` AS `c1143` FROM `testdb`.`t272` ORDER BY `c1143` LIMIT 3,1) AS `c9`,97 AS `c10` FROM `testdb`.`t270` AS `ref_11` WHERE NOT (TRUE)) THEN _UTF8MB4'xf' ELSE _UTF8MB4'>c' END LIKE _UTF8MB4'_^^' AS `c4`,`subq_0`.`c1` AS `c5`,`ref_0`.`vkey` AS `c6`,((`subq_0`.`c1`)=(SELECT `ref_12`.`c1132` AS `c0` FROM `testdb`.`t270` AS `ref_12` WHERE TRUE ORDER BY `c0` DESC LIMIT 1)) XOR ((`ref_0`.`pkey`)>=(SELECT (SELECT `vkey` AS `vkey` FROM `testdb`.`t271` ORDER BY `vkey` LIMIT 1,1) AS `c0` FROM `testdb`.`t271` AS `ref_13` WHERE (-24)<=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)) ORDER BY `c0` LIMIT 1)) AS `c7`,`ref_0`.`pkey` AS `c8`,`subq_0`.`c2` AS `c9`,`ref_0`.`vkey` AS `c10`,`ref_0`.`c1139` AS `c11`,TRUE AS `c12`,`subq_0`.`c0` AS `c13`,`subq_0`.`c2` AS `c14`,`subq_0`.`c2` AS `c15`,FALSE AS `c16`,CASE WHEN ((FALSE) OR ((((FALSE) XOR (((-73)<(-91)) OR (((-0) BETWEEN (-0) AND (-0)) AND ((NULL) OR ((0)>((SELECT COUNT(`c1131`) AS `count(c1131)` FROM `testdb`.`t270`))))))) AND ((-19)>(NULL))) OR (((77)<(73)) AND (NOT (((73) IN (SELECT 0 AS `c0` FROM `testdb`.`t271` AS `ref_14` WHERE (NULL) AND (NULL) EXCEPT SELECT NULL AS `c0` FROM `testdb`.`t270` AS `ref_15` WHERE (`ref_15`.`c1131`)!=(SELECT `ref_15`.`c1133` AS `c0` FROM `testdb`.`t270` AS `ref_16` WHERE _UTF8MB4'$@-X' LIKE _UTF8MB4'__%' ORDER BY `c0` DESC LIMIT 1))) IS TRUE))))) OR (NOT ((-24)<=(-43))) THEN `subq_0`.`c1` ELSE `subq_0`.`c2` END AS `c17`,`subq_0`.`c1` AS `c18`,`subq_0`.`c0` AS `c19`,`subq_0`.`c0` AS `c20`,`subq_0`.`c2` AS `c21`,`subq_0`.`c0` AS `c22`,`subq_0`.`c2` AS `c23`,`subq_0`.`c0` AS `c24`,`ref_0`.`c1141` AS `c25` FROM (SELECT DISTINCT TRUE AS `c0`,`ref_1`.`c1143` AS `c1`,`ref_1`.`c1146` AS `c2` FROM `testdb`.`t272` AS `ref_1` WHERE NOT (((`ref_0`.`c1136`)!=(SELECT `ref_2`.`c1146` AS `c0` FROM `testdb`.`t272` AS `ref_2` WHERE (62) BETWEEN ((SELECT COUNT(`c1147`) AS `count(c1147)` FROM `testdb`.`t272`)) AND (-0) ORDER BY `c0` LIMIT 1)) XOR ((-0) BETWEEN (0) AND (-0)))) AS `subq_0` WHERE (CHAR_LENGTH(CASE WHEN ((`subq_0`.`c0`) IS NOT NULL) OR ((`ref_0`.`c1138`)>(SELECT `ref_0`.`c1138` AS `c0` FROM `testdb`.`t272` AS `ref_3` WHERE FALSE ORDER BY `c0` DESC LIMIT 1)) THEN _UTF8MB4'' ELSE _UTF8MB4'tL' END)) BETWEEN (ABS(46%-11)) AND (CASE WHEN (((((`subq_0`.`c2`) IN (SELECT `ref_4`.`c1134` AS `c0` FROM `testdb`.`t270` AS `ref_4` WHERE (NULL LIKE _UTF8MB4'%Ny') OR (EXISTS (SELECT DISTINCT `ref_5`.`c1136` AS `c0`,`ref_5`.`c1140` AS `c1` FROM `testdb`.`t271` AS `ref_5` WHERE FALSE UNION ALL SELECT `ref_4`.`c1130` AS `c0`,`ref_4`.`c1131` AS `c1` FROM `testdb`.`t271` AS `ref_6` WHERE (-97) BETWEEN (73) AND (-10))) UNION ALL SELECT `ref_7`.`c1138` AS `c0` FROM `testdb`.`t271` AS `ref_7` WHERE FALSE)) IS TRUE) OR (NULL)) AND ((NULL)>=((SELECT COUNT(`c1140`) AS `count(c1140)` FROM `testdb`.`t271`)))) XOR (((`ref_0`.`vkey`) IN (SELECT `ref_8`.`c1145` AS `c0` FROM `testdb`.`t272` AS `ref_8` WHERE ((FALSE) AND (NULL)) OR ((`ref_8`.`c1144`) IS NULL))) IS TRUE) THEN 87 ELSE CASE WHEN ((`ref_0`.`c1138`) IS NULL) OR ((-22)!=(-0)) THEN 17 ELSE -67 END END)) ORDER BY `c0` DESC,`c1` DESC,`c2`,`c3`,`c4` DESC;") + tk.MustQuery("select\n (select\n subq_1.c0 as c0\n from\n t273_test as ref_84\n where exists (\n select\n (select\n ref_86.c1147 as c0\n from\n t272 as ref_86\n where (subq_1.c0) > (subq_1.c0)\n window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc)\n order by c0 limit 1\n ) as c3,\n (select\n subq_1.c0 as c0\n from\n t273_test as ref_89\n order by c0 limit 1) as c4\n from\n t271 as ref_85\n )\n order by c0 desc limit 1) as c1\nfrom\n (select 1 as c0) as subq_1;").Check(testkit.Rows("")) + tk.MustQuery("select\n (select\n subq_1.c0 as c0\n from\n t271 as ref_84\n where exists (\n select\n (select\n ref_86.c1147 as c0\n from\n t272 as ref_86\n where (subq_1.c0) > (subq_1.c0)\n window w0 as (partition by ref_86.c1147 order by ref_86.c1143 desc)\n order by c0 limit 1\n ) as c3,\n (select\n subq_1.c0 as c0\n from\n t271 as ref_89\n order by c0 limit 1) as c4\n from\n t271 as ref_85\n )\n order by c0 desc limit 1) as c1\nfrom\n (select 1 as c0) as subq_1;").Check(testkit.Rows("")) +} diff --git a/planner/core/plan.go b/planner/core/plan.go index b8f027cc6feb0..59c1d20e64bb5 100644 --- a/planner/core/plan.go +++ b/planner/core/plan.go @@ -255,8 +255,8 @@ type LogicalPlan interface { // Because it might change the root if the having clause exists, we need to return a plan that represents a new root. PredicatePushDown([]expression.Expression, *logicalOptimizeOp) ([]expression.Expression, LogicalPlan) - // PruneColumns prunes the unused columns. - PruneColumns([]*expression.Column, *logicalOptimizeOp) error + // PruneColumns prunes the unused columns, and return the new logical plan if changed, otherwise it's same. + PruneColumns([]*expression.Column, *logicalOptimizeOp) (LogicalPlan, error) // findBestTask converts the logical plan to the physical plan. It's a new interface. // It is called recursively from the parent to the children to create the result physical plan. @@ -759,11 +759,16 @@ func (*baseLogicalPlan) ExtractCorrelatedCols() []*expression.CorrelatedColumn { } // PruneColumns implements LogicalPlan interface. -func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { if len(p.children) == 0 { - return nil + return p.self, nil + } + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err } - return p.children[0].PruneColumns(parentUsedCols, opt) + return p.self, nil } // basePlan implements base Plan interface. diff --git a/planner/core/resolve_indices.go b/planner/core/resolve_indices.go index 54980c484963e..3892a401a9e55 100644 --- a/planner/core/resolve_indices.go +++ b/planner/core/resolve_indices.go @@ -141,26 +141,33 @@ func (p *PhysicalHashJoin) ResolveIndicesItself() (err error) { copy(shallowColSlice, p.schema.Columns) p.schema = expression.NewSchema(shallowColSlice...) foundCnt := 0 - // The two column sets are all ordered. And the colsNeedResolving is the subset of the mergedSchema. - // So we can just move forward j if there's no matching is found. - // We don't use the normal ResolvIndices here since there might be duplicate columns in the schema. - // e.g. The schema of child_0 is [col0, col0, col1] - // ResolveIndices will only resolve all col0 reference of the current plan to the first col0. - for i, j := 0, 0; i < colsNeedResolving && j < len(mergedSchema.Columns); { - if !p.schema.Columns[i].Equal(nil, mergedSchema.Columns[j]) { - j++ - continue + + // Here we want to resolve all join schema columns directly as a merged schema, and you know same name + // col in join schema should be separately redirected to corresponded same col in child schema. But two + // column sets are **NOT** always ordered, see comment: https://github.com/pingcap/tidb/pull/45831#discussion_r1481031471 + // we are using mapping mechanism instead of moving j forward. + marked := make([]bool, mergedSchema.Len()) + for i := 0; i < colsNeedResolving; i++ { + findIdx := -1 + for j := 0; j < len(mergedSchema.Columns); j++ { + if !p.schema.Columns[i].Equal(nil, mergedSchema.Columns[j]) || marked[j] { + continue + } + // resolve to a same unique id one, and it not being marked. + findIdx = j + break + } + if findIdx != -1 { + // valid one. + p.schema.Columns[i] = p.schema.Columns[i].Clone().(*expression.Column) + p.schema.Columns[i].Index = findIdx + marked[findIdx] = true + foundCnt++ } - p.schema.Columns[i] = p.schema.Columns[i].Clone().(*expression.Column) - p.schema.Columns[i].Index = j - i++ - j++ - foundCnt++ } if foundCnt < colsNeedResolving { return errors.Errorf("Some columns of %v cannot find the reference from its child(ren)", p.ExplainID().String()) } - return } diff --git a/planner/core/rule_column_pruning.go b/planner/core/rule_column_pruning.go index fb55caa5b8c7e..4c1b507a5042f 100644 --- a/planner/core/rule_column_pruning.go +++ b/planner/core/rule_column_pruning.go @@ -32,8 +32,11 @@ type columnPruner struct { } func (*columnPruner) optimize(_ context.Context, lp LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) { - err := lp.PruneColumns(lp.Schema().Columns, opt) - return lp, err + lp, err := lp.PruneColumns(lp.Schema().Columns, opt) + if err != nil { + return nil, err + } + return lp, nil } // ExprsHasSideEffects checks if any of the expressions has side effects. @@ -65,8 +68,7 @@ func exprHasSetVarOrSleep(expr expression.Expression) bool { // PruneColumns implements LogicalPlan interface. // If any expression has SetVar function or Sleep function, we do not prune it. -func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - child := p.children[0] +func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.schema) prunedColumns := make([]*expression.Column, 0) @@ -80,18 +82,28 @@ func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, op appendColumnPruneTraceStep(p, prunedColumns, opt) selfUsedCols := make([]*expression.Column, 0, len(p.Exprs)) selfUsedCols = expression.ExtractColumnsFromExpressions(selfUsedCols, p.Exprs, nil) - return child.PruneColumns(selfUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(selfUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := p.children[0] parentUsedCols = expression.ExtractColumnsFromExpressions(parentUsedCols, p.Conditions, nil) - return child.PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = child.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := la.children[0] used := expression.GetUsedList(parentUsedCols, la.Schema()) prunedColumns := make([]*expression.Column, 0) @@ -137,7 +149,7 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, newAgg, err = aggregation.NewAggFuncDesc(la.ctx, ast.AggFuncCount, []expression.Expression{expression.NewOne()}, false) } if err != nil { - return err + return nil, err } la.AggFuncs = append(la.AggFuncs, newAgg) col := &expression.Column{ @@ -164,10 +176,13 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, } } appendGroupByItemsPruneTraceStep(la, prunedGroupByItems, opt) - err := child.PruneColumns(selfUsedCols, opt) + var err error + la.children[0], err = child.PruneColumns(selfUsedCols, opt) if err != nil { - return err + return nil, err } + // update children[0] + child = la.children[0] // Do an extra Projection Elimination here. This is specially for empty Projection below Aggregation. // This kind of Projection would cause some bugs for MPP plan and is safe to be removed. // This kind of Projection should be removed in Projection Elimination, but currently PrunColumnsAgain is @@ -178,7 +193,7 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, la.SetChildren(childOfChild) } } - return nil + return la, nil } func pruneByItems(p LogicalPlan, old []*util.ByItems, opt *logicalOptimizeOp) (byItems []*util.ByItems, @@ -215,27 +230,36 @@ func pruneByItems(p LogicalPlan, old []*util.ByItems, opt *logicalOptimizeOp) (b // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - child := ls.children[0] +func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { var cols []*expression.Column ls.ByItems, cols = pruneByItems(ls, ls.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + var err error + ls.children[0], err = ls.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return ls, nil } // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { child := lt.children[0] var cols []*expression.Column lt.ByItems, cols = pruneByItems(lt, lt.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + var err error + lt.children[0], err = child.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return lt, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.schema) hasBeenUsed := false for i := range used { @@ -251,10 +275,12 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt used[i] = true } } - for _, child := range p.Children() { - err := child.PruneColumns(parentUsedCols, opt) + + var err error + for i, child := range p.Children() { + p.Children()[i], err = child.PruneColumns(parentUsedCols, opt) if err != nil { - return err + return nil, err } } @@ -286,11 +312,11 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt } } } - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { for i := 0; i < p.handleCols.NumCols(); i++ { parentUsedCols = append(parentUsedCols, p.handleCols.GetCol(i)) } @@ -301,13 +327,17 @@ func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt } condCols := expression.ExtractColumnsFromExpressions(nil, p.conditions, nil) parentUsedCols = append(parentUsedCols, condCols...) - return p.children[0].PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, ds.schema) - exprCols := expression.ExtractColumnsFromExpressions(nil, ds.allConds, nil) exprUsed := expression.GetUsedList(exprCols, ds.schema) prunedColumns := make([]*expression.Column, 0) @@ -353,11 +383,11 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log if ds.handleCols != nil && ds.handleCols.IsInt() && ds.schema.ColumnIndex(ds.handleCols.GetCol(0)) == -1 { ds.handleCols = nil } - return nil + return ds, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { switch p.TableInfo.Name.O { case infoschema.TableStatementsSummary, infoschema.TableStatementsSummaryHistory, @@ -371,7 +401,7 @@ func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt infoschema.TableDeadlocks, infoschema.ClusterTableDeadlocks: default: - return nil + return p, nil } prunedColumns := make([]*expression.Column, 0) used := expression.GetUsedList(parentUsedCols, p.schema) @@ -384,11 +414,11 @@ func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt } } appendColumnPruneTraceStep(p, prunedColumns, opt) - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { used := expression.GetUsedList(parentUsedCols, p.Schema()) prunedColumns := make([]*expression.Column, 0) for i := len(used) - 1; i >= 0; i-- { @@ -398,7 +428,7 @@ func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt } } appendColumnPruneTraceStep(p, prunedColumns, opt) - return nil + return p, nil } func (p *LogicalJoin) extractUsedCols(parentUsedCols []*expression.Column) (leftCols []*expression.Column, rightCols []*expression.Column) { @@ -434,18 +464,19 @@ func (p *LogicalJoin) mergeSchema() { } // PruneColumns implements LogicalPlan interface. -func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { leftCols, rightCols := p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(leftCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(leftCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(p.children[0]) - err = p.children[1].PruneColumns(rightCols, opt) + p.children[1], err = p.children[1].PruneColumns(rightCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(p.children[1]) @@ -455,16 +486,18 @@ func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *log parentUsedCols = append(parentUsedCols, joinCol) } p.inlineProjection(parentUsedCols, opt) - return nil + return p, nil } // PruneColumns implements LogicalPlan interface. -func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { leftCols, rightCols := la.extractUsedCols(parentUsedCols) - err := la.children[1].PruneColumns(rightCols, opt) + var err error + // column pruning for child-1. + la.children[1], err = la.children[1].PruneColumns(rightCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(la.children[1]) @@ -473,20 +506,29 @@ func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *l leftCols = append(leftCols, &col.Column) } - err = la.children[0].PruneColumns(leftCols, opt) + // column pruning for child-0. + la.children[0], err = la.children[0].PruneColumns(leftCols, opt) if err != nil { - return err + return nil, err } addConstOneForEmptyProjection(la.children[0]) - la.mergeSchema() - return nil + return la, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { + var err error if !IsSelectForUpdateLockType(p.Lock.LockType) { - return p.baseLogicalPlan.PruneColumns(parentUsedCols, opt) + // when use .baseLogicalPlan to call the PruneColumns, it means current plan itself has + // nothing to pruning or plan change, so they resort to its children's column pruning logic. + // so for the returned logical plan here, p is definitely determined, we just need to collect + // those extra deeper call error in handling children's column pruning. + _, err = p.baseLogicalPlan.PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } for tblID, cols := range p.tblID2Handle { @@ -500,11 +542,15 @@ func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *log parentUsedCols = append(parentUsedCols, physTblIDCol) } } - return p.children[0].PruneColumns(parentUsedCols, opt) + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil } // PruneColumns implements LogicalPlan interface. -func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { windowColumns := p.GetWindowResultColumns() cnt := 0 for _, col := range parentUsedCols { @@ -522,14 +568,15 @@ func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *l } parentUsedCols = parentUsedCols[:cnt] parentUsedCols = p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(parentUsedCols, opt) + var err error + p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt) if err != nil { - return err + return nil, err } p.SetSchema(p.children[0].Schema().Clone()) p.Schema().Append(windowColumns...) - return nil + return p, nil } func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []*expression.Column { @@ -548,19 +595,20 @@ func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []* } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) (LogicalPlan, error) { if len(parentUsedCols) == 0 { // happens when LIMIT appears in UPDATE. - return nil + return p, nil } savedUsedCols := make([]*expression.Column, len(parentUsedCols)) copy(savedUsedCols, parentUsedCols) - if err := p.children[0].PruneColumns(parentUsedCols, opt); err != nil { - return err + var err error + if p.children[0], err = p.children[0].PruneColumns(parentUsedCols, opt); err != nil { + return nil, err } p.schema = nil p.inlineProjection(savedUsedCols, opt) - return nil + return p, nil } func (*columnPruner) name() string { diff --git a/planner/core/rule_max_min_eliminate.go b/planner/core/rule_max_min_eliminate.go index d9bb1eae19f9b..fdbe585a18606 100644 --- a/planner/core/rule_max_min_eliminate.go +++ b/planner/core/rule_max_min_eliminate.go @@ -154,9 +154,15 @@ func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation, newAgg := LogicalAggregation{AggFuncs: []*aggregation.AggFuncDesc{f}}.Init(agg.ctx, agg.blockOffset) newAgg.SetChildren(a.cloneSubPlans(agg.children[0])) newAgg.schema = expression.NewSchema(agg.schema.Columns[i]) - if err := newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt); err != nil { + // Since LogicalAggregation doesn’t use the parent LogicalPlan, passing an incorrect parameter here won’t affect subsequent optimizations. + var ( + p LogicalPlan + err error + ) + if p, err = newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt); err != nil { return nil, false } + newAgg = p.(*LogicalAggregation) aggs = append(aggs, newAgg) } return aggs, true