From a532973e53bca88ccbd3365ca722d92889b39506 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Mon, 13 Dec 2021 17:40:35 +0800 Subject: [PATCH] planner: add trace for join reorder (#30394) --- planner/core/logical_plan_trace_test.go | 26 +++- planner/core/rule_join_reorder.go | 154 +++++++++++++++++++++- planner/core/rule_join_reorder_dp.go | 11 +- planner/core/rule_join_reorder_dp_test.go | 4 +- planner/core/rule_join_reorder_greedy.go | 11 +- 5 files changed, 187 insertions(+), 19 deletions(-) diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index cd8445c611468..f0c6d5718eaae 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -86,6 +86,28 @@ func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) { assertRuleName string assertRuleSteps []assertTraceStep }{ + { + sql: "select * from (t t1, t t2, t t3,t t4) union all select * from (t t5, t t6, t t7,t t8)", + flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagDecorrelate, flagPredicatePushDown, flagEliminateOuterJoin, flagJoinReOrder}, + assertRuleName: "join_reorder", + assertRuleSteps: []assertTraceStep{ + { + assertAction: "join order becomes [((t1*t2)*(t3*t4)),((t5*t6)*(t7*t8))] from original [(((t1*t2)*t3)*t4),(((t5*t6)*t7)*t8)]", + assertReason: "join cost during reorder: [[t1, cost:10000],[t2, cost:10000],[t3, cost:10000],[t4, cost:10000],[t5, cost:10000],[t6, cost:10000],[t7, cost:10000],[t8, cost:10000]]", + }, + }, + }, + { + sql: "select * from t t1, t t2, t t3 where t1.a=t2.a and t3.a=t2.a and t1.a=t3.a", + flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagDecorrelate, flagPredicatePushDown, flagEliminateOuterJoin, flagJoinReOrder}, + assertRuleName: "join_reorder", + assertRuleSteps: []assertTraceStep{ + { + assertAction: "join order becomes ((t1*t2)*t3) from original ((t1*t2)*t3)", + assertReason: "join cost during reorder: [[((t1*t2)*t3), cost:58125],[(t1*t2), cost:32500],[(t1*t3), cost:32500],[t1, cost:10000],[t2, cost:10000],[t3, cost:10000]]", + }, + }, + }, { sql: "select min(distinct a) from t group by a", flags: []uint64{flagBuildKeyInfo, flagEliminateAgg}, @@ -215,10 +237,8 @@ func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) { for _, f := range tc.flags { flag = flag | f } - p, err = logicalOptimize(ctx, flag, p.(LogicalPlan)) + _, err = logicalOptimize(ctx, flag, p.(LogicalPlan)) c.Assert(err, IsNil) - _, ok := p.(*LogicalProjection) - c.Assert(ok, IsTrue) otrace := sctx.GetSessionVars().StmtCtx.LogicalOptimizeTrace c.Assert(otrace, NotNil) assert := false diff --git a/planner/core/rule_join_reorder.go b/planner/core/rule_join_reorder.go index 9fb38e572d228..dd29f7d3f1f30 100644 --- a/planner/core/rule_join_reorder.go +++ b/planner/core/rule_join_reorder.go @@ -15,10 +15,15 @@ package core import ( + "bytes" "context" + "fmt" + "sort" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/util/plancodec" + "github.com/pingcap/tidb/util/tracing" ) // extractJoinGroup extracts all the join nodes connected with continuous @@ -56,16 +61,21 @@ type jrNode struct { } func (s *joinReOrderSolver) optimize(ctx context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) { - return s.optimizeRecursive(p.SCtx(), p) + tracer := &joinReorderTrace{cost: map[string]float64{}, opt: opt} + tracer.traceJoinReorder(p) + p, err := s.optimizeRecursive(p.SCtx(), p, tracer) + tracer.traceJoinReorder(p) + appendJoinReorderTraceStep(tracer, p, opt) + return p, err } // optimizeRecursive recursively collects join groups and applies join reorder algorithm for each group. -func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalPlan) (LogicalPlan, error) { +func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalPlan, tracer *joinReorderTrace) (LogicalPlan, error) { var err error curJoinGroup, eqEdges, otherConds := extractJoinGroup(p) if len(curJoinGroup) > 1 { for i := range curJoinGroup { - curJoinGroup[i], err = s.optimizeRecursive(ctx, curJoinGroup[i]) + curJoinGroup[i], err = s.optimizeRecursive(ctx, curJoinGroup[i], tracer) if err != nil { return nil, err } @@ -80,13 +90,13 @@ func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalP baseSingleGroupJoinOrderSolver: baseGroupSolver, eqEdges: eqEdges, } - p, err = groupSolver.solve(curJoinGroup) + p, err = groupSolver.solve(curJoinGroup, tracer) } else { dpSolver := &joinReorderDPSolver{ baseSingleGroupJoinOrderSolver: baseGroupSolver, } dpSolver.newJoin = dpSolver.newJoinWithEdges - p, err = dpSolver.solve(curJoinGroup, expression.ScalarFuncs2Exprs(eqEdges)) + p, err = dpSolver.solve(curJoinGroup, expression.ScalarFuncs2Exprs(eqEdges), tracer) } if err != nil { return nil, err @@ -114,7 +124,7 @@ func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalP } newChildren := make([]LogicalPlan, 0, len(p.Children())) for _, child := range p.Children() { - newChild, err := s.optimizeRecursive(ctx, child) + newChild, err := s.optimizeRecursive(ctx, child, tracer) if err != nil { return nil, err } @@ -194,3 +204,135 @@ func (s *baseSingleGroupJoinOrderSolver) calcJoinCumCost(join LogicalPlan, lNode func (*joinReOrderSolver) name() string { return "join_reorder" } + +func appendJoinReorderTraceStep(tracer *joinReorderTrace, plan LogicalPlan, opt *logicalOptimizeOp) { + if len(tracer.initial) < 1 || len(tracer.final) < 1 { + return + } + action := fmt.Sprintf("join order becomes %v from original %v", tracer.final, tracer.initial) + reason := func() string { + buffer := bytes.NewBufferString("join cost during reorder: [") + var joins []string + for join := range tracer.cost { + joins = append(joins, join) + } + sort.Strings(joins) + for i, join := range joins { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(fmt.Sprintf("[%s, cost:%v]", join, tracer.cost[join])) + } + buffer.WriteString("]") + return buffer.String() + }() + opt.appendStepToCurrent(plan.ID(), plan.TP(), reason, action) +} + +func allJoinOrderToString(tt []*tracing.LogicalPlanTrace) string { + if len(tt) == 1 { + return joinOrderToString(tt[0]) + } + buffer := bytes.NewBufferString("[") + for i, t := range tt { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(joinOrderToString(t)) + } + buffer.WriteString("]") + return buffer.String() +} + +// joinOrderToString let Join(DataSource, DataSource) become '(t1*t2)' +func joinOrderToString(t *tracing.LogicalPlanTrace) string { + if t.TP == plancodec.TypeJoin { + buffer := bytes.NewBufferString("(") + for i, child := range t.Children { + if i > 0 { + buffer.WriteString("*") + } + buffer.WriteString(joinOrderToString(child)) + } + buffer.WriteString(")") + return buffer.String() + } else if t.TP == plancodec.TypeDataSource { + return t.ExplainInfo[6:] + } + return "" +} + +// extractJoinAndDataSource will only keep join and dataSource operator and remove other operators. +// For example: Proj->Join->(Proj->DataSource, DataSource) will become Join->(DataSource, DataSource) +func extractJoinAndDataSource(t *tracing.LogicalPlanTrace) []*tracing.LogicalPlanTrace { + roots := findRoots(t) + if len(roots) < 1 { + return nil + } + var rr []*tracing.LogicalPlanTrace + for _, root := range roots { + simplify(root) + rr = append(rr, root) + } + return rr +} + +// simplify only keeps Join and DataSource operators, and discard other operators. +func simplify(node *tracing.LogicalPlanTrace) { + if len(node.Children) < 1 { + return + } + for valid := false; !valid; { + valid = true + newChildren := make([]*tracing.LogicalPlanTrace, 0) + for _, child := range node.Children { + if child.TP != plancodec.TypeDataSource && child.TP != plancodec.TypeJoin { + newChildren = append(newChildren, child.Children...) + valid = false + } else { + newChildren = append(newChildren, child) + } + } + node.Children = newChildren + } + for _, child := range node.Children { + simplify(child) + } +} + +func findRoots(t *tracing.LogicalPlanTrace) []*tracing.LogicalPlanTrace { + if t.TP == plancodec.TypeJoin || t.TP == plancodec.TypeDataSource { + return []*tracing.LogicalPlanTrace{t} + } + var r []*tracing.LogicalPlanTrace + for _, child := range t.Children { + r = append(r, findRoots(child)...) + } + return r +} + +type joinReorderTrace struct { + opt *logicalOptimizeOp + initial string + final string + cost map[string]float64 +} + +func (t *joinReorderTrace) traceJoinReorder(p LogicalPlan) { + if t == nil || t.opt == nil || t.opt.tracer == nil { + return + } + if len(t.initial) > 0 { + t.final = allJoinOrderToString(extractJoinAndDataSource(p.buildLogicalPlanTrace(p))) + return + } + t.initial = allJoinOrderToString(extractJoinAndDataSource(p.buildLogicalPlanTrace(p))) +} + +func (t *joinReorderTrace) appendLogicalJoinCost(join LogicalPlan, cost float64) { + if t == nil || t.opt == nil || t.opt.tracer == nil { + return + } + joinMapKey := allJoinOrderToString(extractJoinAndDataSource(join.buildLogicalPlanTrace(join))) + t.cost[joinMapKey] = cost +} diff --git a/planner/core/rule_join_reorder_dp.go b/planner/core/rule_join_reorder_dp.go index 560aa5848b54d..d5db965667258 100644 --- a/planner/core/rule_join_reorder_dp.go +++ b/planner/core/rule_join_reorder_dp.go @@ -37,16 +37,18 @@ type joinGroupNonEqEdge struct { expr expression.Expression } -func (s *joinReorderDPSolver) solve(joinGroup []LogicalPlan, eqConds []expression.Expression) (LogicalPlan, error) { +func (s *joinReorderDPSolver) solve(joinGroup []LogicalPlan, eqConds []expression.Expression, tracer *joinReorderTrace) (LogicalPlan, error) { for _, node := range joinGroup { _, err := node.recursiveDeriveStats(nil) if err != nil { return nil, err } + cost := s.baseNodeCumCost(node) s.curJoinGroup = append(s.curJoinGroup, &jrNode{ p: node, - cumCost: s.baseNodeCumCost(node), + cumCost: cost, }) + tracer.appendLogicalJoinCost(node, cost) } adjacents := make([][]int, len(s.curJoinGroup)) totalEqEdges := make([]joinGroupEqEdge, 0, len(eqConds)) @@ -120,7 +122,7 @@ func (s *joinReorderDPSolver) solve(joinGroup []LogicalPlan, eqConds []expressio totalNonEqEdges = append(totalNonEqEdges[:i], totalNonEqEdges[i+1:]...) } // Do DP on each sub graph. - join, err := s.dpGraph(visitID2NodeID, nodeID2VisitID, joinGroup, totalEqEdges, subNonEqEdges) + join, err := s.dpGraph(visitID2NodeID, nodeID2VisitID, joinGroup, totalEqEdges, subNonEqEdges, tracer) if err != nil { return nil, err } @@ -159,7 +161,7 @@ func (s *joinReorderDPSolver) bfsGraph(startNode int, visited []bool, adjacents // It implements the traditional join reorder algorithm: DP by subset using the following formula: // bestPlan[S:set of node] = the best one among Join(bestPlan[S1:subset of S], bestPlan[S2: S/S1]) func (s *joinReorderDPSolver) dpGraph(visitID2NodeID, nodeID2VisitID []int, joinGroup []LogicalPlan, - totalEqEdges []joinGroupEqEdge, totalNonEqEdges []joinGroupNonEqEdge) (LogicalPlan, error) { + totalEqEdges []joinGroupEqEdge, totalNonEqEdges []joinGroupNonEqEdge, tracer *joinReorderTrace) (LogicalPlan, error) { nodeCnt := uint(len(visitID2NodeID)) bestPlan := make([]*jrNode, 1< 0 { - newNode, err := s.constructConnectedJoinTree() + newNode, err := s.constructConnectedJoinTree(tracer) if err != nil { return nil, err } @@ -68,7 +70,7 @@ func (s *joinReorderGreedySolver) solve(joinNodePlans []LogicalPlan) (LogicalPla return s.makeBushyJoin(cartesianGroup), nil } -func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) { +func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorderTrace) (*jrNode, error) { curJoinTree := s.curJoinGroup[0] s.curJoinGroup = s.curJoinGroup[1:] for { @@ -86,6 +88,7 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) return nil, err } curCost := s.calcJoinCumCost(newJoin, curJoinTree, node) + tracer.appendLogicalJoinCost(newJoin, curCost) if bestCost > curCost { bestCost = curCost bestJoin = newJoin