-
Notifications
You must be signed in to change notification settings - Fork 5.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve cte scheduling #22205
Improve cte scheduling #22205
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,17 +24,18 @@ | |
import com.facebook.presto.spi.plan.CteReferenceNode; | ||
import com.facebook.presto.spi.plan.PlanNode; | ||
import com.facebook.presto.spi.plan.PlanNodeIdAllocator; | ||
import com.facebook.presto.spi.plan.SequenceNode; | ||
import com.facebook.presto.spi.plan.TableScanNode; | ||
import com.facebook.presto.sql.planner.SimplePlanVisitor; | ||
import com.facebook.presto.sql.planner.TypeProvider; | ||
import com.facebook.presto.sql.planner.plan.ApplyNode; | ||
import com.facebook.presto.sql.planner.plan.JoinNode; | ||
import com.facebook.presto.sql.planner.plan.RemoteSourceNode; | ||
import com.facebook.presto.sql.planner.plan.SemiJoinNode; | ||
import com.facebook.presto.sql.planner.plan.SequenceNode; | ||
import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; | ||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.google.common.graph.Graph; | ||
import com.google.common.graph.GraphBuilder; | ||
import com.google.common.graph.MutableGraph; | ||
import com.google.common.graph.Traverser; | ||
|
@@ -135,8 +136,11 @@ public PlanNode transformPersistentCtes(Session session, PlanNode root) | |
return transformedCte; | ||
} | ||
isPlanRewritten = true; | ||
SequenceNode sequenceNode = new SequenceNode(root.getSourceLocation(), planNodeIdAllocator.getNextId(), topologicalOrderedList, | ||
transformedCte.getSources().get(0)); | ||
SequenceNode sequenceNode = new SequenceNode(root.getSourceLocation(), | ||
planNodeIdAllocator.getNextId(), | ||
topologicalOrderedList, | ||
transformedCte.getSources().get(0), | ||
context.createIndexedGraphFromTopologicallySortedCteProducers(topologicalOrderedList)); | ||
return root.replaceChildren(Arrays.asList(sequenceNode)); | ||
} | ||
|
||
|
@@ -271,23 +275,25 @@ public PlanNode visitApply(ApplyNode node, RewriteContext<LogicalCteOptimizerCon | |
node.getCorrelation(), | ||
node.getOriginSubqueryError(), | ||
node.getMayParticipateInAntiJoin()); | ||
}} | ||
} | ||
} | ||
|
||
public static class LogicalCteOptimizerContext | ||
{ | ||
public Map<String, CteProducerNode> cteProducerMap; | ||
|
||
// a -> b indicates that b needs to be processed before a | ||
MutableGraph<String> graph; | ||
public Stack<String> activeCteStack; | ||
// a -> b indicates that a needs to be processed before b | ||
private MutableGraph<String> cteDependencyGraph; | ||
|
||
private Stack<String> activeCteStack; | ||
|
||
public Set<String> complexCtes; | ||
private Set<String> complexCtes; | ||
|
||
public LogicalCteOptimizerContext() | ||
{ | ||
cteProducerMap = new HashMap<>(); | ||
// The cte graph will never have cycles because sql won't allow it | ||
graph = GraphBuilder.directed().build(); | ||
cteDependencyGraph = GraphBuilder.directed().build(); | ||
activeCteStack = new Stack<>(); | ||
complexCtes = new HashSet<>(); | ||
} | ||
|
@@ -319,9 +325,10 @@ public Optional<String> peekActiveCte() | |
|
||
public void addDependency(String currentCte) | ||
{ | ||
graph.addNode(currentCte); | ||
cteDependencyGraph.addNode(currentCte); | ||
Optional<String> parentCte = peekActiveCte(); | ||
parentCte.ifPresent(s -> graph.putEdge(currentCte, s)); | ||
// (current -> parentCte) this indicates that currentCte must be processed first | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed wrong comments about the dependency graph and added explanations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could reverse the graph to be according to the comments and then add a reverse in topological ordering, but then i would have to update some tests (since topological ordering is not unique). Its easier to update the comments |
||
parentCte.ifPresent(s -> cteDependencyGraph.putEdge(currentCte, s)); | ||
} | ||
|
||
public void addComplexCte(String cteId) | ||
|
@@ -342,9 +349,29 @@ public boolean isComplexCte(String cteId) | |
public List<PlanNode> getTopologicalOrdering() | ||
{ | ||
ImmutableList.Builder<PlanNode> topSortedCteProducerListBuilder = ImmutableList.builder(); | ||
Traverser.forGraph(graph).depthFirstPostOrder(graph.nodes()) | ||
Traverser.forGraph(cteDependencyGraph).depthFirstPostOrder(cteDependencyGraph.nodes()) | ||
.forEach(cteName -> topSortedCteProducerListBuilder.add(cteProducerMap.get(cteName))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we are not reversing the list and hence the topological ordering is correct, if we reverse the graph, we should reverse the list here and in SequenceNode |
||
return topSortedCteProducerListBuilder.build(); | ||
} | ||
|
||
public Graph<Integer> createIndexedGraphFromTopologicallySortedCteProducers(List<PlanNode> topologicalSortedCteProducerList) | ||
{ | ||
Map<String, Integer> cteIdToProducerIndexMap = new HashMap<>(); | ||
MutableGraph<Integer> indexGraph = GraphBuilder | ||
.directed() | ||
.expectedNodeCount(topologicalSortedCteProducerList.size()) | ||
.build(); | ||
for (int i = 0; i < topologicalSortedCteProducerList.size(); i++) { | ||
cteIdToProducerIndexMap.put(((CteProducerNode) topologicalSortedCteProducerList.get(i)).getCteId(), i); | ||
indexGraph.addNode(i); | ||
} | ||
|
||
// Populate the new graph with edges based on the index mapping | ||
for (String cteId : cteDependencyGraph.nodes()) { | ||
cteDependencyGraph.successors(cteId).forEach(successor -> | ||
indexGraph.putEdge(cteIdToProducerIndexMap.get(cteId), cteIdToProducerIndexMap.get(successor))); | ||
} | ||
return indexGraph; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
general question again, but still trying to understand the basics. can you draw out what a fragmented plan with dependent ctes would look like before and after this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, lets take an example case of Graph(a, b) or sql
WITH t as (SELECT * FROM orders), t1 as (Select * FROM customer) SELECT * FROM t JOIN t1 ON true
Lets say that the Plan would be the below for simplicity.
Lets say the primary source creates subplan (subPrimary1->subPrimary2)
and creates a list of subplans for the cteProducers as (subCteProducerT1 and subCteProducerT2)
The code is a bit tricker here but in the visitSequence we add the the cteproducers to the context created by the parent exchange. (here is actually where the parent exchange creates new properties or context)
so before the subplans look like
code is where we were adding the subCteProducer2 as a child of subCteProducer1
Now we would add the subCteProducer2 to the same level as the subCteProducer1
We can look at the subCteProducer1 and subCteProducer2 as independent subgraphs for the general case of independent subgraphs
In both cases similar structure would be represented in the sectionedPlan