From 31c0af96c4e643af97dd28f600f0e216be794eda Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 14:21:21 +0000 Subject: [PATCH 1/7] loop analysis simplification --- cranelift/codegen/src/loop_analysis.rs | 60 ++++++++++++++------------ 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index c4677400b51e..bab484b2180b 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -198,21 +198,22 @@ impl LoopAnalysis { domtree: &DominatorTree, layout: &Layout, ) { - // We traverse the CFG in reverse postorder - for &block in domtree.cfg_postorder().iter().rev() { - for BlockPredecessor { - inst: pred_inst, .. - } in cfg.pred_iter(block) - { - // If the block dominates one of its predecessors it is a back edge - if domtree.dominates(block, pred_inst, layout) { - // This block is a loop header, so we create its associated loop - let lp = self.loops.push(LoopData::new(block, None)); - self.block_loop_map[block] = lp.into(); - break; - // We break because we only need one back edge to identify a loop header. - } - } + for &block in domtree + .cfg_postorder() + .into_iter() + .rev() // We traverse the CFG in reverse postorder + .filter(|&&block| { + // If the block dominates any one of its predecessors it is a back edge + cfg.pred_iter(block).any( + |BlockPredecessor { + inst: pred_inst, .. + }| domtree.dominates(block, pred_inst, layout), + ) + }) + { + // This block is a loop header, so we create its associated loop + let lp = self.loops.push(LoopData::new(block, None)); + self.block_loop_map[block] = lp.into(); } } @@ -229,16 +230,18 @@ impl LoopAnalysis { // We handle each loop header in reverse order, corresponding to a pseudo postorder // traversal of the graph. for lp in self.loops().rev() { - for BlockPredecessor { - block: pred, - inst: pred_inst, - } in cfg.pred_iter(self.loops[lp].header) - { - // We follow the back edges - if domtree.dominates(self.loops[lp].header, pred_inst, layout) { - stack.push(pred); - } - } + stack.extend( + cfg.pred_iter(self.loops[lp].header) + .filter( + |BlockPredecessor { + inst: pred_inst, .. + }| { + // We follow the back edges + domtree.dominates(self.loops[lp].header, *pred_inst, layout) + }, + ) + .map(|BlockPredecessor { block, .. }| block), + ); while let Some(node) = stack.pop() { let continue_dfs: Option; match self.block_loop_map[node].expand() { @@ -283,9 +286,10 @@ impl LoopAnalysis { // Now we have handled the popped node and need to continue the DFS by adding the // predecessors of that node if let Some(continue_dfs) = continue_dfs { - for BlockPredecessor { block: pred, .. } in cfg.pred_iter(continue_dfs) { - stack.push(pred) - } + stack.extend( + cfg.pred_iter(continue_dfs) + .map(|BlockPredecessor { block: pred, .. }| pred), + ); } } } From 23c489ae475e91595dbaa7280b1188837015a4f3 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 18:56:26 +0000 Subject: [PATCH 2/7] add iterator over reverse post order --- cranelift/codegen/src/dominator_tree.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cranelift/codegen/src/dominator_tree.rs b/cranelift/codegen/src/dominator_tree.rs index d14f06a742cc..03a0368028c6 100644 --- a/cranelift/codegen/src/dominator_tree.rs +++ b/cranelift/codegen/src/dominator_tree.rs @@ -60,6 +60,15 @@ impl DominatorTree { &self.postorder } + /// Get an iterator over CFG reverse post-order of blocks used to compute the dominator tree. + /// + /// Note that the post-order is not updated automatically when the CFG is modified. It is + /// computed from scratch and cached by `compute()`. + pub fn cfg_rpo(&self) -> impl Iterator { + debug_assert!(self.is_valid()); + self.postorder.iter().rev() + } + /// Returns the immediate dominator of `block`. /// /// `block_a` is said to *dominate* `block_b` if all control flow paths from the function From 21a55a0542bd440afb1210d122d36a1f53152b49 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 19:01:00 +0000 Subject: [PATCH 3/7] review comments --- cranelift/codegen/src/loop_analysis.rs | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index bab484b2180b..91aafc38a214 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -190,6 +190,19 @@ impl LoopAnalysis { self.valid = false; } + // Determines if a block dominates any predecessor + // and thus is a loop header + fn check_loop_header( + block: Block, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + layout: &Layout, + ) -> bool { + // loop header if it dominates any of its predecessors + cfg.pred_iter(block) + .any(|pred| domtree.dominates(block, pred.inst, layout)) + } + // Traverses the CFG in reverse postorder and create a loop object for every block having a // back edge. fn find_loop_headers( @@ -199,17 +212,8 @@ impl LoopAnalysis { layout: &Layout, ) { for &block in domtree - .cfg_postorder() - .into_iter() - .rev() // We traverse the CFG in reverse postorder - .filter(|&&block| { - // If the block dominates any one of its predecessors it is a back edge - cfg.pred_iter(block).any( - |BlockPredecessor { - inst: pred_inst, .. - }| domtree.dominates(block, pred_inst, layout), - ) - }) + .cfg_rpo() + .filter(|&&block| Self::check_loop_header(block, cfg, domtree, layout)) { // This block is a loop header, so we create its associated loop let lp = self.loops.push(LoopData::new(block, None)); @@ -232,15 +236,11 @@ impl LoopAnalysis { for lp in self.loops().rev() { stack.extend( cfg.pred_iter(self.loops[lp].header) - .filter( - |BlockPredecessor { - inst: pred_inst, .. - }| { - // We follow the back edges - domtree.dominates(self.loops[lp].header, *pred_inst, layout) - }, - ) - .map(|BlockPredecessor { block, .. }| block), + .filter(|pred| { + // We follow the back edges + domtree.dominates(self.loops[lp].header, pred.inst, layout) + }) + .map(|pred| pred.block), ); while let Some(node) = stack.pop() { let continue_dfs: Option; From c57fb29776f165bd6e3c6088689add4f0ffe88ae Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 19:50:52 +0000 Subject: [PATCH 4/7] review comments --- cranelift/codegen/src/loop_analysis.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index 91aafc38a214..44fbe0503ad3 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -191,14 +191,14 @@ impl LoopAnalysis { } // Determines if a block dominates any predecessor - // and thus is a loop header - fn check_loop_header( + // and thus is a loop header. + fn is_block_loop_header( block: Block, cfg: &ControlFlowGraph, domtree: &DominatorTree, layout: &Layout, ) -> bool { - // loop header if it dominates any of its predecessors + // A block is a loop header if it dominates any of its predecessors. cfg.pred_iter(block) .any(|pred| domtree.dominates(block, pred.inst, layout)) } @@ -213,7 +213,7 @@ impl LoopAnalysis { ) { for &block in domtree .cfg_rpo() - .filter(|&&block| Self::check_loop_header(block, cfg, domtree, layout)) + .filter(|&&block| Self::is_block_loop_header(block, cfg, domtree, layout)) { // This block is a loop header, so we create its associated loop let lp = self.loops.push(LoopData::new(block, None)); @@ -234,6 +234,7 @@ impl LoopAnalysis { // We handle each loop header in reverse order, corresponding to a pseudo postorder // traversal of the graph. for lp in self.loops().rev() { + // Push all predecessors of this header that it dominates onto the stack. stack.extend( cfg.pred_iter(self.loops[lp].header) .filter(|pred| { From eb5a7fefb945e27b69b3e37deb9d38f969c9bcc8 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 20:08:32 +0000 Subject: [PATCH 5/7] use rpo iterator where possible --- cranelift/codegen/src/machinst/blockorder.rs | 2 +- cranelift/codegen/src/remove_constant_phis.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index b31a5a1bade4..b23fcf4859b1 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -192,7 +192,7 @@ impl BlockLoweringOrder { let mut lowered_order = Vec::new(); - for &block in domtree.cfg_postorder().iter().rev() { + for &block in domtree.cfg_rpo() { lowered_order.push(LoweredBlock::Orig { block }); if block_out_count[block] > 1 { diff --git a/cranelift/codegen/src/remove_constant_phis.rs b/cranelift/codegen/src/remove_constant_phis.rs index 4b73b2c24f29..bb2159c3bbeb 100644 --- a/cranelift/codegen/src/remove_constant_phis.rs +++ b/cranelift/codegen/src/remove_constant_phis.rs @@ -233,7 +233,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) let mut summaries = SecondaryMap::::with_capacity(domtree.cfg_postorder().len()); - for b in domtree.cfg_postorder().iter().rev().copied() { + for b in domtree.cfg_rpo().copied() { let formals = func.dfg.block_params(b); let mut summary = BlockSummary::new(&bump, formals); @@ -269,7 +269,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // Set up initial solver state let mut state = SolverState::new(); - for b in domtree.cfg_postorder().iter().rev().copied() { + for b in domtree.cfg_rpo().copied() { // For each block, get the formals if b == entry_block { continue; @@ -288,7 +288,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) iter_no += 1; let mut changed = false; - for src in domtree.cfg_postorder().iter().rev().copied() { + for src in domtree.cfg_rpo().copied() { let src_summary = &summaries[src]; for edge in &src_summary.dests { assert!(edge.block != entry_block); From 0f9e83c08a6e74694e0c8108609592c87544b698 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 20:39:46 +0000 Subject: [PATCH 6/7] make use of same struct access pattern throughout --- cranelift/codegen/src/loop_analysis.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index 44fbe0503ad3..9548017b6f6a 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -5,7 +5,7 @@ use crate::dominator_tree::DominatorTree; use crate::entity::entity_impl; use crate::entity::SecondaryMap; use crate::entity::{Keys, PrimaryMap}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::flowgraph::ControlFlowGraph; use crate::ir::{Block, Function, Layout}; use crate::packed_option::PackedOption; use crate::timing; @@ -287,10 +287,7 @@ impl LoopAnalysis { // Now we have handled the popped node and need to continue the DFS by adding the // predecessors of that node if let Some(continue_dfs) = continue_dfs { - stack.extend( - cfg.pred_iter(continue_dfs) - .map(|BlockPredecessor { block: pred, .. }| pred), - ); + stack.extend(cfg.pred_iter(continue_dfs).map(|pred| pred.block)); } } } From 193e7fa3bf6f706b8585d346a014b4713db2de21 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 15 Nov 2024 21:21:25 +0000 Subject: [PATCH 7/7] use new directly instead of calling macro --- cranelift/codegen/src/loop_analysis.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index 9548017b6f6a..71f845652233 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -10,7 +10,7 @@ use crate::ir::{Block, Function, Layout}; use crate::packed_option::PackedOption; use crate::timing; use alloc::vec::Vec; -use smallvec::{smallvec, SmallVec}; +use smallvec::SmallVec; /// A opaque reference to a code loop. #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -294,7 +294,7 @@ impl LoopAnalysis { } fn assign_loop_levels(&mut self) { - let mut stack: SmallVec<[Loop; 8]> = smallvec![]; + let mut stack: SmallVec<[Loop; 8]> = SmallVec::new(); for lp in self.loops.keys() { if self.loops[lp].level == LoopLevel::invalid() { stack.push(lp);