From 3726d202147afa2ae173d251046c36fc73fd4cf0 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Wed, 25 Oct 2023 17:13:32 -0400 Subject: [PATCH 1/2] Reuse VF2 scoring views for all scoring As part of the VF2Layout and VF2PostLayout passes when there are a large number of matches found we're spending an inordinate amount of time in scoring rebuilding the same views over and over again of the interaction graph for each scoring call. For example, in one test cProfile showed that with #11112 when running transpile() on a 65 Bernstein Vazirani circuit with a secret of all 1s for FakeSherbrooke with optimization_level=3 we were calling vf2_utils.score_layout() 161,761 times which took a culmulative time of 14.33 secs. Of that time though we spent 5.865 secs building the edge list view. These views are fixed for a given interaction graph which doesn't change during the duration of the run() method on these passes. To remove this inefficiency this commit moves the construction of the views to the beginning of the passes and just reuses them by reference for each scoring call, avoiding the reconstruction overhead. --- qiskit/transpiler/passes/layout/vf2_layout.py | 4 ++ .../passes/layout/vf2_post_layout.py | 11 +++++- qiskit/transpiler/passes/layout/vf2_utils.py | 39 +++++++++++++------ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/qiskit/transpiler/passes/layout/vf2_layout.py b/qiskit/transpiler/passes/layout/vf2_layout.py index d6de8f51d31f..5fd6e08ab2fb 100644 --- a/qiskit/transpiler/passes/layout/vf2_layout.py +++ b/qiskit/transpiler/passes/layout/vf2_layout.py @@ -138,6 +138,8 @@ def run(self, dag): self.property_set["VF2Layout_stop_reason"] = VF2LayoutStopReason.MORE_THAN_2Q return im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes = result + scoring_edge_list = vf2_utils.build_edge_list(im_graph) + scoring_bit_list = vf2_utils.build_bit_list(im_graph, im_graph_node_map) cm_graph, cm_nodes = vf2_utils.shuffle_coupling_graph( self.coupling_map, self.seed, self.strict_direction ) @@ -199,6 +201,8 @@ def mapping_to_layout(layout_mapping): reverse_im_graph_node_map, im_graph, self.strict_direction, + edge_list=scoring_edge_list, + bit_list=scoring_bit_list, ) # If the layout score is 0 we can't do any better and we'll just # waste time finding additional mappings that will at best match diff --git a/qiskit/transpiler/passes/layout/vf2_post_layout.py b/qiskit/transpiler/passes/layout/vf2_post_layout.py index cee0e1cf04a8..28d9c397fa34 100644 --- a/qiskit/transpiler/passes/layout/vf2_post_layout.py +++ b/qiskit/transpiler/passes/layout/vf2_post_layout.py @@ -167,6 +167,8 @@ def run(self, dag): self.property_set["VF2PostLayout_stop_reason"] = VF2PostLayoutStopReason.MORE_THAN_2Q return im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes = result + scoring_bit_list = vf2_utils.build_bit_list(im_graph, im_graph_node_map) + scoring_edge_list = vf2_utils.build_edge_list(im_graph) if self.target is not None: # If qargs is None then target is global and ideal so no @@ -256,7 +258,10 @@ def run(self, dag): if self.strict_direction: initial_layout = Layout({bit: index for index, bit in enumerate(dag.qubits)}) chosen_layout_score = self._score_layout( - initial_layout, im_graph_node_map, reverse_im_graph_node_map, im_graph + initial_layout, + im_graph_node_map, + reverse_im_graph_node_map, + im_graph, ) else: initial_layout = { @@ -271,6 +276,8 @@ def run(self, dag): reverse_im_graph_node_map, im_graph, self.strict_direction, + edge_list=scoring_edge_list, + bit_list=scoring_bit_list, ) # Circuit not in basis so we have nothing to compare against return here except KeyError: @@ -303,6 +310,8 @@ def run(self, dag): reverse_im_graph_node_map, im_graph, self.strict_direction, + edge_list=scoring_edge_list, + bit_list=scoring_bit_list, ) logger.debug("Trial %s has score %s", trials, layout_score) if layout_score < chosen_layout_score: diff --git a/qiskit/transpiler/passes/layout/vf2_utils.py b/qiskit/transpiler/passes/layout/vf2_utils.py index b6fc73f18802..a0c2fbed417c 100644 --- a/qiskit/transpiler/passes/layout/vf2_utils.py +++ b/qiskit/transpiler/passes/layout/vf2_utils.py @@ -95,6 +95,27 @@ def _visit(dag, weight, wire_map): return im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes +def build_edge_list(im_graph): + """Generate an edge list for scoring.""" + return { + (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values() + } + + +def build_bit_list(im_graph, bit_map): + """Generate a bit list for scoring.""" + bit_list = np.zeros(len(im_graph), dtype=np.int32) + for node_index in bit_map.values(): + try: + bit_list[node_index] = sum(im_graph[node_index].values()) + # If node_index not in im_graph that means there was a standalone + # node we will score/sort separately outside the vf2 mapping, so we + # can skip the hole + except IndexError: + pass + return bit_list + + def score_layout( avg_error_map, layout_mapping, @@ -103,6 +124,8 @@ def score_layout( im_graph, strict_direction=False, run_in_parallel=False, + edge_list=None, + bit_list=None, ): """Score a layout given an average error map.""" if layout_mapping: @@ -110,18 +133,10 @@ def score_layout( else: size = 0 nlayout = NLayout(layout_mapping, size + 1, size + 1) - bit_list = np.zeros(len(im_graph), dtype=np.int32) - for node_index in bit_map.values(): - try: - bit_list[node_index] = sum(im_graph[node_index].values()) - # If node_index not in im_graph that means there was a standalone - # node we will score/sort separately outside the vf2 mapping, so we - # can skip the hole - except IndexError: - pass - edge_list = { - (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values() - } + if bit_list is None: + bit_list = build_bit_list(im_graph, bit_map) + if edge_list is None: + edge_list = build_edge_list(im_graph) return vf2_layout.score_layout( bit_list, edge_list, avg_error_map, nlayout, strict_direction, run_in_parallel ) From 3264e06a7eaf2ab862fa07dea34c0728184be4cc Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Thu, 26 Oct 2023 06:40:24 -0400 Subject: [PATCH 2/2] Add EdgeList Rust pyclass to avoid repeated conversion This commit adds a new pyclass written in rust that wraps a rust Vec. Previously the scoring function also used an dict->IndexMap conversion, but the mapping structure wasn't necessary and added additional overhead, so it was converted to a list/Vec to speed up the execution even further. By using this new pyclass as the input to the rust scoring function we avoid converting the edge list from a list to an Vec on each call which will reduce the overhead even further. --- crates/accelerate/src/vf2_layout.rs | 34 ++++++++++++++++---- qiskit/transpiler/passes/layout/vf2_utils.py | 6 ++-- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/crates/accelerate/src/vf2_layout.rs b/crates/accelerate/src/vf2_layout.rs index 65817f4ac477..fe361b079410 100644 --- a/crates/accelerate/src/vf2_layout.rs +++ b/crates/accelerate/src/vf2_layout.rs @@ -10,8 +10,6 @@ // copyright notice, and modified files need to carry a notice indicating // that they have been altered from the originals. -use indexmap::IndexMap; - use numpy::PyReadonlyArray1; use pyo3::prelude::*; use pyo3::wrap_pyfunction; @@ -22,6 +20,19 @@ use crate::nlayout::{NLayout, VirtualQubit}; const PARALLEL_THRESHOLD: usize = 50; +#[pyclass] +pub struct EdgeList { + pub edge_list: Vec<([VirtualQubit; 2], i32)>, +} + +#[pymethods] +impl EdgeList { + #[new] + pub fn new(edge_list: Vec<([VirtualQubit; 2], i32)>) -> Self { + EdgeList { edge_list } + } +} + /// Score a given circuit with a layout applied #[pyfunction] #[pyo3( @@ -29,14 +40,14 @@ const PARALLEL_THRESHOLD: usize = 50; )] pub fn score_layout( bit_list: PyReadonlyArray1, - edge_list: IndexMap<[VirtualQubit; 2], i32>, + edge_list: &EdgeList, error_map: &ErrorMap, layout: &NLayout, strict_direction: bool, run_in_parallel: bool, ) -> PyResult { let bit_counts = bit_list.as_slice()?; - let edge_filter_map = |(index_arr, gate_count): (&[VirtualQubit; 2], &i32)| -> Option { + let edge_filter_map = |(index_arr, gate_count): &([VirtualQubit; 2], i32)| -> Option { let mut error = error_map .error_map .get(&[index_arr[0].to_phys(layout), index_arr[1].to_phys(layout)]); @@ -66,10 +77,18 @@ pub fn score_layout( }) }; - let mut fidelity: f64 = if edge_list.len() < PARALLEL_THRESHOLD || !run_in_parallel { - edge_list.iter().filter_map(edge_filter_map).product() + let mut fidelity: f64 = if edge_list.edge_list.len() < PARALLEL_THRESHOLD || !run_in_parallel { + edge_list + .edge_list + .iter() + .filter_map(edge_filter_map) + .product() } else { - edge_list.par_iter().filter_map(edge_filter_map).product() + edge_list + .edge_list + .par_iter() + .filter_map(edge_filter_map) + .product() }; fidelity *= if bit_list.len() < PARALLEL_THRESHOLD || !run_in_parallel { bit_counts @@ -90,5 +109,6 @@ pub fn score_layout( #[pymodule] pub fn vf2_layout(_py: Python, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(score_layout))?; + m.add_class::()?; Ok(()) } diff --git a/qiskit/transpiler/passes/layout/vf2_utils.py b/qiskit/transpiler/passes/layout/vf2_utils.py index a0c2fbed417c..99006017482c 100644 --- a/qiskit/transpiler/passes/layout/vf2_utils.py +++ b/qiskit/transpiler/passes/layout/vf2_utils.py @@ -97,9 +97,9 @@ def _visit(dag, weight, wire_map): def build_edge_list(im_graph): """Generate an edge list for scoring.""" - return { - (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values() - } + return vf2_layout.EdgeList( + [((edge[0], edge[1]), sum(edge[2].values())) for edge in im_graph.edge_index_map().values()] + ) def build_bit_list(im_graph, bit_map):