From 3726d202147afa2ae173d251046c36fc73fd4cf0 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Wed, 25 Oct 2023 17:13:32 -0400
Subject: [PATCH 1/2] Reuse VF2 scoring views for all scoring

As part of the VF2Layout and VF2PostLayout passes when there are a large
number of matches found we're spending an inordinate amount of time in
scoring rebuilding the same views over and over again of the interaction
graph for each scoring call. For example, in one test cProfile showed
that with #11112 when running transpile() on a 65 Bernstein Vazirani
circuit with a secret of all 1s for FakeSherbrooke with
optimization_level=3 we were calling vf2_utils.score_layout() 161,761
times which took a culmulative time of 14.33 secs. Of that time though
we spent 5.865 secs building the edge list view.

These views are fixed for a given interaction graph which doesn't change
during the duration of the run() method on these passes. To remove this
inefficiency this commit moves the construction of the views to the
beginning of the passes and just reuses them by reference for each
scoring call, avoiding the reconstruction overhead.
---
 qiskit/transpiler/passes/layout/vf2_layout.py |  4 ++
 .../passes/layout/vf2_post_layout.py          | 11 +++++-
 qiskit/transpiler/passes/layout/vf2_utils.py  | 39 +++++++++++++------
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/qiskit/transpiler/passes/layout/vf2_layout.py b/qiskit/transpiler/passes/layout/vf2_layout.py
index d6de8f51d31f..5fd6e08ab2fb 100644
--- a/qiskit/transpiler/passes/layout/vf2_layout.py
+++ b/qiskit/transpiler/passes/layout/vf2_layout.py
@@ -138,6 +138,8 @@ def run(self, dag):
             self.property_set["VF2Layout_stop_reason"] = VF2LayoutStopReason.MORE_THAN_2Q
             return
         im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes = result
+        scoring_edge_list = vf2_utils.build_edge_list(im_graph)
+        scoring_bit_list = vf2_utils.build_bit_list(im_graph, im_graph_node_map)
         cm_graph, cm_nodes = vf2_utils.shuffle_coupling_graph(
             self.coupling_map, self.seed, self.strict_direction
         )
@@ -199,6 +201,8 @@ def mapping_to_layout(layout_mapping):
                 reverse_im_graph_node_map,
                 im_graph,
                 self.strict_direction,
+                edge_list=scoring_edge_list,
+                bit_list=scoring_bit_list,
             )
             # If the layout score is 0 we can't do any better and we'll just
             # waste time finding additional mappings that will at best match
diff --git a/qiskit/transpiler/passes/layout/vf2_post_layout.py b/qiskit/transpiler/passes/layout/vf2_post_layout.py
index cee0e1cf04a8..28d9c397fa34 100644
--- a/qiskit/transpiler/passes/layout/vf2_post_layout.py
+++ b/qiskit/transpiler/passes/layout/vf2_post_layout.py
@@ -167,6 +167,8 @@ def run(self, dag):
             self.property_set["VF2PostLayout_stop_reason"] = VF2PostLayoutStopReason.MORE_THAN_2Q
             return
         im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes = result
+        scoring_bit_list = vf2_utils.build_bit_list(im_graph, im_graph_node_map)
+        scoring_edge_list = vf2_utils.build_edge_list(im_graph)
 
         if self.target is not None:
             # If qargs is None then target is global and ideal so no
@@ -256,7 +258,10 @@ def run(self, dag):
             if self.strict_direction:
                 initial_layout = Layout({bit: index for index, bit in enumerate(dag.qubits)})
                 chosen_layout_score = self._score_layout(
-                    initial_layout, im_graph_node_map, reverse_im_graph_node_map, im_graph
+                    initial_layout,
+                    im_graph_node_map,
+                    reverse_im_graph_node_map,
+                    im_graph,
                 )
             else:
                 initial_layout = {
@@ -271,6 +276,8 @@ def run(self, dag):
                     reverse_im_graph_node_map,
                     im_graph,
                     self.strict_direction,
+                    edge_list=scoring_edge_list,
+                    bit_list=scoring_bit_list,
                 )
         # Circuit not in basis so we have nothing to compare against return here
         except KeyError:
@@ -303,6 +310,8 @@ def run(self, dag):
                     reverse_im_graph_node_map,
                     im_graph,
                     self.strict_direction,
+                    edge_list=scoring_edge_list,
+                    bit_list=scoring_bit_list,
                 )
             logger.debug("Trial %s has score %s", trials, layout_score)
             if layout_score < chosen_layout_score:
diff --git a/qiskit/transpiler/passes/layout/vf2_utils.py b/qiskit/transpiler/passes/layout/vf2_utils.py
index b6fc73f18802..a0c2fbed417c 100644
--- a/qiskit/transpiler/passes/layout/vf2_utils.py
+++ b/qiskit/transpiler/passes/layout/vf2_utils.py
@@ -95,6 +95,27 @@ def _visit(dag, weight, wire_map):
     return im_graph, im_graph_node_map, reverse_im_graph_node_map, free_nodes
 
 
+def build_edge_list(im_graph):
+    """Generate an edge list for scoring."""
+    return {
+        (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values()
+    }
+
+
+def build_bit_list(im_graph, bit_map):
+    """Generate a bit list for scoring."""
+    bit_list = np.zeros(len(im_graph), dtype=np.int32)
+    for node_index in bit_map.values():
+        try:
+            bit_list[node_index] = sum(im_graph[node_index].values())
+        # If node_index not in im_graph that means there was a standalone
+        # node we will score/sort separately outside the vf2 mapping, so we
+        # can skip the hole
+        except IndexError:
+            pass
+    return bit_list
+
+
 def score_layout(
     avg_error_map,
     layout_mapping,
@@ -103,6 +124,8 @@ def score_layout(
     im_graph,
     strict_direction=False,
     run_in_parallel=False,
+    edge_list=None,
+    bit_list=None,
 ):
     """Score a layout given an average error map."""
     if layout_mapping:
@@ -110,18 +133,10 @@ def score_layout(
     else:
         size = 0
     nlayout = NLayout(layout_mapping, size + 1, size + 1)
-    bit_list = np.zeros(len(im_graph), dtype=np.int32)
-    for node_index in bit_map.values():
-        try:
-            bit_list[node_index] = sum(im_graph[node_index].values())
-        # If node_index not in im_graph that means there was a standalone
-        # node we will score/sort separately outside the vf2 mapping, so we
-        # can skip the hole
-        except IndexError:
-            pass
-    edge_list = {
-        (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values()
-    }
+    if bit_list is None:
+        bit_list = build_bit_list(im_graph, bit_map)
+    if edge_list is None:
+        edge_list = build_edge_list(im_graph)
     return vf2_layout.score_layout(
         bit_list, edge_list, avg_error_map, nlayout, strict_direction, run_in_parallel
     )

From 3264e06a7eaf2ab862fa07dea34c0728184be4cc Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Thu, 26 Oct 2023 06:40:24 -0400
Subject: [PATCH 2/2] Add EdgeList Rust pyclass to avoid repeated conversion

This commit adds a new pyclass written in rust that wraps a rust
Vec. Previously the scoring function also used an dict->IndexMap
conversion, but the mapping structure wasn't necessary and added
additional overhead, so it was converted to a list/Vec to speed up the
execution even further. By using this new pyclass as the input to the
rust scoring function we avoid converting the edge list from a list to
an Vec on each call which will reduce the overhead even further.
---
 crates/accelerate/src/vf2_layout.rs          | 34 ++++++++++++++++----
 qiskit/transpiler/passes/layout/vf2_utils.py |  6 ++--
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/crates/accelerate/src/vf2_layout.rs b/crates/accelerate/src/vf2_layout.rs
index 65817f4ac477..fe361b079410 100644
--- a/crates/accelerate/src/vf2_layout.rs
+++ b/crates/accelerate/src/vf2_layout.rs
@@ -10,8 +10,6 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
-use indexmap::IndexMap;
-
 use numpy::PyReadonlyArray1;
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
@@ -22,6 +20,19 @@ use crate::nlayout::{NLayout, VirtualQubit};
 
 const PARALLEL_THRESHOLD: usize = 50;
 
+#[pyclass]
+pub struct EdgeList {
+    pub edge_list: Vec<([VirtualQubit; 2], i32)>,
+}
+
+#[pymethods]
+impl EdgeList {
+    #[new]
+    pub fn new(edge_list: Vec<([VirtualQubit; 2], i32)>) -> Self {
+        EdgeList { edge_list }
+    }
+}
+
 /// Score a given circuit with a layout applied
 #[pyfunction]
 #[pyo3(
@@ -29,14 +40,14 @@ const PARALLEL_THRESHOLD: usize = 50;
 )]
 pub fn score_layout(
     bit_list: PyReadonlyArray1<i32>,
-    edge_list: IndexMap<[VirtualQubit; 2], i32>,
+    edge_list: &EdgeList,
     error_map: &ErrorMap,
     layout: &NLayout,
     strict_direction: bool,
     run_in_parallel: bool,
 ) -> PyResult<f64> {
     let bit_counts = bit_list.as_slice()?;
-    let edge_filter_map = |(index_arr, gate_count): (&[VirtualQubit; 2], &i32)| -> Option<f64> {
+    let edge_filter_map = |(index_arr, gate_count): &([VirtualQubit; 2], i32)| -> Option<f64> {
         let mut error = error_map
             .error_map
             .get(&[index_arr[0].to_phys(layout), index_arr[1].to_phys(layout)]);
@@ -66,10 +77,18 @@ pub fn score_layout(
         })
     };
 
-    let mut fidelity: f64 = if edge_list.len() < PARALLEL_THRESHOLD || !run_in_parallel {
-        edge_list.iter().filter_map(edge_filter_map).product()
+    let mut fidelity: f64 = if edge_list.edge_list.len() < PARALLEL_THRESHOLD || !run_in_parallel {
+        edge_list
+            .edge_list
+            .iter()
+            .filter_map(edge_filter_map)
+            .product()
     } else {
-        edge_list.par_iter().filter_map(edge_filter_map).product()
+        edge_list
+            .edge_list
+            .par_iter()
+            .filter_map(edge_filter_map)
+            .product()
     };
     fidelity *= if bit_list.len() < PARALLEL_THRESHOLD || !run_in_parallel {
         bit_counts
@@ -90,5 +109,6 @@ pub fn score_layout(
 #[pymodule]
 pub fn vf2_layout(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(score_layout))?;
+    m.add_class::<EdgeList>()?;
     Ok(())
 }
diff --git a/qiskit/transpiler/passes/layout/vf2_utils.py b/qiskit/transpiler/passes/layout/vf2_utils.py
index a0c2fbed417c..99006017482c 100644
--- a/qiskit/transpiler/passes/layout/vf2_utils.py
+++ b/qiskit/transpiler/passes/layout/vf2_utils.py
@@ -97,9 +97,9 @@ def _visit(dag, weight, wire_map):
 
 def build_edge_list(im_graph):
     """Generate an edge list for scoring."""
-    return {
-        (edge[0], edge[1]): sum(edge[2].values()) for edge in im_graph.edge_index_map().values()
-    }
+    return vf2_layout.EdgeList(
+        [((edge[0], edge[1]), sum(edge[2].values())) for edge in im_graph.edge_index_map().values()]
+    )
 
 
 def build_bit_list(im_graph, bit_map):