From 75799a73ebc756c469a1336fc65704277009b3b4 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Thu, 8 Aug 2024 00:25:12 +0100 Subject: [PATCH 1/3] Allow immutable borrow to access `QuantumCircuit.parameters` `QuantumCircuit.parameters` is logically a read-only operation on `QuantumCircuit`. For efficiency in multiple calls to `assign_parameters`, we actually cache the sort order of the internal `ParameterTable` on access. This is purely a caching effect, and should not leak out to users. The previous implementation took a Rust-space mutable borrow out in order to (potentially) mutate the cache. This caused problems if multiple Python threads attempted to call `assign_parameters` simultaneously; it was possible for one thread to give up the GIL during its initial call to `CircuitData::copy` (so an immutable borrow was still live), allowing another thread to continue on to the getter `CircuitData::get_parameters`, which required a mutable borrow, which failed due to the paused thread in `copy`. This moves the cache into a `RefCell`, allowing the parameter getters to take an immutable borrow as the receiver. We now write the cache out only if we *can* take the mutable borrow out necessary. This can mean that other threads will have to repeat the work of re-sorting the parameters, because their borrows were blocking the saving of the cache, but this will not cause failures. The methods on `ParameterTable` that invalidate the cache all require a mutable borrow on the table itself. This makes it impossible for an immutable borrow to exist simultaneously on the cache, so these methods should always succeed to acquire the cache lock to invalidate it. --- crates/circuit/src/circuit_data.rs | 2 +- crates/circuit/src/parameter_table.rs | 160 +++++++++++++++++--------- 2 files changed, 109 insertions(+), 53 deletions(-) diff --git a/crates/circuit/src/circuit_data.rs b/crates/circuit/src/circuit_data.rs index dd2618f03f9a..dc83c9dd2487 100644 --- a/crates/circuit/src/circuit_data.rs +++ b/crates/circuit/src/circuit_data.rs @@ -343,7 +343,7 @@ impl CircuitData { /// Get a (cached) sorted list of the Python-space `Parameter` instances tracked by this circuit /// data's parameter table. #[getter] - pub fn get_parameters<'py>(&mut self, py: Python<'py>) -> Bound<'py, PyList> { + pub fn get_parameters<'py>(&self, py: Python<'py>) -> Bound<'py, PyList> { self.param_table.py_parameters(py) } diff --git a/crates/circuit/src/parameter_table.rs b/crates/circuit/src/parameter_table.rs index 8825fbd71772..e637d4968a19 100644 --- a/crates/circuit/src/parameter_table.rs +++ b/crates/circuit/src/parameter_table.rs @@ -10,6 +10,8 @@ // copyright notice, and modified files need to carry a notice indicating // that they have been altered from the originals. +use std::cell::RefCell; + use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; use thiserror::Error; @@ -114,6 +116,30 @@ impl<'py> FromPyObject<'py> for VectorUuid { } } +#[derive(Clone, Default, Debug)] +struct ParameterTableOrder { + /// The Rust-space sort order. + uuids: Vec, + /// Cache of a Python-space list of the parameter objects, in order. We only generate this + /// specifically when asked. + py_parameters: Option>, +} + +impl ParameterTableOrder { + fn uuids(&self) -> Option<&[ParameterUuid]> { + (!self.uuids.is_empty()).then_some(self.uuids.as_slice()) + } + + fn py_parameters(&self) -> Option<&Py> { + self.py_parameters.as_ref() + } + + fn invalidate(&mut self) { + self.uuids.clear(); + self.py_parameters = None; + } +} + #[derive(Clone, Default, Debug)] pub struct ParameterTable { /// Mapping of the parameter key (its UUID) to the information on it tracked by this table. @@ -123,18 +149,14 @@ pub struct ParameterTable { by_name: HashMap, /// Additional information on any `ParameterVector` instances that have elements in the circuit. vectors: HashMap, - /// Sort order of the parameters. This is lexicographical for most parameters, except elements - /// of a `ParameterVector` are sorted within the vector by numerical index. We calculate this - /// on demand and cache it; an empty `order` implies it is not currently calculated. We don't - /// use `Option` so we can re-use the allocation for partial parameter bindings. - /// - /// Any method that adds or a removes a parameter is responsible for invalidating this cache. - order: Vec, - /// Cache of a Python-space list of the parameter objects, in order. We only generate this - /// specifically when asked. + /// Cache related to the sort order of the parameters. This is lexicographical for most + /// parameters, except elements of a `ParameterVector` are sorted within the vector by numerical + /// index. We calculate this on demand and cache it; an empty `order` implies it is not + /// currently calculated. We don't use `Option` so we can re-use the allocation for + /// partial parameter bindings. /// - /// Any method that adds or a removes a parameter is responsible for invalidating this cache. - py_parameters: Option>, + /// Any method that adds or removes a parameter needs to invalidate this. + order_cache: RefCell, } impl ParameterTable { @@ -194,8 +216,7 @@ impl ParameterTable { None }; self.by_name.insert(name.clone(), uuid); - self.order.clear(); - self.py_parameters = None; + self.order_cache.borrow_mut().invalidate(); let mut uses = HashSet::new(); if let Some(usage) = usage { uses.insert_unique_unchecked(usage); @@ -226,18 +247,32 @@ impl ParameterTable { } /// Get the (maybe cached) Python list of the sorted `Parameter` objects. - pub fn py_parameters<'py>(&mut self, py: Python<'py>) -> Bound<'py, PyList> { - if let Some(py_parameters) = self.py_parameters.as_ref() { + pub fn py_parameters<'py>(&self, py: Python<'py>) -> Bound<'py, PyList> { + if let Some(py_parameters) = self.order_cache.borrow().py_parameters() { return py_parameters.clone_ref(py).into_bound(py); } - self.ensure_sorted(); - let out = PyList::new_bound( - py, - self.order - .iter() - .map(|uuid| self.by_uuid[uuid].object.clone_ref(py).into_bound(py)), - ); - self.py_parameters = Some(out.clone().unbind()); + let make_parameters = |order: &[ParameterUuid]| { + PyList::new_bound( + py, + order + .iter() + .map(|uuid| self.by_uuid[uuid].object.bind(py).clone()), + ) + }; + let out = match self.order_cache.borrow().uuids() { + Some(uuids) => make_parameters(uuids), + None => { + let uuids = self.sorted_order(); + let out = make_parameters(&uuids); + if let Ok(mut cache) = self.order_cache.try_borrow_mut() { + cache.uuids = uuids; + } + out + } + }; + if let Ok(mut cache) = self.order_cache.try_borrow_mut() { + cache.py_parameters = Some(out.clone().unbind()); + } out } @@ -246,23 +281,18 @@ impl ParameterTable { PySet::new_bound(py, self.by_uuid.values().map(|info| &info.object)) } - /// Ensure that the `order` field is populated and sorted. - fn ensure_sorted(&mut self) { - // If `order` is already populated, it's sorted; it's the responsibility of the methods of - // this struct that mutate it to invalidate the cache. - if !self.order.is_empty() { - return; - } - self.order.reserve(self.by_uuid.len()); - self.order.extend(self.by_uuid.keys()); - self.order.sort_unstable_by_key(|uuid| { + /// Get the sorted order of the `ParameterTable`. This does not access the cache. + fn sorted_order(&self) -> Vec { + let mut out = self.by_uuid.keys().copied().collect::>(); + out.sort_unstable_by_key(|uuid| { let info = &self.by_uuid[uuid]; if let Some(vec) = info.element.as_ref() { (&self.vectors[&vec.vector_uuid].name, vec.index) } else { (&info.name, 0) } - }) + }); + out } /// Add a use of a parameter to the table. @@ -305,8 +335,7 @@ impl ParameterTable { vec_entry.remove_entry(); } } - self.order.clear(); - self.py_parameters = None; + self.order_cache.borrow_mut().invalidate(); entry.remove_entry(); } Ok(()) @@ -332,26 +361,30 @@ impl ParameterTable { (vector_info.refcount > 0).then_some(vector_info) }); } - self.order.clear(); - self.py_parameters = None; + self.order_cache.borrow_mut().invalidate(); Ok(info.uses) } /// Clear this table, yielding the Python parameter objects and their uses in sorted order. + /// + /// The clearing effect is eager and not dependent on the iteration. pub fn drain_ordered( - &'_ mut self, - ) -> impl Iterator, HashSet)> + '_ { - self.ensure_sorted(); + &mut self, + ) -> impl ExactSizeIterator, HashSet)> { + let mut cache = self.order_cache.borrow_mut(); + cache.py_parameters = None; + let order = if cache.uuids.is_empty() { + self.sorted_order() + } else { + ::std::mem::take(&mut cache.uuids) + }; + let by_uuid = ::std::mem::take(&mut self.by_uuid); self.by_name.clear(); self.vectors.clear(); - self.py_parameters = None; - self.order.drain(..).map(|uuid| { - let info = self - .by_uuid - .remove(&uuid) - .expect("tracked UUIDs should be consistent"); - (info.object, info.uses) - }) + ParameterTableDrain { + order: order.into_iter(), + by_uuid, + } } /// Empty this `ParameterTable` of all its contents. This does not affect the capacities of the @@ -360,8 +393,7 @@ impl ParameterTable { self.by_uuid.clear(); self.by_name.clear(); self.vectors.clear(); - self.order.clear(); - self.py_parameters = None; + self.order_cache.borrow_mut().invalidate(); } /// Expose the tracked data for a given parameter as directly as possible to Python space. @@ -396,9 +428,33 @@ impl ParameterTable { visit.call(&info.object)? } // We don't need to / can't visit the `PyBackedStr` stores. - if let Some(list) = self.py_parameters.as_ref() { + if let Some(list) = self.order_cache.borrow().py_parameters() { visit.call(list)? } Ok(()) } } + +struct ParameterTableDrain { + order: ::std::vec::IntoIter, + by_uuid: HashMap, +} +impl Iterator for ParameterTableDrain { + type Item = (Py, HashSet); + + fn next(&mut self) -> Option { + self.order.next().map(|uuid| { + let info = self + .by_uuid + .remove(&uuid) + .expect("tracked UUIDs should be consistent"); + (info.object, info.uses) + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.order.size_hint() + } +} +impl ExactSizeIterator for ParameterTableDrain {} +impl ::std::iter::FusedIterator for ParameterTableDrain {} From 8a1e217394195af281126acb7a0a63dd8d0cc13a Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Thu, 8 Aug 2024 12:46:53 +0100 Subject: [PATCH 2/3] Use `RefCell::get_mut` where possible In several cases, the previous code was using the runtime-checked `RefCell::borrow_mut` in locations that can be statically proven to be safe to take the mutable reference. Using the correct function for this makes the logic clearer (as well as technically removing a small amount of runtime overhead). --- crates/circuit/src/parameter_table.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/circuit/src/parameter_table.rs b/crates/circuit/src/parameter_table.rs index e637d4968a19..b1ec1e8e3262 100644 --- a/crates/circuit/src/parameter_table.rs +++ b/crates/circuit/src/parameter_table.rs @@ -216,7 +216,7 @@ impl ParameterTable { None }; self.by_name.insert(name.clone(), uuid); - self.order_cache.borrow_mut().invalidate(); + self.order_cache.get_mut().invalidate(); let mut uses = HashSet::new(); if let Some(usage) = usage { uses.insert_unique_unchecked(usage); @@ -335,7 +335,7 @@ impl ParameterTable { vec_entry.remove_entry(); } } - self.order_cache.borrow_mut().invalidate(); + self.order_cache.get_mut().invalidate(); entry.remove_entry(); } Ok(()) @@ -361,7 +361,7 @@ impl ParameterTable { (vector_info.refcount > 0).then_some(vector_info) }); } - self.order_cache.borrow_mut().invalidate(); + self.order_cache.get_mut().invalidate(); Ok(info.uses) } @@ -371,7 +371,7 @@ impl ParameterTable { pub fn drain_ordered( &mut self, ) -> impl ExactSizeIterator, HashSet)> { - let mut cache = self.order_cache.borrow_mut(); + let cache = self.order_cache.get_mut(); cache.py_parameters = None; let order = if cache.uuids.is_empty() { self.sorted_order() @@ -393,7 +393,7 @@ impl ParameterTable { self.by_uuid.clear(); self.by_name.clear(); self.vectors.clear(); - self.order_cache.borrow_mut().invalidate(); + self.order_cache.get_mut().invalidate(); } /// Expose the tracked data for a given parameter as directly as possible to Python space. From 54d261c1674104781e25f270e601bc49079466d5 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Fri, 9 Aug 2024 10:13:48 +0100 Subject: [PATCH 3/3] Use `OnceCell` instead of `RefCell` `OnceCell` has less runtime checking than `RefCell` (only whether it is initialised or not, which is an `Option` check), and better represents the dynamic extensions to the borrow checker that we actually need for the caching in this method. All methods that can invalidate the cache all necessarily take `&mut ParameterTable` already, since they will modify Rust-space data. A `OnceCell` can be deinitialised through a mutable reference, so this is fine. The only reason a `&ParameterTable` method would need to mutate the cache is to create it, which is the allowed set of `OnceCell` operations. --- crates/circuit/src/parameter_table.rs | 107 +++++++++----------------- 1 file changed, 38 insertions(+), 69 deletions(-) diff --git a/crates/circuit/src/parameter_table.rs b/crates/circuit/src/parameter_table.rs index b1ec1e8e3262..8fae607e0b90 100644 --- a/crates/circuit/src/parameter_table.rs +++ b/crates/circuit/src/parameter_table.rs @@ -10,7 +10,7 @@ // copyright notice, and modified files need to carry a notice indicating // that they have been altered from the originals. -use std::cell::RefCell; +use std::cell::OnceCell; use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; @@ -116,30 +116,6 @@ impl<'py> FromPyObject<'py> for VectorUuid { } } -#[derive(Clone, Default, Debug)] -struct ParameterTableOrder { - /// The Rust-space sort order. - uuids: Vec, - /// Cache of a Python-space list of the parameter objects, in order. We only generate this - /// specifically when asked. - py_parameters: Option>, -} - -impl ParameterTableOrder { - fn uuids(&self) -> Option<&[ParameterUuid]> { - (!self.uuids.is_empty()).then_some(self.uuids.as_slice()) - } - - fn py_parameters(&self) -> Option<&Py> { - self.py_parameters.as_ref() - } - - fn invalidate(&mut self) { - self.uuids.clear(); - self.py_parameters = None; - } -} - #[derive(Clone, Default, Debug)] pub struct ParameterTable { /// Mapping of the parameter key (its UUID) to the information on it tracked by this table. @@ -149,14 +125,17 @@ pub struct ParameterTable { by_name: HashMap, /// Additional information on any `ParameterVector` instances that have elements in the circuit. vectors: HashMap, - /// Cache related to the sort order of the parameters. This is lexicographical for most - /// parameters, except elements of a `ParameterVector` are sorted within the vector by numerical - /// index. We calculate this on demand and cache it; an empty `order` implies it is not - /// currently calculated. We don't use `Option` so we can re-use the allocation for - /// partial parameter bindings. + /// Cache of the sort order of the parameters. This is lexicographical for most parameters, + /// except elements of a `ParameterVector` are sorted within the vector by numerical index. We + /// calculate this on demand and cache it. /// /// Any method that adds or removes a parameter needs to invalidate this. - order_cache: RefCell, + order_cache: OnceCell>, + /// Cache of a Python-space list of the parameter objects, in order. We only generate this + /// specifically when asked. + /// + /// Any method that adds or removes a parameter needs to invalidate this. + py_parameters_cache: OnceCell>, } impl ParameterTable { @@ -216,7 +195,6 @@ impl ParameterTable { None }; self.by_name.insert(name.clone(), uuid); - self.order_cache.get_mut().invalidate(); let mut uses = HashSet::new(); if let Some(usage) = usage { uses.insert_unique_unchecked(usage); @@ -227,6 +205,7 @@ impl ParameterTable { element, object: param_ob.clone().unbind(), }); + self.invalidate_cache(); } } Ok(uuid) @@ -248,32 +227,19 @@ impl ParameterTable { /// Get the (maybe cached) Python list of the sorted `Parameter` objects. pub fn py_parameters<'py>(&self, py: Python<'py>) -> Bound<'py, PyList> { - if let Some(py_parameters) = self.order_cache.borrow().py_parameters() { - return py_parameters.clone_ref(py).into_bound(py); - } - let make_parameters = |order: &[ParameterUuid]| { - PyList::new_bound( - py, - order - .iter() - .map(|uuid| self.by_uuid[uuid].object.bind(py).clone()), - ) - }; - let out = match self.order_cache.borrow().uuids() { - Some(uuids) => make_parameters(uuids), - None => { - let uuids = self.sorted_order(); - let out = make_parameters(&uuids); - if let Ok(mut cache) = self.order_cache.try_borrow_mut() { - cache.uuids = uuids; - } - out - } - }; - if let Ok(mut cache) = self.order_cache.try_borrow_mut() { - cache.py_parameters = Some(out.clone().unbind()); - } - out + self.py_parameters_cache + .get_or_init(|| { + PyList::new_bound( + py, + self.order_cache + .get_or_init(|| self.sorted_order()) + .iter() + .map(|uuid| self.by_uuid[uuid].object.bind(py).clone()), + ) + .unbind() + }) + .bind(py) + .clone() } /// Get a Python set of all tracked `Parameter` objects. @@ -335,8 +301,8 @@ impl ParameterTable { vec_entry.remove_entry(); } } - self.order_cache.get_mut().invalidate(); entry.remove_entry(); + self.invalidate_cache(); } Ok(()) } @@ -361,7 +327,7 @@ impl ParameterTable { (vector_info.refcount > 0).then_some(vector_info) }); } - self.order_cache.get_mut().invalidate(); + self.invalidate_cache(); Ok(info.uses) } @@ -371,16 +337,14 @@ impl ParameterTable { pub fn drain_ordered( &mut self, ) -> impl ExactSizeIterator, HashSet)> { - let cache = self.order_cache.get_mut(); - cache.py_parameters = None; - let order = if cache.uuids.is_empty() { - self.sorted_order() - } else { - ::std::mem::take(&mut cache.uuids) - }; + let order = self + .order_cache + .take() + .unwrap_or_else(|| self.sorted_order()); let by_uuid = ::std::mem::take(&mut self.by_uuid); self.by_name.clear(); self.vectors.clear(); + self.py_parameters_cache.take(); ParameterTableDrain { order: order.into_iter(), by_uuid, @@ -393,7 +357,12 @@ impl ParameterTable { self.by_uuid.clear(); self.by_name.clear(); self.vectors.clear(); - self.order_cache.get_mut().invalidate(); + self.invalidate_cache(); + } + + fn invalidate_cache(&mut self) { + self.order_cache.take(); + self.py_parameters_cache.take(); } /// Expose the tracked data for a given parameter as directly as possible to Python space. @@ -428,7 +397,7 @@ impl ParameterTable { visit.call(&info.object)? } // We don't need to / can't visit the `PyBackedStr` stores. - if let Some(list) = self.order_cache.borrow().py_parameters() { + if let Some(list) = self.py_parameters_cache.get() { visit.call(list)? } Ok(())