From 04a9071e2a5ba903648f8db19066e671659850fb Mon Sep 17 00:00:00 2001 From: Andrei Sandu <54316454+sandreim@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:15:59 +0300 Subject: [PATCH] Use higher priority for PVF preparation in dispute/approval context (#4172) Related to https://github.com/paritytech/polkadot-sdk/issues/4126 discussion Currently all preparations have same priority and this is not ideal in all cases. This change should improve the finality time in the context of on-demand parachains and when `ExecutorParams` are updated on-chain and a rebuild of all artifacts is required. The desired effect is to speed up approval and dispute PVF executions which require preparation and delay backing executions which require preparation. --------- Signed-off-by: Andrei Sandu --- .../node/core/candidate-validation/src/lib.rs | 40 ++++++++++++++----- .../core/candidate-validation/src/tests.rs | 2 + polkadot/node/core/pvf/src/host.rs | 2 +- polkadot/node/core/pvf/src/priority.rs | 7 ++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index ec24434db24c..8663dc43835a 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -657,7 +657,14 @@ async fn validate_candidate_exhaustive( PrepareJobKind::Compilation, ); - validation_backend.validate_candidate(pvf, exec_timeout, params.encode()).await + validation_backend + .validate_candidate( + pvf, + exec_timeout, + params.encode(), + polkadot_node_core_pvf::Priority::Normal, + ) + .await }, PvfExecKind::Approval => validation_backend @@ -667,6 +674,7 @@ async fn validate_candidate_exhaustive( params, executor_params, PVF_APPROVAL_EXECUTION_RETRY_DELAY, + polkadot_node_core_pvf::Priority::Critical, ) .await, }; @@ -749,10 +757,15 @@ trait ValidationBackend { pvf: PvfPrepData, exec_timeout: Duration, encoded_params: Vec, + // The priority for the preparation job. + prepare_priority: polkadot_node_core_pvf::Priority, ) -> Result; - /// Tries executing a PVF for the approval subsystem. Will retry once if an error is encountered - /// that may have been transient. + /// Tries executing a PVF. Will retry once if an error is encountered that may have + /// been transient. + /// + /// The `prepare_priority` is relevant in the context of the caller. Currently we expect + /// that `approval` context has priority over `backing` context. /// /// NOTE: Should retry only on errors that are a result of execution itself, and not of /// preparation. @@ -763,6 +776,8 @@ trait ValidationBackend { params: ValidationParams, executor_params: ExecutorParams, retry_delay: Duration, + // The priority for the preparation job. + prepare_priority: polkadot_node_core_pvf::Priority, ) -> Result { let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare); // Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap. @@ -776,8 +791,10 @@ trait ValidationBackend { // long. let total_time_start = Instant::now(); - let mut validation_result = - self.validate_candidate(pvf.clone(), exec_timeout, params.encode()).await; + // Use `Priority::Critical` as finality trumps parachain liveliness. + let mut validation_result = self + .validate_candidate(pvf.clone(), exec_timeout, params.encode(), prepare_priority) + .await; if validation_result.is_ok() { return validation_result } @@ -851,8 +868,9 @@ trait ValidationBackend { // Encode the params again when re-trying. We expect the retry case to be relatively // rare, and we want to avoid unconditionally cloning data. - validation_result = - self.validate_candidate(pvf.clone(), new_timeout, params.encode()).await; + validation_result = self + .validate_candidate(pvf.clone(), new_timeout, params.encode(), prepare_priority) + .await; } } @@ -870,11 +888,13 @@ impl ValidationBackend for ValidationHost { pvf: PvfPrepData, exec_timeout: Duration, encoded_params: Vec, + // The priority for the preparation job. + prepare_priority: polkadot_node_core_pvf::Priority, ) -> Result { - let priority = polkadot_node_core_pvf::Priority::Normal; - let (tx, rx) = oneshot::channel(); - if let Err(err) = self.execute_pvf(pvf, exec_timeout, encoded_params, priority, tx).await { + if let Err(err) = + self.execute_pvf(pvf, exec_timeout, encoded_params, prepare_priority, tx).await + { return Err(InternalValidationError::HostCommunication(format!( "cannot send pvf to the validation host, it might have shut down: {:?}", err diff --git a/polkadot/node/core/candidate-validation/src/tests.rs b/polkadot/node/core/candidate-validation/src/tests.rs index f646f8535495..e492d51e239e 100644 --- a/polkadot/node/core/candidate-validation/src/tests.rs +++ b/polkadot/node/core/candidate-validation/src/tests.rs @@ -368,6 +368,7 @@ impl ValidationBackend for MockValidateCandidateBackend { _pvf: PvfPrepData, _timeout: Duration, _encoded_params: Vec, + _prepare_priority: polkadot_node_core_pvf::Priority, ) -> Result { // This is expected to panic if called more times than expected, indicating an error in the // test. @@ -1044,6 +1045,7 @@ impl ValidationBackend for MockPreCheckBackend { _pvf: PvfPrepData, _timeout: Duration, _encoded_params: Vec, + _prepare_priority: polkadot_node_core_pvf::Priority, ) -> Result { unreachable!() } diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 59d5a7e20a88..247d753d7c44 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -197,7 +197,7 @@ impl Config { prepare_worker_program_path, prepare_worker_spawn_timeout: Duration::from_secs(3), prepare_workers_soft_max_num: 1, - prepare_workers_hard_max_num: 1, + prepare_workers_hard_max_num: 2, execute_worker_program_path, execute_worker_spawn_timeout: Duration::from_secs(3), diff --git a/polkadot/node/core/pvf/src/priority.rs b/polkadot/node/core/pvf/src/priority.rs index d1ef9c604b11..0d18d4b484ca 100644 --- a/polkadot/node/core/pvf/src/priority.rs +++ b/polkadot/node/core/pvf/src/priority.rs @@ -14,17 +14,18 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -/// A priority assigned to execution of a PVF. +/// A priority assigned to preparation of a PVF. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Priority { /// Normal priority for things that do not require immediate response, but still need to be /// done pretty quick. /// - /// Approvals and disputes fall into this category. + /// Backing falls into this category. Normal, /// This priority is used for requests that are required to be processed as soon as possible. /// - /// For example, backing is on a critical path and requires execution as soon as possible. + /// Disputes and approvals are on a critical path and require execution as soon as + /// possible to not delay finality. Critical, }