Skip to content

Commit

Permalink
Use higher priority for PVF preparation in dispute/approval context (#…
Browse files Browse the repository at this point in the history
…4172)

Related to #4126
discussion

Currently all preparations have same priority and this is not ideal in
all cases. This change should improve the finality time in the context
of on-demand parachains and when `ExecutorParams` are updated on-chain
and a rebuild of all artifacts is required. The desired effect is to
speed up approval and dispute PVF executions which require preparation
and delay backing executions which require preparation.

---------

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>
  • Loading branch information
sandreim authored Apr 19, 2024
1 parent 4f125d1 commit 04a9071
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 14 deletions.
40 changes: 30 additions & 10 deletions polkadot/node/core/candidate-validation/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,14 @@ async fn validate_candidate_exhaustive(
PrepareJobKind::Compilation,
);

validation_backend.validate_candidate(pvf, exec_timeout, params.encode()).await
validation_backend
.validate_candidate(
pvf,
exec_timeout,
params.encode(),
polkadot_node_core_pvf::Priority::Normal,
)
.await
},
PvfExecKind::Approval =>
validation_backend
Expand All @@ -667,6 +674,7 @@ async fn validate_candidate_exhaustive(
params,
executor_params,
PVF_APPROVAL_EXECUTION_RETRY_DELAY,
polkadot_node_core_pvf::Priority::Critical,
)
.await,
};
Expand Down Expand Up @@ -749,10 +757,15 @@ trait ValidationBackend {
pvf: PvfPrepData,
exec_timeout: Duration,
encoded_params: Vec<u8>,
// The priority for the preparation job.
prepare_priority: polkadot_node_core_pvf::Priority,
) -> Result<WasmValidationResult, ValidationError>;

/// Tries executing a PVF for the approval subsystem. Will retry once if an error is encountered
/// that may have been transient.
/// Tries executing a PVF. Will retry once if an error is encountered that may have
/// been transient.
///
/// The `prepare_priority` is relevant in the context of the caller. Currently we expect
/// that `approval` context has priority over `backing` context.
///
/// NOTE: Should retry only on errors that are a result of execution itself, and not of
/// preparation.
Expand All @@ -763,6 +776,8 @@ trait ValidationBackend {
params: ValidationParams,
executor_params: ExecutorParams,
retry_delay: Duration,
// The priority for the preparation job.
prepare_priority: polkadot_node_core_pvf::Priority,
) -> Result<WasmValidationResult, ValidationError> {
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
Expand All @@ -776,8 +791,10 @@ trait ValidationBackend {
// long.
let total_time_start = Instant::now();

let mut validation_result =
self.validate_candidate(pvf.clone(), exec_timeout, params.encode()).await;
// Use `Priority::Critical` as finality trumps parachain liveliness.
let mut validation_result = self
.validate_candidate(pvf.clone(), exec_timeout, params.encode(), prepare_priority)
.await;
if validation_result.is_ok() {
return validation_result
}
Expand Down Expand Up @@ -851,8 +868,9 @@ trait ValidationBackend {

// Encode the params again when re-trying. We expect the retry case to be relatively
// rare, and we want to avoid unconditionally cloning data.
validation_result =
self.validate_candidate(pvf.clone(), new_timeout, params.encode()).await;
validation_result = self
.validate_candidate(pvf.clone(), new_timeout, params.encode(), prepare_priority)
.await;
}
}

Expand All @@ -870,11 +888,13 @@ impl ValidationBackend for ValidationHost {
pvf: PvfPrepData,
exec_timeout: Duration,
encoded_params: Vec<u8>,
// The priority for the preparation job.
prepare_priority: polkadot_node_core_pvf::Priority,
) -> Result<WasmValidationResult, ValidationError> {
let priority = polkadot_node_core_pvf::Priority::Normal;

let (tx, rx) = oneshot::channel();
if let Err(err) = self.execute_pvf(pvf, exec_timeout, encoded_params, priority, tx).await {
if let Err(err) =
self.execute_pvf(pvf, exec_timeout, encoded_params, prepare_priority, tx).await
{
return Err(InternalValidationError::HostCommunication(format!(
"cannot send pvf to the validation host, it might have shut down: {:?}",
err
Expand Down
2 changes: 2 additions & 0 deletions polkadot/node/core/candidate-validation/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ impl ValidationBackend for MockValidateCandidateBackend {
_pvf: PvfPrepData,
_timeout: Duration,
_encoded_params: Vec<u8>,
_prepare_priority: polkadot_node_core_pvf::Priority,
) -> Result<WasmValidationResult, ValidationError> {
// This is expected to panic if called more times than expected, indicating an error in the
// test.
Expand Down Expand Up @@ -1044,6 +1045,7 @@ impl ValidationBackend for MockPreCheckBackend {
_pvf: PvfPrepData,
_timeout: Duration,
_encoded_params: Vec<u8>,
_prepare_priority: polkadot_node_core_pvf::Priority,
) -> Result<WasmValidationResult, ValidationError> {
unreachable!()
}
Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ impl Config {
prepare_worker_program_path,
prepare_worker_spawn_timeout: Duration::from_secs(3),
prepare_workers_soft_max_num: 1,
prepare_workers_hard_max_num: 1,
prepare_workers_hard_max_num: 2,

execute_worker_program_path,
execute_worker_spawn_timeout: Duration::from_secs(3),
Expand Down
7 changes: 4 additions & 3 deletions polkadot/node/core/pvf/src/priority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

/// A priority assigned to execution of a PVF.
/// A priority assigned to preparation of a PVF.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Priority {
/// Normal priority for things that do not require immediate response, but still need to be
/// done pretty quick.
///
/// Approvals and disputes fall into this category.
/// Backing falls into this category.
Normal,
/// This priority is used for requests that are required to be processed as soon as possible.
///
/// For example, backing is on a critical path and requires execution as soon as possible.
/// Disputes and approvals are on a critical path and require execution as soon as
/// possible to not delay finality.
Critical,
}

Expand Down

0 comments on commit 04a9071

Please sign in to comment.