From ecc05c50b81006e2cd43146eb80148dec54f65ce Mon Sep 17 00:00:00 2001 From: steviez Date: Sun, 4 Aug 2024 16:46:40 -0500 Subject: [PATCH] Cleanup PoH speed check error (#2400) Cleanup PoH speed check logs and error The current logging and error message from the Poh speed check are confusing. If the node fails, the error message states that the node is too slow. But, the reported numbers are slot durations in nanoseconds where a slower node will have a larger number. Lastly, the reported numbers aren't labeled with a unit so it is hard to make sense of this without looking at the actual code. The check now computes and reports hashes per second. --- core/src/validator.rs | 44 +++++++++++++++++++++++-------------------- entry/src/poh.rs | 6 +++--- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/core/src/validator.rs b/core/src/validator.rs index 97acfb70533f8f..4052a48e144a6f 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -39,7 +39,7 @@ use { utils::{move_and_async_delete_path, move_and_async_delete_path_contents}, }, solana_client::connection_cache::{ConnectionCache, Protocol}, - solana_entry::poh::compute_hash_time_ns, + solana_entry::poh::compute_hash_time, solana_geyser_plugin_manager::{ geyser_plugin_service::GeyserPluginService, GeyserPluginManagerRequest, }, @@ -1680,29 +1680,28 @@ fn active_vote_account_exists_in_bank(bank: &Bank, vote_account: &Pubkey) -> boo fn check_poh_speed( genesis_config: &GenesisConfig, maybe_hash_samples: Option, -) -> Result<(), String> { +) -> Result<(), ValidatorError> { if let Some(hashes_per_tick) = genesis_config.hashes_per_tick() { let ticks_per_slot = genesis_config.ticks_per_slot(); let hashes_per_slot = hashes_per_tick * ticks_per_slot; - let hash_samples = maybe_hash_samples.unwrap_or(hashes_per_slot); - let hash_time_ns = compute_hash_time_ns(hash_samples); - - let my_ns_per_slot = (hash_time_ns * hashes_per_slot) / hash_samples; - debug!("computed: ns_per_slot: {}", my_ns_per_slot); - let target_ns_per_slot = genesis_config.ns_per_slot() as u64; - debug!( - "cluster ns_per_hash: {}ns ns_per_slot: {}", - target_ns_per_slot / hashes_per_slot, - target_ns_per_slot + + let hash_time = compute_hash_time(hash_samples); + let my_hashes_per_second = (hash_samples as f64 / hash_time.as_secs_f64()) as u64; + let target_slot_duration = Duration::from_nanos(genesis_config.ns_per_slot() as u64); + let target_hashes_per_second = + (hashes_per_slot as f64 / target_slot_duration.as_secs_f64()) as u64; + + info!( + "PoH speed check: \ + computed hashes per second {my_hashes_per_second}, \ + target hashes per second {target_hashes_per_second}" ); - if my_ns_per_slot < target_ns_per_slot { - let extra_ns = target_ns_per_slot - my_ns_per_slot; - info!("PoH speed check: Will sleep {}ns per slot.", extra_ns); - } else { - return Err(format!( - "PoH is slower than cluster target tick rate! mine: {my_ns_per_slot} cluster: {target_ns_per_slot}.", - )); + if my_hashes_per_second < target_hashes_per_second { + return Err(ValidatorError::PohTooSlow { + mine: my_hashes_per_second, + target: target_hashes_per_second, + }); } } Ok(()) @@ -1861,7 +1860,7 @@ fn load_blockstore( } if !config.no_poh_speed_test { - check_poh_speed(&genesis_config, None)?; + check_poh_speed(&genesis_config, None).map_err(|err| format!("{err}"))?; } let mut blockstore = @@ -2345,6 +2344,11 @@ pub enum ValidatorError { #[error("{0}")] Other(String), + #[error( + "PoH hashes/second rate is slower than the cluster target: mine {mine}, cluster {target}" + )] + PohTooSlow { mine: u64, target: u64 }, + #[error(transparent)] TraceError(#[from] TraceError), diff --git a/entry/src/poh.rs b/entry/src/poh.rs index b54c8a745ae4fd..9bf6be66a0594f 100644 --- a/entry/src/poh.rs +++ b/entry/src/poh.rs @@ -109,18 +109,18 @@ impl Poh { } } -pub fn compute_hash_time_ns(hashes_sample_size: u64) -> u64 { +pub fn compute_hash_time(hashes_sample_size: u64) -> Duration { info!("Running {} hashes...", hashes_sample_size); let mut v = Hash::default(); let start = Instant::now(); for _ in 0..hashes_sample_size { v = hash(v.as_ref()); } - start.elapsed().as_nanos() as u64 + start.elapsed() } pub fn compute_hashes_per_tick(duration: Duration, hashes_sample_size: u64) -> u64 { - let elapsed_ms = compute_hash_time_ns(hashes_sample_size) / (1000 * 1000); + let elapsed_ms = compute_hash_time(hashes_sample_size).as_millis() as u64; duration.as_millis() as u64 * hashes_sample_size / elapsed_ms }