diff --git a/Cargo.lock b/Cargo.lock index bf10b9fe0e3..03a8fc735ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -779,6 +779,7 @@ version = "0.2.0" dependencies = [ "bitvec 1.0.1", "bls", + "criterion", "derivative", "eth1", "eth2", @@ -3852,6 +3853,7 @@ dependencies = [ "operation_pool", "parking_lot 0.12.3", "proto_array", + "rand", "safe_arith", "sensitive_url", "serde", diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 0deccfb622d..5ad56cba55c 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -5,6 +5,10 @@ authors = ["Paul Hauner ", "Age Manning ( + num_of_blobs: usize, + spec: &ChainSpec, +) -> (SignedBeaconBlock, BlobsList) { + let mut block = BeaconBlock::Deneb(BeaconBlockDeneb::empty(spec)); + let mut body = block.body_mut(); + let blob_kzg_commitments = body.blob_kzg_commitments_mut().unwrap(); + *blob_kzg_commitments = + KzgCommitments::::new(vec![KzgCommitment::empty_for_testing(); num_of_blobs]).unwrap(); + + let signed_block = SignedBeaconBlock::from_block(block, Signature::empty()); + + let blobs = (0..num_of_blobs) + .map(|_| Blob::::default()) + .collect::>() + .into(); + + (signed_block, blobs) +} + +fn all_benches(c: &mut Criterion) { + type E = MainnetEthSpec; + let spec = Arc::new(E::default_spec()); + + let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) + .map_err(|e| format!("Unable to read trusted setup file: {}", e)) + .expect("should have trusted setup"); + let kzg = Arc::new(Kzg::new_from_trusted_setup(trusted_setup).expect("should create kzg")); + + for blob_count in [1, 2, 3, 6] { + let kzg = kzg.clone(); + let (signed_block, blob_sidecars) = create_test_block_and_blobs::(blob_count, &spec); + + let column_sidecars = + blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, &kzg.clone(), &spec) + .unwrap(); + + let spec = spec.clone(); + + c.bench_function(&format!("reconstruct_{}", blob_count), |b| { + b.iter(|| { + black_box(reconstruct_data_columns( + &kzg, + &column_sidecars.iter().as_slice()[0..column_sidecars.len() / 2], + spec.as_ref(), + )) + }) + }); + } +} + +criterion_group!(benches, all_benches); +criterion_main!(benches); diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 66e7d06ad7c..74eaa2f50d9 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -22,6 +22,7 @@ pub use crate::canonical_head::CanonicalHead; use crate::chain_config::ChainConfig; use crate::data_availability_checker::{ Availability, AvailabilityCheckError, AvailableBlock, DataAvailabilityChecker, + DataColumnsToPublish, }; use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn}; use crate::early_attester_cache::EarlyAttesterCache; @@ -123,6 +124,7 @@ use task_executor::{ShutdownReason, TaskExecutor}; use tokio_stream::Stream; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; +use types::data_column_sidecar::{ColumnIndex, DataColumnIdentifier}; use types::payload::BlockProductionVersion; use types::*; @@ -206,11 +208,13 @@ impl TryInto for AvailabilityProcessingStatus { /// The result of a chain segment processing. pub enum ChainSegmentResult { /// Processing this chain segment finished successfully. - Successful { imported_blocks: usize }, + Successful { + imported_blocks: Vec<(Hash256, Slot)>, + }, /// There was an error processing this chain segment. Before the error, some blocks could /// have been imported. Failed { - imported_blocks: usize, + imported_blocks: Vec<(Hash256, Slot)>, error: BlockError, }, } @@ -2696,7 +2700,7 @@ impl BeaconChain { chain_segment: Vec>, ) -> Result>, ChainSegmentResult> { // This function will never import any blocks. - let imported_blocks = 0; + let imported_blocks = vec![]; let mut filtered_chain_segment = Vec::with_capacity(chain_segment.len()); // Produce a list of the parent root and slot of the child of each block. @@ -2802,7 +2806,7 @@ impl BeaconChain { chain_segment: Vec>, notify_execution_layer: NotifyExecutionLayer, ) -> ChainSegmentResult { - let mut imported_blocks = 0; + let mut imported_blocks = vec![]; // Filter uninteresting blocks from the chain segment in a blocking task. let chain = self.clone(); @@ -2862,6 +2866,7 @@ impl BeaconChain { // Import the blocks into the chain. for signature_verified_block in signature_verified_blocks { + let block_slot = signature_verified_block.slot(); match self .process_block( signature_verified_block.block_root(), @@ -2874,9 +2879,9 @@ impl BeaconChain { { Ok(status) => { match status { - AvailabilityProcessingStatus::Imported(_) => { + AvailabilityProcessingStatus::Imported(block_root) => { // The block was imported successfully. - imported_blocks += 1; + imported_blocks.push((block_root, block_slot)); } AvailabilityProcessingStatus::MissingComponents(slot, block_root) => { warn!(self.log, "Blobs missing in response to range request"; @@ -2909,6 +2914,17 @@ impl BeaconChain { ChainSegmentResult::Successful { imported_blocks } } + /// Updates fork-choice node into a permanent `available` state so it can become a viable head. + /// Only completed sampling results are received. Blocks are unavailable by default and should + /// be pruned on finalization, on a timeout or by a max count. + pub async fn process_sampling_completed(self: &Arc, block_root: Hash256) { + // TODO(das): update fork-choice + // NOTE: It is possible that sampling complets before block is imported into fork choice, + // in that case we may need to update availability cache. + // TODO(das): These log levels are too high, reduce once DAS matures + info!(self.log, "Sampling completed"; "block_root" => %block_root); + } + /// Returns `Ok(GossipVerifiedBlock)` if the supplied `block` should be forwarded onto the /// gossip network. The block is not imported into the chain, it is just partially verified. /// @@ -2983,6 +2999,11 @@ impl BeaconChain { return Err(BlockError::BlockIsAlreadyKnown(blob.block_root())); } + // No need to process and import blobs beyond the PeerDAS epoch. + if self.spec.is_peer_das_enabled_for_epoch(blob.epoch()) { + return Err(BlockError::BlobNotRequired(blob.slot())); + } + if let Some(event_handler) = self.event_handler.as_ref() { if event_handler.has_blob_sidecar_subscribers() { event_handler.register(EventKind::BlobSidecar(SseBlobSidecar::from_blob_sidecar( @@ -3000,7 +3021,13 @@ impl BeaconChain { pub async fn process_gossip_data_columns( self: &Arc, data_columns: Vec>, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { let Ok((slot, block_root)) = data_columns .iter() .map(|c| (c.slot(), c.block_root())) @@ -3067,7 +3094,13 @@ impl BeaconChain { pub async fn process_rpc_custody_columns( self: &Arc, custody_columns: DataColumnSidecarList, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { let Ok((slot, block_root)) = custody_columns .iter() .map(|c| (c.slot(), c.block_root())) @@ -3094,7 +3127,7 @@ impl BeaconChain { let r = self .check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) .await; - self.remove_notified(&block_root, r) + self.remove_notified_custody_columns(&block_root, r) } /// Remove any block components from the *processing cache* if we no longer require them. If the @@ -3114,13 +3147,15 @@ impl BeaconChain { /// Remove any block components from the *processing cache* if we no longer require them. If the /// block was imported full or erred, we no longer require them. - fn remove_notified_custody_columns( + fn remove_notified_custody_columns

( &self, block_root: &Hash256, - r: Result>, - ) -> Result> { - let has_missing_components = - matches!(r, Ok(AvailabilityProcessingStatus::MissingComponents(_, _))); + r: Result<(AvailabilityProcessingStatus, P), BlockError>, + ) -> Result<(AvailabilityProcessingStatus, P), BlockError> { + let has_missing_components = matches!( + r, + Ok((AvailabilityProcessingStatus::MissingComponents(_, _), _)) + ); if !has_missing_components { self.reqresp_pre_import_cache.write().remove(block_root); } @@ -3378,20 +3413,26 @@ impl BeaconChain { slot: Slot, block_root: Hash256, data_columns: Vec>, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { if let Some(slasher) = self.slasher.as_ref() { for data_colum in &data_columns { slasher.accept_block_header(data_colum.signed_block_header()); } } - let availability = self.data_availability_checker.put_gossip_data_columns( - slot, - block_root, - data_columns, - )?; + let (availability, data_columns_to_publish) = self + .data_availability_checker + .put_gossip_data_columns(slot, block_root, data_columns)?; - self.process_availability(slot, availability).await + self.process_availability(slot, availability) + .await + .map(|result| (result, data_columns_to_publish)) } /// Checks if the provided blobs can make any cached blocks available, and imports immediately @@ -3440,7 +3481,13 @@ impl BeaconChain { slot: Slot, block_root: Hash256, custody_columns: DataColumnSidecarList, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { // Need to scope this to ensure the lock is dropped before calling `process_availability` // Even an explicit drop is not enough to convince the borrow checker. { @@ -3465,13 +3512,16 @@ impl BeaconChain { // This slot value is purely informative for the consumers of // `AvailabilityProcessingStatus::MissingComponents` to log an error with a slot. - let availability = self.data_availability_checker.put_rpc_custody_columns( - block_root, - slot.epoch(T::EthSpec::slots_per_epoch()), - custody_columns, - )?; + let (availability, data_columns_to_publish) = + self.data_availability_checker.put_rpc_custody_columns( + block_root, + slot.epoch(T::EthSpec::slots_per_epoch()), + custody_columns, + )?; - self.process_availability(slot, availability).await + self.process_availability(slot, availability) + .await + .map(|result| (result, data_columns_to_publish)) } /// Imports a fully available block. Otherwise, returns `AvailabilityProcessingStatus::MissingComponents` @@ -3522,6 +3572,8 @@ impl BeaconChain { ); } + // TODO(das) record custody column available timestamp + // import let chain = self.clone(); let block_root = self @@ -6895,6 +6947,15 @@ impl BeaconChain { && self.spec.is_peer_das_enabled_for_epoch(block_epoch) } + /// Returns true if we should issue a sampling request for this block + /// TODO(das): check if the block is still within the da_window + pub fn should_sample_slot(&self, slot: Slot) -> bool { + self.config.enable_sampling + && self + .spec + .is_peer_das_enabled_for_epoch(slot.epoch(T::EthSpec::slots_per_epoch())) + } + pub fn logger(&self) -> &Logger { &self.log } diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 228b3f7092c..99fc5d9d0c0 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -409,8 +409,8 @@ pub fn validate_blob_sidecar_for_gossip( // Verify that the blob_sidecar was received on the correct subnet. if blob_index != subnet { return Err(GossipBlobError::InvalidSubnet { - expected: blob_index, - received: subnet, + expected: subnet, + received: blob_index, }); } diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 68fccee959b..d9662d59f9e 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -49,17 +49,20 @@ #![allow(clippy::result_large_err)] use crate::beacon_snapshot::PreProcessingSnapshot; -use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob}; +use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob, GossipVerifiedBlobList}; use crate::block_verification_types::{ AsBlock, BlockContentsError, BlockImportData, GossipVerifiedBlockContents, RpcBlock, }; use crate::data_availability_checker::{AvailabilityCheckError, MaybeAvailableBlock}; -use crate::data_column_verification::GossipDataColumnError; +use crate::data_column_verification::{ + GossipDataColumnError, GossipVerifiedDataColumn, GossipVerifiedDataColumnList, +}; use crate::eth1_finalization_cache::Eth1FinalizationData; use crate::execution_payload::{ is_optimistic_candidate_block, validate_execution_payload_for_gossip, validate_merge_block, AllowOptimisticImport, NotifyExecutionLayer, PayloadNotifier, }; +use crate::kzg_utils::blobs_to_data_column_sidecars; use crate::observed_block_producers::SeenBlock; use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS; use crate::validator_pubkey_cache::ValidatorPubkeyCache; @@ -94,10 +97,12 @@ use std::io::Write; use std::sync::Arc; use store::{Error as DBError, HotStateSummary, KeyValueStore, StoreOp}; use task_executor::JoinHandle; +use types::data_column_sidecar::DataColumnSidecarError; use types::{ - BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, Epoch, EthSpec, ExecutionBlockHash, - Hash256, InconsistentFork, PublicKey, PublicKeyBytes, RelativeEpoch, SignedBeaconBlock, - SignedBeaconBlockHeader, Slot, + BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSubnetId, Epoch, + EthSpec, ExecutionBlockHash, FullPayload, Hash256, InconsistentFork, KzgProofs, PublicKey, + PublicKeyBytes, RelativeEpoch, RuntimeVariableList, SignedBeaconBlock, SignedBeaconBlockHeader, + Slot, }; use types::{BlobSidecar, ExecPayload}; @@ -306,6 +311,14 @@ pub enum BlockError { /// TODO: We may need to penalize the peer that gave us a potentially invalid rpc blob. /// https://github.com/sigp/lighthouse/issues/4546 AvailabilityCheck(AvailabilityCheckError), + /// A Blob with a slot after PeerDAS is received and is not required to be imported. + /// This can happen because we stay subscribed to the blob subnet after 2 epochs, as we could + /// still receive valid blobs from a Deneb epoch after PeerDAS is activated. + /// + /// ## Peer scoring + /// + /// This indicates the peer is sending an unexpected gossip blob and should be penalised. + BlobNotRequired(Slot), /// An internal error has occurred when processing the block or sidecars. /// /// ## Peer scoring @@ -722,27 +735,24 @@ impl IntoGossipVerifiedBlockContents for PublishBlockReq chain: &BeaconChain, ) -> Result, BlockContentsError> { let (block, blobs) = self.deconstruct(); + let peer_das_enabled = chain.spec.is_peer_das_enabled_for_epoch(block.epoch()); + + let (gossip_verified_blobs, gossip_verified_data_columns) = if peer_das_enabled { + let gossip_verified_data_columns = + build_gossip_verified_data_columns(chain, &block, blobs.map(|(_, blobs)| blobs))?; + (None, gossip_verified_data_columns) + } else { + let gossip_verified_blobs = build_gossip_verified_blobs(chain, &block, blobs)?; + (gossip_verified_blobs, None) + }; - let gossip_verified_blobs = blobs - .map(|(kzg_proofs, blobs)| { - let mut gossip_verified_blobs = vec![]; - for (i, (kzg_proof, blob)) in kzg_proofs.iter().zip(blobs).enumerate() { - let _timer = - metrics::start_timer(&metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION); - let blob = BlobSidecar::new(i, blob, &block, *kzg_proof) - .map_err(BlockContentsError::SidecarError)?; - drop(_timer); - let gossip_verified_blob = - GossipVerifiedBlob::new(Arc::new(blob), i as u64, chain)?; - gossip_verified_blobs.push(gossip_verified_blob); - } - let gossip_verified_blobs = VariableList::from(gossip_verified_blobs); - Ok::<_, BlockContentsError>(gossip_verified_blobs) - }) - .transpose()?; let gossip_verified_block = GossipVerifiedBlock::new(block, chain)?; - Ok((gossip_verified_block, gossip_verified_blobs)) + Ok(( + gossip_verified_block, + gossip_verified_blobs, + gossip_verified_data_columns, + )) } fn inner_block(&self) -> &SignedBeaconBlock { @@ -750,6 +760,70 @@ impl IntoGossipVerifiedBlockContents for PublishBlockReq } } +#[allow(clippy::type_complexity)] +fn build_gossip_verified_blobs( + chain: &BeaconChain, + block: &Arc>>, + blobs: Option<(KzgProofs, BlobsList)>, +) -> Result>, BlockContentsError> { + blobs + .map(|(kzg_proofs, blobs)| { + let mut gossip_verified_blobs = vec![]; + for (i, (kzg_proof, blob)) in kzg_proofs.iter().zip(blobs).enumerate() { + let _timer = + metrics::start_timer(&metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION); + let blob = BlobSidecar::new(i, blob, block, *kzg_proof) + .map_err(BlockContentsError::BlobSidecarError)?; + drop(_timer); + let gossip_verified_blob = + GossipVerifiedBlob::new(Arc::new(blob), i as u64, chain)?; + gossip_verified_blobs.push(gossip_verified_blob); + } + let gossip_verified_blobs = VariableList::from(gossip_verified_blobs); + Ok::<_, BlockContentsError>(gossip_verified_blobs) + }) + .transpose() +} + +fn build_gossip_verified_data_columns( + chain: &BeaconChain, + block: &SignedBeaconBlock>, + blobs: Option>, +) -> Result>, BlockContentsError> { + blobs + // Only attempt to build data columns if blobs is non empty to avoid skewing the metrics. + .filter(|b| !b.is_empty()) + .map(|blobs| { + // NOTE: we expect KZG to be initialized if the blobs are present + let kzg = chain + .kzg + .as_ref() + .ok_or(BlockContentsError::DataColumnError( + GossipDataColumnError::KzgNotInitialized, + ))?; + + let timer = metrics::start_timer(&metrics::DATA_COLUMN_SIDECAR_COMPUTATION); + let sidecars = blobs_to_data_column_sidecars(&blobs, block, kzg, &chain.spec)?; + drop(timer); + let mut gossip_verified_data_columns = vec![]; + for sidecar in sidecars { + let subnet = DataColumnSubnetId::from_column_index::( + sidecar.index as usize, + &chain.spec, + ); + let column = GossipVerifiedDataColumn::new(sidecar, subnet.into(), chain)?; + gossip_verified_data_columns.push(column); + } + let gossip_verified_data_columns = RuntimeVariableList::new( + gossip_verified_data_columns, + chain.spec.number_of_columns, + ) + .map_err(DataColumnSidecarError::SszError)?; + Ok::<_, BlockContentsError>(gossip_verified_data_columns) + }) + .transpose() +} + /// Implemented on types that can be converted into a `ExecutionPendingBlock`. /// /// Used to allow functions to accept blocks at various stages of verification. @@ -1169,6 +1243,10 @@ impl SignatureVerifiedBlock { pub fn block_root(&self) -> Hash256 { self.block_root } + + pub fn slot(&self) -> Slot { + self.block.slot() + } } impl IntoExecutionPendingBlock for SignatureVerifiedBlock { diff --git a/beacon_node/beacon_chain/src/block_verification_types.rs b/beacon_node/beacon_chain/src/block_verification_types.rs index 426c41bfeab..b271f0a2f98 100644 --- a/beacon_node/beacon_chain/src/block_verification_types.rs +++ b/beacon_node/beacon_chain/src/block_verification_types.rs @@ -2,6 +2,9 @@ use crate::blob_verification::{GossipBlobError, GossipVerifiedBlobList}; use crate::block_verification::BlockError; use crate::data_availability_checker::AvailabilityCheckError; pub use crate::data_availability_checker::{AvailableBlock, MaybeAvailableBlock}; +use crate::data_column_verification::{ + CustodyDataColumn, CustodyDataColumnList, GossipDataColumnError, GossipVerifiedDataColumnList, +}; use crate::eth1_finalization_cache::Eth1FinalizationData; use crate::{get_block_root, GossipVerifiedBlock, PayloadVerificationOutcome}; use derivative::Derivative; @@ -9,10 +12,11 @@ use ssz_types::VariableList; use state_processing::ConsensusContext; use std::fmt::{Debug, Formatter}; use std::sync::Arc; -use types::blob_sidecar::{BlobIdentifier, BlobSidecarError, FixedBlobSidecarList}; +use types::blob_sidecar::{self, BlobIdentifier, FixedBlobSidecarList}; +use types::data_column_sidecar::{self}; use types::{ - BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, Epoch, EthSpec, Hash256, - SignedBeaconBlock, SignedBeaconBlockHeader, Slot, + BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, ChainSpec, Epoch, EthSpec, + Hash256, RuntimeVariableList, SignedBeaconBlock, SignedBeaconBlockHeader, Slot, }; /// A block that has been received over RPC. It has 2 internal variants: @@ -50,6 +54,7 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(block) => block, RpcBlockInner::BlockAndBlobs(block, _) => block, + RpcBlockInner::BlockAndCustodyColumns(block, _) => block, } } @@ -57,6 +62,7 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(block) => block.clone(), RpcBlockInner::BlockAndBlobs(block, _) => block.clone(), + RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(), } } @@ -64,6 +70,15 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(_) => None, RpcBlockInner::BlockAndBlobs(_, blobs) => Some(blobs), + RpcBlockInner::BlockAndCustodyColumns(_, _) => None, + } + } + + pub fn custody_columns(&self) -> Option<&CustodyDataColumnList> { + match &self.block { + RpcBlockInner::Block(_) => None, + RpcBlockInner::BlockAndBlobs(_, _) => None, + RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => Some(data_columns), } } } @@ -79,6 +94,9 @@ enum RpcBlockInner { /// This variant is used with parent lookups and by-range responses. It should have all blobs /// ordered, all block roots matching, and the correct number of blobs for this block. BlockAndBlobs(Arc>, BlobSidecarList), + /// This variant is used with parent lookups and by-range responses. It should have all + /// requested data columns, all block roots matching for this block. + BlockAndCustodyColumns(Arc>, CustodyDataColumnList), } impl RpcBlock { @@ -136,6 +154,33 @@ impl RpcBlock { }) } + pub fn new_with_custody_columns( + block_root: Option, + block: Arc>, + custody_columns: Vec>, + spec: &ChainSpec, + ) -> Result { + let block_root = block_root.unwrap_or_else(|| get_block_root(&block)); + + if block.num_expected_blobs() > 0 && custody_columns.is_empty() { + // The number of required custody columns is out of scope here. + return Err(AvailabilityCheckError::MissingCustodyColumns); + } + // Treat empty data column lists as if they are missing. + let inner = if !custody_columns.is_empty() { + RpcBlockInner::BlockAndCustodyColumns( + block, + RuntimeVariableList::new(custody_columns, spec.number_of_columns)?, + ) + } else { + RpcBlockInner::Block(block) + }; + Ok(Self { + block_root, + block: inner, + }) + } + pub fn new_from_fixed( block_root: Hash256, block: Arc>, @@ -153,25 +198,36 @@ impl RpcBlock { Self::new(Some(block_root), block, blobs) } + #[allow(clippy::type_complexity)] pub fn deconstruct( self, ) -> ( Hash256, Arc>, Option>, + Option>, ) { let block_root = self.block_root(); match self.block { - RpcBlockInner::Block(block) => (block_root, block, None), - RpcBlockInner::BlockAndBlobs(block, blobs) => (block_root, block, Some(blobs)), + RpcBlockInner::Block(block) => (block_root, block, None, None), + RpcBlockInner::BlockAndBlobs(block, blobs) => (block_root, block, Some(blobs), None), + RpcBlockInner::BlockAndCustodyColumns(block, data_columns) => { + (block_root, block, None, Some(data_columns)) + } } } pub fn n_blobs(&self) -> usize { match &self.block { - RpcBlockInner::Block(_) => 0, + RpcBlockInner::Block(_) | RpcBlockInner::BlockAndCustodyColumns(_, _) => 0, RpcBlockInner::BlockAndBlobs(_, blobs) => blobs.len(), } } + pub fn n_data_columns(&self) -> usize { + match &self.block { + RpcBlockInner::Block(_) | RpcBlockInner::BlockAndBlobs(_, _) => 0, + RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => data_columns.len(), + } + } } /// A block that has gone through all pre-deneb block processing checks including block processing @@ -334,14 +390,19 @@ impl BlockImportData { } } -pub type GossipVerifiedBlockContents = - (GossipVerifiedBlock, Option>); +pub type GossipVerifiedBlockContents = ( + GossipVerifiedBlock, + Option>, + Option>, +); #[derive(Debug)] pub enum BlockContentsError { BlockError(BlockError), BlobError(GossipBlobError), - SidecarError(BlobSidecarError), + BlobSidecarError(blob_sidecar::BlobSidecarError), + DataColumnError(GossipDataColumnError), + DataColumnSidecarError(data_column_sidecar::DataColumnSidecarError), } impl From> for BlockContentsError { @@ -356,6 +417,18 @@ impl From> for BlockContentsError { } } +impl From for BlockContentsError { + fn from(value: GossipDataColumnError) -> Self { + Self::DataColumnError(value) + } +} + +impl From for BlockContentsError { + fn from(value: data_column_sidecar::DataColumnSidecarError) -> Self { + Self::DataColumnSidecarError(value) + } +} + impl std::fmt::Display for BlockContentsError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -365,8 +438,14 @@ impl std::fmt::Display for BlockContentsError { BlockContentsError::BlobError(err) => { write!(f, "BlobError({})", err) } - BlockContentsError::SidecarError(err) => { - write!(f, "SidecarError({:?})", err) + BlockContentsError::BlobSidecarError(err) => { + write!(f, "BlobSidecarError({:?})", err) + } + BlockContentsError::DataColumnError(err) => { + write!(f, "DataColumnError({:?})", err) + } + BlockContentsError::DataColumnSidecarError(err) => { + write!(f, "DataColumnSidecarError({:?})", err) } } } @@ -517,13 +596,28 @@ impl AsBlock for AvailableBlock { } fn into_rpc_block(self) -> RpcBlock { - // TODO(das): rpc data columns to be merged from `das` branch - let (block_root, block, blobs_opt, _data_columns_opt) = self.deconstruct(); + let number_of_columns = self.spec.number_of_columns; + let (block_root, block, blobs_opt, data_columns_opt) = self.deconstruct(); // Circumvent the constructor here, because an Available block will have already had // consistency checks performed. - let inner = match blobs_opt { - None => RpcBlockInner::Block(block), - Some(blobs) => RpcBlockInner::BlockAndBlobs(block, blobs), + let inner = match (blobs_opt, data_columns_opt) { + (None, None) => RpcBlockInner::Block(block), + (Some(blobs), _) => RpcBlockInner::BlockAndBlobs(block, blobs), + (_, Some(data_columns)) => RpcBlockInner::BlockAndCustodyColumns( + block, + RuntimeVariableList::new( + data_columns + .into_iter() + // TODO(das): This is an ugly hack that should be removed. After updating + // store types to handle custody data columns this should not be required. + // It's okay-ish because available blocks must have all the required custody + // columns. + .map(|d| CustodyDataColumn::from_asserted_custody(d)) + .collect(), + number_of_columns, + ) + .expect("data column list is within bounds"), + ), }; RpcBlock { block_root, @@ -555,12 +649,14 @@ impl AsBlock for RpcBlock { match &self.block { RpcBlockInner::Block(block) => block, RpcBlockInner::BlockAndBlobs(block, _) => block, + RpcBlockInner::BlockAndCustodyColumns(block, _) => block, } } fn block_cloned(&self) -> Arc> { match &self.block { RpcBlockInner::Block(block) => block.clone(), RpcBlockInner::BlockAndBlobs(block, _) => block.clone(), + RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(), } } fn canonical_root(&self) -> Hash256 { diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 042d14a4fa4..84c6dea3680 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -409,6 +409,11 @@ where .init_blob_info(genesis.beacon_block.slot()) .map_err(|e| format!("Failed to initialize genesis blob info: {:?}", e))?, ); + self.pending_io_batch.push( + store + .init_data_column_info(genesis.beacon_block.slot()) + .map_err(|e| format!("Failed to initialize genesis data column info: {:?}", e))?, + ); let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &genesis) .map_err(|e| format!("Unable to initialize fork choice store: {e:?}"))?; @@ -573,6 +578,11 @@ where .init_blob_info(weak_subj_block.slot()) .map_err(|e| format!("Failed to initialize blob info: {:?}", e))?, ); + self.pending_io_batch.push( + store + .init_data_column_info(weak_subj_block.slot()) + .map_err(|e| format!("Failed to initialize data column info: {:?}", e))?, + ); // Store pruning checkpoint to prevent attempting to prune before the anchor state. self.pending_io_batch @@ -978,7 +988,6 @@ where self.kzg.clone(), store, self.import_all_data_columns, - &log, self.spec, ) .map_err(|e| format!("Error initializing DataAvailabilityChecker: {:?}", e))?, diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index c908efa07c3..20edfbf31a4 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -84,6 +84,10 @@ pub struct ChainConfig { pub epochs_per_migration: u64, /// When set to true Light client server computes and caches state proofs for serving updates pub enable_light_client_server: bool, + /// The number of data columns to withhold / exclude from publishing when proposing a block. + pub malicious_withhold_count: usize, + /// Enable peer sampling on blocks. + pub enable_sampling: bool, } impl Default for ChainConfig { @@ -115,6 +119,8 @@ impl Default for ChainConfig { always_prepare_payload: false, epochs_per_migration: crate::migrate::DEFAULT_EPOCHS_PER_MIGRATION, enable_light_client_server: false, + malicious_withhold_count: 0, + enable_sampling: false, } } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 1bfe377ad05..470cee713fa 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -5,7 +5,7 @@ use crate::block_verification_types::{ use crate::data_availability_checker::overflow_lru_cache::DataAvailabilityCheckerInner; use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use kzg::Kzg; -use slog::{debug, error, Logger}; +use slog::{debug, error}; use slot_clock::SlotClock; use std::fmt; use std::fmt::Debug; @@ -16,7 +16,7 @@ use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnIdentifier, DataColumnSidecar, DataColumnSidecarList, - Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot, + Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock, Slot, }; mod error; @@ -24,11 +24,14 @@ mod overflow_lru_cache; mod state_lru_cache; use crate::data_column_verification::{ - GossipVerifiedDataColumn, KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, + verify_kzg_for_data_column_list, CustodyDataColumn, GossipVerifiedDataColumn, + KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, }; pub use error::{Error as AvailabilityCheckError, ErrorCategory as AvailabilityCheckErrorCategory}; use types::non_zero_usize::new_non_zero_usize; +pub use self::overflow_lru_cache::DataColumnsToPublish; + /// The LRU Cache stores `PendingComponents` which can store up to /// `MAX_BLOBS_PER_BLOCK = 6` blobs each. A `BlobSidecar` is 0.131256 MB. So /// the maximum size of a `PendingComponents` is ~ 0.787536 MB. Setting this @@ -67,8 +70,7 @@ pub struct DataAvailabilityChecker { availability_cache: Arc>, slot_clock: T::SlotClock, kzg: Option>, - log: Logger, - spec: ChainSpec, + spec: Arc, } /// This type is returned after adding a block / blob to the `DataAvailabilityChecker`. @@ -98,9 +100,9 @@ impl DataAvailabilityChecker { kzg: Option>, store: BeaconStore, import_all_data_columns: bool, - log: &Logger, spec: ChainSpec, ) -> Result { + let spec = Arc::new(spec); let custody_subnet_count = if import_all_data_columns { spec.data_column_sidecar_subnet_count as usize } else { @@ -120,7 +122,6 @@ impl DataAvailabilityChecker { availability_cache: Arc::new(inner), slot_clock, kzg, - log: log.clone(), spec, }) } @@ -208,12 +209,14 @@ impl DataAvailabilityChecker { /// Put a list of custody columns received via RPC into the availability cache. This performs KZG /// verification on the blobs in the list. + #[allow(clippy::type_complexity)] pub fn put_rpc_custody_columns( &self, block_root: Hash256, epoch: Epoch, custody_columns: DataColumnSidecarList, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { let Some(kzg) = self.kzg.as_ref() else { return Err(AvailabilityCheckError::KzgNotInitialized); }; @@ -221,16 +224,16 @@ impl DataAvailabilityChecker { // TODO(das): report which column is invalid for proper peer scoring // TODO(das): batch KZG verification here let verified_custody_columns = custody_columns - .iter() + .into_iter() .map(|column| { Ok(KzgVerifiedCustodyDataColumn::from_asserted_custody( - KzgVerifiedDataColumn::new(column.clone(), kzg) - .map_err(AvailabilityCheckError::Kzg)?, + KzgVerifiedDataColumn::new(column, kzg).map_err(AvailabilityCheckError::Kzg)?, )) }) .collect::, AvailabilityCheckError>>()?; self.availability_cache.put_kzg_verified_data_columns( + kzg, block_root, epoch, verified_custody_columns, @@ -253,20 +256,35 @@ impl DataAvailabilityChecker { ) } + /// Check if we've cached other data columns for this block. If it satisfies the custody requirement and we also + /// have a block cached, return the `Availability` variant triggering block import. + /// Otherwise cache the data column sidecar. + /// + /// This should only accept gossip verified data columns, so we should not have to worry about dupes. + #[allow(clippy::type_complexity)] pub fn put_gossip_data_columns( &self, slot: Slot, block_root: Hash256, gossip_data_columns: Vec>, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { + let Some(kzg) = self.kzg.as_ref() else { + return Err(AvailabilityCheckError::KzgNotInitialized); + }; let epoch = slot.epoch(T::EthSpec::slots_per_epoch()); + let custody_columns = gossip_data_columns .into_iter() .map(|c| KzgVerifiedCustodyDataColumn::from_asserted_custody(c.into_inner())) .collect::>(); - self.availability_cache - .put_kzg_verified_data_columns(block_root, epoch, custody_columns) + self.availability_cache.put_kzg_verified_data_columns( + kzg, + block_root, + epoch, + custody_columns, + ) } /// Check if we have all the blobs for a block. Returns `Availability` which has information @@ -293,42 +311,66 @@ impl DataAvailabilityChecker { &self, block: RpcBlock, ) -> Result, AvailabilityCheckError> { - let (block_root, block, blobs) = block.deconstruct(); - match blobs { - None => { - if self.blobs_required_for_block(&block) { - Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) - } else { - Ok(MaybeAvailableBlock::Available(AvailableBlock { - block_root, - block, - blobs: None, - data_columns: None, - blobs_available_timestamp: None, - })) - } - } - Some(blob_list) => { - let verified_blobs = if self.blobs_required_for_block(&block) { - let kzg = self - .kzg - .as_ref() - .ok_or(AvailabilityCheckError::KzgNotInitialized)?; - verify_kzg_for_blob_list(blob_list.iter(), kzg) - .map_err(AvailabilityCheckError::Kzg)?; - Some(blob_list) - } else { - None - }; + let (block_root, block, blobs, data_columns) = block.deconstruct(); + if self.blobs_required_for_block(&block) { + return if let Some(blob_list) = blobs.as_ref() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_blob_list(blob_list.iter(), kzg) + .map_err(AvailabilityCheckError::Kzg)?; Ok(MaybeAvailableBlock::Available(AvailableBlock { block_root, block, - blobs: verified_blobs, + blobs, + blobs_available_timestamp: None, data_columns: None, + spec: self.spec.clone(), + })) + } else { + Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) + }; + } + if self.data_columns_required_for_block(&block) { + return if let Some(data_column_list) = data_columns.as_ref() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_data_column_list( + data_column_list + .iter() + .map(|custody_column| custody_column.as_data_column()), + kzg, + ) + .map_err(AvailabilityCheckError::Kzg)?; + Ok(MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, blobs_available_timestamp: None, + data_columns: Some( + data_column_list + .into_iter() + .map(|d| d.clone_arc()) + .collect(), + ), + spec: self.spec.clone(), })) - } + } else { + Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) + }; } + + Ok(MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + blobs_available_timestamp: None, + data_columns: None, + spec: self.spec.clone(), + })) } /// Checks if a vector of blocks are available. Returns a vector of `MaybeAvailableBlock` @@ -360,64 +402,108 @@ impl DataAvailabilityChecker { verify_kzg_for_blob_list(all_blobs.iter(), kzg)?; } + let all_data_columns = blocks + .iter() + .filter(|block| self.data_columns_required_for_block(block.as_block())) + // this clone is cheap as it's cloning an Arc + .filter_map(|block| block.custody_columns().cloned()) + .flatten() + .map(CustodyDataColumn::into_inner) + .collect::>(); + let all_data_columns = + RuntimeVariableList::from_vec(all_data_columns, self.spec.number_of_columns); + + // verify kzg for all data columns at once + if !all_data_columns.is_empty() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_data_column_list(all_data_columns.iter(), kzg)?; + } + for block in blocks { - let (block_root, block, blobs) = block.deconstruct(); - match blobs { - None => { - if self.blobs_required_for_block(&block) { - results.push(MaybeAvailableBlock::AvailabilityPending { block_root, block }) - } else { - results.push(MaybeAvailableBlock::Available(AvailableBlock { - block_root, - block, - blobs: None, - data_columns: None, - blobs_available_timestamp: None, - })) - } - } - Some(blob_list) => { - let verified_blobs = if self.blobs_required_for_block(&block) { - Some(blob_list) - } else { - None - }; - // already verified kzg for all blobs - results.push(MaybeAvailableBlock::Available(AvailableBlock { + let (block_root, block, blobs, data_columns) = block.deconstruct(); + + let maybe_available_block = if self.blobs_required_for_block(&block) { + if blobs.is_some() { + MaybeAvailableBlock::Available(AvailableBlock { block_root, block, - blobs: verified_blobs, + blobs, + blobs_available_timestamp: None, data_columns: None, + spec: self.spec.clone(), + }) + } else { + MaybeAvailableBlock::AvailabilityPending { block_root, block } + } + } else if self.data_columns_required_for_block(&block) { + if data_columns.is_some() { + MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + data_columns: data_columns.map(|data_columns| { + data_columns.into_iter().map(|d| d.into_inner()).collect() + }), blobs_available_timestamp: None, - })) + spec: self.spec.clone(), + }) + } else { + MaybeAvailableBlock::AvailabilityPending { block_root, block } } - } + } else { + MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + data_columns: None, + blobs_available_timestamp: None, + spec: self.spec.clone(), + }) + }; + + results.push(maybe_available_block); } Ok(results) } /// Determines the blob requirements for a block. If the block is pre-deneb, no blobs are required. - /// If the block's epoch is from prior to the data availability boundary, no blobs are required. + /// If the epoch is from prior to the data availability boundary, no blobs are required. + pub fn blobs_required_for_epoch(&self, epoch: Epoch) -> bool { + self.da_check_required_for_epoch(epoch) && !self.spec.is_peer_das_enabled_for_epoch(epoch) + } + + /// Determines the data column requirements for an epoch. + /// - If the epoch is pre-peerdas, no data columns are required. + /// - If the epoch is from prior to the data availability boundary, no data columns are required. + pub fn data_columns_required_for_epoch(&self, epoch: Epoch) -> bool { + self.da_check_required_for_epoch(epoch) && self.spec.is_peer_das_enabled_for_epoch(epoch) + } + + /// See `Self::blobs_required_for_epoch` fn blobs_required_for_block(&self, block: &SignedBeaconBlock) -> bool { - block.num_expected_blobs() > 0 && self.da_check_required_for_epoch(block.epoch()) + block.num_expected_blobs() > 0 && self.blobs_required_for_epoch(block.epoch()) + } + + /// See `Self::data_columns_required_for_epoch` + fn data_columns_required_for_block(&self, block: &SignedBeaconBlock) -> bool { + block.num_expected_blobs() > 0 && self.data_columns_required_for_epoch(block.epoch()) } /// The epoch at which we require a data availability check in block processing. /// `None` if the `Deneb` fork is disabled. pub fn data_availability_boundary(&self) -> Option { - self.spec.deneb_fork_epoch.and_then(|fork_epoch| { - self.slot_clock - .now() - .map(|slot| slot.epoch(T::EthSpec::slots_per_epoch())) - .map(|current_epoch| { - std::cmp::max( - fork_epoch, - current_epoch - .saturating_sub(self.spec.min_epochs_for_blob_sidecars_requests), - ) - }) - }) + let fork_epoch = self.spec.deneb_fork_epoch?; + let current_slot = self.slot_clock.now()?; + Some(std::cmp::max( + fork_epoch, + current_slot + .epoch(T::EthSpec::slots_per_epoch()) + .saturating_sub(self.spec.min_epochs_for_blob_sidecars_requests), + )) } /// Returns true if the given epoch lies within the da boundary and false otherwise. @@ -426,18 +512,6 @@ impl DataAvailabilityChecker { .map_or(false, |da_epoch| block_epoch >= da_epoch) } - pub fn da_check_required_for_current_epoch(&self) -> bool { - let Some(current_slot) = self.slot_clock.now_or_genesis() else { - error!( - self.log, - "Failed to read slot clock when checking for missing blob ids" - ); - return false; - }; - - self.da_check_required_for_epoch(current_slot.epoch(T::EthSpec::slots_per_epoch())) - } - /// Returns `true` if the current epoch is greater than or equal to the `Deneb` epoch. pub fn is_deneb(&self) -> bool { self.slot_clock.now().map_or(false, |slot| { @@ -556,6 +630,7 @@ pub struct AvailableBlock { data_columns: Option>, /// Timestamp at which this block first became available (UNIX timestamp, time since 1970). blobs_available_timestamp: Option, + pub spec: Arc, } impl AvailableBlock { @@ -564,6 +639,7 @@ impl AvailableBlock { block: Arc>, blobs: Option>, data_columns: Option>, + spec: Arc, ) -> Self { Self { block_root, @@ -571,6 +647,7 @@ impl AvailableBlock { blobs, data_columns, blobs_available_timestamp: None, + spec, } } @@ -589,6 +666,10 @@ impl AvailableBlock { self.blobs_available_timestamp } + pub fn data_columns(&self) -> Option<&DataColumnSidecarList> { + self.data_columns.as_ref() + } + #[allow(clippy::type_complexity)] pub fn deconstruct( self, @@ -604,6 +685,7 @@ impl AvailableBlock { blobs, data_columns, blobs_available_timestamp: _, + .. } = self; (block_root, block, blobs, data_columns) } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index bb92b0b6322..79793d6dc29 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -14,7 +14,9 @@ pub enum Error { Unexpected, SszTypes(ssz_types::Error), MissingBlobs, + MissingCustodyColumns, BlobIndexInvalid(u64), + DataColumnIndexInvalid(u64), StoreError(store::Error), DecodeError(ssz::DecodeError), ParentStateMissing(Hash256), @@ -37,6 +39,7 @@ impl Error { Error::KzgNotInitialized | Error::SszTypes(_) | Error::MissingBlobs + | Error::MissingCustodyColumns | Error::StoreError(_) | Error::DecodeError(_) | Error::Unexpected @@ -47,6 +50,7 @@ impl Error { | Error::SlotClockError => ErrorCategory::Internal, Error::Kzg(_) | Error::BlobIndexInvalid(_) + | Error::DataColumnIndexInvalid(_) | Error::KzgCommitmentMismatch { .. } | Error::KzgVerificationFailed => ErrorCategory::Malicious, } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 50fae091196..4863982b552 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -6,28 +6,36 @@ use crate::block_verification_types::{ }; use crate::data_availability_checker::{Availability, AvailabilityCheckError}; use crate::data_column_verification::KzgVerifiedCustodyDataColumn; +use crate::metrics; use crate::BeaconChainTypes; +use kzg::Kzg; use lru::LruCache; use parking_lot::RwLock; use ssz_types::{FixedVector, VariableList}; +use std::collections::HashSet; use std::num::NonZeroUsize; use std::sync::Arc; use types::blob_sidecar::BlobIdentifier; use types::{ - BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, Epoch, EthSpec, - Hash256, SignedBeaconBlock, + BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, + DataColumnSidecarList, Epoch, EthSpec, Hash256, SignedBeaconBlock, }; +pub type DataColumnsToPublish = Option>; + /// This represents the components of a partially available block /// /// The blobs are all gossip and kzg verified. /// The block has completed all verifications except the availability check. +/// TODO(das): this struct can potentially be reafactored as blobs and data columns are mutually +/// exclusive and this could simplify `is_importable`. #[derive(Clone)] pub struct PendingComponents { pub block_root: Hash256, pub verified_blobs: FixedVector>, E::MaxBlobsPerBlock>, pub verified_data_columns: Vec>, pub executed_block: Option>, + pub reconstruction_started: bool, } pub enum BlockImportRequirement { @@ -52,10 +60,11 @@ impl PendingComponents { pub fn get_cached_data_column( &self, data_column_index: u64, - ) -> Option<&KzgVerifiedCustodyDataColumn> { + ) -> Option>> { self.verified_data_columns .iter() .find(|d| d.index() == data_column_index) + .map(|d| d.clone_arc()) } /// Returns a mutable reference to the cached block. @@ -170,12 +179,14 @@ impl PendingComponents { fn merge_data_columns>>( &mut self, kzg_verified_data_columns: I, - ) { + ) -> Result<(), AvailabilityCheckError> { for data_column in kzg_verified_data_columns { + // TODO(das): Add equivalent checks for data columns if necessary if !self.data_column_exists(data_column.index()) { self.verified_data_columns.push(data_column); } } + Ok(()) } /// Inserts a new block and revalidates the existing blobs against it. @@ -218,6 +229,7 @@ impl PendingComponents { verified_blobs: FixedVector::default(), verified_data_columns: vec![], executed_block: None, + reconstruction_started: false, } } @@ -230,6 +242,7 @@ impl PendingComponents { pub fn make_available( self, block_import_requirement: BlockImportRequirement, + spec: &Arc, recover: R, ) -> Result, AvailabilityCheckError> where @@ -242,6 +255,7 @@ impl PendingComponents { verified_blobs, verified_data_columns, executed_block, + .. } = self; let blobs_available_timestamp = verified_blobs @@ -291,12 +305,17 @@ impl PendingComponents { blobs, data_columns, blobs_available_timestamp, + spec: spec.clone(), }; Ok(Availability::Available(Box::new( AvailableExecutedBlock::new(available_block, import_data, payload_verification_outcome), ))) } + pub fn reconstruction_started(&mut self) { + self.reconstruction_started = true; + } + /// Returns the epoch of the block if it is cached, otherwise returns the epoch of the first blob. pub fn epoch(&self) -> Option { self.executed_block @@ -337,7 +356,7 @@ pub struct DataAvailabilityCheckerInner { state_cache: StateLRUCache, /// The number of data columns the node is custodying. custody_column_count: usize, - spec: ChainSpec, + spec: Arc, } impl DataAvailabilityCheckerInner { @@ -345,7 +364,7 @@ impl DataAvailabilityCheckerInner { capacity: NonZeroUsize, beacon_store: BeaconStore, custody_column_count: usize, - spec: ChainSpec, + spec: Arc, ) -> Result { Ok(Self { critical: RwLock::new(LruCache::new(capacity)), @@ -430,6 +449,28 @@ impl DataAvailabilityCheckerInner { } } + /// Potentially trigger reconstruction if: + /// - Our custody requirement is all columns + /// - We >= 50% of columns, but not all columns + fn should_reconstruct( + &self, + block_import_requirement: &BlockImportRequirement, + pending_components: &PendingComponents, + ) -> bool { + let BlockImportRequirement::CustodyColumns(num_expected_columns) = block_import_requirement + else { + return false; + }; + + let num_of_columns = self.spec.number_of_columns; + let has_missing_columns = pending_components.verified_data_columns.len() < num_of_columns; + + has_missing_columns + && !pending_components.reconstruction_started + && *num_expected_columns == num_of_columns + && pending_components.verified_data_columns.len() >= num_of_columns / 2 + } + pub fn put_kzg_verified_blobs>>( &self, block_root: Hash256, @@ -460,7 +501,7 @@ impl DataAvailabilityCheckerInner { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { + pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -469,14 +510,17 @@ impl DataAvailabilityCheckerInner { } } + #[allow(clippy::type_complexity)] pub fn put_kzg_verified_data_columns< I: IntoIterator>, >( &self, + kzg: &Kzg, block_root: Hash256, epoch: Epoch, kzg_verified_data_columns: I, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { let mut write_lock = self.critical.write(); // Grab existing entry or create a new entry. @@ -486,19 +530,68 @@ impl DataAvailabilityCheckerInner { .unwrap_or_else(|| PendingComponents::empty(block_root)); // Merge in the data columns. - pending_components.merge_data_columns(kzg_verified_data_columns); + pending_components.merge_data_columns(kzg_verified_data_columns)?; let block_import_requirement = self.block_import_requirement(epoch)?; + + // Potentially trigger reconstruction if: + // - Our custody requirement is all columns + // - We >= 50% of columns + let data_columns_to_publish = + if self.should_reconstruct(&block_import_requirement, &pending_components) { + pending_components.reconstruction_started(); + + let timer = metrics::start_timer(&metrics::DATA_AVAILABILITY_RECONSTRUCTION_TIME); + + let existing_column_indices = pending_components + .verified_data_columns + .iter() + .map(|d| d.index()) + .collect::>(); + + // Will only return an error if: + // - < 50% of columns + // - There are duplicates + let all_data_columns = KzgVerifiedCustodyDataColumn::reconstruct_columns( + kzg, + pending_components.verified_data_columns.as_slice(), + &self.spec, + )?; + + let data_columns_to_publish = all_data_columns + .iter() + .filter(|d| !existing_column_indices.contains(&d.index())) + .map(|d| d.clone_arc()) + .collect::>(); + + pending_components.verified_data_columns = all_data_columns; + + metrics::stop_timer(timer); + metrics::inc_counter_by( + &metrics::DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS, + data_columns_to_publish.len() as u64, + ); + + Some(data_columns_to_publish) + } else { + None + }; + if pending_components.is_available(&block_import_requirement) { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { - self.state_cache.recover_pending_executed_block(diet_block) - }) + pending_components + .make_available(block_import_requirement, &self.spec, |diet_block| { + self.state_cache.recover_pending_executed_block(diet_block) + }) + .map(|availability| (availability, data_columns_to_publish)) } else { write_lock.put(block_root, pending_components); - Ok(Availability::MissingComponents(block_root)) + Ok(( + Availability::MissingComponents(block_root), + data_columns_to_publish, + )) } } @@ -532,7 +625,7 @@ impl DataAvailabilityCheckerInner { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { + pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -791,7 +884,7 @@ mod test { let log = test_logger(); let chain_db_path = tempdir().expect("should get temp dir"); let harness = get_deneb_chain(log.clone(), &chain_db_path).await; - let spec = harness.spec.clone(); + let spec = Arc::new(harness.spec.clone()); let test_store = harness.chain.store.clone(); let capacity_non_zero = new_non_zero_usize(capacity); let cache = Arc::new( diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index cf6eb669d5e..03e3289118d 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -70,11 +70,11 @@ impl DietAvailabilityPendingExecutedBlock { pub struct StateLRUCache { states: RwLock>>, store: BeaconStore, - spec: ChainSpec, + spec: Arc, } impl StateLRUCache { - pub fn new(store: BeaconStore, spec: ChainSpec) -> Self { + pub fn new(store: BeaconStore, spec: Arc) -> Self { Self { states: RwLock::new(LruCache::new(STATE_LRU_CAPACITY_NON_ZERO)), store, diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 279af20909b..f4a5feaee2a 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -2,7 +2,7 @@ use crate::block_verification::{ cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, process_block_slash_info, BlockSlashInfo, }; -use crate::kzg_utils::validate_data_columns; +use crate::kzg_utils::{reconstruct_data_columns, validate_data_columns}; use crate::{metrics, BeaconChain, BeaconChainError, BeaconChainTypes}; use derivative::Derivative; use fork_choice::ProtoBlock; @@ -181,6 +181,15 @@ impl GossipVerifiedDataColumn { } } + pub fn as_data_column(&self) -> &DataColumnSidecar { + self.data_column.as_data_column() + } + + /// This is cheap as we're calling clone on an Arc + pub fn clone_data_column(&self) -> Arc> { + self.data_column.clone_data_column() + } + pub fn block_root(&self) -> Hash256 { self.block_root } @@ -189,6 +198,10 @@ impl GossipVerifiedDataColumn { self.data_column.data.slot() } + pub fn index(&self) -> ColumnIndex { + self.data_column.data.index + } + pub fn signed_block_header(&self) -> SignedBeaconBlockHeader { self.data_column.data.signed_block_header.clone() } @@ -226,6 +239,38 @@ impl KzgVerifiedDataColumn { } } +pub type CustodyDataColumnList = RuntimeVariableList>; + +/// Data column that we must custody +#[derive(Debug, Derivative, Clone, Encode, Decode)] +#[derivative(PartialEq, Eq, Hash(bound = "E: EthSpec"))] +#[ssz(struct_behaviour = "transparent")] +pub struct CustodyDataColumn { + data: Arc>, +} + +impl CustodyDataColumn { + /// Mark a column as custody column. Caller must ensure that our current custody requirements + /// include this column + pub fn from_asserted_custody(data: Arc>) -> Self { + Self { data } + } + + pub fn into_inner(self) -> Arc> { + self.data + } + pub fn as_data_column(&self) -> &Arc> { + &self.data + } + /// This is cheap as we're calling clone on an Arc + pub fn clone_arc(&self) -> Arc> { + self.data.clone() + } + pub fn index(&self) -> u64 { + self.data.index + } +} + /// Data column that we must custody and has completed kzg verification #[derive(Debug, Derivative, Clone, Encode, Decode)] #[derivative(PartialEq, Eq)] @@ -243,8 +288,39 @@ impl KzgVerifiedCustodyDataColumn { } } - pub fn index(&self) -> ColumnIndex { - self.data.index + /// Verify a column already marked as custody column + pub fn new(data_column: CustodyDataColumn, kzg: &Kzg) -> Result { + verify_kzg_for_data_column(data_column.clone_arc(), kzg)?; + Ok(Self { + data: data_column.data, + }) + } + + pub fn reconstruct_columns( + kzg: &Kzg, + partial_set_of_columns: &[Self], + spec: &ChainSpec, + ) -> Result, KzgError> { + // Will only return an error if: + // - < 50% of columns + // - There are duplicates + let all_data_columns = reconstruct_data_columns( + kzg, + &partial_set_of_columns + .iter() + .map(|d| d.clone_arc()) + .collect::>(), + spec, + )?; + + Ok(all_data_columns + .into_iter() + .map(|d| { + KzgVerifiedCustodyDataColumn::from_asserted_custody(KzgVerifiedDataColumn { + data: d, + }) + }) + .collect::>()) } pub fn into_inner(self) -> Arc> { @@ -257,6 +333,9 @@ impl KzgVerifiedCustodyDataColumn { pub fn clone_arc(&self) -> Arc> { self.data.clone() } + pub fn index(&self) -> ColumnIndex { + self.data.index + } } /// Complete kzg verification for a `DataColumnSidecar`. @@ -303,6 +382,7 @@ pub fn validate_data_column_sidecar_for_gossip( let parent_block = verify_parent_block_and_finalized_descendant(data_column.clone(), chain)?; verify_slot_higher_than_parent(&parent_block, column_slot)?; verify_proposer_and_signature(&data_column, &parent_block, chain)?; + let kzg = chain .kzg .clone() @@ -350,9 +430,11 @@ fn verify_is_first_sidecar( fn verify_column_inclusion_proof( data_column: &DataColumnSidecar, ) -> Result<(), GossipDataColumnError> { + let _timer = metrics::start_timer(&metrics::DATA_COLUMN_SIDECAR_INCLUSION_PROOF_VERIFICATION); if !data_column.verify_inclusion_proof() { return Err(GossipDataColumnError::InvalidInclusionProof); } + Ok(()) } diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index 1e3d67f9d7a..4db3f0ebb41 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -77,8 +77,6 @@ pub enum BeaconChainError { AttesterSlashingValidationError(AttesterSlashingValidationError), BlsExecutionChangeValidationError(BlsExecutionChangeValidationError), MissingFinalizedStateRoot(Slot), - /// Returned when an internal check fails, indicating corrupt data. - InvariantViolated(String), SszTypesError(SszTypesError), NoProposerForSlot(Slot), CanonicalHeadLockTimeout, @@ -216,10 +214,12 @@ pub enum BeaconChainError { InconsistentFork(InconsistentFork), ProposerHeadForkChoiceError(fork_choice::Error), UnableToPublish, + UnableToBuildColumnSidecar(String), AvailabilityCheckError(AvailabilityCheckError), LightClientError(LightClientError), UnsupportedFork, MilhouseError(MilhouseError), + EmptyRpcCustodyColumns, AttestationError(AttestationError), AttestationCommitteeIndexNotSet, } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 82c98a2083b..3394946255f 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1653,6 +1653,13 @@ pub static DATA_COLUMN_SIDECAR_COMPUTATION: LazyLock> = LazyLo Ok(vec![0.04, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 1.0]), ) }); +pub static DATA_COLUMN_SIDECAR_INCLUSION_PROOF_VERIFICATION: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "data_column_sidecar_inclusion_proof_verification_seconds", + "Time taken to verify data_column sidecar inclusion proof", + ) + }); pub static DATA_COLUMN_SIDECAR_PROCESSING_REQUESTS: LazyLock> = LazyLock::new(|| { try_create_int_counter( @@ -1674,6 +1681,13 @@ pub static DATA_COLUMN_SIDECAR_GOSSIP_VERIFICATION_TIMES: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_column_sidecar_processing_successes_total", + "Number of data column sidecars verified for gossip", + ) + }); /* * Light server message verification @@ -1856,6 +1870,20 @@ pub static DATA_AVAILABILITY_OVERFLOW_STORE_CACHE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "data_availability_reconstruction_time_seconds", + "Time taken to reconstruct columns", + ) + }); +pub static DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "data_availability_reconstructed_columns_total", + "Total count of reconstructed columns", + ) + }); /* * light_client server metrics diff --git a/beacon_node/beacon_chain/src/test_utils.rs b/beacon_node/beacon_chain/src/test_utils.rs index 87a3eeb359e..b28d221da7e 100644 --- a/beacon_node/beacon_chain/src/test_utils.rs +++ b/beacon_node/beacon_chain/src/test_utils.rs @@ -1,4 +1,5 @@ use crate::block_verification_types::{AsBlock, RpcBlock}; +use crate::kzg_utils::blobs_to_data_column_sidecars; use crate::observed_operations::ObservationOutcome; pub use crate::persisted_beacon_chain::PersistedBeaconChain; use crate::BeaconBlockResponseWrapper; @@ -82,6 +83,14 @@ pub static KZG: LazyLock> = LazyLock::new(|| { Arc::new(kzg) }); +pub static KZG_PEERDAS: LazyLock> = LazyLock::new(|| { + let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) + .map_err(|e| format!("Unable to read trusted setup file: {}", e)) + .expect("should have trusted setup"); + let kzg = Kzg::new_from_trusted_setup_das_enabled(trusted_setup).expect("should create kzg"); + Arc::new(kzg) +}); + pub type BaseHarnessType = Witness, E, THotStore, TColdStore>; @@ -2690,3 +2699,20 @@ pub fn generate_rand_block_and_blobs( } (block, blob_sidecars) } + +#[allow(clippy::type_complexity)] +pub fn generate_rand_block_and_data_columns( + fork_name: ForkName, + num_blobs: NumBlobs, + rng: &mut impl Rng, + spec: &ChainSpec, +) -> ( + SignedBeaconBlock>, + Vec>>, +) { + let (block, blobs) = generate_rand_block_and_blobs(fork_name, num_blobs, rng); + let blob: BlobsList = blobs.into_iter().map(|b| b.blob).collect::>().into(); + let data_columns = blobs_to_data_column_sidecars(&blob, &block, &KZG_PEERDAS, spec).unwrap(); + + (block, data_columns) +} diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index 046a3468afc..1c494d99bf5 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -1472,7 +1472,7 @@ async fn add_base_block_to_altair_chain() { ) .await, ChainSegmentResult::Failed { - imported_blocks: 0, + imported_blocks: _, error: BlockError::InconsistentFork(InconsistentFork { fork_at_slot: ForkName::Altair, object_fork: ForkName::Base, @@ -1608,7 +1608,7 @@ async fn add_altair_block_to_base_chain() { ) .await, ChainSegmentResult::Failed { - imported_blocks: 0, + imported_blocks: _, error: BlockError::InconsistentFork(InconsistentFork { fork_at_slot: ForkName::Base, object_fork: ForkName::Altair, diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index c1071d55cf6..740aada413d 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -2792,7 +2792,13 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { let (block_root, block, blobs, data_columns) = available_blocks[0].clone().deconstruct(); let mut corrupt_block = (*block).clone(); *corrupt_block.signature_mut() = Signature::empty(); - AvailableBlock::__new_for_testing(block_root, Arc::new(corrupt_block), blobs, data_columns) + AvailableBlock::__new_for_testing( + block_root, + Arc::new(corrupt_block), + blobs, + data_columns, + Arc::new(spec), + ) }; // Importing the invalid batch should error. diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index 6ce3b64acfe..f506f0bb94d 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -64,11 +64,11 @@ use types::{ Attestation, BeaconState, ChainSpec, Hash256, RelativeEpoch, SignedAggregateAndProof, SubnetId, }; use types::{EthSpec, Slot}; -use work_reprocessing_queue::IgnoredRpcBlock; use work_reprocessing_queue::{ spawn_reprocess_scheduler, QueuedAggregate, QueuedLightClientUpdate, QueuedRpcBlock, QueuedUnaggregate, ReadyWork, }; +use work_reprocessing_queue::{IgnoredRpcBlock, QueuedSamplingRequest}; mod metrics; pub mod work_reprocessing_queue; @@ -106,9 +106,12 @@ pub struct BeaconProcessorQueueLengths { finality_update_queue: usize, optimistic_update_queue: usize, unknown_light_client_update_queue: usize, + unknown_block_sampling_request_queue: usize, rpc_block_queue: usize, rpc_blob_queue: usize, rpc_custody_column_queue: usize, + rpc_verify_data_column_queue: usize, + sampling_result_queue: usize, chain_segment_queue: usize, backfill_chain_segment: usize, gossip_block_queue: usize, @@ -161,10 +164,14 @@ impl BeaconProcessorQueueLengths { gossip_attester_slashing_queue: 4096, finality_update_queue: 1024, optimistic_update_queue: 1024, + unknown_block_sampling_request_queue: 16384, unknown_light_client_update_queue: 128, rpc_block_queue: 1024, rpc_blob_queue: 1024, - rpc_custody_column_queue: 1024, + // TODO(das): Placeholder values + rpc_custody_column_queue: 1000, + rpc_verify_data_column_queue: 1000, + sampling_result_queue: 1000, chain_segment_queue: 64, backfill_chain_segment: 64, gossip_block_queue: 1024, @@ -231,6 +238,8 @@ pub const RPC_BLOCK: &str = "rpc_block"; pub const IGNORED_RPC_BLOCK: &str = "ignored_rpc_block"; pub const RPC_BLOBS: &str = "rpc_blob"; pub const RPC_CUSTODY_COLUMN: &str = "rpc_custody_column"; +pub const RPC_VERIFY_DATA_COLUMNS: &str = "rpc_verify_data_columns"; +pub const SAMPLING_RESULT: &str = "sampling_result"; pub const CHAIN_SEGMENT: &str = "chain_segment"; pub const CHAIN_SEGMENT_BACKFILL: &str = "chain_segment_backfill"; pub const STATUS_PROCESSING: &str = "status_processing"; @@ -246,6 +255,7 @@ pub const LIGHT_CLIENT_OPTIMISTIC_UPDATE_REQUEST: &str = "light_client_optimisti pub const UNKNOWN_BLOCK_ATTESTATION: &str = "unknown_block_attestation"; pub const UNKNOWN_BLOCK_AGGREGATE: &str = "unknown_block_aggregate"; pub const UNKNOWN_LIGHT_CLIENT_UPDATE: &str = "unknown_light_client_update"; +pub const UNKNOWN_BLOCK_SAMPLING_REQUEST: &str = "unknown_block_sampling_request"; pub const GOSSIP_BLS_TO_EXECUTION_CHANGE: &str = "gossip_bls_to_execution_change"; pub const API_REQUEST_P0: &str = "api_request_p0"; pub const API_REQUEST_P1: &str = "api_request_p1"; @@ -501,6 +511,10 @@ impl From for WorkEvent { process_fn, }, }, + ReadyWork::SamplingRequest(QueuedSamplingRequest { process_fn, .. }) => Self { + drop_during_sync: true, + work: Work::UnknownBlockSamplingRequest { process_fn }, + }, ReadyWork::BackfillSync(QueuedBackfillBatch(process_fn)) => Self { drop_during_sync: false, work: Work::ChainSegmentBackfill(process_fn), @@ -584,6 +598,9 @@ pub enum Work { parent_root: Hash256, process_fn: BlockingFn, }, + UnknownBlockSamplingRequest { + process_fn: BlockingFn, + }, GossipAggregateBatch { aggregates: Vec>, process_batch: Box>) + Send + Sync>, @@ -610,6 +627,8 @@ pub enum Work { process_fn: AsyncFn, }, RpcCustodyColumn(AsyncFn), + RpcVerifyDataColumn(AsyncFn), + SamplingResult(AsyncFn), IgnoredRpcBlock { process_fn: BlockingFn, }, @@ -658,6 +677,8 @@ impl Work { Work::RpcBlock { .. } => RPC_BLOCK, Work::RpcBlobs { .. } => RPC_BLOBS, Work::RpcCustodyColumn { .. } => RPC_CUSTODY_COLUMN, + Work::RpcVerifyDataColumn(_) => RPC_VERIFY_DATA_COLUMNS, + Work::SamplingResult(_) => SAMPLING_RESULT, Work::IgnoredRpcBlock { .. } => IGNORED_RPC_BLOCK, Work::ChainSegment { .. } => CHAIN_SEGMENT, Work::ChainSegmentBackfill(_) => CHAIN_SEGMENT_BACKFILL, @@ -673,8 +694,9 @@ impl Work { Work::LightClientFinalityUpdateRequest(_) => LIGHT_CLIENT_FINALITY_UPDATE_REQUEST, Work::UnknownBlockAttestation { .. } => UNKNOWN_BLOCK_ATTESTATION, Work::UnknownBlockAggregate { .. } => UNKNOWN_BLOCK_AGGREGATE, - Work::GossipBlsToExecutionChange(_) => GOSSIP_BLS_TO_EXECUTION_CHANGE, Work::UnknownLightClientOptimisticUpdate { .. } => UNKNOWN_LIGHT_CLIENT_UPDATE, + Work::UnknownBlockSamplingRequest { .. } => UNKNOWN_BLOCK_SAMPLING_REQUEST, + Work::GossipBlsToExecutionChange(_) => GOSSIP_BLS_TO_EXECUTION_CHANGE, Work::ApiRequestP0 { .. } => API_REQUEST_P0, Work::ApiRequestP1 { .. } => API_REQUEST_P1, } @@ -816,11 +838,16 @@ impl BeaconProcessor { let mut optimistic_update_queue = FifoQueue::new(queue_lengths.optimistic_update_queue); let mut unknown_light_client_update_queue = FifoQueue::new(queue_lengths.unknown_light_client_update_queue); + let mut unknown_block_sampling_request_queue = + FifoQueue::new(queue_lengths.unknown_block_sampling_request_queue); // Using a FIFO queue since blocks need to be imported sequentially. let mut rpc_block_queue = FifoQueue::new(queue_lengths.rpc_block_queue); let mut rpc_blob_queue = FifoQueue::new(queue_lengths.rpc_blob_queue); let mut rpc_custody_column_queue = FifoQueue::new(queue_lengths.rpc_custody_column_queue); + let mut rpc_verify_data_column_queue = + FifoQueue::new(queue_lengths.rpc_verify_data_column_queue); + let mut sampling_result_queue = FifoQueue::new(queue_lengths.sampling_result_queue); let mut chain_segment_queue = FifoQueue::new(queue_lengths.chain_segment_queue); let mut backfill_chain_segment = FifoQueue::new(queue_lengths.backfill_chain_segment); let mut gossip_block_queue = FifoQueue::new(queue_lengths.gossip_block_queue); @@ -978,6 +1005,13 @@ impl BeaconProcessor { self.spawn_worker(item, idle_tx); } else if let Some(item) = rpc_custody_column_queue.pop() { self.spawn_worker(item, idle_tx); + // TODO(das): decide proper prioritization for sampling columns + } else if let Some(item) = rpc_custody_column_queue.pop() { + self.spawn_worker(item, idle_tx); + } else if let Some(item) = rpc_verify_data_column_queue.pop() { + self.spawn_worker(item, idle_tx); + } else if let Some(item) = sampling_result_queue.pop() { + self.spawn_worker(item, idle_tx); // Check delayed blocks before gossip blocks, the gossip blocks might rely // on the delayed ones. } else if let Some(item) = delayed_block_queue.pop() { @@ -1143,6 +1177,9 @@ impl BeaconProcessor { self.spawn_worker(item, idle_tx); } else if let Some(item) = dcbrange_queue.pop() { self.spawn_worker(item, idle_tx); + // Prioritize sampling requests after block syncing requests + } else if let Some(item) = unknown_block_sampling_request_queue.pop() { + self.spawn_worker(item, idle_tx); // Check slashings after all other consensus messages so we prioritize // following head. // @@ -1273,6 +1310,12 @@ impl BeaconProcessor { Work::RpcCustodyColumn { .. } => { rpc_custody_column_queue.push(work, work_id, &self.log) } + Work::RpcVerifyDataColumn(_) => { + rpc_verify_data_column_queue.push(work, work_id, &self.log) + } + Work::SamplingResult(_) => { + sampling_result_queue.push(work, work_id, &self.log) + } Work::ChainSegment { .. } => { chain_segment_queue.push(work, work_id, &self.log) } @@ -1319,6 +1362,9 @@ impl BeaconProcessor { Work::UnknownLightClientOptimisticUpdate { .. } => { unknown_light_client_update_queue.push(work, work_id, &self.log) } + Work::UnknownBlockSamplingRequest { .. } => { + unknown_block_sampling_request_queue.push(work, work_id, &self.log) + } Work::ApiRequestP0 { .. } => { api_request_p0_queue.push(work, work_id, &self.log) } @@ -1369,6 +1415,18 @@ impl BeaconProcessor { &metrics::BEACON_PROCESSOR_RPC_BLOB_QUEUE_TOTAL, rpc_blob_queue.len() as i64, ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_RPC_CUSTODY_COLUMN_QUEUE_TOTAL, + rpc_custody_column_queue.len() as i64, + ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_RPC_VERIFY_DATA_COLUMN_QUEUE_TOTAL, + rpc_verify_data_column_queue.len() as i64, + ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_SAMPLING_RESULT_QUEUE_TOTAL, + sampling_result_queue.len() as i64, + ); metrics::set_gauge( &metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_QUEUE_TOTAL, chain_segment_queue.len() as i64, @@ -1497,12 +1555,12 @@ impl BeaconProcessor { Work::ChainSegment(process_fn) => task_spawner.spawn_async(async move { process_fn.await; }), - Work::UnknownBlockAttestation { process_fn } => task_spawner.spawn_blocking(process_fn), - Work::UnknownBlockAggregate { process_fn } => task_spawner.spawn_blocking(process_fn), - Work::UnknownLightClientOptimisticUpdate { - parent_root: _, - process_fn, - } => task_spawner.spawn_blocking(process_fn), + Work::UnknownBlockAttestation { process_fn } + | Work::UnknownBlockAggregate { process_fn } + | Work::UnknownLightClientOptimisticUpdate { process_fn, .. } + | Work::UnknownBlockSamplingRequest { process_fn } => { + task_spawner.spawn_blocking(process_fn) + } Work::DelayedImportBlock { beacon_block_slot: _, beacon_block_root: _, @@ -1510,7 +1568,9 @@ impl BeaconProcessor { } => task_spawner.spawn_async(process_fn), Work::RpcBlock { process_fn } | Work::RpcBlobs { process_fn } - | Work::RpcCustodyColumn(process_fn) => task_spawner.spawn_async(process_fn), + | Work::RpcCustodyColumn(process_fn) + | Work::RpcVerifyDataColumn(process_fn) + | Work::SamplingResult(process_fn) => task_spawner.spawn_async(process_fn), Work::IgnoredRpcBlock { process_fn } => task_spawner.spawn_blocking(process_fn), Work::GossipBlock(work) | Work::GossipBlobSidecar(work) diff --git a/beacon_node/beacon_processor/src/metrics.rs b/beacon_node/beacon_processor/src/metrics.rs index 56105f1e101..8bc03cee6c7 100644 --- a/beacon_node/beacon_processor/src/metrics.rs +++ b/beacon_node/beacon_processor/src/metrics.rs @@ -133,6 +133,30 @@ pub static BEACON_PROCESSOR_RPC_BLOB_QUEUE_TOTAL: LazyLock> = "Count of blobs from the rpc waiting to be verified.", ) }); +// Rpc custody data columns. +pub static BEACON_PROCESSOR_RPC_CUSTODY_COLUMN_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_rpc_custody_column_queue_total", + "Count of custody columns from the rpc waiting to be imported.", + ) + }); +// Rpc verify data columns +pub static BEACON_PROCESSOR_RPC_VERIFY_DATA_COLUMN_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_rpc_verify_data_column_queue_total", + "Count of data columns from the rpc waiting to be verified.", + ) + }); +// Sampling result +pub static BEACON_PROCESSOR_SAMPLING_RESULT_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_sampling_result_queue_total", + "Count of sampling results waiting to be processed.", + ) + }); // Chain segments. pub static BEACON_PROCESSOR_CHAIN_SEGMENT_QUEUE_TOTAL: LazyLock> = LazyLock::new(|| { @@ -221,6 +245,15 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_ATTESTATIONS: LazyLock, +> = LazyLock::new(|| { + try_create_int_counter( + "beacon_processor_reprocessing_queue_matched_sampling_requests", + "Number of queued sampling requests where a matching block has been imported.", + ) +}); /* * Light client update reprocessing queue metrics. @@ -238,7 +271,7 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_OPTIMISTIC_UPDATES: LazyL > = LazyLock::new(|| { try_create_int_counter( "beacon_processor_reprocessing_queue_matched_optimistic_updates", - "Number of queued light client optimistic updates where as matching block has been imported." + "Number of queued light client optimistic updates where a matching block has been imported." ) }); diff --git a/beacon_node/beacon_processor/src/work_reprocessing_queue.rs b/beacon_node/beacon_processor/src/work_reprocessing_queue.rs index 137010557da..a43310ac834 100644 --- a/beacon_node/beacon_processor/src/work_reprocessing_queue.rs +++ b/beacon_node/beacon_processor/src/work_reprocessing_queue.rs @@ -50,6 +50,9 @@ pub const QUEUED_LIGHT_CLIENT_UPDATE_DELAY: Duration = Duration::from_secs(12); /// For how long to queue rpc blocks before sending them back for reprocessing. pub const QUEUED_RPC_BLOCK_DELAY: Duration = Duration::from_secs(4); +/// For how long to queue sampling requests for reprocessing. +pub const QUEUED_SAMPLING_REQUESTS_DELAY: Duration = Duration::from_secs(12); + /// Set an arbitrary upper-bound on the number of queued blocks to avoid DoS attacks. The fact that /// we signature-verify blocks before putting them in the queue *should* protect against this, but /// it's nice to have extra protection. @@ -61,6 +64,10 @@ const MAXIMUM_QUEUED_ATTESTATIONS: usize = 16_384; /// How many light client updates we keep before new ones get dropped. const MAXIMUM_QUEUED_LIGHT_CLIENT_UPDATES: usize = 128; +/// How many sampling requests we queue before new ones get dropped. +/// TODO(das): choose a sensible value +const MAXIMUM_QUEUED_SAMPLING_REQUESTS: usize = 16_384; + // Process backfill batch 50%, 60%, 80% through each slot. // // Note: use caution to set these fractions in a way that won't cause panic-y @@ -97,6 +104,8 @@ pub enum ReprocessQueueMessage { UnknownBlockAggregate(QueuedAggregate), /// A light client optimistic update that references a parent root that has not been seen as a parent. UnknownLightClientOptimisticUpdate(QueuedLightClientUpdate), + /// A sampling request that references an unknown block. + UnknownBlockSamplingRequest(QueuedSamplingRequest), /// A new backfill batch that needs to be scheduled for processing. BackfillSync(QueuedBackfillBatch), } @@ -109,6 +118,7 @@ pub enum ReadyWork { Unaggregate(QueuedUnaggregate), Aggregate(QueuedAggregate), LightClientUpdate(QueuedLightClientUpdate), + SamplingRequest(QueuedSamplingRequest), BackfillSync(QueuedBackfillBatch), } @@ -133,6 +143,12 @@ pub struct QueuedLightClientUpdate { pub process_fn: BlockingFn, } +/// A sampling request for which the corresponding block is not known while processing. +pub struct QueuedSamplingRequest { + pub beacon_block_root: Hash256, + pub process_fn: BlockingFn, +} + /// A block that arrived early and has been queued for later import. pub struct QueuedGossipBlock { pub beacon_block_slot: Slot, @@ -215,6 +231,8 @@ struct ReprocessQueue { attestations_delay_queue: DelayQueue, /// Queue to manage scheduled light client updates. lc_updates_delay_queue: DelayQueue, + /// Queue to manage scheduled sampling requests + sampling_requests_delay_queue: DelayQueue, /* Queued items */ /// Queued blocks. @@ -229,6 +247,10 @@ struct ReprocessQueue { queued_lc_updates: FnvHashMap, /// Light Client Updates per parent_root. awaiting_lc_updates_per_parent_root: HashMap>, + /// Queued sampling requests. + queued_sampling_requests: FnvHashMap, + /// Sampling requests per block root. + awaiting_sampling_requests_per_block_root: HashMap>, /// Queued backfill batches queued_backfill_batches: Vec, @@ -236,15 +258,18 @@ struct ReprocessQueue { /// Next attestation id, used for both aggregated and unaggregated attestations next_attestation: usize, next_lc_update: usize, + next_sampling_request_update: usize, early_block_debounce: TimeLatch, rpc_block_debounce: TimeLatch, attestation_delay_debounce: TimeLatch, lc_update_delay_debounce: TimeLatch, + sampling_request_delay_debounce: TimeLatch, next_backfill_batch_event: Option>>, slot_clock: Arc, } pub type QueuedLightClientUpdateId = usize; +pub type QueuedSamplingRequestId = usize; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum QueuedAttestationId { @@ -388,19 +413,24 @@ impl ReprocessQueue { rpc_block_delay_queue: DelayQueue::new(), attestations_delay_queue: DelayQueue::new(), lc_updates_delay_queue: DelayQueue::new(), + sampling_requests_delay_queue: <_>::default(), queued_gossip_block_roots: HashSet::new(), queued_lc_updates: FnvHashMap::default(), queued_aggregates: FnvHashMap::default(), queued_unaggregates: FnvHashMap::default(), + queued_sampling_requests: <_>::default(), awaiting_attestations_per_root: HashMap::new(), awaiting_lc_updates_per_parent_root: HashMap::new(), + awaiting_sampling_requests_per_block_root: <_>::default(), queued_backfill_batches: Vec::new(), next_attestation: 0, next_lc_update: 0, + next_sampling_request_update: 0, early_block_debounce: TimeLatch::default(), rpc_block_debounce: TimeLatch::default(), attestation_delay_debounce: TimeLatch::default(), lc_update_delay_debounce: TimeLatch::default(), + sampling_request_delay_debounce: <_>::default(), next_backfill_batch_event: None, slot_clock, } @@ -624,6 +654,35 @@ impl ReprocessQueue { self.next_lc_update += 1; } + InboundEvent::Msg(UnknownBlockSamplingRequest(queued_sampling_request)) => { + if self.sampling_requests_delay_queue.len() >= MAXIMUM_QUEUED_SAMPLING_REQUESTS { + if self.sampling_request_delay_debounce.elapsed() { + error!( + log, + "Sampling requests delay queue is full"; + "queue_size" => MAXIMUM_QUEUED_SAMPLING_REQUESTS, + ); + } + // Drop the inbound message. + return; + } + + let id: QueuedSamplingRequestId = self.next_sampling_request_update; + self.next_sampling_request_update += 1; + + // Register the delay. + let delay_key = self + .sampling_requests_delay_queue + .insert(id, QUEUED_SAMPLING_REQUESTS_DELAY); + + self.awaiting_sampling_requests_per_block_root + .entry(queued_sampling_request.beacon_block_root) + .or_default() + .push(id); + + self.queued_sampling_requests + .insert(id, (queued_sampling_request, delay_key)); + } InboundEvent::Msg(BlockImported { block_root, parent_root, @@ -685,6 +744,49 @@ impl ReprocessQueue { ); } } + // Unqueue the sampling requests we have for this root, if any. + if let Some(queued_ids) = self + .awaiting_sampling_requests_per_block_root + .remove(&block_root) + { + let mut sent_count = 0; + let mut failed_to_send_count = 0; + + for id in queued_ids { + metrics::inc_counter( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_SAMPLING_REQUESTS, + ); + + if let Some((queued, delay_key)) = self.queued_sampling_requests.remove(&id) + { + // Remove the delay. + self.sampling_requests_delay_queue.remove(&delay_key); + + // Send the work. + let work = ReadyWork::SamplingRequest(queued); + + if self.ready_work_tx.try_send(work).is_err() { + failed_to_send_count += 1; + } else { + sent_count += 1; + } + } else { + // This should never happen. + error!(log, "Unknown sampling request for block root"; "block_root" => ?block_root, "id" => ?id); + } + } + + if failed_to_send_count > 0 { + error!( + log, + "Ignored scheduled sampling requests for block"; + "hint" => "system may be overloaded", + "block_root" => ?block_root, + "failed_count" => failed_to_send_count, + "sent_count" => sent_count, + ); + } + } } InboundEvent::Msg(NewLightClientOptimisticUpdate { parent_root }) => { // Unqueue the light client optimistic updates we have for this root, if any. diff --git a/beacon_node/http_api/Cargo.toml b/beacon_node/http_api/Cargo.toml index 068feea1df8..f3779f0e4ac 100644 --- a/beacon_node/http_api/Cargo.toml +++ b/beacon_node/http_api/Cargo.toml @@ -42,6 +42,7 @@ sensitive_url = { workspace = true } store = { workspace = true } bytes = { workspace = true } beacon_processor = { workspace = true } +rand = { workspace = true } [dev-dependencies] serde_json = { workspace = true } diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 93499b7c38a..102d138aa3a 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -1263,12 +1263,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_contents: PublishBlockRequest, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_block( @@ -1279,6 +1281,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1294,6 +1297,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_bytes: Bytes, @@ -1301,6 +1305,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block_contents = PublishBlockRequest::::from_ssz_bytes( @@ -1318,6 +1323,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1333,6 +1339,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1340,6 +1347,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_block( @@ -1350,6 +1358,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1366,6 +1375,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1374,6 +1384,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block_contents = PublishBlockRequest::::from_ssz_bytes( @@ -1391,6 +1402,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1410,12 +1422,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_contents: Arc>, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_blinded_block( @@ -1425,6 +1439,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1440,12 +1455,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_bytes: Bytes, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block = SignedBlindedBeaconBlock::::from_ssz_bytes( @@ -1463,6 +1480,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1478,6 +1496,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1485,6 +1504,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_blinded_block( @@ -1494,6 +1514,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1509,6 +1530,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1516,6 +1538,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block = SignedBlindedBeaconBlock::::from_ssz_bytes( @@ -1533,6 +1556,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) diff --git a/beacon_node/http_api/src/publish_blocks.rs b/beacon_node/http_api/src/publish_blocks.rs index 10d000ef6f8..bbdfc31d430 100644 --- a/beacon_node/http_api/src/publish_blocks.rs +++ b/beacon_node/http_api/src/publish_blocks.rs @@ -9,8 +9,9 @@ use beacon_chain::{ use eth2::types::{into_full_block_and_blobs, BroadcastValidation, ErrorMessage}; use eth2::types::{FullPayloadContents, PublishBlockRequest}; use execution_layer::ProvenancedPayload; -use lighthouse_network::PubsubMessage; +use lighthouse_network::{NetworkGlobals, PubsubMessage}; use network::NetworkMessage; +use rand::seq::SliceRandom; use slog::{debug, error, info, warn, Logger}; use slot_clock::SlotClock; use std::marker::PhantomData; @@ -19,9 +20,9 @@ use std::time::Duration; use tokio::sync::mpsc::UnboundedSender; use tree_hash::TreeHash; use types::{ - AbstractExecPayload, BeaconBlockRef, BlobSidecarList, BlockImportSource, EthSpec, ExecPayload, - ExecutionBlockHash, ForkName, FullPayload, FullPayloadBellatrix, Hash256, SignedBeaconBlock, - SignedBlindedBeaconBlock, VariableList, + AbstractExecPayload, BeaconBlockRef, BlobSidecarList, BlockImportSource, DataColumnSidecarList, + DataColumnSubnetId, EthSpec, ExecPayload, ExecutionBlockHash, ForkName, FullPayload, + FullPayloadBellatrix, Hash256, SignedBeaconBlock, SignedBlindedBeaconBlock, VariableList, }; use warp::http::StatusCode; use warp::{reply::Response, Rejection, Reply}; @@ -45,6 +46,7 @@ impl> ProvenancedBloc } /// Handles a request from the HTTP API for full blocks. +#[allow(clippy::too_many_arguments)] pub async fn publish_block>( block_root: Option, provenanced_block: ProvenancedBlock, @@ -53,6 +55,7 @@ pub async fn publish_block>, ) -> Result { let seen_timestamp = timestamp_now(); @@ -68,10 +71,13 @@ pub async fn publish_block block.slot()); + let malicious_withhold_count = chain.config.malicious_withhold_count; + let chain_cloned = chain.clone(); /* actually publish a block */ let publish_block = move |block: Arc>, blobs_opt: Option>, + data_cols_opt: Option>, sender, log, seen_timestamp| { @@ -104,6 +110,7 @@ pub async fn publish_block { let mut pubsub_messages = vec![PubsubMessage::BeaconBlock(block)]; if let Some(blob_sidecars) = blobs_opt { + // Publish blob sidecars for (blob_index, blob) in blob_sidecars.into_iter().enumerate() { pubsub_messages.push(PubsubMessage::BlobSidecar(Box::new(( blob_index as u64, @@ -111,6 +118,30 @@ pub async fn publish_block 0 { + let columns_to_keep = data_col_sidecars + .len() + .saturating_sub(malicious_withhold_count); + // Randomize columns before dropping the last malicious_withhold_count items + data_col_sidecars.shuffle(&mut rand::thread_rng()); + data_col_sidecars = data_col_sidecars + .into_iter() + .take(columns_to_keep) + .collect::>(); + } + + for data_col in data_col_sidecars { + let subnet = DataColumnSubnetId::from_column_index::( + data_col.index as usize, + &chain_cloned.spec, + ); + pubsub_messages.push(PubsubMessage::DataColumnSidecar(Box::new(( + subnet, data_col, + )))); + } + } crate::publish_pubsub_messages(&sender, pubsub_messages) .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish))?; } @@ -126,7 +157,7 @@ pub async fn publish_block b, Err(BlockContentsError::BlockError(BlockError::BlockIsAlreadyKnown(_))) @@ -155,6 +186,10 @@ pub async fn publish_block>(); VariableList::from(blobs) }); + let data_cols_opt = gossip_verified_data_columns + .as_ref() + .map(|gossip_verified_data_columns| { + gossip_verified_data_columns + .into_iter() + .map(|col| col.clone_data_column()) + .collect::>() + }); let block_root = block_root.unwrap_or(gossip_verified_block.block_root); @@ -172,6 +215,7 @@ pub async fn publish_block publish_block( block_clone, blobs_opt, + data_cols_opt, sender_clone, log_clone, seen_timestamp, @@ -201,6 +246,7 @@ pub async fn publish_block &msg + ); + Err(warp_utils::reject::custom_bad_request(msg)) + }; + } + } + match Box::pin(chain.process_block( block_root, gossip_verified_block, @@ -313,6 +382,7 @@ pub async fn publish_blinded_block( log: Logger, validation_level: BroadcastValidation, duplicate_status_code: StatusCode, + network_globals: Arc>, ) -> Result { let block_root = blinded_block.canonical_root(); let full_block: ProvenancedBlock> = @@ -325,6 +395,7 @@ pub async fn publish_blinded_block( log, validation_level, duplicate_status_code, + network_globals, ) .await } diff --git a/beacon_node/http_api/src/test_utils.rs b/beacon_node/http_api/src/test_utils.rs index 88112de10b6..dcd494a880f 100644 --- a/beacon_node/http_api/src/test_utils.rs +++ b/beacon_node/http_api/src/test_utils.rs @@ -151,6 +151,7 @@ pub async fn create_api_server( vec![], false, &log, + chain.spec.clone(), )); // Only a peer manager can add peers, so we create a dummy manager. diff --git a/beacon_node/http_api/tests/broadcast_validation_tests.rs b/beacon_node/http_api/tests/broadcast_validation_tests.rs index 78f9c819888..4b884bb5192 100644 --- a/beacon_node/http_api/tests/broadcast_validation_tests.rs +++ b/beacon_node/http_api/tests/broadcast_validation_tests.rs @@ -376,6 +376,7 @@ pub async fn consensus_partial_pass_only_consensus() { /* submit `block_b` which should induce equivocation */ let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_block( None, @@ -385,6 +386,7 @@ pub async fn consensus_partial_pass_only_consensus() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; @@ -677,6 +679,7 @@ pub async fn equivocation_consensus_late_equivocation() { assert!(gossip_block_contents_a.is_err()); let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_block( None, @@ -686,6 +689,7 @@ pub async fn equivocation_consensus_late_equivocation() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; @@ -1335,6 +1339,7 @@ pub async fn blinded_equivocation_consensus_late_equivocation() { assert!(gossip_block_a.is_err()); let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_blinded_block( block_b, @@ -1343,6 +1348,7 @@ pub async fn blinded_equivocation_consensus_late_equivocation() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; diff --git a/beacon_node/lighthouse_network/src/discovery/enr.rs b/beacon_node/lighthouse_network/src/discovery/enr.rs index 04ae9971502..7415fdaf590 100644 --- a/beacon_node/lighthouse_network/src/discovery/enr.rs +++ b/beacon_node/lighthouse_network/src/discovery/enr.rs @@ -360,7 +360,7 @@ mod test { let config = NetworkConfig::default(); let spec = make_eip7594_spec(); let (mut enr, enr_key) = build_enr_with_config(config, &spec); - let invalid_subnet_count = 99u64; + let invalid_subnet_count = 999u64; enr.insert( PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY, diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 300c190cdaf..7b297d243bd 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -1232,6 +1232,7 @@ mod tests { vec![], false, &log, + spec.clone(), ); let keypair = keypair.into(); Discovery::new(keypair, &config, Arc::new(globals), &log, &spec) diff --git a/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs b/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs index b53afe556db..8bc5e25fde9 100644 --- a/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs +++ b/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs @@ -16,6 +16,7 @@ where E: EthSpec, { let log_clone = log.clone(); + let spec_clone = spec.clone(); move |enr: &Enr| { let attestation_bitfield: EnrAttestationBitfield = match enr.attestation_bitfield::() @@ -29,8 +30,7 @@ where let sync_committee_bitfield: Result, _> = enr.sync_committee_bitfield::(); - // TODO(das): compute from enr - let custody_subnet_count = spec.custody_requirement; + let custody_subnet_count = enr.custody_subnet_count::(&spec_clone); let predicate = subnets.iter().any(|subnet| match subnet { Subnet::Attestation(s) => attestation_bitfield diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 4c9551507e7..7247425f500 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -530,7 +530,10 @@ impl PeerManager { RPCResponseErrorCode::Unknown => PeerAction::HighToleranceError, RPCResponseErrorCode::ResourceUnavailable => { // Don't ban on this because we want to retry with a block by root request. - if matches!(protocol, Protocol::BlobsByRoot) { + if matches!( + protocol, + Protocol::BlobsByRoot | Protocol::DataColumnsByRoot + ) { return; } @@ -1385,7 +1388,8 @@ mod tests { ..Default::default() }; let log = build_log(slog::Level::Debug, false); - let globals = NetworkGlobals::new_test_globals(vec![], &log); + let spec = E::default_spec(); + let globals = NetworkGlobals::new_test_globals(vec![], &log, spec); PeerManager::new(config, Arc::new(globals), &log).unwrap() } @@ -1399,7 +1403,8 @@ mod tests { ..Default::default() }; let log = build_log(slog::Level::Debug, false); - let globals = NetworkGlobals::new_test_globals(trusted_peers, &log); + let spec = E::default_spec(); + let globals = NetworkGlobals::new_test_globals(trusted_peers, &log, spec); PeerManager::new(config, Arc::new(globals), &log).unwrap() } diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs index c3e77ae225e..fdde57b4a57 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs @@ -1,5 +1,8 @@ +use crate::discovery::enr::PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY; use crate::discovery::CombinedKey; -use crate::{metrics, multiaddr::Multiaddr, types::Subnet, Enr, Gossipsub, PeerId}; +use crate::{ + metrics, multiaddr::Multiaddr, types::Subnet, Enr, EnrExt, Eth2Enr, Gossipsub, PeerId, +}; use peer_info::{ConnectionDirection, PeerConnectionStatus, PeerInfo}; use rand::seq::SliceRandom; use score::{PeerAction, ReportSource, Score, ScoreState}; @@ -12,7 +15,7 @@ use std::{ fmt::Formatter, }; use sync_status::SyncStatus; -use types::EthSpec; +use types::{ChainSpec, DataColumnSubnetId, EthSpec}; pub mod client; pub mod peer_info; @@ -44,10 +47,16 @@ pub struct PeerDB { disable_peer_scoring: bool, /// PeerDB's logger log: slog::Logger, + spec: ChainSpec, } impl PeerDB { - pub fn new(trusted_peers: Vec, disable_peer_scoring: bool, log: &slog::Logger) -> Self { + pub fn new( + trusted_peers: Vec, + disable_peer_scoring: bool, + log: &slog::Logger, + spec: ChainSpec, + ) -> Self { // Initialize the peers hashmap with trusted peers let peers = trusted_peers .into_iter() @@ -59,6 +68,7 @@ impl PeerDB { banned_peers_count: BannedPeersCount::default(), disable_peer_scoring, peers, + spec, } } @@ -246,6 +256,27 @@ impl PeerDB { .map(|(peer_id, _)| peer_id) } + pub fn good_custody_subnet_peer( + &self, + subnet: DataColumnSubnetId, + ) -> impl Iterator { + self.peers + .iter() + .filter(move |(_, info)| { + // TODO(das): we currently consider peer to be a subnet peer if the peer is *either* + // subscribed to the subnet or assigned to the subnet. + // The first condition is currently required as we don't have custody count in + // metadata implemented yet, and therefore unable to reliably determine custody + // subnet count (ENR is not always available). + // This condition can be removed later so that we can identify peers that are not + // serving custody columns and penalise accordingly. + let is_custody_subnet_peer = info.on_subnet_gossipsub(&Subnet::DataColumn(subnet)) + || info.is_assigned_to_custody_subnet(&subnet); + info.is_connected() && info.is_good_gossipsub_peer() && is_custody_subnet_peer + }) + .map(|(peer_id, _)| peer_id) + } + /// Gives the ids of all known disconnected peers. pub fn disconnected_peers(&self) -> impl Iterator { self.peers @@ -673,17 +704,34 @@ impl PeerDB { } /// Updates the connection state. MUST ONLY BE USED IN TESTS. - pub fn __add_connected_peer_testing_only(&mut self, peer_id: &PeerId) -> Option { + pub fn __add_connected_peer_testing_only( + &mut self, + supernode: bool, + spec: &ChainSpec, + ) -> PeerId { let enr_key = CombinedKey::generate_secp256k1(); - let enr = Enr::builder().build(&enr_key).unwrap(); + let mut enr = Enr::builder().build(&enr_key).unwrap(); + let peer_id = enr.peer_id(); + + if supernode { + enr.insert( + PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY, + &spec.data_column_sidecar_subnet_count, + &enr_key, + ) + .expect("u64 can be encoded"); + } + self.update_connection_state( - peer_id, + &peer_id, NewConnectionState::Connected { enr: Some(enr), seen_address: Multiaddr::empty(), direction: ConnectionDirection::Outgoing, }, - ) + ); + + peer_id } /// The connection state of the peer has been changed. Modify the peer in the db to ensure all @@ -746,8 +794,17 @@ impl PeerDB { seen_address, }, ) => { - // Update the ENR if one exists + // Update the ENR if one exists, and compute the custody subnets if let Some(enr) = enr { + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + let custody_subnets = DataColumnSubnetId::compute_custody_subnets::( + node_id, + custody_subnet_count, + &self.spec, + ) + .collect::>(); + info.set_custody_subnets(custody_subnets); info.set_enr(enr); } @@ -1298,7 +1355,8 @@ mod tests { fn get_db() -> PeerDB { let log = build_log(slog::Level::Debug, false); - PeerDB::new(vec![], false, &log) + let spec = M::default_spec(); + PeerDB::new(vec![], false, &log, spec) } #[test] @@ -1997,7 +2055,8 @@ mod tests { fn test_trusted_peers_score() { let trusted_peer = PeerId::random(); let log = build_log(slog::Level::Debug, false); - let mut pdb: PeerDB = PeerDB::new(vec![trusted_peer], false, &log); + let spec = M::default_spec(); + let mut pdb: PeerDB = PeerDB::new(vec![trusted_peer], false, &log, spec); pdb.connect_ingoing(&trusted_peer, "/ip4/0.0.0.0".parse().unwrap(), None); @@ -2021,7 +2080,8 @@ mod tests { fn test_disable_peer_scoring() { let peer = PeerId::random(); let log = build_log(slog::Level::Debug, false); - let mut pdb: PeerDB = PeerDB::new(vec![], true, &log); + let spec = M::default_spec(); + let mut pdb: PeerDB = PeerDB::new(vec![], true, &log, spec); pdb.connect_ingoing(&peer, "/ip4/0.0.0.0".parse().unwrap(), None); diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs index 0745cc26008..8a04d450ba4 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs @@ -13,7 +13,7 @@ use std::collections::HashSet; use std::net::IpAddr; use std::time::Instant; use strum::AsRefStr; -use types::EthSpec; +use types::{DataColumnSubnetId, EthSpec}; use PeerConnectionStatus::*; /// Information about a given connected peer. @@ -40,6 +40,11 @@ pub struct PeerInfo { meta_data: Option>, /// Subnets the peer is connected to. subnets: HashSet, + /// This is computed from either metadata or the ENR, and contains the subnets that the peer + /// is *assigned* to custody, rather than *connected* to (different to `self.subnets`). + /// Note: Another reason to keep this separate to `self.subnets` is an upcoming change to + /// decouple custody requirements from the actual subnets, i.e. changing this to `custody_groups`. + custody_subnets: HashSet, /// The time we would like to retain this peer. After this time, the peer is no longer /// necessary. #[serde(skip)] @@ -62,6 +67,7 @@ impl Default for PeerInfo { listening_addresses: Vec::new(), seen_multiaddrs: HashSet::new(), subnets: HashSet::new(), + custody_subnets: HashSet::new(), sync_status: SyncStatus::Unknown, meta_data: None, min_ttl: None, @@ -210,6 +216,11 @@ impl PeerInfo { self.subnets.contains(subnet) } + /// Returns if the peer is assigned to a given `DataColumnSubnetId`. + pub fn is_assigned_to_custody_subnet(&self, subnet: &DataColumnSubnetId) -> bool { + self.custody_subnets.contains(subnet) + } + /// Returns true if the peer is connected to a long-lived subnet. pub fn has_long_lived_subnet(&self) -> bool { // Check the meta_data @@ -362,6 +373,10 @@ impl PeerInfo { self.connection_status = connection_status } + pub(super) fn set_custody_subnets(&mut self, custody_subnets: HashSet) { + self.custody_subnets = custody_subnets + } + /// Sets the ENR of the peer if one is known. pub(super) fn set_enr(&mut self, enr: Enr) { self.enr = Some(enr) diff --git a/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs b/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs index f5d8b58dcee..9012954391c 100644 --- a/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs +++ b/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs @@ -522,6 +522,9 @@ fn handle_rpc_request( )?, }))) } + SupportedProtocol::DataColumnsByRangeV1 => Ok(Some(InboundRequest::DataColumnsByRange( + DataColumnsByRangeRequest::from_ssz_bytes(decoded_buffer)?, + ))), SupportedProtocol::DataColumnsByRootV1 => Ok(Some(InboundRequest::DataColumnsByRoot( DataColumnsByRootRequest { data_column_ids: RuntimeVariableList::from_ssz_bytes( @@ -530,9 +533,6 @@ fn handle_rpc_request( )?, }, ))), - SupportedProtocol::DataColumnsByRangeV1 => Ok(Some(InboundRequest::DataColumnsByRange( - DataColumnsByRangeRequest::from_ssz_bytes(decoded_buffer)?, - ))), SupportedProtocol::PingV1 => Ok(Some(InboundRequest::Ping(Ping { data: u64::from_ssz_bytes(decoded_buffer)?, }))), diff --git a/beacon_node/lighthouse_network/src/rpc/config.rs b/beacon_node/lighthouse_network/src/rpc/config.rs index 7ff189b9815..fcb9c986048 100644 --- a/beacon_node/lighthouse_network/src/rpc/config.rs +++ b/beacon_node/lighthouse_network/src/rpc/config.rs @@ -165,6 +165,14 @@ impl Debug for RateLimiterConfig { .field("blocks_by_root", fmt_q!(&self.blocks_by_root_quota)) .field("blobs_by_range", fmt_q!(&self.blobs_by_range_quota)) .field("blobs_by_root", fmt_q!(&self.blobs_by_root_quota)) + .field( + "data_columns_by_range", + fmt_q!(&self.data_columns_by_range_quota), + ) + .field( + "data_columns_by_root", + fmt_q!(&self.data_columns_by_root_quota), + ) .finish() } } diff --git a/beacon_node/lighthouse_network/src/rpc/methods.rs b/beacon_node/lighthouse_network/src/rpc/methods.rs index 7c7dca02f50..12565dee5ee 100644 --- a/beacon_node/lighthouse_network/src/rpc/methods.rs +++ b/beacon_node/lighthouse_network/src/rpc/methods.rs @@ -742,6 +742,16 @@ impl std::fmt::Display for BlobsByRangeRequest { } } +impl std::fmt::Display for DataColumnsByRootRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Request: DataColumnsByRoot: Number of Requested Data Column Ids: {}", + self.data_column_ids.len() + ) + } +} + impl slog::KV for StatusMessage { fn serialize( &self, diff --git a/beacon_node/lighthouse_network/src/rpc/mod.rs b/beacon_node/lighthouse_network/src/rpc/mod.rs index 666cbe6fbcc..c40f976e7a1 100644 --- a/beacon_node/lighthouse_network/src/rpc/mod.rs +++ b/beacon_node/lighthouse_network/src/rpc/mod.rs @@ -366,8 +366,10 @@ where protocol, Protocol::BlocksByRange | Protocol::BlobsByRange + | Protocol::DataColumnsByRange | Protocol::BlocksByRoot | Protocol::BlobsByRoot + | Protocol::DataColumnsByRoot ) { debug!(self.log, "Request too large to process"; "request" => %req, "protocol" => %protocol); } else { diff --git a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs index 9fb085efd86..523b891a009 100644 --- a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs +++ b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs @@ -196,7 +196,6 @@ impl RPCRateLimiterBuilder { let blbrange_quota = self .blbrange_quota .ok_or("BlobsByRange quota not specified")?; - let blbroots_quota = self .blbroot_quota .ok_or("BlobsByRoot quota not specified")?; @@ -357,6 +356,8 @@ impl RPCRateLimiter { self.bbroots_rl.prune(time_since_start); self.blbrange_rl.prune(time_since_start); self.blbroot_rl.prune(time_since_start); + self.dcbrange_rl.prune(time_since_start); + self.dcbroot_rl.prune(time_since_start); } } diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs index 756f4bd1326..30400db3b66 100644 --- a/beacon_node/lighthouse_network/src/service/api_types.rs +++ b/beacon_node/lighthouse_network/src/service/api_types.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use libp2p::swarm::ConnectionId; use types::{ - BlobSidecar, DataColumnSidecar, EthSpec, LightClientBootstrap, LightClientFinalityUpdate, - LightClientOptimisticUpdate, SignedBeaconBlock, + BlobSidecar, DataColumnSidecar, EthSpec, Hash256, LightClientBootstrap, + LightClientFinalityUpdate, LightClientOptimisticUpdate, SignedBeaconBlock, }; use crate::rpc::methods::{ @@ -42,11 +42,43 @@ pub enum SyncRequestId { /// Request searching for a set of blobs given a hash. SingleBlob { id: SingleLookupReqId }, /// Request searching for a set of data columns given a hash and list of column indices. - DataColumnsByRoot(DataColumnsByRootRequestId, SingleLookupReqId), + DataColumnsByRoot(DataColumnsByRootRequestId, DataColumnsByRootRequester), /// Range request that is composed by both a block range request and a blob range request. RangeBlockAndBlobs { id: Id }, } +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub enum DataColumnsByRootRequester { + Sampling(SamplingId), + Custody(CustodyId), +} + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct SamplingId { + pub id: SamplingRequester, + pub sampling_request_id: SamplingRequestId, +} + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub enum SamplingRequester { + ImportedBlock(Hash256), +} + +/// Identifier of sampling requests. +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct SamplingRequestId(pub usize); + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct CustodyId { + pub requester: CustodyRequester, + pub req_id: Id, +} + +/// Downstream components that perform custody by root requests. +/// Currently, it's only single block lookups, so not using an enum +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct CustodyRequester(pub SingleLookupReqId); + /// Application level requests sent to the network. #[derive(Debug, Clone, Copy)] pub enum AppRequestId { diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs index 4ef080619eb..50bce0217af 100644 --- a/beacon_node/lighthouse_network/src/service/mod.rs +++ b/beacon_node/lighthouse_network/src/service/mod.rs @@ -172,6 +172,7 @@ impl Network { trusted_peers, config.disable_peer_scoring, &log, + ctx.chain_spec.clone(), ); Arc::new(globals) }; @@ -242,6 +243,7 @@ impl Network { let max_topics = ctx.chain_spec.attestation_subnet_count as usize + SYNC_COMMITTEE_SUBNET_COUNT as usize + ctx.chain_spec.blob_sidecar_subnet_count as usize + + ctx.chain_spec.data_column_sidecar_subnet_count as usize + BASE_CORE_TOPICS.len() + ALTAIR_CORE_TOPICS.len() + CAPELLA_CORE_TOPICS.len() @@ -255,10 +257,11 @@ impl Network { ctx.chain_spec.attestation_subnet_count, SYNC_COMMITTEE_SUBNET_COUNT, ctx.chain_spec.blob_sidecar_subnet_count, + ctx.chain_spec.data_column_sidecar_subnet_count, ), // during a fork we subscribe to both the old and new topics max_subscribed_topics: max_topics * 4, - // 162 in theory = (64 attestation + 4 sync committee + 7 core topics + 6 blob topics) * 2 + // 418 in theory = (64 attestation + 4 sync committee + 7 core topics + 6 blob topics + 128 column topics) * 2 max_subscriptions_per_request: max_topics * 2, }; diff --git a/beacon_node/lighthouse_network/src/service/utils.rs b/beacon_node/lighthouse_network/src/service/utils.rs index 80187efc103..cf06b7c1cee 100644 --- a/beacon_node/lighthouse_network/src/service/utils.rs +++ b/beacon_node/lighthouse_network/src/service/utils.rs @@ -19,7 +19,9 @@ use std::io::prelude::*; use std::path::Path; use std::sync::Arc; use std::time::Duration; -use types::{ChainSpec, EnrForkId, EthSpec, ForkContext, SubnetId, SyncSubnetId}; +use types::{ + ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, SubnetId, SyncSubnetId, +}; pub const NETWORK_KEY_FILENAME: &str = "key"; /// The maximum simultaneous libp2p connections per peer. @@ -231,6 +233,7 @@ pub(crate) fn create_whitelist_filter( attestation_subnet_count: u64, sync_committee_subnet_count: u64, blob_sidecar_subnet_count: u64, + data_column_sidecar_subnet_count: u64, ) -> gossipsub::WhitelistSubscriptionFilter { let mut possible_hashes = HashSet::new(); for fork_digest in possible_fork_digests { @@ -259,6 +262,9 @@ pub(crate) fn create_whitelist_filter( for id in 0..blob_sidecar_subnet_count { add(BlobSidecar(id)); } + for id in 0..data_column_sidecar_subnet_count { + add(DataColumnSidecar(DataColumnSubnetId::new(id))); + } } gossipsub::WhitelistSubscriptionFilter(possible_hashes) } diff --git a/beacon_node/lighthouse_network/src/types/globals.rs b/beacon_node/lighthouse_network/src/types/globals.rs index 1c7c7f07d0a..412a70902df 100644 --- a/beacon_node/lighthouse_network/src/types/globals.rs +++ b/beacon_node/lighthouse_network/src/types/globals.rs @@ -2,12 +2,12 @@ use crate::peer_manager::peerdb::PeerDB; use crate::rpc::{MetaData, MetaDataV2}; use crate::types::{BackFillState, SyncState}; -use crate::Client; use crate::EnrExt; +use crate::{Client, Eth2Enr}; use crate::{Enr, GossipTopic, Multiaddr, PeerId}; use parking_lot::RwLock; use std::collections::HashSet; -use types::{ChainSpec, ColumnIndex, EthSpec}; +use types::{ChainSpec, ColumnIndex, DataColumnSubnetId, EthSpec}; pub struct NetworkGlobals { /// The current local ENR. @@ -26,6 +26,7 @@ pub struct NetworkGlobals { pub sync_state: RwLock, /// The current state of the backfill sync. pub backfill_state: RwLock, + spec: ChainSpec, } impl NetworkGlobals { @@ -35,16 +36,23 @@ impl NetworkGlobals { trusted_peers: Vec, disable_peer_scoring: bool, log: &slog::Logger, + spec: ChainSpec, ) -> Self { NetworkGlobals { local_enr: RwLock::new(enr.clone()), peer_id: RwLock::new(enr.peer_id()), listen_multiaddrs: RwLock::new(Vec::new()), local_metadata: RwLock::new(local_metadata), - peers: RwLock::new(PeerDB::new(trusted_peers, disable_peer_scoring, log)), + peers: RwLock::new(PeerDB::new( + trusted_peers, + disable_peer_scoring, + log, + spec.clone(), + )), gossipsub_subscriptions: RwLock::new(HashSet::new()), sync_state: RwLock::new(SyncState::Stalled), backfill_state: RwLock::new(BackFillState::NotRequired), + spec, } } @@ -111,14 +119,45 @@ impl NetworkGlobals { } /// Compute custody data columns the node is assigned to custody. - pub fn custody_columns(&self, _spec: &ChainSpec) -> Vec { - let _enr = self.local_enr(); - //TODO(das): implement ENR changes - vec![] + pub fn custody_columns(&self) -> Vec { + let enr = self.local_enr(); + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + DataColumnSubnetId::compute_custody_columns::(node_id, custody_subnet_count, &self.spec) + .collect() + } + + /// Compute custody data column subnets the node is assigned to custody. + pub fn custody_subnets(&self) -> impl Iterator { + let enr = self.local_enr(); + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + DataColumnSubnetId::compute_custody_subnets::(node_id, custody_subnet_count, &self.spec) + } + + /// Returns a connected peer that: + /// 1. is connected + /// 2. assigned to custody the column based on it's `custody_subnet_count` from ENR or metadata (WIP) + /// 3. has a good score + /// 4. subscribed to the specified column - this condition can be removed later, so we can + /// identify and penalise peers that are supposed to custody the column. + pub fn custody_peers_for_column(&self, column_index: ColumnIndex) -> Vec { + self.peers + .read() + .good_custody_subnet_peer(DataColumnSubnetId::from_column_index::( + column_index as usize, + &self.spec, + )) + .cloned() + .collect::>() } /// TESTING ONLY. Build a dummy NetworkGlobals instance. - pub fn new_test_globals(trusted_peers: Vec, log: &slog::Logger) -> NetworkGlobals { + pub fn new_test_globals( + trusted_peers: Vec, + log: &slog::Logger, + spec: ChainSpec, + ) -> NetworkGlobals { use crate::CombinedKeyExt; let keypair = libp2p::identity::secp256k1::Keypair::generate(); let enr_key: discv5::enr::CombinedKey = discv5::enr::CombinedKey::from_secp256k1(&keypair); @@ -133,6 +172,28 @@ impl NetworkGlobals { trusted_peers, false, log, + spec, ) } } + +#[cfg(test)] +mod test { + use super::*; + use types::{EthSpec, MainnetEthSpec as E}; + + #[test] + fn test_custody_count_default() { + let spec = E::default_spec(); + let log = logging::test_logger(); + let default_custody_requirement_column_count = spec.number_of_columns as u64 + / spec.data_column_sidecar_subnet_count + * spec.custody_requirement; + let globals = NetworkGlobals::::new_test_globals(vec![], &log, spec.clone()); + let columns = globals.custody_columns(); + assert_eq!( + columns.len(), + default_custody_requirement_column_count as usize + ); + } +} diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index bb1e5468705..9e42aa8e924 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -14,6 +14,9 @@ use std::sync::{Arc, LazyLock}; use strum::IntoEnumIterator; use types::EthSpec; +pub const SUCCESS: &str = "SUCCESS"; +pub const FAILURE: &str = "FAILURE"; + pub static BEACON_BLOCK_MESH_PEERS_PER_CLIENT: LazyLock> = LazyLock::new(|| { try_create_int_gauge_vec( @@ -340,6 +343,13 @@ pub static PEERS_PER_SYNC_TYPE: LazyLock> = LazyLock::new(|| &["sync_status"], ) }); +pub static PEERS_PER_COLUMN_SUBNET: LazyLock> = LazyLock::new(|| { + try_create_int_gauge_vec( + "peers_per_column_subnet", + "Number of connected peers per column subnet", + &["subnet_id"], + ) +}); pub static SYNCING_CHAINS_COUNT: LazyLock> = LazyLock::new(|| { try_create_int_gauge_vec( "sync_range_chains", @@ -481,6 +491,29 @@ pub static BEACON_BLOB_DELAY_GOSSIP: LazyLock> = LazyLock::new( ) }); +pub static BEACON_DATA_COLUMN_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: LazyLock< + Result, +> = LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_data_column_gossip_propagation_verification_delay_time", + "Duration between when the data column sidecar is received over gossip and when it is verified for propagation.", + // [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] + decimal_buckets(-3,-1) + ) +}); +pub static BEACON_DATA_COLUMN_GOSSIP_SLOT_START_DELAY_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_data_column_gossip_slot_start_delay_time", + "Duration between when the data column sidecar is received over gossip and the start of the slot it belongs to.", + // Create a custom bucket list for greater granularity in block delay + Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0]) + // NOTE: Previous values, which we may want to switch back to. + // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] + //decimal_buckets(-1,2) + ) + }); + pub static BEACON_BLOB_DELAY_GOSSIP_VERIFICATION: LazyLock> = LazyLock::new( || { try_create_int_gauge( @@ -520,22 +553,6 @@ pub static BEACON_BLOB_GOSSIP_ARRIVED_LATE_TOTAL: LazyLock> = }, ); -pub static BEACON_DATA_COLUMN_DELAY_GOSSIP: LazyLock> = LazyLock::new(|| { - try_create_int_gauge( - "beacon_data_column_delay_gossip_last_delay", - "The first time we see this data column as a delay from the start of the slot", - ) -}); - -pub static BEACON_DATA_COLUMN_DELAY_GOSSIP_VERIFICATION: LazyLock> = LazyLock::new( - || { - try_create_int_gauge( - "beacon_data_column_delay_gossip_verification", - "Keeps track of the time delay from the start of the slot to the point we propagate the data column" - ) - }, -); - /* * Light client update reprocessing queue metrics. */ @@ -548,6 +565,31 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_SENT_OPTIMISTIC_UPDATES: LazyLock ) }); +/* + * Sampling + */ +pub static SAMPLE_DOWNLOAD_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_sample_verify_result_total", + "Total count of individual sample download results", + &["result"], + ) +}); +pub static SAMPLE_VERIFY_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_sample_verify_result_total", + "Total count of individual sample verify results", + &["result"], + ) +}); +pub static SAMPLING_REQUEST_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_request_result_total", + "Total count of sample request results", + &["result"], + ) +}); + pub fn register_finality_update_error(error: &LightClientFinalityUpdateError) { inc_counter_vec(&GOSSIP_FINALITY_UPDATE_ERRORS_PER_TYPE, &[error.as_ref()]); } @@ -564,6 +606,13 @@ pub fn register_sync_committee_error(error: &SyncCommitteeError) { inc_counter_vec(&GOSSIP_SYNC_COMMITTEE_ERRORS_PER_TYPE, &[error.as_ref()]); } +pub fn from_result(result: &std::result::Result) -> &str { + match result { + Ok(_) => SUCCESS, + Err(_) => FAILURE, + } +} + pub fn update_gossip_metrics( gossipsub: &Gossipsub, network_globals: &Arc>, diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 4c5c34bfd83..d5d83d540a0 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -4,7 +4,6 @@ use crate::{ service::NetworkMessage, sync::SyncMessage, }; -use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn}; use beacon_chain::store::Error; @@ -19,7 +18,13 @@ use beacon_chain::{ AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError, GossipVerifiedBlock, NotifyExecutionLayer, }; -use lighthouse_network::{Client, MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource}; +use beacon_chain::{ + blob_verification::{GossipBlobError, GossipVerifiedBlob}, + data_availability_checker::DataColumnsToPublish, +}; +use lighthouse_network::{ + Client, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage, ReportSource, +}; use operation_pool::ReceivedPreCapella; use slog::{crit, debug, error, info, trace, warn, Logger}; use slot_clock::SlotClock; @@ -166,6 +171,26 @@ impl NetworkBeaconProcessor { }) } + pub(crate) fn handle_data_columns_to_publish( + &self, + data_columns_to_publish: DataColumnsToPublish, + ) { + if let Some(data_columns_to_publish) = data_columns_to_publish { + self.send_network_message(NetworkMessage::Publish { + messages: data_columns_to_publish + .iter() + .map(|d| { + let subnet = DataColumnSubnetId::from_column_index::( + d.index as usize, + &self.chain.spec, + ); + PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone()))) + }) + .collect(), + }); + } + } + /// Send a message on `message_tx` that the `message_id` sent by `peer_id` should be propagated on /// the gossip network. /// @@ -615,9 +640,9 @@ impl NetworkBeaconProcessor { let index = column_sidecar.index; let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. - metrics::set_gauge( - &metrics::BEACON_DATA_COLUMN_DELAY_GOSSIP, - delay.as_millis() as i64, + metrics::observe_duration( + &metrics::BEACON_DATA_COLUMN_GOSSIP_SLOT_START_DELAY_TIME, + delay, ); match self .chain @@ -644,9 +669,9 @@ impl NetworkBeaconProcessor { .ok() .and_then(|now| now.checked_sub(seen_duration)) { - metrics::set_gauge( - &metrics::BEACON_DATA_COLUMN_DELAY_GOSSIP_VERIFICATION, - duration.as_millis() as i64, + metrics::observe_duration( + &metrics::BEACON_DATA_COLUMN_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME, + duration, ); } self.process_gossip_verified_data_column( @@ -991,7 +1016,9 @@ impl NetworkBeaconProcessor { .process_gossip_data_columns(vec![verified_data_column]) .await { - Ok(availability) => { + Ok((availability, data_columns_to_publish)) => { + self.handle_data_columns_to_publish(data_columns_to_publish); + match availability { AvailabilityProcessingStatus::Imported(block_root) => { // Note: Reusing block imported metric here @@ -1304,6 +1331,16 @@ impl NetworkBeaconProcessor { ); return None; } + Err(e @ BlockError::BlobNotRequired(_)) => { + // TODO(das): penalty not implemented yet as other clients may still send us blobs + // during early stage of implementation. + debug!(self.log, "Received blobs for slot after PeerDAS epoch from peer"; + "error" => %e, + "peer_id" => %peer_id, + ); + self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); + return None; + } }; metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_VERIFIED_TOTAL); @@ -1414,7 +1451,19 @@ impl NetworkBeaconProcessor { let block = verified_block.block.block_cloned(); let block_root = verified_block.block_root; - // TODO(block source) + // TODO(das) Might be too early to issue a request here. We haven't checked that the block + // actually includes blob transactions and thus has data. A peer could send a block is + // garbage commitments, and make us trigger sampling for a block that does not have data. + if block.num_expected_blobs() > 0 { + // Trigger sampling for block not yet execution valid. At this point column custodials are + // unlikely to have received their columns. Triggering sampling so early is only viable with + // either: + // - Sync delaying sampling until some latter window + // - Re-processing early sampling requests: https://github.com/sigp/lighthouse/pull/5569 + if self.chain.should_sample_slot(block.slot()) { + self.send_sync_message(SyncMessage::SampleBlock(block_root, block.slot())); + } + } let result = self .chain diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index cb21b6dfb50..7f551c544c7 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -1,4 +1,5 @@ use crate::sync::manager::BlockProcessType; +use crate::sync::SamplingId; use crate::{service::NetworkMessage, sync::manager::SyncMessage}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::{builder::Witness, eth1_chain::CachingEth1Backend, BeaconChain}; @@ -500,6 +501,43 @@ impl NetworkBeaconProcessor { }) } + /// Create a new `Work` event for some sampling columns, and reports the verification result + /// back to sync. + pub fn send_rpc_validate_data_columns( + self: &Arc, + block_root: Hash256, + data_columns: Vec>>, + seen_timestamp: Duration, + id: SamplingId, + ) -> Result<(), Error> { + let s = self.clone(); + self.try_send(BeaconWorkEvent { + drop_during_sync: false, + work: Work::RpcVerifyDataColumn(Box::pin(async move { + let result = s + .clone() + .validate_rpc_data_columns(block_root, data_columns, seen_timestamp) + .await; + // Sync handles these results + s.send_sync_message(SyncMessage::SampleVerified { id, result }); + })), + }) + } + + /// Create a new `Work` event with a block sampling completed result + pub fn send_sampling_completed( + self: &Arc, + block_root: Hash256, + ) -> Result<(), Error> { + let nbp = self.clone(); + self.try_send(BeaconWorkEvent { + drop_during_sync: false, + work: Work::SamplingResult(Box::pin(async move { + nbp.process_sampling_completed(block_root).await; + })), + }) + } + /// Create a new work event to import `blocks` as a beacon chain segment. pub fn send_chain_segment( self: &Arc, diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 495d1cd92be..508576d9f52 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -8,6 +8,7 @@ use crate::sync::{ use beacon_chain::block_verification_types::{AsBlock, RpcBlock}; use beacon_chain::data_availability_checker::AvailabilityCheckError; use beacon_chain::data_availability_checker::MaybeAvailableBlock; +use beacon_chain::data_column_verification::verify_kzg_for_data_column_list; use beacon_chain::{ validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError, NotifyExecutionLayer, @@ -24,8 +25,7 @@ use store::KzgCommitment; use tokio::sync::mpsc; use types::beacon_block_body::format_kzg_commitments; use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlockImportSource, DataColumnSidecarList}; -use types::{Epoch, Hash256}; +use types::{BlockImportSource, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256}; /// Id associated to a batch processing request, either a sync batch or a parent lookup. #[derive(Clone, Debug, PartialEq)] @@ -139,6 +139,7 @@ impl NetworkBeaconProcessor { }; let slot = block.slot(); + let block_has_data = block.as_block().num_expected_blobs() > 0; let parent_root = block.message().parent_root(); let commitments_formatted = block.as_block().commitments_formatted(); @@ -186,6 +187,18 @@ impl NetworkBeaconProcessor { self.chain.recompute_head_at_current_slot().await; } + + // RPC block imported or execution validated. If the block was already imported by gossip we + // receive Err(BlockError::AlreadyKnown). + if result.is_ok() && + // Block has at least one blob, so it produced columns + block_has_data && + // Block slot is within the DA boundary (should always be the case) and PeerDAS is activated + self.chain.should_sample_slot(slot) + { + self.send_sync_message(SyncMessage::SampleBlock(block_root, slot)); + } + // Sync handles these results self.send_sync_message(SyncMessage::BlockComponentProcessed { process_type, @@ -320,24 +333,28 @@ impl NetworkBeaconProcessor { .await; match &result { - Ok(availability) => match availability { - AvailabilityProcessingStatus::Imported(hash) => { - debug!( - self.log, - "Block components retrieved"; - "result" => "imported block and custody columns", - "block_hash" => %hash, - ); - self.chain.recompute_head_at_current_slot().await; - } - AvailabilityProcessingStatus::MissingComponents(_, _) => { - debug!( - self.log, - "Missing components over rpc"; - "block_hash" => %block_root, - ); + Ok((availability, data_columns_to_publish)) => { + self.handle_data_columns_to_publish(data_columns_to_publish.clone()); + + match availability { + AvailabilityProcessingStatus::Imported(hash) => { + debug!( + self.log, + "Block components retrieved"; + "result" => "imported block and custody columns", + "block_hash" => %hash, + ); + self.chain.recompute_head_at_current_slot().await; + } + AvailabilityProcessingStatus::MissingComponents(_, _) => { + debug!( + self.log, + "Missing components over rpc"; + "block_hash" => %block_root, + ); + } } - }, + } Err(BlockError::BlockIsAlreadyKnown(_)) => { debug!( self.log, @@ -357,10 +374,29 @@ impl NetworkBeaconProcessor { self.send_sync_message(SyncMessage::BlockComponentProcessed { process_type, - result: result.into(), + result: result.map(|(r, _)| r).into(), }); } + /// Validate a list of data columns received from RPC requests + pub async fn validate_rpc_data_columns( + self: Arc>, + _block_root: Hash256, + data_columns: Vec>>, + _seen_timestamp: Duration, + ) -> Result<(), String> { + let kzg = self.chain.kzg.as_ref().ok_or("Kzg not initialized")?; + verify_kzg_for_data_column_list(data_columns.iter(), kzg).map_err(|err| format!("{err:?}")) + } + + /// Process a sampling completed event, inserting it into fork-choice + pub async fn process_sampling_completed( + self: Arc>, + block_root: Hash256, + ) { + self.chain.process_sampling_completed(block_root).await; + } + /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync /// thread if more blocks are needed to process it. pub async fn process_chain_segment( @@ -421,6 +457,10 @@ impl NetworkBeaconProcessor { .iter() .map(|wrapped| wrapped.n_blobs()) .sum::(); + let n_data_columns = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_data_columns()) + .sum::(); match self.process_backfill_blocks(downloaded_blocks) { (imported_blocks, Ok(_)) => { @@ -430,6 +470,7 @@ impl NetworkBeaconProcessor { "last_block_slot" => end_slot, "processed_blocks" => sent_blocks, "processed_blobs" => n_blobs, + "processed_data_columns" => n_data_columns, "service"=> "sync"); BatchProcessResult::Success { sent_blocks, @@ -473,10 +514,19 @@ impl NetworkBeaconProcessor { { ChainSegmentResult::Successful { imported_blocks } => { metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_SUCCESS_TOTAL); - if imported_blocks > 0 { + if !imported_blocks.is_empty() { self.chain.recompute_head_at_current_slot().await; + + for (block_root, block_slot) in &imported_blocks { + if self.chain.should_sample_slot(*block_slot) { + self.send_sync_message(SyncMessage::SampleBlock( + *block_root, + *block_slot, + )); + } + } } - (imported_blocks, Ok(())) + (imported_blocks.len(), Ok(())) } ChainSegmentResult::Failed { imported_blocks, @@ -484,10 +534,10 @@ impl NetworkBeaconProcessor { } => { metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_FAILED_TOTAL); let r = self.handle_failed_chain_segment(error); - if imported_blocks > 0 { + if !imported_blocks.is_empty() { self.chain.recompute_head_at_current_slot().await; } - (imported_blocks, r) + (imported_blocks.len(), r) } } } diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index a9b9f64a79d..40c69a0baa5 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -93,7 +93,7 @@ impl TestRig { spec.shard_committee_period = 2; let harness = BeaconChainHarness::builder(MainnetEthSpec) - .spec(spec) + .spec(spec.clone()) .deterministic_keypairs(VALIDATOR_COUNT) .fresh_ephemeral_store() .mock_execution_layer() @@ -204,7 +204,14 @@ impl TestRig { }); let enr_key = CombinedKey::generate_secp256k1(); let enr = enr::Enr::builder().build(&enr_key).unwrap(); - let network_globals = Arc::new(NetworkGlobals::new(enr, meta_data, vec![], false, &log)); + let network_globals = Arc::new(NetworkGlobals::new( + enr, + meta_data, + vec![], + false, + &log, + spec, + )); let executor = harness.runtime.task_executor.clone(); diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index dfb05da19bd..946d25237bf 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -372,7 +372,9 @@ impl BackFillSync { // A batch could be retried without the peer failing the request (disconnecting/ // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(()); } debug!(self.log, "Batch failed"; "batch_epoch" => batch_id, "error" => "rpc_error"); @@ -420,7 +422,9 @@ impl BackFillSync { // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer, and that the // request_id matches - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(ProcessResult::Successful); } batch @@ -958,7 +962,7 @@ impl BackFillSync { ) -> Result<(), BackFillError> { if let Some(batch) = self.batches.get_mut(&batch_id) { let (request, is_blob_batch) = batch.to_blocks_by_range_request(); - match network.blocks_and_blobs_by_range_request( + match network.block_components_by_range_request( peer, is_blob_batch, request, diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index a7be72556e2..c7c043f53f8 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -4,6 +4,7 @@ use crate::sync::block_lookups::single_block_lookup::{ use crate::sync::block_lookups::{ BlobRequestState, BlockRequestState, CustodyRequestState, PeerId, }; +use crate::sync::manager::BlockProcessType; use crate::sync::network_context::{LookupRequestResult, SyncNetworkContext}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; @@ -92,7 +93,7 @@ impl RequestState for BlockRequestState { value, block_root, seen_timestamp, - peer_id: _, + .. } = download_result; cx.send_block_for_processing( id, @@ -140,7 +141,7 @@ impl RequestState for BlobRequestState { value, block_root, seen_timestamp, - peer_id: _, + .. } = download_result; cx.send_blobs_for_processing(id, block_root, value, seen_timestamp) .map_err(LookupRequestError::SendFailedProcessor) @@ -186,8 +187,14 @@ impl RequestState for CustodyRequestState { seen_timestamp, .. } = download_result; - cx.send_custody_columns_for_processing(id, block_root, value, seen_timestamp) - .map_err(LookupRequestError::SendFailedProcessor) + cx.send_custody_columns_for_processing( + id, + block_root, + value, + seen_timestamp, + BlockProcessType::SingleCustodyColumn(id), + ) + .map_err(LookupRequestError::SendFailedProcessor) } fn response_type() -> ResponseType { diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 7194faa2860..7a5cda20692 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -24,7 +24,7 @@ use self::parent_chain::{compute_parent_chains, NodeChain}; pub use self::single_block_lookup::DownloadResult; use self::single_block_lookup::{LookupRequestError, LookupResult, SingleBlockLookup}; use super::manager::{BlockProcessType, BlockProcessingResult, SLOT_IMPORT_TOLERANCE}; -use super::network_context::{RpcResponseResult, SyncNetworkContext}; +use super::network_context::{PeerGroup, RpcResponseError, SyncNetworkContext}; use crate::metrics; use crate::sync::block_lookups::common::ResponseType; use crate::sync::block_lookups::parent_chain::find_oldest_fork_ancestor; @@ -42,7 +42,7 @@ use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; +use types::{BlobSidecar, DataColumnSidecar, EthSpec, SignedBeaconBlock}; pub mod common; pub mod parent_chain; @@ -76,6 +76,7 @@ const MAX_LOOKUPS: usize = 200; pub enum BlockComponent { Block(DownloadResult>>), Blob(DownloadResult>>), + DataColumn(DownloadResult>>), } impl BlockComponent { @@ -83,12 +84,14 @@ impl BlockComponent { match self { BlockComponent::Block(block) => block.value.parent_root(), BlockComponent::Blob(blob) => blob.value.block_parent_root(), + BlockComponent::DataColumn(column) => column.value.block_parent_root(), } } fn get_type(&self) -> &'static str { match self { BlockComponent::Block(_) => "block", BlockComponent::Blob(_) => "blob", + BlockComponent::DataColumn(_) => "data_column", } } } @@ -379,11 +382,10 @@ impl BlockLookups { pub fn on_download_response>( &mut self, id: SingleLookupReqId, - peer_id: PeerId, - response: RpcResponseResult, + response: Result<(R::VerifiedResponseType, PeerGroup, Duration), RpcResponseError>, cx: &mut SyncNetworkContext, ) { - let result = self.on_download_response_inner::(id, peer_id, response, cx); + let result = self.on_download_response_inner::(id, response, cx); self.on_lookup_result(id.lookup_id, result, "download_response", cx); } @@ -391,8 +393,7 @@ impl BlockLookups { pub fn on_download_response_inner>( &mut self, id: SingleLookupReqId, - peer_id: PeerId, - response: RpcResponseResult, + response: Result<(R::VerifiedResponseType, PeerGroup, Duration), RpcResponseError>, cx: &mut SyncNetworkContext, ) -> Result { // Note: no need to downscore peers here, already downscored on network context @@ -409,12 +410,12 @@ impl BlockLookups { let request_state = R::request_state_mut(lookup).get_state_mut(); match response { - Ok((response, seen_timestamp)) => { + Ok((response, peer_group, seen_timestamp)) => { debug!(self.log, "Received lookup download success"; "block_root" => ?block_root, "id" => ?id, - "peer_id" => %peer_id, + "peer_group" => ?peer_group, "response_type" => ?response_type, ); @@ -435,19 +436,20 @@ impl BlockLookups { value: response, block_root, seen_timestamp, - peer_id, + peer_group, }, )?; // continue_request will send for processing as the request state is AwaitingProcessing } Err(e) => { + // TODO(das): is it okay to not log the peer source of request failures? Then we + // should log individual requests failures in the SyncNetworkContext debug!(self.log, "Received lookup download failure"; "block_root" => ?block_root, "id" => ?id, - "peer_id" => %peer_id, "response_type" => ?response_type, - "error" => %e, + "error" => ?e, ); request_state.on_download_failure(id.req_id)?; @@ -481,11 +483,11 @@ impl BlockLookups { BlockProcessType::SingleBlob { id } => { self.on_processing_result_inner::>(id, result, cx) } + BlockProcessType::SingleCustodyColumn(id) => { + self.on_processing_result_inner::>(id, result, cx) + } }; - let id = match process_type { - BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, - }; - self.on_lookup_result(id, lookup_result, "processing_result", cx); + self.on_lookup_result(process_type.id(), lookup_result, "processing_result", cx); } pub fn on_processing_result_inner>( @@ -519,10 +521,9 @@ impl BlockLookups { Action::Continue } - BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( - _, - _block_root, - )) => { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents { + .. + }) => { // `on_processing_success` is called here to ensure the request state is updated prior to checking // if both components have been processed. request_state.on_processing_success()?; @@ -591,17 +592,21 @@ impl BlockLookups { } other => { debug!(self.log, "Invalid lookup component"; "block_root" => ?block_root, "component" => ?R::response_type(), "error" => ?other); - - let peer_id = request_state.on_processing_failure()?; - cx.report_peer( - peer_id, - PeerAction::MidToleranceError, - match R::response_type() { - ResponseType::Block => "lookup_block_processing_failure", - ResponseType::Blob => "lookup_blobs_processing_failure", - ResponseType::CustodyColumn => "lookup_custody_processing_failure", - }, - ); + let peer_group = request_state.on_processing_failure()?; + // TOOD(das): only downscore peer subgroup that provided the invalid proof + for peer in peer_group.all() { + cx.report_peer( + *peer, + PeerAction::MidToleranceError, + match R::response_type() { + ResponseType::Block => "lookup_block_processing_failure", + ResponseType::Blob => "lookup_blobs_processing_failure", + ResponseType::CustodyColumn => { + "lookup_custody_column_processing_failure" + } + }, + ); + } Action::Retry } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index b9cd4e3e035..b17bcedc5f5 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -2,7 +2,8 @@ use super::common::ResponseType; use super::{BlockComponent, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS}; use crate::sync::block_lookups::common::RequestState; use crate::sync::network_context::{ - LookupRequestResult, ReqId, RpcRequestSendError, SendErrorProcessor, SyncNetworkContext, + LookupRequestResult, PeerGroup, ReqId, RpcRequestSendError, SendErrorProcessor, + SyncNetworkContext, }; use beacon_chain::BeaconChainTypes; use derivative::Derivative; @@ -124,8 +125,8 @@ impl SingleBlockLookup { .block_request_state .state .insert_verified_response(block), - BlockComponent::Blob(_) => { - // For now ignore single blobs, as the blob request state assumes all blobs are + BlockComponent::Blob(_) | BlockComponent::DataColumn(_) => { + // For now ignore single blobs and columns, as the blob request state assumes all blobs are // attributed to the same peer = the peer serving the remaining blobs. Ignoring this // block component has a minor effect, causing the node to re-request this blob // once the parent chain is successfully resolved @@ -292,34 +293,34 @@ impl SingleBlockLookup { } } -/// The state of the block request component of a `SingleBlockLookup`. +/// The state of the blob request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct BlockRequestState { +pub struct BlobRequestState { #[derivative(Debug = "ignore")] - pub requested_block_root: Hash256, - pub state: SingleLookupRequestState>>, + pub block_root: Hash256, + pub state: SingleLookupRequestState>, } -impl BlockRequestState { +impl BlobRequestState { pub fn new(block_root: Hash256) -> Self { Self { - requested_block_root: block_root, + block_root, state: SingleLookupRequestState::new(), } } } -/// The state of the blob request component of a `SingleBlockLookup`. +/// The state of the custody request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct BlobRequestState { +pub struct CustodyRequestState { #[derivative(Debug = "ignore")] pub block_root: Hash256, - pub state: SingleLookupRequestState>, + pub state: SingleLookupRequestState>, } -impl BlobRequestState { +impl CustodyRequestState { pub fn new(block_root: Hash256) -> Self { Self { block_root, @@ -328,33 +329,33 @@ impl BlobRequestState { } } -/// The state of the custody request component of a `SingleBlockLookup`. +/// The state of the block request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct CustodyRequestState { +pub struct BlockRequestState { #[derivative(Debug = "ignore")] - pub block_root: Hash256, - pub state: SingleLookupRequestState>, + pub requested_block_root: Hash256, + pub state: SingleLookupRequestState>>, } -impl CustodyRequestState { +impl BlockRequestState { pub fn new(block_root: Hash256) -> Self { Self { - block_root, + requested_block_root: block_root, state: SingleLookupRequestState::new(), } } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Clone)] pub struct DownloadResult { pub value: T, pub block_root: Hash256, pub seen_timestamp: Duration, - pub peer_id: PeerId, + pub peer_group: PeerGroup, } -#[derive(PartialEq, Eq, IntoStaticStr)] +#[derive(IntoStaticStr)] pub enum State { AwaitingDownload(&'static str), Downloading(ReqId), @@ -366,8 +367,7 @@ pub enum State { } /// Object representing the state of a single block or blob lookup request. -#[derive(PartialEq, Eq, Derivative)] -#[derivative(Debug)] +#[derive(Debug)] pub struct SingleLookupRequestState { /// State of this request. state: State, @@ -537,13 +537,13 @@ impl SingleLookupRequestState { } /// Registers a failure in processing a block. - pub fn on_processing_failure(&mut self) -> Result { + pub fn on_processing_failure(&mut self) -> Result { match &self.state { State::Processing(result) => { - let peer_id = result.peer_id; + let peers_source = result.peer_group.clone(); self.failed_processing = self.failed_processing.saturating_add(1); self.state = State::AwaitingDownload("not started"); - Ok(peer_id) + Ok(peers_source) } other => Err(LookupRequestError::BadState(format!( "Bad state on_processing_failure expected Processing got {other}" @@ -600,8 +600,8 @@ impl std::fmt::Debug for State { match self { Self::AwaitingDownload(status) => write!(f, "AwaitingDownload({:?})", status), Self::Downloading(req_id) => write!(f, "Downloading({:?})", req_id), - Self::AwaitingProcess(d) => write!(f, "AwaitingProcess({:?})", d.peer_id), - Self::Processing(d) => write!(f, "Processing({:?})", d.peer_id), + Self::AwaitingProcess(d) => write!(f, "AwaitingProcess({:?})", d.peer_group), + Self::Processing(d) => write!(f, "Processing({:?})", d.peer_group), Self::Processed { .. } => write!(f, "Processed"), } } diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index fcd0d768b7b..9572bf7f444 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -1,7 +1,7 @@ use crate::network_beacon_processor::NetworkBeaconProcessor; - use crate::sync::manager::{BlockProcessType, SyncManager}; -use crate::sync::SyncMessage; +use crate::sync::sampling::SamplingConfig; +use crate::sync::{SamplingId, SyncMessage}; use crate::NetworkMessage; use std::sync::Arc; @@ -14,26 +14,33 @@ use beacon_chain::builder::Witness; use beacon_chain::data_availability_checker::Availability; use beacon_chain::eth1_chain::CachingEth1Backend; use beacon_chain::test_utils::{ - build_log, generate_rand_block_and_blobs, BeaconChainHarness, EphemeralHarnessType, NumBlobs, + build_log, generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, + BeaconChainHarness, EphemeralHarnessType, NumBlobs, }; +use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{ AvailabilityPendingExecutedBlock, PayloadVerificationOutcome, PayloadVerificationStatus, }; use beacon_processor::WorkEvent; use lighthouse_network::rpc::{RPCError, RPCResponseErrorCode}; -use lighthouse_network::service::api_types::{AppRequestId, Id, SingleLookupReqId, SyncRequestId}; +use lighthouse_network::service::api_types::{ + AppRequestId, DataColumnsByRootRequester, Id, SamplingRequester, SingleLookupReqId, + SyncRequestId, +}; use lighthouse_network::types::SyncState; use lighthouse_network::{NetworkGlobals, Request}; use slog::info; use slot_clock::{ManualSlotClock, SlotClock, TestingSlotClock}; use store::MemoryStore; use tokio::sync::mpsc; +use types::data_column_sidecar::ColumnIndex; use types::test_utils::TestRandom; use types::{ test_utils::{SeedableRng, XorShiftRng}, BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, Slot, }; use types::{BeaconState, BeaconStateBase}; +use types::{DataColumnSidecar, Epoch}; type T = Witness, E, MemoryStore, MemoryStore>; @@ -84,15 +91,32 @@ struct TestRig { const D: Duration = Duration::new(0, 0); const PARENT_FAIL_TOLERANCE: u8 = SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS; +const SAMPLING_REQUIRED_SUCCESSES: usize = 2; + +type DCByRootIds = Vec; +type DCByRootId = (SyncRequestId, Vec); + +struct TestRigConfig { + peer_das_enabled: bool, +} impl TestRig { - fn test_setup() -> Self { + fn test_setup_with_config(config: Option) -> Self { let enable_log = cfg!(feature = "test_logger"); let log = build_log(slog::Level::Trace, enable_log); + // Use `fork_from_env` logic to set correct fork epochs + let mut spec = test_spec::(); + + if let Some(config) = config { + if config.peer_das_enabled { + spec.eip7594_fork_epoch = Some(Epoch::new(0)); + } + } + // Initialise a new beacon chain let harness = BeaconChainHarness::>::builder(E) - .default_spec() + .spec(spec) .logger(log.clone()) .deterministic_keypairs(1) .fresh_ephemeral_store() @@ -106,7 +130,13 @@ impl TestRig { let chain = harness.chain.clone(); let (network_tx, network_rx) = mpsc::unbounded_channel(); - let globals = Arc::new(NetworkGlobals::new_test_globals(Vec::new(), &log)); + // TODO(das): make the generation of the ENR use the deterministic rng to have consistent + // column assignments + let globals = Arc::new(NetworkGlobals::new_test_globals( + Vec::new(), + &log, + chain.spec.clone(), + )); let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing( globals, chain.clone(), @@ -136,6 +166,9 @@ impl TestRig { network_tx, beacon_processor.into(), sync_recv, + SamplingConfig::Custom { + required_successes: vec![SAMPLING_REQUIRED_SUCCESSES], + }, log.clone(), ), harness, @@ -144,6 +177,10 @@ impl TestRig { } } + fn test_setup() -> Self { + Self::test_setup_with_config(None) + } + fn test_setup_after_deneb() -> Option { let r = Self::test_setup(); if r.after_deneb() { @@ -153,6 +190,17 @@ impl TestRig { } } + fn test_setup_after_peerdas() -> Option { + let r = Self::test_setup_with_config(Some(TestRigConfig { + peer_das_enabled: true, + })); + if r.after_deneb() { + Some(r) + } else { + None + } + } + fn log(&self, msg: &str) { info!(self.log, "TEST_RIG"; "msg" => msg); } @@ -180,6 +228,10 @@ impl TestRig { )); } + fn trigger_sample_block(&mut self, block_root: Hash256, block_slot: Slot) { + self.send_sync_message(SyncMessage::SampleBlock(block_root, block_slot)) + } + fn rand_block(&mut self) -> SignedBeaconBlock { self.rand_block_and_blobs(NumBlobs::None).0 } @@ -193,6 +245,18 @@ impl TestRig { generate_rand_block_and_blobs::(fork_name, num_blobs, rng) } + fn rand_block_and_data_columns( + &mut self, + ) -> (SignedBeaconBlock, Vec>>) { + let num_blobs = NumBlobs::Number(1); + generate_rand_block_and_data_columns::( + self.fork_name, + num_blobs, + &mut self.rng, + &self.harness.spec, + ) + } + pub fn rand_block_and_parent( &mut self, ) -> (SignedBeaconBlock, SignedBeaconBlock, Hash256, Hash256) { @@ -233,6 +297,20 @@ impl TestRig { ); } + fn expect_no_active_sampling(&mut self) { + assert_eq!( + self.sync_manager.active_sampling_requests(), + vec![], + "expected no active sampling" + ); + } + + fn expect_clean_finished_sampling(&mut self) { + self.expect_empty_network(); + self.expect_sampling_result_work(); + self.expect_no_active_sampling(); + } + fn assert_parent_lookups_count(&self, count: usize) { assert_eq!( self.active_parent_lookups_count(), @@ -311,12 +389,26 @@ impl TestRig { } fn new_connected_peer(&mut self) -> PeerId { - let peer_id = PeerId::random(); self.network_globals .peers .write() - .__add_connected_peer_testing_only(&peer_id); - peer_id + .__add_connected_peer_testing_only(false, &self.harness.spec) + } + + fn new_connected_supernode_peer(&mut self) -> PeerId { + self.network_globals + .peers + .write() + .__add_connected_peer_testing_only(true, &self.harness.spec) + } + + fn new_connected_peers_for_peerdas(&mut self) { + // Enough sampling peers with few columns + for _ in 0..100 { + self.new_connected_peer(); + } + // One supernode peer to ensure all columns have at least one peer + self.new_connected_supernode_peer(); } fn parent_chain_processed_success( @@ -542,6 +634,182 @@ impl TestRig { }) } + fn return_empty_sampling_requests(&mut self, ids: DCByRootIds) { + for id in ids { + self.log(&format!("return empty data column for {id:?}")); + self.return_empty_sampling_request(id) + } + } + + fn return_empty_sampling_request(&mut self, (request_id, _): DCByRootId) { + let peer_id = PeerId::random(); + // Send stream termination + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: None, + seen_timestamp: timestamp_now(), + }); + } + + fn sampling_requests_failed( + &mut self, + sampling_ids: DCByRootIds, + peer_id: PeerId, + error: RPCError, + ) { + for (request_id, _) in sampling_ids { + self.send_sync_message(SyncMessage::RpcError { + peer_id, + request_id, + error: error.clone(), + }) + } + } + + fn complete_valid_block_request( + &mut self, + id: SingleLookupReqId, + block: Arc>, + missing_components: bool, + ) { + // Complete download + let peer_id = PeerId::random(); + let slot = block.slot(); + let block_root = block.canonical_root(); + self.single_lookup_block_response(id, peer_id, Some(block)); + self.single_lookup_block_response(id, peer_id, None); + // Expect processing and resolve with import + self.expect_block_process(ResponseType::Block); + self.single_block_component_processed( + id.lookup_id, + if missing_components { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + slot, block_root, + )) + } else { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(block_root)) + }, + ) + } + + fn complete_valid_sampling_column_requests( + &mut self, + ids: DCByRootIds, + data_columns: Vec>>, + ) { + for id in ids { + self.log(&format!("return valid data column for {id:?}")); + let indices = &id.1; + let columns_to_send = indices + .iter() + .map(|&i| data_columns[i as usize].clone()) + .collect::>(); + self.complete_valid_sampling_column_request(id, &columns_to_send); + } + } + + fn complete_valid_sampling_column_request( + &mut self, + id: DCByRootId, + data_columns: &[Arc>], + ) { + let first_dc = data_columns.first().unwrap(); + let block_root = first_dc.block_root(); + let sampling_request_id = match id.0 { + SyncRequestId::DataColumnsByRoot( + _, + _requester @ DataColumnsByRootRequester::Sampling(sampling_id), + ) => sampling_id.sampling_request_id, + _ => unreachable!(), + }; + self.complete_data_columns_by_root_request(id, data_columns); + + // Expect work event + // TODO(das): worth it to append sender id to the work event for stricter assertion? + self.expect_rpc_sample_verify_work_event(); + + // Respond with valid result + self.send_sync_message(SyncMessage::SampleVerified { + id: SamplingId { + id: SamplingRequester::ImportedBlock(block_root), + sampling_request_id, + }, + result: Ok(()), + }) + } + + fn complete_valid_custody_request( + &mut self, + ids: DCByRootIds, + data_columns: Vec>>, + missing_components: bool, + ) { + let lookup_id = + if let SyncRequestId::DataColumnsByRoot(_, DataColumnsByRootRequester::Custody(id)) = + ids.first().unwrap().0 + { + id.requester.0.lookup_id + } else { + panic!("not a custody requester") + }; + + let first_column = data_columns.first().cloned().unwrap(); + + for id in ids { + self.log(&format!("return valid data column for {id:?}")); + let indices = &id.1; + let columns_to_send = indices + .iter() + .map(|&i| data_columns[i as usize].clone()) + .collect::>(); + self.complete_data_columns_by_root_request(id, &columns_to_send); + } + + // Expect work event + // TODO(das): worth it to append sender id to the work event for stricter assertion? + self.expect_rpc_custody_column_work_event(); + + // Respond with valid result + self.send_sync_message(SyncMessage::BlockComponentProcessed { + process_type: BlockProcessType::SingleCustodyColumn(lookup_id), + result: if missing_components { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + first_column.slot(), + first_column.block_root(), + )) + } else { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported( + first_column.block_root(), + )) + }, + }); + } + + fn complete_data_columns_by_root_request( + &mut self, + (request_id, _): DCByRootId, + data_columns: &[Arc>], + ) { + let peer_id = PeerId::random(); + for data_column in data_columns { + // Send chunks + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: Some(data_column.clone()), + seen_timestamp: timestamp_now(), + }); + } + // Send stream termination + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: None, + seen_timestamp: timestamp_now(), + }); + } + /// Return RPCErrors for all active requests of peer fn rpc_error_all_active_requests(&mut self, disconnected_peer_id: PeerId) { self.drain_network_rx(); @@ -710,6 +978,59 @@ impl TestRig { .unwrap_or_else(|e| panic!("Expected blob parent request for {for_block:?}: {e}")) } + /// Retrieves an unknown number of requests for data columns of `block_root`. Because peer ENRs + /// are random, and peer selection is random, the total number of batched requests is unknown. + fn expect_data_columns_by_root_requests( + &mut self, + block_root: Hash256, + count: usize, + ) -> DCByRootIds { + let mut requests: DCByRootIds = vec![]; + loop { + let req = self + .pop_received_network_event(|ev| match ev { + NetworkMessage::SendRequest { + peer_id: _, + request: Request::DataColumnsByRoot(request), + request_id: AppRequestId::Sync(id @ SyncRequestId::DataColumnsByRoot { .. }), + } if request + .data_column_ids + .to_vec() + .iter() + .any(|r| r.block_root == block_root) => + { + let indices = request + .data_column_ids + .to_vec() + .iter() + .map(|cid| cid.index) + .collect::>(); + Some((*id, indices)) + } + _ => None, + }) + .unwrap_or_else(|e| { + panic!("Expected more DataColumnsByRoot requests for {block_root:?}: {e}") + }); + requests.push(req); + + // Should never infinite loop because sync does not send requests for 0 columns + if requests.iter().map(|r| r.1.len()).sum::() >= count { + return requests; + } + } + } + + fn expect_only_data_columns_by_root_requests( + &mut self, + for_block: Hash256, + count: usize, + ) -> DCByRootIds { + let ids = self.expect_data_columns_by_root_requests(for_block, count); + self.expect_empty_network(); + ids + } + #[track_caller] fn expect_block_process(&mut self, response_type: ResponseType) { match response_type { @@ -723,11 +1044,47 @@ impl TestRig { (ev.work_type() == beacon_processor::RPC_BLOBS).then_some(()) }) .unwrap_or_else(|e| panic!("Expected blobs work event: {e}")), - // TODO(das): remove todo when adding tests for custody sync lookup - ResponseType::CustodyColumn => todo!(), + ResponseType::CustodyColumn => self + .pop_received_processor_event(|ev| { + (ev.work_type() == beacon_processor::RPC_CUSTODY_COLUMN).then_some(()) + }) + .unwrap_or_else(|e| panic!("Expected column work event: {e}")), } } + fn expect_rpc_custody_column_work_event(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::RPC_CUSTODY_COLUMN { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected RPC custody column work: {e}")) + } + + fn expect_rpc_sample_verify_work_event(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::RPC_VERIFY_DATA_COLUMNS { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected sample verify work: {e}")) + } + + fn expect_sampling_result_work(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::SAMPLING_RESULT { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected sampling result work: {e}")) + } + fn expect_no_penalty_for(&mut self, peer_id: PeerId) { self.drain_network_rx(); let downscore_events = self @@ -763,7 +1120,11 @@ impl TestRig { fn expect_empty_network(&mut self) { self.drain_network_rx(); if !self.network_rx_queue.is_empty() { - panic!("expected no network events: {:#?}", self.network_rx_queue); + let n = self.network_rx_queue.len(); + panic!( + "expected no network events but got {n} events, displaying first 2: {:#?}", + self.network_rx_queue[..n.min(2)].iter().collect::>() + ); } } @@ -1588,6 +1949,94 @@ fn blobs_in_da_checker_skip_download() { r.expect_no_active_lookups(); } +#[test] +fn sampling_happy_path() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + r.trigger_sample_block(block_root, block.slot()); + // Retrieve all outgoing sample requests for random column indexes + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + // Resolve all of them one by one + r.complete_valid_sampling_column_requests(sampling_ids, data_columns); + r.expect_clean_finished_sampling(); +} + +#[test] +fn sampling_with_retries() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + r.trigger_sample_block(block_root, block.slot()); + // Retrieve all outgoing sample requests for random column indexes, and return empty responses + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.return_empty_sampling_requests(sampling_ids); + // Expect retries for all of them, and resolve them + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.complete_valid_sampling_column_requests(sampling_ids, data_columns); + r.expect_clean_finished_sampling(); +} + +#[test] +fn sampling_avoid_retrying_same_peer() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + let peer_id_1 = r.new_connected_supernode_peer(); + let peer_id_2 = r.new_connected_supernode_peer(); + let block_root = Hash256::random(); + r.trigger_sample_block(block_root, Slot::new(0)); + // Retrieve all outgoing sample requests for random column indexes, and return empty responses + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.sampling_requests_failed(sampling_ids, peer_id_1, RPCError::Disconnected); + // Should retry the other peer + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.sampling_requests_failed(sampling_ids, peer_id_2, RPCError::Disconnected); + // Expect no more retries + r.expect_empty_network(); +} + +#[test] +fn custody_lookup_happy_path() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + let spec = E::default_spec(); + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + let peer_id = r.new_connected_peer(); + r.trigger_unknown_block_from_attestation(block_root, peer_id); + // Should not request blobs + let id = r.expect_block_lookup_request(block.canonical_root()); + r.complete_valid_block_request(id, block.into(), true); + let custody_column_count = spec.custody_requirement * spec.data_columns_per_subnet() as u64; + let custody_ids = + r.expect_only_data_columns_by_root_requests(block_root, custody_column_count as usize); + r.complete_valid_custody_request(custody_ids, data_columns, false); + r.expect_no_active_lookups(); +} + +// TODO(das): Test retries of DataColumnByRoot: +// - Expect request for column_index +// - Respond with bad data +// - Respond with stream terminator +// ^ The stream terminator should be ignored and not close the next retry + +// TODO(das): Test error early a sampling request and it getting drop + then receiving responses +// from pending requests. + mod deneb_only { use super::*; use beacon_chain::{ diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs index f31f2921ea2..966ce55fabe 100644 --- a/beacon_node/network/src/sync/block_sidecar_coupling.rs +++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs @@ -1,69 +1,105 @@ -use beacon_chain::block_verification_types::RpcBlock; +use beacon_chain::{ + block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root, +}; use lighthouse_network::PeerId; use ssz_types::VariableList; -use std::{collections::VecDeque, sync::Arc}; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; - -use super::range_sync::ByRangeRequestType; +use std::{ + collections::{HashMap, VecDeque}, + sync::Arc, +}; +use types::{ + BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, EthSpec, Hash256, SignedBeaconBlock, +}; #[derive(Debug)] -pub struct BlocksAndBlobsRequestInfo { +pub struct RangeBlockComponentsRequest { /// Blocks we have received awaiting for their corresponding sidecar. - accumulated_blocks: VecDeque>>, + blocks: VecDeque>>, /// Sidecars we have received awaiting for their corresponding block. - accumulated_sidecars: VecDeque>>, + blobs: VecDeque>>, + data_columns: VecDeque>>, /// Whether the individual RPC request for blocks is finished or not. is_blocks_stream_terminated: bool, /// Whether the individual RPC request for sidecars is finished or not. is_sidecars_stream_terminated: bool, + custody_columns_streams_terminated: usize, /// Used to determine if this accumulator should wait for a sidecars stream termination - request_type: ByRangeRequestType, - /// The peer the request was made to. - pub(crate) peer_id: PeerId, + expects_blobs: bool, + expects_custody_columns: Option>, + /// Used to determine if the number of data columns stream termination this accumulator should + /// wait for. This may be less than the number of `expects_custody_columns` due to request batching. + num_custody_column_requests: Option, + /// The peers the request was made to. + pub(crate) peer_ids: Vec, } -impl BlocksAndBlobsRequestInfo { - pub fn new(request_type: ByRangeRequestType, peer_id: PeerId) -> Self { +impl RangeBlockComponentsRequest { + pub fn new( + expects_blobs: bool, + expects_custody_columns: Option>, + num_custody_column_requests: Option, + peer_ids: Vec, + ) -> Self { Self { - accumulated_blocks: <_>::default(), - accumulated_sidecars: <_>::default(), - is_blocks_stream_terminated: <_>::default(), - is_sidecars_stream_terminated: <_>::default(), - request_type, - peer_id, + blocks: <_>::default(), + blobs: <_>::default(), + data_columns: <_>::default(), + is_blocks_stream_terminated: false, + is_sidecars_stream_terminated: false, + custody_columns_streams_terminated: 0, + expects_blobs, + expects_custody_columns, + num_custody_column_requests, + peer_ids, } } - pub fn get_request_type(&self) -> ByRangeRequestType { - self.request_type + // TODO: This function should be deprecated when simplying the retry mechanism of this range + // requests. + pub fn get_requirements(&self) -> (bool, Option>) { + (self.expects_blobs, self.expects_custody_columns.clone()) } pub fn add_block_response(&mut self, block_opt: Option>>) { match block_opt { - Some(block) => self.accumulated_blocks.push_back(block), + Some(block) => self.blocks.push_back(block), None => self.is_blocks_stream_terminated = true, } } pub fn add_sidecar_response(&mut self, sidecar_opt: Option>>) { match sidecar_opt { - Some(sidecar) => self.accumulated_sidecars.push_back(sidecar), + Some(sidecar) => self.blobs.push_back(sidecar), None => self.is_sidecars_stream_terminated = true, } } - pub fn into_responses(self) -> Result>, String> { - let BlocksAndBlobsRequestInfo { - accumulated_blocks, - accumulated_sidecars, - .. - } = self; + pub fn add_data_column(&mut self, column_opt: Option>>) { + match column_opt { + Some(column) => self.data_columns.push_back(column), + // TODO(das): this mechanism is dangerous, if somehow there are two requests for the + // same column index it can terminate early. This struct should track that all requests + // for all custody columns terminate. + None => self.custody_columns_streams_terminated += 1, + } + } + + pub fn into_responses(self, spec: &ChainSpec) -> Result>, String> { + if let Some(expects_custody_columns) = self.expects_custody_columns.clone() { + self.into_responses_with_custody_columns(expects_custody_columns, spec) + } else { + self.into_responses_with_blobs() + } + } + + fn into_responses_with_blobs(self) -> Result>, String> { + let RangeBlockComponentsRequest { blocks, blobs, .. } = self; // There can't be more more blobs than blocks. i.e. sending any blob (empty // included) for a skipped slot is not permitted. - let mut responses = Vec::with_capacity(accumulated_blocks.len()); - let mut blob_iter = accumulated_sidecars.into_iter().peekable(); - for block in accumulated_blocks.into_iter() { + let mut responses = Vec::with_capacity(blocks.len()); + let mut blob_iter = blobs.into_iter().peekable(); + for block in blocks.into_iter() { let mut blob_list = Vec::with_capacity(E::max_blobs_per_block()); while { let pair_next_blob = blob_iter @@ -99,20 +135,110 @@ impl BlocksAndBlobsRequestInfo { Ok(responses) } + fn into_responses_with_custody_columns( + self, + expects_custody_columns: Vec, + spec: &ChainSpec, + ) -> Result>, String> { + let RangeBlockComponentsRequest { + blocks, + data_columns, + .. + } = self; + + // Group data columns by block_root and index + let mut data_columns_by_block = + HashMap::>>>::new(); + + for column in data_columns { + let block_root = column.block_root(); + let index = column.index; + if data_columns_by_block + .entry(block_root) + .or_default() + .insert(index, column) + .is_some() + { + return Err(format!( + "Repeated column block_root {block_root:?} index {index}" + )); + } + } + + // Now iterate all blocks ensuring that the block roots of each block and data column match, + // plus we have columns for our custody requirements + let mut rpc_blocks = Vec::with_capacity(blocks.len()); + + for block in blocks { + let block_root = get_block_root(&block); + rpc_blocks.push(if block.num_expected_blobs() > 0 { + let Some(mut data_columns_by_index) = data_columns_by_block.remove(&block_root) + else { + // This PR ignores the fix from https://github.com/sigp/lighthouse/pull/5675 + // which allows blobs to not match blocks. + // TODO(das): on the initial version of PeerDAS the beacon chain does not check + // rpc custody requirements and dropping this check can allow the block to have + // an inconsistent DB. + return Err(format!("No columns for block {block_root:?} with data")); + }; + + let mut custody_columns = vec![]; + for index in &expects_custody_columns { + let Some(data_column) = data_columns_by_index.remove(index) else { + return Err(format!("No column for block {block_root:?} index {index}")); + }; + // Safe to convert to `CustodyDataColumn`: we have asserted that the index of + // this column is in the set of `expects_custody_columns` and with the expected + // block root, so for the expected epoch of this batch. + custody_columns.push(CustodyDataColumn::from_asserted_custody(data_column)); + } + + // Assert that there are no columns left + if !data_columns_by_index.is_empty() { + let remaining_indices = data_columns_by_index.keys().collect::>(); + return Err(format!( + "Not all columns consumed for block {block_root:?}: {remaining_indices:?}" + )); + } + + RpcBlock::new_with_custody_columns(Some(block_root), block, custody_columns, spec) + .map_err(|e| format!("{e:?}"))? + } else { + RpcBlock::new_without_blobs(Some(block_root), block) + }); + } + + // Assert that there are no columns left for other blocks + if !data_columns_by_block.is_empty() { + let remaining_roots = data_columns_by_block.keys().collect::>(); + return Err(format!("Not all columns consumed: {remaining_roots:?}")); + } + + Ok(rpc_blocks) + } + pub fn is_finished(&self) -> bool { - let blobs_requested = match self.request_type { - ByRangeRequestType::Blocks => false, - ByRangeRequestType::BlocksAndBlobs => true, - }; - self.is_blocks_stream_terminated && (!blobs_requested || self.is_sidecars_stream_terminated) + if !self.is_blocks_stream_terminated { + return false; + } + if self.expects_blobs && !self.is_sidecars_stream_terminated { + return false; + } + if let Some(expects_custody_column_responses) = self.num_custody_column_requests { + if self.custody_columns_streams_terminated < expects_custody_column_responses { + return false; + } + } + true } } #[cfg(test)] mod tests { - use super::BlocksAndBlobsRequestInfo; - use crate::sync::range_sync::ByRangeRequestType; - use beacon_chain::test_utils::{generate_rand_block_and_blobs, NumBlobs}; + use super::RangeBlockComponentsRequest; + use beacon_chain::test_utils::{ + generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, NumBlobs, + }; use lighthouse_network::PeerId; use rand::SeedableRng; use types::{test_utils::XorShiftRng, ForkName, MinimalEthSpec as E}; @@ -120,7 +246,7 @@ mod tests { #[test] fn no_blobs_into_responses() { let peer_id = PeerId::random(); - let mut info = BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::Blocks, peer_id); + let mut info = RangeBlockComponentsRequest::::new(false, None, None, vec![peer_id]); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng).0) @@ -134,14 +260,13 @@ mod tests { // Assert response is finished and RpcBlocks can be constructed assert!(info.is_finished()); - info.into_responses().unwrap(); + info.into_responses(&test_spec::()).unwrap(); } #[test] fn empty_blobs_into_responses() { let peer_id = PeerId::random(); - let mut info = - BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::BlocksAndBlobs, peer_id); + let mut info = RangeBlockComponentsRequest::::new(true, None, None, vec![peer_id]); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| { @@ -162,6 +287,123 @@ mod tests { // This makes sure we don't expect blobs here when they have expired. Checking this logic should // be hendled elsewhere. assert!(info.is_finished()); - info.into_responses().unwrap(); + info.into_responses(&test_spec::()).unwrap(); + } + + #[test] + fn rpc_block_with_custody_columns() { + let spec = test_spec::(); + let expects_custody_columns = vec![1, 2, 3, 4]; + let mut info = RangeBlockComponentsRequest::::new( + false, + Some(expects_custody_columns.clone()), + Some(expects_custody_columns.len()), + vec![PeerId::random()], + ); + let mut rng = XorShiftRng::from_seed([42; 16]); + let blocks = (0..4) + .map(|_| { + generate_rand_block_and_data_columns::( + ForkName::Deneb, + NumBlobs::Number(1), + &mut rng, + &spec, + ) + }) + .collect::>(); + + // Send blocks and complete terminate response + for block in &blocks { + info.add_block_response(Some(block.0.clone().into())); + } + info.add_block_response(None); + // Assert response is not finished + assert!(!info.is_finished()); + + // Send data columns interleaved + for block in &blocks { + for column in &block.1 { + if expects_custody_columns.contains(&column.index) { + info.add_data_column(Some(column.clone())); + } + } + } + + // Terminate the requests + for (i, _column_index) in expects_custody_columns.iter().enumerate() { + info.add_data_column(None); + + if i < expects_custody_columns.len() - 1 { + assert!( + !info.is_finished(), + "requested should not be finished at loop {i}" + ); + } else { + assert!( + info.is_finished(), + "request should be finishied at loop {i}" + ); + } + } + + // All completed construct response + info.into_responses(&spec).unwrap(); + } + + #[test] + fn rpc_block_with_custody_columns_batched() { + let spec = test_spec::(); + let expects_custody_columns = vec![1, 2, 3, 4]; + let num_of_data_column_requests = 2; + let mut info = RangeBlockComponentsRequest::::new( + false, + Some(expects_custody_columns.clone()), + Some(num_of_data_column_requests), + vec![PeerId::random()], + ); + let mut rng = XorShiftRng::from_seed([42; 16]); + let blocks = (0..4) + .map(|_| { + generate_rand_block_and_data_columns::( + ForkName::Deneb, + NumBlobs::Number(1), + &mut rng, + &spec, + ) + }) + .collect::>(); + + // Send blocks and complete terminate response + for block in &blocks { + info.add_block_response(Some(block.0.clone().into())); + } + info.add_block_response(None); + // Assert response is not finished + assert!(!info.is_finished()); + + // Send data columns interleaved + for block in &blocks { + for column in &block.1 { + if expects_custody_columns.contains(&column.index) { + info.add_data_column(Some(column.clone())); + } + } + } + + // Terminate the requests + for i in 0..num_of_data_column_requests { + info.add_data_column(None); + if i < num_of_data_column_requests - 1 { + assert!( + !info.is_finished(), + "requested should not be finished at loop {i}" + ); + } else { + assert!(info.is_finished(), "request should be finished at loop {i}"); + } + } + + // All completed construct response + info.into_responses(&spec).unwrap(); } } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index e494f1f94fc..d6ce14adb16 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -38,13 +38,15 @@ use super::block_lookups::BlockLookups; use super::network_context::{BlockOrBlob, RangeRequestId, RpcEvent, SyncNetworkContext}; use super::peer_sync_info::{remote_sync_type, PeerSyncType}; use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; +use super::sampling::{Sampling, SamplingConfig, SamplingResult}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use crate::sync::block_lookups::{ - BlobRequestState, BlockComponent, BlockRequestState, DownloadResult, + BlobRequestState, BlockComponent, BlockRequestState, CustodyRequestState, DownloadResult, }; -use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; +use crate::sync::block_sidecar_coupling::RangeBlockComponentsRequest; +use crate::sync::network_context::PeerGroup; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::validator_monitor::timestamp_now; @@ -54,7 +56,8 @@ use beacon_chain::{ use futures::StreamExt; use lighthouse_network::rpc::RPCError; use lighthouse_network::service::api_types::{ - DataColumnsByRootRequestId, Id, SingleLookupReqId, SyncRequestId, + DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SamplingId, SamplingRequester, + SingleLookupReqId, SyncRequestId, }; use lighthouse_network::types::{NetworkGlobals, SyncState}; use lighthouse_network::SyncInfo; @@ -124,6 +127,10 @@ pub enum SyncMessage { /// manager to attempt to find the block matching the unknown hash. UnknownBlockHashFromAttestation(PeerId, Hash256), + /// Request to start sampling a block. Caller should ensure that block has data before sending + /// the request. + SampleBlock(Hash256, Slot), + /// A peer has disconnected. Disconnect(PeerId), @@ -146,6 +153,12 @@ pub enum SyncMessage { result: BlockProcessingResult, }, + /// Sample data column verified + SampleVerified { + id: SamplingId, + result: Result<(), String>, + }, + /// A block from gossip has completed processing, GossipBlockProcessResult { block_root: Hash256, imported: bool }, } @@ -155,6 +168,17 @@ pub enum SyncMessage { pub enum BlockProcessType { SingleBlock { id: Id }, SingleBlob { id: Id }, + SingleCustodyColumn(Id), +} + +impl BlockProcessType { + pub fn id(&self) -> Id { + match self { + BlockProcessType::SingleBlock { id } + | BlockProcessType::SingleBlob { id } + | BlockProcessType::SingleCustodyColumn(id) => *id, + } + } } #[derive(Debug)] @@ -206,6 +230,8 @@ pub struct SyncManager { /// one event is useful, the rest generating log noise and wasted cycles notified_unknown_roots: LRUTimeCache<(PeerId, Hash256)>, + sampling: Sampling, + /// The logger for the import manager. log: Logger, } @@ -232,6 +258,7 @@ pub fn spawn( network_send, beacon_processor, sync_recv, + SamplingConfig::Default, log.clone(), ); @@ -246,6 +273,7 @@ impl SyncManager { network_send: mpsc::UnboundedSender>, beacon_processor: Arc>, sync_recv: mpsc::UnboundedReceiver>, + sampling_config: SamplingConfig, log: slog::Logger, ) -> Self { let network_globals = beacon_processor.network_globals.clone(); @@ -271,6 +299,7 @@ impl SyncManager { notified_unknown_roots: LRUTimeCache::new(Duration::from_secs( NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS, )), + sampling: Sampling::new(sampling_config, log.new(o!("service" => "sampling"))), log: log.clone(), } } @@ -299,6 +328,11 @@ impl SyncManager { self.block_lookups.insert_failed_chain(block_root); } + #[cfg(test)] + pub(crate) fn active_sampling_requests(&self) -> Vec { + self.sampling.active_sampling_requests() + } + fn network_globals(&self) -> &NetworkGlobals { self.network.network_globals() } @@ -650,7 +684,7 @@ impl SyncManager { value: block.block_cloned(), block_root, seen_timestamp: timestamp_now(), - peer_id, + peer_group: PeerGroup::from_single(peer_id), }), ); } @@ -668,12 +702,27 @@ impl SyncManager { value: blob, block_root, seen_timestamp: timestamp_now(), - peer_id, + peer_group: PeerGroup::from_single(peer_id), }), ); } - SyncMessage::UnknownParentDataColumn(_peer_id, _data_column) => { - // TODO(das): data column parent lookup to be implemented + SyncMessage::UnknownParentDataColumn(peer_id, data_column) => { + let data_column_slot = data_column.slot(); + let block_root = data_column.block_root(); + let parent_root = data_column.block_parent_root(); + debug!(self.log, "Received unknown parent data column message"; "block_root" => %block_root, "parent_root" => %parent_root); + self.handle_unknown_parent( + peer_id, + block_root, + parent_root, + data_column_slot, + BlockComponent::DataColumn(DownloadResult { + value: data_column, + block_root, + seen_timestamp: timestamp_now(), + peer_group: PeerGroup::from_single(peer_id), + }), + ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { if !self.notified_unknown_roots.contains(&(peer_id, block_root)) { @@ -682,6 +731,15 @@ impl SyncManager { self.handle_unknown_block_root(peer_id, block_root); } } + SyncMessage::SampleBlock(block_root, block_slot) => { + debug!(self.log, "Received SampleBlock message"; "block_root" => %block_root, "slot" => block_slot); + if let Some((requester, result)) = self + .sampling + .on_new_sample_request(block_root, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } SyncMessage::Disconnect(peer_id) => { debug!(self.log, "Received disconnected message"; "peer_id" => %peer_id); self.peer_disconnect(&peer_id); @@ -731,6 +789,14 @@ impl SyncManager { } } }, + SyncMessage::SampleVerified { id, result } => { + if let Some((requester, result)) = + self.sampling + .on_sample_verified(id, result, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } } } @@ -885,8 +951,9 @@ impl SyncManager { self.block_lookups .on_download_response::>( id, - peer_id, - resp, + resp.map(|(value, seen_timestamp)| { + (value, PeerGroup::from_single(peer_id), seen_timestamp) + }), &mut self.network, ) } @@ -936,8 +1003,12 @@ impl SyncManager { }, ); } - SyncRequestId::RangeBlockAndBlobs { id: _ } => { - // TODO(das): implement custody range sync + SyncRequestId::RangeBlockAndBlobs { id } => { + self.range_block_and_blobs_response( + id, + peer_id, + BlockOrBlob::CustodyColumns(data_column), + ); } _ => { crit!(self.log, "bad request id for data_column"; "peer_id" => %peer_id); @@ -955,8 +1026,9 @@ impl SyncManager { self.block_lookups .on_download_response::>( id, - peer_id, - resp, + resp.map(|(value, seen_timestamp)| { + (value, PeerGroup::from_single(peer_id), seen_timestamp) + }), &mut self.network, ) } @@ -965,15 +1037,74 @@ impl SyncManager { fn on_data_columns_by_root_response( &mut self, req_id: DataColumnsByRootRequestId, - _requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, peer_id: PeerId, - rpc_event: RpcEvent>>, + data_column: RpcEvent>>, ) { - if let Some(_resp) = self - .network - .on_data_columns_by_root_response(req_id, peer_id, rpc_event) + if let Some(resp) = + self.network + .on_data_columns_by_root_response(req_id, peer_id, data_column) { - // TODO(das): pass data_columns_by_root result to consumer + match requester { + DataColumnsByRootRequester::Sampling(id) => { + if let Some((requester, result)) = + self.sampling + .on_sample_downloaded(id, peer_id, resp, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } + DataColumnsByRootRequester::Custody(custody_id) => { + if let Some(custody_columns) = self + .network + .on_custody_by_root_response(custody_id, req_id, peer_id, resp) + { + // TODO(das): get proper timestamp + let seen_timestamp = timestamp_now(); + self.block_lookups + .on_download_response::>( + custody_id.requester.0, + custody_columns.map(|(columns, peer_group)| { + (columns, peer_group, seen_timestamp) + }), + &mut self.network, + ); + } + } + } + } + } + + fn on_sampling_result(&mut self, requester: SamplingRequester, result: SamplingResult) { + // TODO(das): How is a consumer of sampling results? + // - Fork-choice for trailing DA + // - Single lookups to complete import requirements + // - Range sync to complete import requirements? Can sampling for syncing lag behind and + // accumulate in fork-choice? + + match requester { + SamplingRequester::ImportedBlock(block_root) => { + debug!(self.log, "Sampling result"; "block_root" => %block_root, "result" => ?result); + + // TODO(das): Consider moving SamplingResult to the beacon_chain crate and import + // here. No need to add too much enum variants, just whatever the beacon_chain or + // fork-choice needs to make a decision. Currently the fork-choice only needs to + // be notified of successful samplings, i.e. sampling failures don't trigger pruning + match result { + Ok(_) => { + if let Err(e) = self + .network + .beacon_processor() + .send_sampling_completed(block_root) + { + warn!(self.log, "Error sending sampling result"; "block_root" => ?block_root, "reason" => ?e); + } + } + Err(e) => { + warn!(self.log, "Sampling failed"; "block_root" => %block_root, "reason" => ?e); + } + } + } } } @@ -1027,7 +1158,12 @@ impl SyncManager { self.network.insert_range_blocks_and_blobs_request( id, resp.sender_id, - BlocksAndBlobsRequestInfo::new(resp.request_type, peer_id), + RangeBlockComponentsRequest::new( + resp.expects_blobs, + resp.expects_custody_columns, + None, + vec![], + ), ); // inform range that the request needs to be treated as failed // With time we will want to downgrade this log diff --git a/beacon_node/network/src/sync/mod.rs b/beacon_node/network/src/sync/mod.rs index 7b244bceceb..6669add4453 100644 --- a/beacon_node/network/src/sync/mod.rs +++ b/beacon_node/network/src/sync/mod.rs @@ -8,6 +8,8 @@ pub mod manager; mod network_context; mod peer_sync_info; mod range_sync; +mod sampling; +pub use lighthouse_network::service::api_types::SamplingId; pub use manager::{BatchProcessResult, SyncMessage}; pub use range_sync::{BatchOperationOutcome, ChainId}; diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index fa9159f7f8e..0b02a986f73 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -1,43 +1,53 @@ //! Provides network functionality for the Syncing thread. This fundamentally wraps a network //! channel and stores a global RPC ID to perform requests. +use self::custody::{ActiveCustodyRequest, Error as CustodyRequestError}; use self::requests::{ActiveBlobsByRootRequest, ActiveBlocksByRootRequest}; -pub use self::requests::{BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest}; -use super::block_sidecar_coupling::BlocksAndBlobsRequestInfo; +pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest}; +use super::block_sidecar_coupling::RangeBlockComponentsRequest; +use super::manager::BlockProcessType; use super::range_sync::{BatchId, ByRangeRequestType, ChainId}; use crate::metrics; use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use crate::sync::block_lookups::SingleLookupId; -use crate::sync::manager::BlockProcessType; +use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState}; use fnv::FnvHashMap; -use lighthouse_network::rpc::methods::BlobsByRangeRequest; +use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest}; use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError}; use lighthouse_network::service::api_types::{ - AppRequestId, DataColumnsByRootRequestId, Id, SingleLookupReqId, SyncRequestId, + AppRequestId, CustodyId, CustodyRequester, DataColumnsByRootRequestId, + DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId, }; use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request}; +use rand::seq::SliceRandom; +use rand::thread_rng; +use requests::ActiveDataColumnsByRootRequest; pub use requests::LookupVerifyError; -use requests::{ActiveDataColumnsByRootRequest, DataColumnsByRootSingleBlockRequest}; -use slog::{debug, error, trace, warn}; +use slog::{debug, error, warn}; +use slot_clock::SlotClock; use std::collections::hash_map::Entry; +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; use types::blob_sidecar::FixedBlobSidecarList; use types::{ - BlobSidecar, DataColumnSidecar, DataColumnSidecarList, EthSpec, Hash256, SignedBeaconBlock, + BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, Hash256, + SignedBeaconBlock, Slot, }; +pub mod custody; mod requests; pub struct BlocksAndBlobsByRangeResponse { pub sender_id: RangeRequestId, pub responses: Result>, String>, - pub request_type: ByRangeRequestType, + pub expects_blobs: bool, + pub expects_custody_columns: Option>, } #[derive(Debug, Clone, Copy)] @@ -60,15 +70,20 @@ pub enum RpcEvent { pub type RpcResponseResult = Result<(T, Duration), RpcResponseError>; +#[derive(Debug)] pub enum RpcResponseError { RpcError(RPCError), VerifyError(LookupVerifyError), + CustodyRequestError(CustodyRequestError), } #[derive(Debug, PartialEq, Eq)] pub enum RpcRequestSendError { /// Network channel send failed NetworkSendError, + NoCustodyPeers, + CustodyRequestError(custody::Error), + SlotClockError, } #[derive(Debug, PartialEq, Eq)] @@ -82,6 +97,7 @@ impl std::fmt::Display for RpcResponseError { match self { RpcResponseError::RpcError(e) => write!(f, "RPC Error: {:?}", e), RpcResponseError::VerifyError(e) => write!(f, "Lookup Verify Error: {:?}", e), + RpcResponseError::CustodyRequestError(e) => write!(f, "Custody Request Error: {:?}", e), } } } @@ -98,6 +114,31 @@ impl From for RpcResponseError { } } +/// Represents a group of peers that served a block component. +#[derive(Clone, Debug)] +pub struct PeerGroup { + /// Peers group by which indexed section of the block component they served. For example: + /// - PeerA served = [blob index 0, blob index 2] + /// - PeerA served = [blob index 1] + peers: HashMap>, +} + +impl PeerGroup { + /// Return a peer group where a single peer returned all parts of a block component. For + /// example, a block has a single component (the block = index 0/1). + pub fn from_single(peer: PeerId) -> Self { + Self { + peers: HashMap::from_iter([(peer, vec![0])]), + } + } + pub fn from_set(peers: HashMap>) -> Self { + Self { peers } + } + pub fn all(&self) -> impl Iterator + '_ { + self.peers.keys() + } +} + /// Sequential ID that uniquely identifies ReqResp outgoing requests pub type ReqId = u32; @@ -128,13 +169,16 @@ pub struct SyncNetworkContext { /// A mapping of active BlobsByRoot requests, including both current slot and parent lookups. blobs_by_root_requests: FnvHashMap>, + /// Mapping of active custody column requests for a block root + custody_by_root_requests: FnvHashMap>, + /// A mapping of active DataColumnsByRoot requests data_columns_by_root_requests: FnvHashMap>, /// BlocksByRange requests paired with BlobsByRange - range_blocks_and_blobs_requests: - FnvHashMap)>, + range_block_components_requests: + FnvHashMap)>, /// Whether the ee is online. If it's not, we don't allow access to the /// `beacon_processor_send`. @@ -153,6 +197,7 @@ pub struct SyncNetworkContext { pub enum BlockOrBlob { Block(Option>>), Blob(Option>>), + CustodyColumns(Option>>), } impl From>>> for BlockOrBlob { @@ -181,7 +226,8 @@ impl SyncNetworkContext { blocks_by_root_requests: <_>::default(), blobs_by_root_requests: <_>::default(), data_columns_by_root_requests: <_>::default(), - range_blocks_and_blobs_requests: FnvHashMap::default(), + custody_by_root_requests: <_>::default(), + range_block_components_requests: FnvHashMap::default(), network_beacon_processor, chain, log, @@ -191,10 +237,10 @@ impl SyncNetworkContext { /// Returns the ids of all the requests made to the given peer_id. pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec { let failed_range_ids = - self.range_blocks_and_blobs_requests + self.range_block_components_requests .iter() .filter_map(|(id, request)| { - if request.1.peer_id == *peer_id { + if request.1.peer_ids.contains(peer_id) { Some(SyncRequestId::RangeBlockAndBlobs { id: *id }) } else { None @@ -239,6 +285,17 @@ impl SyncNetworkContext { .collect() } + pub fn get_custodial_peers(&self, column_index: ColumnIndex) -> Vec { + self.network_globals() + .custody_peers_for_column(column_index) + } + + pub fn get_random_custodial_peer(&self, column_index: ColumnIndex) -> Option { + self.get_custodial_peers(column_index) + .choose(&mut thread_rng()) + .cloned() + } + pub fn network_globals(&self) -> &NetworkGlobals { &self.network_beacon_processor.network_globals } @@ -277,19 +334,23 @@ impl SyncNetworkContext { } } - /// A blocks by range request for the range sync algorithm. - pub fn blocks_by_range_request( + /// A blocks by range request sent by the range sync algorithm + pub fn block_components_by_range_request( &mut self, peer_id: PeerId, batch_type: ByRangeRequestType, request: BlocksByRangeRequest, + sender_id: RangeRequestId, ) -> Result { + let epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch()); let id = self.next_id(); - trace!( + let mut requested_peers = vec![peer_id]; + debug!( self.log, "Sending BlocksByRange request"; "method" => "BlocksByRange", "count" => request.count(), + "epoch" => epoch, "peer" => %peer_id, ); self.network_send @@ -300,12 +361,13 @@ impl SyncNetworkContext { }) .map_err(|_| RpcRequestSendError::NetworkSendError)?; - if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) { + let expected_blobs = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) { debug!( self.log, "Sending BlobsByRange requests"; "method" => "BlobsByRange", "count" => request.count(), + "epoch" => epoch, "peer" => %peer_id, ); @@ -320,33 +382,94 @@ impl SyncNetworkContext { request_id: AppRequestId::Sync(SyncRequestId::RangeBlockAndBlobs { id }), }) .map_err(|_| RpcRequestSendError::NetworkSendError)?; - } + true + } else { + false + }; + + let (expects_custody_columns, num_of_custody_column_req) = + if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) { + let custody_indexes = self.network_globals().custody_columns(); + let mut num_of_custody_column_req = 0; + + for (peer_id, columns_by_range_request) in + self.make_columns_by_range_requests(request, &custody_indexes)? + { + requested_peers.push(peer_id); + + debug!( + self.log, + "Sending DataColumnsByRange requests"; + "method" => "DataColumnsByRange", + "count" => columns_by_range_request.count, + "epoch" => epoch, + "columns" => ?columns_by_range_request.columns, + "peer" => %peer_id, + ); + + self.send_network_msg(NetworkMessage::SendRequest { + peer_id, + request: Request::DataColumnsByRange(columns_by_range_request), + request_id: AppRequestId::Sync(SyncRequestId::RangeBlockAndBlobs { id }), + }) + .map_err(|_| RpcRequestSendError::NetworkSendError)?; + + num_of_custody_column_req += 1; + } + (Some(custody_indexes), Some(num_of_custody_column_req)) + } else { + (None, None) + }; + + let info = RangeBlockComponentsRequest::new( + expected_blobs, + expects_custody_columns, + num_of_custody_column_req, + requested_peers, + ); + self.range_block_components_requests + .insert(id, (sender_id, info)); Ok(id) } - /// A blocks by range request sent by the range sync algorithm - pub fn blocks_and_blobs_by_range_request( - &mut self, - peer_id: PeerId, - batch_type: ByRangeRequestType, + fn make_columns_by_range_requests( + &self, request: BlocksByRangeRequest, - sender_id: RangeRequestId, - ) -> Result { - let id = self.blocks_by_range_request(peer_id, batch_type, request)?; - self.range_blocks_and_blobs_requests.insert( - id, - ( - sender_id, - BlocksAndBlobsRequestInfo::new(batch_type, peer_id), - ), - ); - Ok(id) + custody_indexes: &Vec, + ) -> Result, RpcRequestSendError> { + let mut peer_id_to_request_map = HashMap::new(); + + for column_index in custody_indexes { + // TODO(das): The peer selection logic here needs to be improved - we should probably + // avoid retrying from failed peers, however `BatchState` currently only tracks the peer + // serving the blocks. + let Some(custody_peer) = self.get_random_custodial_peer(*column_index) else { + // TODO(das): this will be pretty bad UX. To improve we should: + // - Attempt to fetch custody requests first, before requesting blocks + // - Handle the no peers case gracefully, maybe add some timeout and give a few + // minutes / seconds to the peer manager to locate peers on this subnet before + // abandoing progress on the chain completely. + return Err(RpcRequestSendError::NoCustodyPeers); + }; + + let columns_by_range_request = peer_id_to_request_map + .entry(custody_peer) + .or_insert_with(|| DataColumnsByRangeRequest { + start_slot: *request.start_slot(), + count: *request.count(), + columns: vec![], + }); + + columns_by_range_request.columns.push(*column_index); + } + + Ok(peer_id_to_request_map) } pub fn range_request_failed(&mut self, request_id: Id) -> Option { let sender_id = self - .range_blocks_and_blobs_requests + .range_block_components_requests .remove(&request_id) .map(|(sender_id, _info)| sender_id); if let Some(sender_id) = sender_id { @@ -370,7 +493,7 @@ impl SyncNetworkContext { request_id: Id, block_or_blob: BlockOrBlob, ) -> Option> { - let Entry::Occupied(mut entry) = self.range_blocks_and_blobs_requests.entry(request_id) + let Entry::Occupied(mut entry) = self.range_block_components_requests.entry(request_id) else { metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]); return None; @@ -380,15 +503,17 @@ impl SyncNetworkContext { match block_or_blob { BlockOrBlob::Block(maybe_block) => info.add_block_response(maybe_block), BlockOrBlob::Blob(maybe_sidecar) => info.add_sidecar_response(maybe_sidecar), + BlockOrBlob::CustodyColumns(column) => info.add_data_column(column), } if info.is_finished() { // If the request is finished, dequeue everything let (sender_id, info) = entry.remove(); - let request_type = info.get_request_type(); + let (expects_blobs, expects_custody_columns) = info.get_requirements(); Some(BlocksAndBlobsByRangeResponse { sender_id, - request_type, - responses: info.into_responses(), + responses: info.into_responses(&self.chain.spec), + expects_blobs, + expects_custody_columns, }) } else { None @@ -470,6 +595,21 @@ impl SyncNetworkContext { block_root: Hash256, downloaded_block: Option>>, ) -> Result { + // Check if we are into deneb, and before peerdas + if !self + .chain + .data_availability_checker + .blobs_required_for_epoch( + // TODO(das): use the block's slot + self.chain + .slot_clock + .now_or_genesis() + .ok_or(RpcRequestSendError::SlotClockError)? + .epoch(T::EthSpec::slots_per_epoch()), + ) + { + return Ok(LookupRequestResult::NoRequestNeeded); + } let Some(block) = downloaded_block.or_else(|| { // If the block is already being processed or fully validated, retrieve how many blobs // it expects. Consider any stage of the block. If the block root has been validated, we @@ -553,7 +693,7 @@ impl SyncNetworkContext { /// Request to send a single `data_columns_by_root` request to the network. pub fn data_column_lookup_request( &mut self, - requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, peer_id: PeerId, request: DataColumnsByRootSingleBlockRequest, ) -> Result, &'static str> { @@ -627,7 +767,7 @@ impl SyncNetworkContext { .unwrap_or_default(); // TODO(das): figure out how to pass block.slot if we end up doing rotation - let custody_indexes_duty = self.network_globals().custody_columns(&self.chain.spec); + let custody_indexes_duty = self.network_globals().custody_columns(); // Include only the blob indexes not yet imported (received through gossip) let custody_indexes_to_fetch = custody_indexes_duty @@ -651,10 +791,28 @@ impl SyncNetworkContext { "id" => ?id ); - // TODO(das): Issue a custody request with `id` for the set of columns - // `custody_indexes_to_fetch` and block `block_root`. + let requester = CustodyRequester(id); + let mut request = ActiveCustodyRequest::new( + block_root, + // TODO(das): req_id is duplicated here, also present in id + CustodyId { requester, req_id }, + &custody_indexes_to_fetch, + self.log.clone(), + ); - Ok(LookupRequestResult::RequestSent(req_id)) + // TODO(das): start request + // Note that you can only send, but not handle a response here + match request.continue_requests(self) { + Ok(_) => { + // Ignoring the result of `continue_requests` is okay. A request that has just been + // created cannot return data immediately, it must send some request to the network + // first. And there must exist some request, `custody_indexes_to_fetch` is not empty. + self.custody_by_root_requests.insert(requester, request); + Ok(LookupRequestResult::RequestSent(req_id)) + } + // TODO(das): handle this error properly + Err(e) => Err(RpcRequestSendError::CustodyRequestError(e)), + } } pub fn is_execution_engine_online(&self) -> bool { @@ -738,12 +896,18 @@ impl SyncNetworkContext { "To deal with alignment with deneb boundaries, batches need to be of just one epoch" ); - if let Some(data_availability_boundary) = self.chain.data_availability_boundary() { - if epoch >= data_availability_boundary { - ByRangeRequestType::BlocksAndBlobs - } else { - ByRangeRequestType::Blocks - } + if self + .chain + .data_availability_checker + .data_columns_required_for_epoch(epoch) + { + ByRangeRequestType::BlocksAndColumns + } else if self + .chain + .data_availability_checker + .blobs_required_for_epoch(epoch) + { + ByRangeRequestType::BlocksAndBlobs } else { ByRangeRequestType::Blocks } @@ -753,9 +917,9 @@ impl SyncNetworkContext { &mut self, id: Id, sender_id: RangeRequestId, - info: BlocksAndBlobsRequestInfo, + info: RangeBlockComponentsRequest, ) { - self.range_blocks_and_blobs_requests + self.range_block_components_requests .insert(id, (sender_id, info)); } @@ -853,7 +1017,7 @@ impl SyncNetworkContext { pub fn on_data_columns_by_root_response( &mut self, id: DataColumnsByRootRequestId, - peer_id: PeerId, + _peer_id: PeerId, rpc_event: RpcEvent>>, ) -> Option>>>> { let Entry::Occupied(mut request) = self.data_columns_by_root_requests.entry(id) else { @@ -885,8 +1049,10 @@ impl SyncNetworkContext { // catch if a peer is returning more columns than requested or if the excess blobs are // invalid. Err((e, resolved)) => { - if let RpcResponseError::VerifyError(e) = &e { - self.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); + if let RpcResponseError::VerifyError(_e) = &e { + // TODO(das): this is a bug, we should not penalise peer in this case. + // confirm this can be removed. + // self.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); } if resolved { None @@ -897,6 +1063,53 @@ impl SyncNetworkContext { } } + /// Insert a downloaded column into an active custody request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + #[allow(clippy::type_complexity)] + pub fn on_custody_by_root_response( + &mut self, + id: CustodyId, + req_id: DataColumnsByRootRequestId, + peer_id: PeerId, + resp: RpcResponseResult>>>, + ) -> Option, PeerGroup), RpcResponseError>> { + // Note: need to remove the request to borrow self again below. Otherwise we can't + // do nested requests + let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Custody column downloaded event for unknown request"; "id" => ?id); + return None; + }; + + let result = request + .on_data_column_downloaded(peer_id, req_id, resp, self) + .map_err(RpcResponseError::CustodyRequestError) + .transpose(); + + // Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to + // an Option first to use in an `if let Some() { act on result }` block. + if let Some(result) = result { + match result.as_ref() { + Ok((columns, peer_group)) => { + debug!(self.log, "Custody request success, removing"; "id" => ?id, "count" => columns.len(), "peers" => ?peer_group) + } + Err(e) => { + debug!(self.log, "Custody request failure, removing"; "id" => ?id, "error" => ?e) + } + } + + Some(result) + } else { + self.custody_by_root_requests.insert(id.requester, request); + None + } + } + pub fn send_block_for_processing( &self, id: Id, @@ -961,22 +1174,28 @@ impl SyncNetworkContext { pub fn send_custody_columns_for_processing( &self, - id: Id, + _id: Id, block_root: Hash256, - _custody_columns: DataColumnSidecarList, - _duration: Duration, + custody_columns: DataColumnSidecarList, + duration: Duration, + process_type: BlockProcessType, ) -> Result<(), SendErrorProcessor> { - let _beacon_processor = self + let beacon_processor = self .beacon_processor_if_enabled() .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; - debug!(self.log, "Sending custody columns for processing"; "block" => ?block_root, "id" => id); + debug!(self.log, "Sending custody columns for processing"; "block" => ?block_root, "process_type" => ?process_type); - // Lookup sync event safety: If `beacon_processor.send_rpc_custody_columns` returns Ok() sync - // must receive a single `SyncMessage::BlockComponentProcessed` event with this process type - // - // TODO(das): After merging processor import PR, actually send columns to beacon processor. - Ok(()) + beacon_processor + .send_rpc_custody_columns(block_root, custody_columns, duration, process_type) + .map_err(|e| { + error!( + self.log, + "Failed to send sync custody columns to processor"; + "error" => ?e + ); + SendErrorProcessor::SendError + }) } pub(crate) fn register_metrics(&self) { @@ -993,7 +1212,7 @@ impl SyncNetworkContext { metrics::set_gauge_vec( &metrics::SYNC_ACTIVE_NETWORK_REQUESTS, &["range_blocks"], - self.range_blocks_and_blobs_requests.len() as i64, + self.range_block_components_requests.len() as i64, ); } } diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs new file mode 100644 index 00000000000..b1038c74703 --- /dev/null +++ b/beacon_node/network/src/sync/network_context/custody.rs @@ -0,0 +1,415 @@ +use crate::sync::network_context::{ + DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest, +}; + +use beacon_chain::BeaconChainTypes; +use fnv::FnvHashMap; +use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester}; +use lighthouse_network::PeerId; +use lru_cache::LRUTimeCache; +use rand::Rng; +use slog::{debug, warn}; +use std::time::Duration; +use std::{collections::HashMap, marker::PhantomData, sync::Arc}; +use types::EthSpec; +use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256}; + +use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext}; + +const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5; + +type DataColumnSidecarList = Vec>>; + +pub struct ActiveCustodyRequest { + block_root: Hash256, + custody_id: CustodyId, + /// List of column indices this request needs to download to complete successfully + column_requests: FnvHashMap>, + /// Active requests for 1 or more columns each + active_batch_columns_requests: + FnvHashMap, + /// Peers that have recently failed to successfully respond to a columns by root request. + /// Having a LRUTimeCache allows this request to not have to track disconnecting peers. + failed_peers: LRUTimeCache, + /// Logger for the `SyncNetworkContext`. + pub log: slog::Logger, + _phantom: PhantomData, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Error { + SendFailed(&'static str), + TooManyFailures, + BadState(String), + NoPeers(ColumnIndex), + /// Received a download result for a different request id than the in-flight request. + /// There should only exist a single request at a time. Having multiple requests is a bug and + /// can result in undefined state, so it's treated as a hard error and the lookup is dropped. + UnexpectedRequestId { + expected_req_id: DataColumnsByRootRequestId, + req_id: DataColumnsByRootRequestId, + }, +} + +struct ActiveBatchColumnsRequest { + peer_id: PeerId, + indices: Vec, +} + +type CustodyRequestResult = Result, PeerGroup)>, Error>; + +impl ActiveCustodyRequest { + pub(crate) fn new( + block_root: Hash256, + custody_id: CustodyId, + column_indices: &[ColumnIndex], + log: slog::Logger, + ) -> Self { + Self { + block_root, + custody_id, + column_requests: HashMap::from_iter( + column_indices + .iter() + .map(|index| (*index, ColumnRequest::new())), + ), + active_batch_columns_requests: <_>::default(), + failed_peers: LRUTimeCache::new(Duration::from_secs(FAILED_PEERS_CACHE_EXPIRY_SECONDS)), + log, + _phantom: PhantomData, + } + } + + /// Insert a downloaded column into an active custody request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Err`: Custody request has failed and will be dropped + /// - `Ok(Some)`: Custody request has successfully completed and will be dropped + /// - `Ok(None)`: Custody request still active + pub(crate) fn on_data_column_downloaded( + &mut self, + peer_id: PeerId, + req_id: DataColumnsByRootRequestId, + resp: RpcResponseResult>, + cx: &mut SyncNetworkContext, + ) -> CustodyRequestResult { + // TODO(das): Should downscore peers for verify errors here + + let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else { + warn!(self.log, + "Received custody column response for unrequested index"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + ); + return Ok(None); + }; + + match resp { + Ok((data_columns, _seen_timestamp)) => { + debug!(self.log, + "Custody column download success"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + "count" => data_columns.len() + ); + + // Map columns by index as an optimization to not loop the returned list on each + // requested index. The worse case is 128 loops over a 128 item vec + mutation to + // drop the consumed columns. + let mut data_columns = HashMap::::from_iter( + data_columns.into_iter().map(|d| (d.index, d)), + ); + // Accumulate columns that the peer does not have to issue a single log per request + let mut missing_column_indexes = vec![]; + + for column_index in &batch_request.indices { + let column_request = self + .column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))?; + + if let Some(data_column) = data_columns.remove(column_index) { + column_request.on_download_success(req_id, peer_id, data_column)?; + } else { + // Peer does not have the requested data. + // TODO(das) do not consider this case a success. We know for sure the block has + // data. However we allow the peer to return empty as we can't attribute fault. + // TODO(das): Should track which columns are missing and eventually give up + // TODO(das): If the peer is in the lookup peer set it claims to have imported + // the block AND its custody columns. So in this case we can downscore + column_request.on_download_error(req_id)?; + missing_column_indexes.push(column_index); + } + } + + // Note: no need to check data_columns is empty, SyncNetworkContext ensures that + // successful responses only contain requested data. + + if !missing_column_indexes.is_empty() { + // Note: Batch logging that columns are missing to not spam logger + debug!(self.log, + "Custody column peer claims to not have some data"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + // TODO(das): this property can become very noisy, being the full range 0..128 + "missing_column_indexes" => ?missing_column_indexes + ); + + self.failed_peers.insert(peer_id); + } + } + Err(err) => { + debug!(self.log, + "Custody column download error"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + "error" => ?err + ); + + // TODO(das): Should mark peer as failed and try from another peer + for column_index in &batch_request.indices { + self.column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))? + .on_download_error_and_mark_failure(req_id)?; + } + + self.failed_peers.insert(peer_id); + } + }; + + self.continue_requests(cx) + } + + pub(crate) fn continue_requests( + &mut self, + cx: &mut SyncNetworkContext, + ) -> CustodyRequestResult { + if self.column_requests.values().all(|r| r.is_downloaded()) { + // All requests have completed successfully. + let mut peers = HashMap::>::new(); + let columns = std::mem::take(&mut self.column_requests) + .into_values() + .map(|request| { + let (peer, data_column) = request.complete()?; + peers + .entry(peer) + .or_default() + .push(data_column.index as usize); + Ok(data_column) + }) + .collect::, _>>()?; + + let peer_group = PeerGroup::from_set(peers); + return Ok(Some((columns, peer_group))); + } + + let mut columns_to_request_by_peer = HashMap::>::new(); + + // Need to: + // - track how many active requests a peer has for load balancing + // - which peers have failures to attempt others + // - which peer returned what to have PeerGroup attributability + + for (column_index, request) in self.column_requests.iter_mut() { + if request.is_awaiting_download() { + if request.download_failures > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS { + return Err(Error::TooManyFailures); + } + + // TODO: When is a fork and only a subset of your peers know about a block, we should only + // query the peers on that fork. Should this case be handled? How to handle it? + let custodial_peers = cx.get_custodial_peers(*column_index); + + // TODO(das): cache this computation in a OneCell or similar to prevent having to + // run it every loop + let mut active_requests_by_peer = HashMap::::new(); + for batch_request in self.active_batch_columns_requests.values() { + *active_requests_by_peer + .entry(batch_request.peer_id) + .or_default() += 1; + } + + let mut priorized_peers = custodial_peers + .iter() + .map(|peer| { + ( + // De-prioritize peers that have failed to successfully respond to + // requests recently + self.failed_peers.contains(peer), + // Prefer peers with less requests to load balance across peers + active_requests_by_peer.get(peer).copied().unwrap_or(0), + // Final random factor to give all peers a shot in each retry + rand::thread_rng().gen::(), + *peer, + ) + }) + .collect::>(); + priorized_peers.sort_unstable(); + + let Some((_, _, _, peer_id)) = priorized_peers.first() else { + // Do not tolerate not having custody peers, hard error. + // TODO(das): we might implement some grace period. The request will pause for X + // seconds expecting the peer manager to find peers before failing the request. + return Err(Error::NoPeers(*column_index)); + }; + + columns_to_request_by_peer + .entry(*peer_id) + .or_default() + .push(*column_index); + } + } + + for (peer_id, indices) in columns_to_request_by_peer.into_iter() { + let request_result = cx + .data_column_lookup_request( + DataColumnsByRootRequester::Custody(self.custody_id), + peer_id, + DataColumnsByRootSingleBlockRequest { + block_root: self.block_root, + indices: indices.clone(), + }, + ) + .map_err(Error::SendFailed)?; + + match request_result { + LookupRequestResult::RequestSent(req_id) => { + for column_index in &indices { + let column_request = self + .column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))?; + + column_request.on_download_start(req_id)?; + } + + self.active_batch_columns_requests + .insert(req_id, ActiveBatchColumnsRequest { indices, peer_id }); + } + LookupRequestResult::NoRequestNeeded => unreachable!(), + LookupRequestResult::Pending(_) => unreachable!(), + } + } + + Ok(None) + } +} + +/// TODO(das): this attempt count is nested into the existing lookup request count. +const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3; + +struct ColumnRequest { + status: Status, + download_failures: usize, +} + +#[derive(Debug, Clone)] +enum Status { + NotStarted, + Downloading(DataColumnsByRootRequestId), + Downloaded(PeerId, Arc>), +} + +impl ColumnRequest { + fn new() -> Self { + Self { + status: Status::NotStarted, + download_failures: 0, + } + } + + fn is_awaiting_download(&self) -> bool { + match self.status { + Status::NotStarted => true, + Status::Downloading { .. } | Status::Downloaded { .. } => false, + } + } + + fn is_downloaded(&self) -> bool { + match self.status { + Status::NotStarted | Status::Downloading { .. } => false, + Status::Downloaded { .. } => true, + } + } + + fn on_download_start(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> { + match &self.status { + Status::NotStarted => { + self.status = Status::Downloading(req_id); + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_start expected NotStarted got {other:?}" + ))), + } + } + + fn on_download_error(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> { + match &self.status { + Status::Downloading(expected_req_id) => { + if req_id != *expected_req_id { + return Err(Error::UnexpectedRequestId { + expected_req_id: *expected_req_id, + req_id, + }); + } + self.status = Status::NotStarted; + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_error expected Downloading got {other:?}" + ))), + } + } + + fn on_download_error_and_mark_failure( + &mut self, + req_id: DataColumnsByRootRequestId, + ) -> Result<(), Error> { + // TODO(das): Should track which peers don't have data + self.download_failures += 1; + self.on_download_error(req_id) + } + + fn on_download_success( + &mut self, + req_id: DataColumnsByRootRequestId, + peer_id: PeerId, + data_column: Arc>, + ) -> Result<(), Error> { + match &self.status { + Status::Downloading(expected_req_id) => { + if req_id != *expected_req_id { + return Err(Error::UnexpectedRequestId { + expected_req_id: *expected_req_id, + req_id, + }); + } + self.status = Status::Downloaded(peer_id, data_column); + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_success expected Downloading got {other:?}" + ))), + } + } + + fn complete(self) -> Result<(PeerId, Arc>), Error> { + match self.status { + Status::Downloaded(peer_id, data_column) => Ok((peer_id, data_column)), + other => Err(Error::BadState(format!( + "bad state complete expected Downloaded got {other:?}" + ))), + } + } +} diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs index a45916905ce..a42ae7ca41f 100644 --- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs +++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs @@ -1,6 +1,5 @@ -use lighthouse_network::{ - rpc::methods::DataColumnsByRootRequest, service::api_types::SingleLookupReqId, PeerId, -}; +use lighthouse_network::service::api_types::DataColumnsByRootRequester; +use lighthouse_network::{rpc::methods::DataColumnsByRootRequest, PeerId}; use std::sync::Arc; use types::{ChainSpec, DataColumnIdentifier, DataColumnSidecar, EthSpec, Hash256}; @@ -32,14 +31,14 @@ pub struct ActiveDataColumnsByRootRequest { items: Vec>>, resolved: bool, pub(crate) peer_id: PeerId, - pub(crate) requester: SingleLookupReqId, + pub(crate) requester: DataColumnsByRootRequester, } impl ActiveDataColumnsByRootRequest { pub fn new( request: DataColumnsByRootSingleBlockRequest, peer_id: PeerId, - requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, ) -> Self { Self { request, diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index 7f9629740bb..53fb55b14da 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -20,6 +20,7 @@ const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3; #[derive(Debug, Copy, Clone, Display)] #[strum(serialize_all = "snake_case")] pub enum ByRangeRequestType { + BlocksAndColumns, BlocksAndBlobs, Blocks, } @@ -199,9 +200,9 @@ impl BatchInfo { } /// Verifies if an incoming block belongs to this batch. - pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &Id) -> bool { - if let BatchState::Downloading(expected_peer, expected_id) = &self.state { - return peer_id == expected_peer && expected_id == request_id; + pub fn is_expecting_block(&self, request_id: &Id) -> bool { + if let BatchState::Downloading(_, expected_id) = &self.state { + return expected_id == request_id; } false } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index d92dcd4851c..1756fb513da 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -1,15 +1,18 @@ use super::batch::{BatchInfo, BatchProcessingResult, BatchState}; use super::RangeSyncType; use crate::metrics; +use crate::metrics::PEERS_PER_COLUMN_SUBNET; use crate::network_beacon_processor::ChainSegmentProcessId; use crate::sync::network_context::RangeRequestId; use crate::sync::{network_context::SyncNetworkContext, BatchOperationOutcome, BatchProcessResult}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; use fnv::FnvHashMap; +use lighthouse_metrics::set_int_gauge; use lighthouse_network::service::api_types::Id; use lighthouse_network::{PeerAction, PeerId}; -use rand::{seq::SliceRandom, Rng}; +use rand::seq::SliceRandom; +use rand::Rng; use slog::{crit, debug, o, warn}; use std::collections::{btree_map::Entry, BTreeMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -256,7 +259,9 @@ impl SyncingChain { // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer, and that the // request_id matches - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(KeepChain); } batch @@ -439,6 +444,11 @@ impl SyncingChain { self.request_batches(network)?; } } + } else if !self.good_peers_on_custody_subnets(self.processing_target, network) { + // This is to handle the case where no batch was sent for the current processing + // target when there is no custody peers available. This is a valid state and should not + // return an error. + return Ok(KeepChain); } else { return Err(RemoveChain::WrongChainState(format!( "Batch not found for current processing target {}", @@ -862,7 +872,9 @@ impl SyncingChain { // A batch could be retried without the peer failing the request (disconnecting/ // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { debug!( self.log, "Batch not expecting block"; @@ -953,7 +965,7 @@ impl SyncingChain { let batch_state = self.visualize_batch_state(); if let Some(batch) = self.batches.get_mut(&batch_id) { let (request, batch_type) = batch.to_blocks_by_range_request(); - match network.blocks_and_blobs_by_range_request( + match network.block_components_by_range_request( peer, batch_type, request, @@ -1063,6 +1075,14 @@ impl SyncingChain { // check if we have the batch for our optimistic start. If not, request it first. // We wait for this batch before requesting any other batches. if let Some(epoch) = self.optimistic_start { + if !self.good_peers_on_custody_subnets(epoch, network) { + debug!( + self.log, + "Waiting for peers to be available on custody column subnets" + ); + return Ok(KeepChain); + } + if let Entry::Vacant(entry) = self.batches.entry(epoch) { if let Some(peer) = idle_peers.pop() { let batch_type = network.batch_type(epoch); @@ -1087,6 +1107,36 @@ impl SyncingChain { Ok(KeepChain) } + /// Checks all custody column subnets for peers. Returns `true` if there is at least one peer in + /// every custody column subnet. + fn good_peers_on_custody_subnets(&self, epoch: Epoch, network: &SyncNetworkContext) -> bool { + if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) { + // Require peers on all custody column subnets before sending batches + let peers_on_all_custody_subnets = + network + .network_globals() + .custody_subnets() + .all(|subnet_id| { + let peer_count = network + .network_globals() + .peers + .read() + .good_custody_subnet_peer(subnet_id) + .count(); + + set_int_gauge( + &PEERS_PER_COLUMN_SUBNET, + &[&subnet_id.to_string()], + peer_count as i64, + ); + peer_count > 0 + }); + peers_on_all_custody_subnets + } else { + true + } + } + /// Creates the next required batch from the chain. If there are no more batches required, /// `false` is returned. fn include_next_batch(&mut self, network: &mut SyncNetworkContext) -> Option { @@ -1117,6 +1167,18 @@ impl SyncingChain { return None; } + // don't send batch requests until we have peers on custody subnets + // TODO(das): this is a workaround to avoid sending out excessive block requests because + // block and data column requests are currently coupled. This can be removed once we find a + // way to decouple the requests and do retries individually, see issue #6258. + if !self.good_peers_on_custody_subnets(self.to_be_downloaded, network) { + debug!( + self.log, + "Waiting for peers to be available on custody column subnets" + ); + return None; + } + let batch_id = self.to_be_downloaded; // this batch could have been included already being an optimistic batch match self.batches.entry(batch_id) { diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs index 334c58090e2..c8bb9b3b09a 100644 --- a/beacon_node/network/src/sync/range_sync/range.rs +++ b/beacon_node/network/src/sync/range_sync/range.rs @@ -689,7 +689,11 @@ mod tests { log.new(o!("component" => "range")), ); let (network_tx, network_rx) = mpsc::unbounded_channel(); - let globals = Arc::new(NetworkGlobals::new_test_globals(Vec::new(), &log)); + let globals = Arc::new(NetworkGlobals::new_test_globals( + Vec::new(), + &log, + chain.spec.clone(), + )); let (network_beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing( globals.clone(), diff --git a/beacon_node/network/src/sync/sampling.rs b/beacon_node/network/src/sync/sampling.rs new file mode 100644 index 00000000000..524fe86bee9 --- /dev/null +++ b/beacon_node/network/src/sync/sampling.rs @@ -0,0 +1,628 @@ +use self::request::ActiveColumnSampleRequest; +use super::network_context::{ + DataColumnsByRootSingleBlockRequest, RpcResponseError, SyncNetworkContext, +}; +use crate::metrics; +use beacon_chain::BeaconChainTypes; +use fnv::FnvHashMap; +use lighthouse_network::service::api_types::{ + DataColumnsByRootRequester, SamplingId, SamplingRequestId, SamplingRequester, +}; +use lighthouse_network::{PeerAction, PeerId}; +use rand::{seq::SliceRandom, thread_rng}; +use slog::{debug, error, warn}; +use std::{ + collections::hash_map::Entry, collections::HashMap, marker::PhantomData, sync::Arc, + time::Duration, +}; +use types::{data_column_sidecar::ColumnIndex, ChainSpec, DataColumnSidecar, Hash256}; + +pub type SamplingResult = Result<(), SamplingError>; + +type DataColumnSidecarList = Vec>>; + +pub struct Sampling { + // TODO(das): stalled sampling request are never cleaned up + requests: HashMap>, + sampling_config: SamplingConfig, + log: slog::Logger, +} + +impl Sampling { + pub fn new(sampling_config: SamplingConfig, log: slog::Logger) -> Self { + Self { + requests: <_>::default(), + sampling_config, + log, + } + } + + #[cfg(test)] + pub fn active_sampling_requests(&self) -> Vec { + self.requests.values().map(|r| r.block_root).collect() + } + + /// Create a new sampling request for a known block + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_new_sample_request( + &mut self, + block_root: Hash256, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let id = SamplingRequester::ImportedBlock(block_root); + + let request = match self.requests.entry(id) { + Entry::Vacant(e) => e.insert(ActiveSamplingRequest::new( + block_root, + id, + &self.sampling_config, + self.log.clone(), + &cx.chain.spec, + )), + Entry::Occupied(_) => { + // Sampling is triggered from multiple sources, duplicate sampling requests are + // likely (gossip block + gossip data column) + // TODO(das): Should track failed sampling request for some time? Otherwise there's + // a risk of a loop with multiple triggers creating the request, then failing, + // and repeat. + debug!(self.log, "Ignoring duplicate sampling request"; "id" => ?id); + return None; + } + }; + + debug!(self.log, "Created new sample request"; "id" => ?id); + + // TOOD(das): If a node has very little peers, continue_sampling() will attempt to find enough + // to sample here, immediately failing the sampling request. There should be some grace + // period to allow the peer manager to find custody peers. + let result = request.continue_sampling(cx); + self.handle_sampling_result(result, &id) + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_sample_downloaded( + &mut self, + id: SamplingId, + peer_id: PeerId, + resp: Result<(DataColumnSidecarList, Duration), RpcResponseError>, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let Some(request) = self.requests.get_mut(&id.id) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Sample downloaded event for unknown request"; "id" => ?id); + return None; + }; + + let result = request.on_sample_downloaded(peer_id, id.sampling_request_id, resp, cx); + self.handle_sampling_result(result, &id.id) + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_sample_verified( + &mut self, + id: SamplingId, + result: Result<(), String>, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let Some(request) = self.requests.get_mut(&id.id) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Sample verified event for unknown request"; "id" => ?id); + return None; + }; + + let result = request.on_sample_verified(id.sampling_request_id, result, cx); + self.handle_sampling_result(result, &id.id) + } + + /// Converts a result from the internal format of `ActiveSamplingRequest` (error first to use ? + /// conveniently), to an Option first format to use an `if let Some() { act on result }` pattern + /// in the sync manager. + fn handle_sampling_result( + &mut self, + result: Result, SamplingError>, + id: &SamplingRequester, + ) -> Option<(SamplingRequester, SamplingResult)> { + let result = result.transpose(); + if let Some(result) = result { + debug!(self.log, "Sampling request completed, removing"; "id" => ?id, "result" => ?result); + metrics::inc_counter_vec( + &metrics::SAMPLING_REQUEST_RESULT, + &[metrics::from_result(&result)], + ); + self.requests.remove(id); + Some((*id, result)) + } else { + None + } + } +} + +pub struct ActiveSamplingRequest { + block_root: Hash256, + requester_id: SamplingRequester, + column_requests: FnvHashMap, + /// Mapping of column indexes for a sampling request. + column_indexes_by_sampling_request: FnvHashMap>, + /// Sequential ID for sampling requests. + current_sampling_request_id: SamplingRequestId, + column_shuffle: Vec, + required_successes: Vec, + /// Logger for the `SyncNetworkContext`. + pub log: slog::Logger, + _phantom: PhantomData, +} + +#[derive(Debug)] +pub enum SamplingError { + SendFailed(#[allow(dead_code)] &'static str), + ProcessorUnavailable, + TooManyFailures, + BadState(#[allow(dead_code)] String), + ColumnIndexOutOfBounds, +} + +/// Required success index by current failures, with p_target=5.00E-06 +/// Ref: https://colab.research.google.com/drive/18uUgT2i-m3CbzQ5TyP9XFKqTn1DImUJD#scrollTo=E82ITcgB5ATh +const REQUIRED_SUCCESSES: [usize; 11] = [16, 20, 23, 26, 29, 32, 34, 37, 39, 42, 44]; + +#[derive(Debug, Clone)] +pub enum SamplingConfig { + Default, + #[allow(dead_code)] + Custom { + required_successes: Vec, + }, +} + +impl ActiveSamplingRequest { + fn new( + block_root: Hash256, + requester_id: SamplingRequester, + sampling_config: &SamplingConfig, + log: slog::Logger, + spec: &ChainSpec, + ) -> Self { + // Select ahead of time the full list of to-sample columns + let mut column_shuffle = + (0..spec.number_of_columns as ColumnIndex).collect::>(); + let mut rng = thread_rng(); + column_shuffle.shuffle(&mut rng); + + Self { + block_root, + requester_id, + column_requests: <_>::default(), + column_indexes_by_sampling_request: <_>::default(), + current_sampling_request_id: SamplingRequestId(0), + column_shuffle, + required_successes: match sampling_config { + SamplingConfig::Default => REQUIRED_SUCCESSES.to_vec(), + SamplingConfig::Custom { required_successes } => required_successes.clone(), + }, + log, + _phantom: PhantomData, + } + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Err`: Sampling request has failed and will be dropped + /// - `Ok(Some)`: Sampling request has successfully completed and will be dropped + /// - `Ok(None)`: Sampling request still active + pub(crate) fn on_sample_downloaded( + &mut self, + _peer_id: PeerId, + sampling_request_id: SamplingRequestId, + resp: Result<(DataColumnSidecarList, Duration), RpcResponseError>, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + // Select columns to sample + // Create individual request per column + // Progress requests + // If request fails retry or expand search + // If all good return + let Some(column_indexes) = self + .column_indexes_by_sampling_request + .get(&sampling_request_id) + else { + error!(self.log, "Column indexes for the sampling request ID not found"; "sampling_request_id" => ?sampling_request_id); + return Ok(None); + }; + + match resp { + Ok((mut resp_data_columns, seen_timestamp)) => { + debug!(self.log, "Sample download success"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "count" => resp_data_columns.len()); + metrics::inc_counter_vec(&metrics::SAMPLE_DOWNLOAD_RESULT, &[metrics::SUCCESS]); + + // Filter the data received in the response using the requested column indexes. + let mut data_columns = vec![]; + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + + let Some(data_pos) = resp_data_columns + .iter() + .position(|data| &data.index == column_index) + else { + // Peer does not have the requested data. + // TODO(das) what to do? + debug!(self.log, "Sampling peer claims to not have the data"; "block_root" => %self.block_root, "column_index" => column_index); + request.on_sampling_error()?; + continue; + }; + + data_columns.push(resp_data_columns.swap_remove(data_pos)); + } + + if !resp_data_columns.is_empty() { + let resp_column_indexes = resp_data_columns + .iter() + .map(|d| d.index) + .collect::>(); + debug!( + self.log, + "Received data that was not requested"; "block_root" => %self.block_root, "column_indexes" => ?resp_column_indexes + ); + } + + // Handle the downloaded data columns. + if data_columns.is_empty() { + debug!(self.log,"Received empty response"; "block_root" => %self.block_root); + self.column_indexes_by_sampling_request + .remove(&sampling_request_id); + } else { + // Overwrite `column_indexes` with the column indexes received in the response. + let column_indexes = data_columns.iter().map(|d| d.index).collect::>(); + self.column_indexes_by_sampling_request + .insert(sampling_request_id, column_indexes.clone()); + // Peer has data column, send to verify + let Some(beacon_processor) = cx.beacon_processor_if_enabled() else { + // If processor is not available, error the entire sampling + debug!(self.log, "Dropping sampling"; "block" => %self.block_root, "reason" => "beacon processor unavailable"); + return Err(SamplingError::ProcessorUnavailable); + }; + debug!(self.log, "Sending data_column for verification"; "block" => ?self.block_root, "column_indexes" => ?column_indexes); + if let Err(e) = beacon_processor.send_rpc_validate_data_columns( + self.block_root, + data_columns, + seen_timestamp, + SamplingId { + id: self.requester_id, + sampling_request_id, + }, + ) { + // TODO(das): Beacon processor is overloaded, what should we do? + error!(self.log, "Dropping sampling"; "block" => %self.block_root, "reason" => e.to_string()); + return Err(SamplingError::SendFailed("beacon processor send failure")); + } + } + } + Err(err) => { + debug!(self.log, "Sample download error"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "error" => ?err); + metrics::inc_counter_vec(&metrics::SAMPLE_DOWNLOAD_RESULT, &[metrics::FAILURE]); + + // Error downloading, maybe penalize peer and retry again. + // TODO(das) with different peer or different peer? + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + request.on_sampling_error()?; + } + } + }; + + self.continue_sampling(cx) + } + + /// Insert a column verification result into an active sampling request. Then make progress + /// on the entire request. + /// + /// ### Returns + /// + /// - `Err`: Sampling request has failed and will be dropped + /// - `Ok(Some)`: Sampling request has successfully completed and will be dropped + /// - `Ok(None)`: Sampling request still active + pub(crate) fn on_sample_verified( + &mut self, + sampling_request_id: SamplingRequestId, + result: Result<(), String>, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + let Some(column_indexes) = self + .column_indexes_by_sampling_request + .get(&sampling_request_id) + else { + error!(self.log, "Column indexes for the sampling request ID not found"; "sampling_request_id" => ?sampling_request_id); + return Ok(None); + }; + + match result { + Ok(_) => { + debug!(self.log, "Sample verification success"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes); + metrics::inc_counter_vec(&metrics::SAMPLE_VERIFY_RESULT, &[metrics::SUCCESS]); + + // Valid, continue_sampling will maybe consider sampling succees + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + request.on_sampling_success()?; + } + } + Err(err) => { + debug!(self.log, "Sample verification failure"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "reason" => ?err); + metrics::inc_counter_vec(&metrics::SAMPLE_VERIFY_RESULT, &[metrics::FAILURE]); + + // TODO(das): Peer sent invalid data, penalize and try again from different peer + // TODO(das): Count individual failures + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + let peer_id = request.on_sampling_error()?; + cx.report_peer( + peer_id, + PeerAction::LowToleranceError, + "invalid data column", + ); + } + } + } + + self.continue_sampling(cx) + } + + pub(crate) fn continue_sampling( + &mut self, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + // First check if sampling is completed, by computing `required_successes` + let mut successes = 0; + let mut failures = 0; + let mut ongoings = 0; + + for request in self.column_requests.values() { + if request.is_completed() { + successes += 1; + } + if request.is_failed() { + failures += 1; + } + if request.is_ongoing() { + ongoings += 1; + } + } + + // If there are too many failures, consider the sampling failed + let Some(required_successes) = self.required_successes.get(failures) else { + return Err(SamplingError::TooManyFailures); + }; + + // If there are enough successes, consider the sampling complete + if successes >= *required_successes { + return Ok(Some(())); + } + + // First, attempt to progress sampling by requesting more columns, so that request failures + // are accounted for below. + + // Group the requested column indexes by the destination peer to batch sampling requests. + let mut column_indexes_to_request = FnvHashMap::default(); + for idx in 0..*required_successes { + // Re-request columns. Note: out of bounds error should never happen, inputs are hardcoded + let column_index = *self + .column_shuffle + .get(idx) + .ok_or(SamplingError::ColumnIndexOutOfBounds)?; + let request = self + .column_requests + .entry(column_index) + .or_insert(ActiveColumnSampleRequest::new(column_index)); + + if request.is_ready_to_request() { + if let Some(peer_id) = request.choose_peer(cx) { + let indexes = column_indexes_to_request.entry(peer_id).or_insert(vec![]); + indexes.push(column_index); + } + } + } + + // Send requests. + let mut sent_request = false; + for (peer_id, column_indexes) in column_indexes_to_request { + cx.data_column_lookup_request( + DataColumnsByRootRequester::Sampling(SamplingId { + id: self.requester_id, + sampling_request_id: self.current_sampling_request_id, + }), + peer_id, + DataColumnsByRootSingleBlockRequest { + block_root: self.block_root, + indices: column_indexes.clone(), + }, + ) + .map_err(SamplingError::SendFailed)?; + self.column_indexes_by_sampling_request + .insert(self.current_sampling_request_id, column_indexes.clone()); + self.current_sampling_request_id.0 += 1; + sent_request = true; + + // Update request status. + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(&column_index) else { + continue; + }; + request.on_start_sampling(peer_id)?; + } + } + + // Make sure that sampling doesn't stall, by ensuring that this sampling request will + // receive a new event of some type. If there are no ongoing requests, and no new + // request was sent, loop to increase the required_successes until the sampling fails if + // there are no peers. + if ongoings == 0 && !sent_request { + debug!(self.log, "Sampling request stalled"; "block_root" => %self.block_root); + } + + Ok(None) + } +} + +mod request { + use super::SamplingError; + use crate::sync::network_context::SyncNetworkContext; + use beacon_chain::BeaconChainTypes; + use lighthouse_network::PeerId; + use rand::seq::SliceRandom; + use rand::thread_rng; + use std::collections::HashSet; + use types::data_column_sidecar::ColumnIndex; + + pub(crate) struct ActiveColumnSampleRequest { + column_index: ColumnIndex, + status: Status, + // TODO(das): Should downscore peers that claim to not have the sample? + peers_dont_have: HashSet, + } + + #[derive(Debug, Clone)] + enum Status { + NoPeers, + NotStarted, + Sampling(PeerId), + Verified, + } + + impl ActiveColumnSampleRequest { + pub(crate) fn new(column_index: ColumnIndex) -> Self { + Self { + column_index, + status: Status::NotStarted, + peers_dont_have: <_>::default(), + } + } + + pub(crate) fn is_completed(&self) -> bool { + match self.status { + Status::NoPeers | Status::NotStarted | Status::Sampling(_) => false, + Status::Verified => true, + } + } + + pub(crate) fn is_failed(&self) -> bool { + match self.status { + Status::NotStarted | Status::Sampling(_) | Status::Verified => false, + Status::NoPeers => true, + } + } + + pub(crate) fn is_ongoing(&self) -> bool { + match self.status { + Status::NotStarted | Status::NoPeers | Status::Verified => false, + Status::Sampling(_) => true, + } + } + + pub(crate) fn is_ready_to_request(&self) -> bool { + match self.status { + Status::NoPeers | Status::NotStarted => true, + Status::Sampling(_) | Status::Verified => false, + } + } + + pub(crate) fn choose_peer( + &mut self, + cx: &SyncNetworkContext, + ) -> Option { + // TODO: When is a fork and only a subset of your peers know about a block, sampling should only + // be queried on the peers on that fork. Should this case be handled? How to handle it? + let mut peer_ids = cx.get_custodial_peers(self.column_index); + + peer_ids.retain(|peer_id| !self.peers_dont_have.contains(peer_id)); + + if let Some(peer_id) = peer_ids.choose(&mut thread_rng()) { + Some(*peer_id) + } else { + self.status = Status::NoPeers; + None + } + } + + pub(crate) fn on_start_sampling(&mut self, peer_id: PeerId) -> Result<(), SamplingError> { + match self.status.clone() { + Status::NoPeers | Status::NotStarted => { + self.status = Status::Sampling(peer_id); + Ok(()) + } + other => Err(SamplingError::BadState(format!( + "bad state on_start_sampling expected NoPeers|NotStarted got {other:?}. column_index:{}", + self.column_index + ))), + } + } + + pub(crate) fn on_sampling_error(&mut self) -> Result { + match self.status.clone() { + Status::Sampling(peer_id) => { + self.peers_dont_have.insert(peer_id); + self.status = Status::NotStarted; + Ok(peer_id) + } + other => Err(SamplingError::BadState(format!( + "bad state on_sampling_error expected Sampling got {other:?}. column_index:{}", + self.column_index + ))), + } + } + + pub(crate) fn on_sampling_success(&mut self) -> Result<(), SamplingError> { + match &self.status { + Status::Sampling(_) => { + self.status = Status::Verified; + Ok(()) + } + other => Err(SamplingError::BadState(format!( + "bad state on_sampling_success expected Sampling got {other:?}. column_index:{}", + self.column_index + ))), + } + } + } +} diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 54502f70646..67bc9d7d407 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -66,6 +66,25 @@ pub fn cli_app() -> Command { .display_order(0) .hide(true) ) + .arg( + // TODO(das): remove this before PeerDAS release + Arg::new("malicious-withhold-count") + .long("malicious-withhold-count") + .action(ArgAction::Set) + .help_heading(FLAG_HEADER) + .help("TESTING ONLY do not use this") + .hide(true) + .display_order(0) + ) + .arg( + Arg::new("enable-sampling") + .long("enable-sampling") + .action(ArgAction::SetTrue) + .help_heading(FLAG_HEADER) + .help("Enable peer sampling on data columns. Disabled by default.") + .hide(true) + .display_order(0) + ) .arg( Arg::new("subscribe-all-subnets") .long("subscribe-all-subnets") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 558b1cb6ebe..6f61748a2d3 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -204,6 +204,10 @@ pub fn get_config( client_config.chain.shuffling_cache_size = cache_size; } + if cli_args.get_flag("enable-sampling") { + client_config.chain.enable_sampling = true; + } + /* * Prometheus metrics HTTP server */ @@ -477,6 +481,12 @@ pub fn get_config( client_config.store.blob_prune_margin_epochs = blob_prune_margin_epochs; } + if let Some(malicious_withhold_count) = + clap_utils::parse_optional(cli_args, "malicious-withhold-count")? + { + client_config.chain.malicious_withhold_count = malicious_withhold_count; + } + /* * Zero-ports * diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 8b144c1be93..fecd8e37442 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -18,11 +18,11 @@ use crate::metadata::{ STATE_UPPER_LIMIT_NO_RETAIN, }; use crate::state_cache::{PutStateOutcome, StateCache}; -use crate::{get_data_column_key, metrics, parse_data_column_key}; use crate::{ - get_key_for_col, ChunkWriter, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, - PartialBeaconState, StoreItem, StoreOp, + get_data_column_key, get_key_for_col, ChunkWriter, DBColumn, DatabaseBlock, Error, ItemStore, + KeyValueStoreOp, PartialBeaconState, StoreItem, StoreOp, }; +use crate::{metrics, parse_data_column_key}; use itertools::process_results; use leveldb::iterator::LevelDBIterator; use lru::LruCache; diff --git a/common/eth2_network_config/built_in_network_configs/chiado/config.yaml b/common/eth2_network_config/built_in_network_configs/chiado/config.yaml index 066b27795cd..74fca4c5010 100644 --- a/common/eth2_network_config/built_in_network_configs/chiado/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/chiado/config.yaml @@ -138,6 +138,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 16384 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml b/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml index 23cf040b276..07bd21b35c2 100644 --- a/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml @@ -121,6 +121,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 16384 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml index cec2b61f213..67f1e5b6831 100644 --- a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml @@ -125,6 +125,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml index 500b9e60a5c..acf4d83f323 100644 --- a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml @@ -147,6 +147,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml index 2a1809d6ce9..8b84d870103 100644 --- a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml @@ -121,6 +121,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index 2c64d21130f..10b00d5ba1d 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -807,8 +807,8 @@ impl ChainSpec { * DAS params */ eip7594_fork_epoch: None, - custody_requirement: 1, - data_column_sidecar_subnet_count: 32, + custody_requirement: 4, + data_column_sidecar_subnet_count: 128, number_of_columns: 128, /* @@ -1129,8 +1129,8 @@ impl ChainSpec { * DAS params */ eip7594_fork_epoch: None, - custody_requirement: 1, - data_column_sidecar_subnet_count: 32, + custody_requirement: 4, + data_column_sidecar_subnet_count: 128, number_of_columns: 128, /* * Network specific @@ -2122,7 +2122,7 @@ mod yaml_tests { DEPOSIT_NETWORK_ID: 1 DEPOSIT_CONTRACT_ADDRESS: 0x00000000219ab540356cBB839Cbe05303d7705Fa CUSTODY_REQUIREMENT: 1 - DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 + DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 "#; diff --git a/consensus/types/src/lib.rs b/consensus/types/src/lib.rs index 2b874be4825..68d48ec7c8b 100644 --- a/consensus/types/src/lib.rs +++ b/consensus/types/src/lib.rs @@ -138,7 +138,7 @@ pub use crate::beacon_block_body::{ pub use crate::beacon_block_header::BeaconBlockHeader; pub use crate::beacon_committee::{BeaconCommittee, OwnedBeaconCommittee}; pub use crate::beacon_state::{Error as BeaconStateError, *}; -pub use crate::blob_sidecar::{BlobSidecar, BlobSidecarList, BlobsList}; +pub use crate::blob_sidecar::{BlobIdentifier, BlobSidecar, BlobSidecarList, BlobsList}; pub use crate::bls_to_execution_change::BlsToExecutionChange; pub use crate::chain_spec::{ChainSpec, Config, Domain}; pub use crate::checkpoint::Checkpoint; diff --git a/consensus/types/src/runtime_var_list.rs b/consensus/types/src/runtime_var_list.rs index 84ad5d074e7..af4ee87c158 100644 --- a/consensus/types/src/runtime_var_list.rs +++ b/consensus/types/src/runtime_var_list.rs @@ -1,20 +1,58 @@ -use ssz::{Decode, Encode}; -use ssz_derive::Encode; +use derivative::Derivative; +use serde::{Deserialize, Serialize}; +use ssz::Decode; +use ssz_types::Error; +use std::ops::{Deref, DerefMut, Index, IndexMut}; +use std::slice::SliceIndex; -#[derive(Debug, Clone, PartialEq, Encode)] -#[ssz(struct_behaviour = "transparent")] -pub struct RuntimeVariableList { +/// Emulates a SSZ `List`. +/// +/// An ordered, heap-allocated, variable-length, homogeneous collection of `T`, with no more than +/// `max_len` values. +/// +/// ## Example +/// +/// ``` +/// use ssz_types::{RuntimeVariableList}; +/// +/// let base: Vec = vec![1, 2, 3, 4]; +/// +/// // Create a `RuntimeVariableList` from a `Vec` that has the expected length. +/// let exact: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base.clone(), 4); +/// assert_eq!(&exact[..], &[1, 2, 3, 4]); +/// +/// // Create a `RuntimeVariableList` from a `Vec` that is too long and the `Vec` is truncated. +/// let short: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base.clone(), 3); +/// assert_eq!(&short[..], &[1, 2, 3]); +/// +/// // Create a `RuntimeVariableList` from a `Vec` that is shorter than the maximum. +/// let mut long: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base, 5); +/// assert_eq!(&long[..], &[1, 2, 3, 4]); +/// +/// // Push a value to if it does not exceed the maximum +/// long.push(5).unwrap(); +/// assert_eq!(&long[..], &[1, 2, 3, 4, 5]); +/// +/// // Push a value to if it _does_ exceed the maximum. +/// assert!(long.push(6).is_err()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, Derivative)] +#[derivative(PartialEq, Eq, Hash(bound = "T: std::hash::Hash"))] +#[serde(transparent)] +pub struct RuntimeVariableList { vec: Vec, - #[ssz(skip_serializing, skip_deserializing)] + #[serde(skip)] max_len: usize, } -impl RuntimeVariableList { - pub fn new(vec: Vec, max_len: usize) -> Result { +impl RuntimeVariableList { + /// Returns `Ok` if the given `vec` equals the fixed length of `Self`. Otherwise returns + /// `Err(OutOfBounds { .. })`. + pub fn new(vec: Vec, max_len: usize) -> Result { if vec.len() <= max_len { Ok(Self { vec, max_len }) } else { - Err(ssz_types::Error::OutOfBounds { + Err(Error::OutOfBounds { i: vec.len(), len: max_len, }) @@ -27,22 +65,50 @@ impl RuntimeVariableList { Self { vec, max_len } } - pub fn to_vec(&self) -> Vec { - self.vec.clone() + /// Create an empty list. + pub fn empty(max_len: usize) -> Self { + Self { + vec: vec![], + max_len, + } } pub fn as_slice(&self) -> &[T] { self.vec.as_slice() } + /// Returns the number of values presently in `self`. pub fn len(&self) -> usize { self.vec.len() } + /// True if `self` does not contain any values. pub fn is_empty(&self) -> bool { - self.vec.is_empty() + self.len() == 0 + } + + /// Returns the type-level maximum length. + pub fn max_len(&self) -> usize { + self.max_len + } + + /// Appends `value` to the back of `self`. + /// + /// Returns `Err(())` when appending `value` would exceed the maximum length. + pub fn push(&mut self, value: T) -> Result<(), Error> { + if self.vec.len() < self.max_len { + self.vec.push(value); + Ok(()) + } else { + Err(Error::OutOfBounds { + i: self.vec.len().saturating_add(1), + len: self.max_len, + }) + } } +} +impl RuntimeVariableList { pub fn from_ssz_bytes(bytes: &[u8], max_len: usize) -> Result { let vec = if bytes.is_empty() { vec![] @@ -54,7 +120,7 @@ impl RuntimeVariableList { if num_items > max_len { return Err(ssz::DecodeError::BytesInvalid(format!( - "VariableList of {} items exceeds maximum of {}", + "RuntimeVariableList of {} items exceeds maximum of {}", num_items, max_len ))); } @@ -73,65 +139,162 @@ impl RuntimeVariableList { } } +impl From> for Vec { + fn from(list: RuntimeVariableList) -> Vec { + list.vec + } +} + +impl> Index for RuntimeVariableList { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&self.vec, index) + } +} + +impl> IndexMut for RuntimeVariableList { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut self.vec, index) + } +} + +impl Deref for RuntimeVariableList { + type Target = [T]; + + fn deref(&self) -> &[T] { + &self.vec[..] + } +} + +impl DerefMut for RuntimeVariableList { + fn deref_mut(&mut self) -> &mut [T] { + &mut self.vec[..] + } +} + +impl<'a, T> IntoIterator for &'a RuntimeVariableList { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for RuntimeVariableList { + type Item = T; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.vec.into_iter() + } +} + +impl ssz::Encode for RuntimeVariableList +where + T: ssz::Encode, +{ + fn is_ssz_fixed_len() -> bool { + >::is_ssz_fixed_len() + } + + fn ssz_append(&self, buf: &mut Vec) { + self.vec.ssz_append(buf) + } + + fn ssz_fixed_len() -> usize { + >::ssz_fixed_len() + } + + fn ssz_bytes_len(&self) -> usize { + self.vec.ssz_bytes_len() + } +} + #[cfg(test)] mod test { - use ssz_types::{typenum::U4, VariableList}; - use super::*; + use ssz::*; + use std::fmt::Debug; #[test] fn new() { let vec = vec![42; 5]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_err()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_err()); let vec = vec![42; 3]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_ok()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_ok()); let vec = vec![42; 4]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_ok()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_ok()); + } + + #[test] + fn indexing() { + let vec = vec![1, 2]; + + let mut fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 8192); + + assert_eq!(fixed[0], 1); + assert_eq!(&fixed[0..1], &vec[0..1]); + assert_eq!(fixed[..].len(), 2); + + fixed[1] = 3; + assert_eq!(fixed[1], 3); } #[test] fn length() { + let vec = vec![42; 5]; + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 4); + assert_eq!(&fixed[..], &vec[0..4]); + let vec = vec![42; 3]; - let runtime_var_list: RuntimeVariableList = - RuntimeVariableList::new(vec.clone(), 4).unwrap(); - let var_list: VariableList = VariableList::from(vec.clone()); - assert_eq!(&runtime_var_list.as_slice()[0..3], &vec[..]); - assert_eq!(runtime_var_list.as_slice(), &vec![42, 42, 42][..]); - assert_eq!(runtime_var_list.len(), var_list.len()); + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 4); + assert_eq!(&fixed[0..3], &vec[..]); + assert_eq!(&fixed[..], &vec![42, 42, 42][..]); let vec = vec![]; - let runtime_var_list: RuntimeVariableList = RuntimeVariableList::new(vec, 4).unwrap(); - assert_eq!(runtime_var_list.as_slice(), &[] as &[u64]); - assert!(runtime_var_list.is_empty()); + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec, 4); + assert_eq!(&fixed[..], &[] as &[u64]); } #[test] - fn encode() { - let runtime_var_list: RuntimeVariableList = - RuntimeVariableList::new(vec![0; 2], 2).unwrap(); + fn deref() { + let vec = vec![0, 2, 4, 6]; + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec, 4); - assert_eq!(runtime_var_list.as_ssz_bytes(), vec![0, 0, 0, 0]); - assert_eq!( as Encode>::ssz_fixed_len(), 4); + assert_eq!(fixed.first(), Some(&0)); + assert_eq!(fixed.get(3), Some(&6)); + assert_eq!(fixed.get(4), None); } #[test] - fn round_trip() { - let item = RuntimeVariableList::::new(vec![42; 8], 8).unwrap(); - let encoded = &item.as_ssz_bytes(); - assert_eq!(item.ssz_bytes_len(), encoded.len()); - assert_eq!(RuntimeVariableList::from_ssz_bytes(encoded, 8), Ok(item)); + fn encode() { + let vec: RuntimeVariableList = RuntimeVariableList::from_vec(vec![0; 2], 2); + assert_eq!(vec.as_ssz_bytes(), vec![0, 0, 0, 0]); + assert_eq!( as Encode>::ssz_fixed_len(), 4); + } - let item = RuntimeVariableList::::new(vec![0; 8], 8).unwrap(); + fn round_trip(item: RuntimeVariableList) { + let max_len = item.max_len(); let encoded = &item.as_ssz_bytes(); assert_eq!(item.ssz_bytes_len(), encoded.len()); - assert_eq!(RuntimeVariableList::from_ssz_bytes(encoded, 8), Ok(item)); + assert_eq!( + RuntimeVariableList::from_ssz_bytes(encoded, max_len), + Ok(item) + ); + } + + #[test] + fn u16_len_8() { + round_trip::(RuntimeVariableList::from_vec(vec![42; 8], 8)); + round_trip::(RuntimeVariableList::from_vec(vec![0; 8], 8)); } } diff --git a/lighthouse/environment/tests/testnet_dir/config.yaml b/lighthouse/environment/tests/testnet_dir/config.yaml index 4fc7bc2dcff..84e8274f06e 100644 --- a/lighthouse/environment/tests/testnet_dir/config.yaml +++ b/lighthouse/environment/tests/testnet_dir/config.yaml @@ -100,6 +100,6 @@ ATTESTATION_SUBNET_PREFIX_BITS: 6 ATTESTATION_SUBNET_SHUFFLING_PREFIX_BITS: 3 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 4fdd967c65c..f3832a1a1e5 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -825,6 +825,26 @@ fn network_target_peers_flag() { }); } #[test] +fn network_subscribe_all_data_column_subnets_flag() { + CommandLineTest::new() + .flag("subscribe-all-data-column-subnets", None) + .run_with_zero_port() + .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); +} +#[test] +fn network_enable_sampling_flag() { + CommandLineTest::new() + .flag("enable-sampling", None) + .run_with_zero_port() + .with_config(|config| assert!(config.chain.enable_sampling)); +} +#[test] +fn network_enable_sampling_flag_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.chain.enable_sampling)); +} +#[test] fn network_subscribe_all_subnets_flag() { CommandLineTest::new() .flag("subscribe-all-subnets", None) @@ -2022,6 +2042,13 @@ fn epochs_per_migration_override() { .run_with_zero_port() .with_config(|config| assert_eq!(config.chain.epochs_per_migration, 128)); } +#[test] +fn malicious_withhold_count_flag() { + CommandLineTest::new() + .flag("malicious-withhold-count", Some("128")) + .run_with_zero_port() + .with_config(|config| assert_eq!(config.chain.malicious_withhold_count, 128)); +} // Tests for Slasher flags. // Using `--slasher-max-db-size` to work around https://github.com/sigp/lighthouse/issues/2342 diff --git a/scripts/local_testnet/network_params.yaml b/scripts/local_testnet/network_params.yaml index 1c25c30f060..b53d88e52c5 100644 --- a/scripts/local_testnet/network_params.yaml +++ b/scripts/local_testnet/network_params.yaml @@ -1,4 +1,4 @@ -# Full configuration reference [here](https://github.com/kurtosis-tech/ethereum-package?tab=readme-ov-file#configuration). +# Full configuration reference [here](https://github.com/ethpandaops/ethereum-package?tab=readme-ov-file#configuration). participants: - el_type: geth el_image: ethereum/client-go:latest @@ -14,4 +14,4 @@ global_log_level: debug snooper_enabled: false additional_services: - dora - - prometheus_grafana \ No newline at end of file + - prometheus_grafana diff --git a/scripts/local_testnet/network_params_das_devnet_1.yaml b/scripts/local_testnet/network_params_das_devnet_1.yaml new file mode 100644 index 00000000000..fcd131a06ca --- /dev/null +++ b/scripts/local_testnet/network_params_das_devnet_1.yaml @@ -0,0 +1,8 @@ +participants: + - cl_type: lighthouse + cl_image: lighthouse:local +network_params: + network: peerdas-devnet-1 +global_log_level: debug +additional_services: + - prometheus_grafana \ No newline at end of file diff --git a/scripts/local_testnet/network_params_das_interop.yaml b/scripts/local_testnet/network_params_das_interop.yaml new file mode 100644 index 00000000000..0c8f9d7f49d --- /dev/null +++ b/scripts/local_testnet/network_params_das_interop.yaml @@ -0,0 +1,38 @@ +# Full configuration reference [here](https://github.com/ethpandaops/ethereum-package?tab=readme-ov-file#configuration). +participants: + - cl_type: prysm + cl_image: ethpandaops/prysm-beacon-chain:peerDAS + + - cl_type: lighthouse + cl_extra_params: [ + --subscribe-all-data-column-subnets, + ] + cl_image: lighthouse:local + + - cl_type: lighthouse + cl_image: lighthouse:local + + - cl_type: teku + cl_image: ethpandaops/teku:nashatyrev-das + +# - cl_type: nimbus +# cl_image: ethpandaops/nimbus-eth2:kzgpeerdas +# +# - cl_type: grandine +# cl_image: ethpandaops/grandine:das +# +# - cl_type: lodestar +# cl_image: ethpandaops/lodestar:peerDAS +network_params: + eip7594_fork_epoch: 0 + eip7594_fork_version: "0x50000038" + data_column_sidecar_subnet_count: 128 + samples_per_slot: 16 + custody_requirement: 4 +snooper_enabled: false +global_log_level: debug +ethereum_metrics_exporter_enabled: true +additional_services: + - dora + - goomy_blob + - prometheus_grafana diff --git a/scripts/local_testnet/network_params_das_local.yaml b/scripts/local_testnet/network_params_das_local.yaml new file mode 100644 index 00000000000..d1b646a34a3 --- /dev/null +++ b/scripts/local_testnet/network_params_das_local.yaml @@ -0,0 +1,20 @@ +participants: + - cl_type: lighthouse + cl_image: lighthouse:local + cl_extra_params: + - --subscribe-all-data-column-subnets + - --target-peers=2 + count: 2 + - cl_type: lighthouse + cl_image: lighthouse:local + cl_extra_params: + - --target-peers=2 + count: 1 +network_params: + eip7594_fork_epoch: 0 + seconds_per_slot: 6 +snooper_enabled: false +global_log_level: debug +additional_services: + - dora + - goomy_blob diff --git a/testing/ef_tests/check_all_files_accessed.py b/testing/ef_tests/check_all_files_accessed.py index 9495047e7f9..f6ae4cfa450 100755 --- a/testing/ef_tests/check_all_files_accessed.py +++ b/testing/ef_tests/check_all_files_accessed.py @@ -20,6 +20,8 @@ # following regular expressions, we will assume they are to be ignored (i.e., we are purposefully # *not* running the spec tests). excluded_paths = [ + # TODO(das): ignore until new spec test release with column subnet count = 64. + "tests/.*/.*/.*/get_custody_columns/", # Eth1Block and PowBlock # # Intentionally omitted, as per https://github.com/sigp/lighthouse/issues/1835 @@ -33,10 +35,15 @@ "tests/.*/.*/ssz_static/LightClientStore", # LightClientSnapshot "tests/.*/.*/ssz_static/LightClientSnapshot", + # Unused container for das + "tests/.*/.*/ssz_static/MatrixEntry", + # Unused kzg methods + "tests/.*/.*/kzg/verify_cell_kzg_proof", # One of the EF researchers likes to pack the tarballs on a Mac ".*\\.DS_Store.*", # More Mac weirdness. "tests/mainnet/bellatrix/operations/deposit/pyspec_tests/deposit_with_previous_fork_version__valid_ineffective/._meta.yaml", + "tests/mainnet/eip7594/networking/get_custody_columns/pyspec_tests/get_custody_columns__short_node_id/._meta.yaml", # bls tests are moved to bls12-381-tests directory "tests/general/phase0/bls", # some bls tests are not included now diff --git a/testing/ef_tests/src/cases.rs b/testing/ef_tests/src/cases.rs index 2d6f661f0e4..63274ee0c03 100644 --- a/testing/ef_tests/src/cases.rs +++ b/testing/ef_tests/src/cases.rs @@ -1,6 +1,6 @@ use super::*; use rayon::prelude::*; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; use std::path::{Path, PathBuf}; use types::ForkName; @@ -18,11 +18,15 @@ mod fork; mod fork_choice; mod genesis_initialization; mod genesis_validity; +mod get_custody_columns; mod kzg_blob_to_kzg_commitment; mod kzg_compute_blob_kzg_proof; +mod kzg_compute_cells_and_kzg_proofs; mod kzg_compute_kzg_proof; +mod kzg_recover_cells_and_kzg_proofs; mod kzg_verify_blob_kzg_proof; mod kzg_verify_blob_kzg_proof_batch; +mod kzg_verify_cell_kzg_proof_batch; mod kzg_verify_kzg_proof; mod light_client_verify_is_better_update; mod merkle_proof_validity; @@ -49,11 +53,15 @@ pub use epoch_processing::*; pub use fork::ForkTest; pub use genesis_initialization::*; pub use genesis_validity::*; +pub use get_custody_columns::*; pub use kzg_blob_to_kzg_commitment::*; pub use kzg_compute_blob_kzg_proof::*; +pub use kzg_compute_cells_and_kzg_proofs::*; pub use kzg_compute_kzg_proof::*; +pub use kzg_recover_cells_and_kzg_proofs::*; pub use kzg_verify_blob_kzg_proof::*; pub use kzg_verify_blob_kzg_proof_batch::*; +pub use kzg_verify_cell_kzg_proof_batch::*; pub use kzg_verify_kzg_proof::*; pub use light_client_verify_is_better_update::*; pub use merkle_proof_validity::*; @@ -66,6 +74,19 @@ pub use ssz_generic::*; pub use ssz_static::*; pub use transition::TransitionTest; +#[derive(Debug, PartialEq)] +pub enum FeatureName { + Eip7594, +} + +impl Display for FeatureName { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + FeatureName::Eip7594 => f.write_str("eip7594"), + } + } +} + pub trait LoadCase: Sized { /// Load the test case from a test case directory. fn load_from_dir(_path: &Path, _fork_name: ForkName) -> Result; @@ -86,6 +107,13 @@ pub trait Case: Debug + Sync { true } + /// Whether or not this test exists for the given `feature_name`. + /// + /// Returns `true` by default. + fn is_enabled_for_feature(_feature_name: FeatureName) -> bool { + true + } + /// Execute a test and return the result. /// /// `case_index` reports the index of the case in the set of test cases. It is not strictly diff --git a/testing/ef_tests/src/cases/get_custody_columns.rs b/testing/ef_tests/src/cases/get_custody_columns.rs new file mode 100644 index 00000000000..efe5b147e44 --- /dev/null +++ b/testing/ef_tests/src/cases/get_custody_columns.rs @@ -0,0 +1,43 @@ +use super::*; +use ethereum_types::U256; +use serde::Deserialize; +use std::marker::PhantomData; +use types::DataColumnSubnetId; + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct GetCustodyColumns { + pub node_id: String, + pub custody_subnet_count: u64, + pub result: Vec, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for GetCustodyColumns { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("meta.yaml").as_path()) + } +} + +impl Case for GetCustodyColumns { + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let spec = E::default_spec(); + let node_id = U256::from_dec_str(&self.node_id) + .map_err(|e| Error::FailedToParseTest(format!("{e:?}")))?; + let computed = DataColumnSubnetId::compute_custody_columns::( + node_id, + self.custody_subnet_count, + &spec, + ) + .collect::>(); + let expected = &self.result; + if computed == *expected { + Ok(()) + } else { + Err(Error::NotEqual(format!( + "Got {computed:?}\nExpected {expected:?}" + ))) + } + } +} diff --git a/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs b/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs index aa48c127b20..5194c3336c8 100644 --- a/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs +++ b/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs @@ -31,9 +31,12 @@ impl Case for KZGBlobToKZGCommitment { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let kzg = get_kzg()?; - let commitment = parse_blob::(&self.input.blob).and_then(|blob| { blob_to_kzg_commitment::(&kzg, &blob).map_err(|e| { Error::InternalError(format!("Failed to compute kzg commitment: {:?}", e)) diff --git a/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs index 71e1ff8e23d..61e7248deeb 100644 --- a/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs @@ -32,6 +32,10 @@ impl Case for KZGComputeBlobKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGComputeBlobKZGProofInput| -> Result<_, Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs b/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs new file mode 100644 index 00000000000..74a44fdddfc --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs @@ -0,0 +1,67 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::CellsAndKzgProofs; +use serde::Deserialize; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGComputeCellsAndKzgProofsInput { + pub blob: String, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGComputeCellsAndKZGProofs { + pub input: KZGComputeCellsAndKzgProofsInput, + pub output: Option<(Vec, Vec)>, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGComputeCellsAndKZGProofs { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGComputeCellsAndKZGProofs { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let cells_and_proofs = parse_blob::(&self.input.blob).and_then(|blob| { + let blob = blob.as_ref().try_into().map_err(|e| { + Error::InternalError(format!("Failed to convert blob to kzg blob: {e:?}")) + })?; + let kzg = get_kzg()?; + kzg.compute_cells_and_proofs(blob).map_err(|e| { + Error::InternalError(format!("Failed to compute cells and kzg proofs: {e:?}")) + }) + }); + + let expected = self.output.as_ref().and_then(|(cells, proofs)| { + parse_cells_and_proofs(cells, proofs) + .map(|(cells, proofs)| { + ( + cells + .try_into() + .map_err(|e| { + Error::FailedToParseTest(format!("Failed to parse cells: {e:?}")) + }) + .unwrap(), + proofs + .try_into() + .map_err(|e| { + Error::FailedToParseTest(format!("Failed to parse proofs: {e:?}")) + }) + .unwrap(), + ) + }) + .ok() + }); + + compare_result::(&cells_and_proofs, &expected) + } +} diff --git a/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs index 98bb7492491..ca19882d501 100644 --- a/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs @@ -39,6 +39,10 @@ impl Case for KZGComputeKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGComputeKZGProofInput| -> Result<_, Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs b/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs new file mode 100644 index 00000000000..fc41f1f2a62 --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs @@ -0,0 +1,97 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::{CellsAndKzgProofs, KzgProof}; +use serde::Deserialize; +use std::convert::Infallible; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGRecoverCellsAndKzgProofsInput { + pub cell_indices: Vec, + pub cells: Vec, + pub proofs: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGRecoverCellsAndKZGProofs { + pub input: KZGRecoverCellsAndKzgProofsInput, + pub output: Option<(Vec, Vec)>, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGRecoverCellsAndKZGProofs { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGRecoverCellsAndKZGProofs { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let parse_input = |input: &KZGRecoverCellsAndKzgProofsInput| { + // Proofs are not used for `recover_cells_and_compute_kzg_proofs`, they are only checked + // to satisfy the spec tests. + if input.proofs.len() != input.cell_indices.len() { + return Err(Error::SkippedKnownFailure); + } + + let proofs = input + .proofs + .iter() + .map(|s| parse_proof(s)) + .collect::, Error>>()?; + + let cells = input + .cells + .iter() + .map(|s| parse_cell(s)) + .collect::, Error>>()?; + + Ok((proofs, cells, input.cell_indices.clone())) + }; + + let result = + parse_input(&self.input).and_then(|(input_proofs, input_cells, cell_indices)| { + let input_cells_ref: Vec<_> = input_cells.iter().map(|cell| &**cell).collect(); + let kzg = get_kzg()?; + let (cells, proofs) = kzg + .recover_cells_and_compute_kzg_proofs( + cell_indices.as_slice(), + input_cells_ref.as_slice(), + ) + .map_err(|e| { + Error::InternalError(format!( + "Failed to recover cells and kzg proofs: {e:?}" + )) + })?; + + // Check recovered proofs matches inputs proofs. This is done only to satisfy the + // spec tests, as the ckzg library recomputes all proofs and does not require + // proofs to recover. + for (input_proof, cell_id) in input_proofs.iter().zip(cell_indices) { + if let Err(e) = compare_result::( + &Ok(*input_proof), + &proofs.get(cell_id as usize).cloned(), + ) { + return Err(e); + } + } + + Ok((cells, proofs)) + }); + + let expected = self + .output + .as_ref() + .and_then(|(cells, proofs)| parse_cells_and_proofs(cells, proofs).ok()) + .map(|(cells, proofs)| (cells.try_into().unwrap(), proofs.try_into().unwrap())); + + compare_result::(&result, &expected) + } +} diff --git a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs index f68f0fd7ed0..4e56b2b44c3 100644 --- a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs @@ -2,7 +2,7 @@ use super::*; use crate::case_result::compare_result; use beacon_chain::kzg_utils::validate_blob; use eth2_network_config::TRUSTED_SETUP_BYTES; -use kzg::{Error as KzgError, Kzg, KzgCommitment, KzgProof, TrustedSetup}; +use kzg::{Cell, Error as KzgError, Kzg, KzgCommitment, KzgProof, TrustedSetup}; use serde::Deserialize; use std::marker::PhantomData; use types::Blob; @@ -10,10 +10,38 @@ use types::Blob; pub fn get_kzg() -> Result { let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) .map_err(|e| Error::InternalError(format!("Failed to initialize kzg: {:?}", e)))?; + // TODO(das): need to enable these tests when rayon issues in rust_eth_kzg are fixed Kzg::new_from_trusted_setup(trusted_setup) .map_err(|e| Error::InternalError(format!("Failed to initialize kzg: {:?}", e))) } +pub fn parse_cells_and_proofs( + cells: &[String], + proofs: &[String], +) -> Result<(Vec, Vec), Error> { + let cells = cells + .iter() + .map(|s| parse_cell(s.as_str())) + .collect::, Error>>()?; + + let proofs = proofs + .iter() + .map(|s| parse_proof(s.as_str())) + .collect::, Error>>()?; + + Ok((cells, proofs)) +} + +pub fn parse_cell(cell: &str) -> Result { + hex::decode(strip_0x(cell)?) + .map_err(|e| Error::FailedToParseTest(format!("Failed to parse cell: {:?}", e))) + .and_then(|bytes| { + bytes + .try_into() + .map_err(|e| Error::FailedToParseTest(format!("Failed to parse cell: {:?}", e))) + }) +} + pub fn parse_proof(proof: &str) -> Result { hex::decode(strip_0x(proof)?) .map_err(|e| Error::FailedToParseTest(format!("Failed to parse proof: {:?}", e))) @@ -80,6 +108,10 @@ impl Case for KZGVerifyBlobKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyBlobKZGProofInput| -> Result<(Blob, KzgCommitment, KzgProof), Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs index ae5caedf069..cfe15d5c05a 100644 --- a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs +++ b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs @@ -33,6 +33,10 @@ impl Case for KZGVerifyBlobKZGProofBatch { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyBlobKZGProofBatchInput| -> Result<_, Error> { let blobs = input @@ -54,7 +58,6 @@ impl Case for KZGVerifyBlobKZGProofBatch { }; let kzg = get_kzg()?; - let result = parse_input(&self.input).and_then( |(commitments, blobs, proofs)| match validate_blobs::( diff --git a/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs b/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs new file mode 100644 index 00000000000..9c651d2d633 --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs @@ -0,0 +1,77 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::{Bytes48, Error as KzgError}; +use serde::Deserialize; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGVerifyCellKZGProofBatchInput { + pub row_commitments: Vec, + pub row_indices: Vec, + pub column_indices: Vec, + pub cells: Vec, + pub proofs: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGVerifyCellKZGProofBatch { + pub input: KZGVerifyCellKZGProofBatchInput, + pub output: Option, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGVerifyCellKZGProofBatch { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGVerifyCellKZGProofBatch { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let parse_input = |input: &KZGVerifyCellKZGProofBatchInput| -> Result<_, Error> { + let (cells, proofs) = parse_cells_and_proofs(&input.cells, &input.proofs)?; + let row_commitments = input + .row_commitments + .iter() + .map(|s| parse_commitment(s)) + .collect::, _>>()?; + let coordinates = input + .row_indices + .iter() + .zip(&input.column_indices) + .map(|(&row, &col)| (row as u64, col as u64)) + .collect::>(); + + Ok((cells, proofs, coordinates, row_commitments)) + }; + + let result = + parse_input(&self.input).and_then(|(cells, proofs, coordinates, commitments)| { + let proofs: Vec = proofs.iter().map(|&proof| proof.into()).collect(); + let commitments: Vec = commitments.iter().map(|&c| c.into()).collect(); + let cells = cells.iter().map(|c| c.as_ref()).collect::>(); + let column_indices = coordinates + .into_iter() + .map(|(_row, col)| col) + .collect::>(); + let kzg = get_kzg()?; + match kzg.verify_cell_proof_batch(&cells, &proofs, column_indices, &commitments) { + Ok(_) => Ok(true), + Err(KzgError::KzgVerificationFailed) => Ok(false), + Err(e) => Err(Error::InternalError(format!( + "Failed to validate cells: {:?}", + e + ))), + } + }); + + compare_result::(&result, &self.output) + } +} diff --git a/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs index e395558e0e1..4468176c277 100644 --- a/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs @@ -33,6 +33,10 @@ impl Case for KZGVerifyKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyKZGProofInput| -> Result<_, Error> { let commitment = parse_commitment(&input.commitment)?; diff --git a/testing/ef_tests/src/cases/merkle_proof_validity.rs b/testing/ef_tests/src/cases/merkle_proof_validity.rs index 8d5c0687753..b68bbdc5d39 100644 --- a/testing/ef_tests/src/cases/merkle_proof_validity.rs +++ b/testing/ef_tests/src/cases/merkle_proof_validity.rs @@ -3,7 +3,8 @@ use crate::decode::{ssz_decode_file, ssz_decode_state, yaml_decode_file}; use serde::Deserialize; use tree_hash::Hash256; use types::{ - BeaconBlockBody, BeaconBlockBodyDeneb, BeaconBlockBodyElectra, BeaconState, FullPayload, + BeaconBlockBody, BeaconBlockBodyDeneb, BeaconBlockBodyElectra, BeaconState, FixedVector, + FullPayload, Unsigned, }; #[derive(Debug, Clone, Deserialize)] @@ -81,12 +82,18 @@ impl Case for MerkleProofValidity { } } -#[derive(Debug, Clone, Deserialize)] -#[serde(bound = "E: EthSpec")] +#[derive(Debug, Clone)] pub struct KzgInclusionMerkleProofValidity { pub metadata: Option, pub block: BeaconBlockBody, pub merkle_proof: MerkleProof, + pub proof_type: KzgInclusionProofType, +} + +#[derive(Debug, Clone)] +pub enum KzgInclusionProofType { + Single, + List, } impl LoadCase for KzgInclusionMerkleProofValidity { @@ -115,21 +122,33 @@ impl LoadCase for KzgInclusionMerkleProofValidity { None }; + let file_name = path + .file_name() + .and_then(|file_name| file_name.to_str()) + .ok_or(Error::InternalError( + "failed to read file name from path".to_string(), + ))?; + + let proof_type = if file_name.starts_with("blob_kzg_commitments") { + KzgInclusionProofType::List + } else { + KzgInclusionProofType::Single + }; + Ok(Self { metadata, block, merkle_proof, + proof_type, }) } } -impl Case for KzgInclusionMerkleProofValidity { - fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { - let Ok(proof) = self.block.to_ref().kzg_commitment_merkle_proof(0) else { - return Err(Error::FailedToParseTest( - "Could not retrieve merkle proof".to_string(), - )); - }; +impl KzgInclusionMerkleProofValidity { + fn verify_kzg_inclusion_proof( + &self, + proof: FixedVector, + ) -> Result<(), Error> { let proof_len = proof.len(); let branch_len = self.merkle_proof.branch.len(); if proof_len != branch_len { @@ -153,3 +172,29 @@ impl Case for KzgInclusionMerkleProofValidity { Ok(()) } } +impl Case for KzgInclusionMerkleProofValidity { + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + match self.proof_type { + KzgInclusionProofType::Single => { + let proof = self + .block + .to_ref() + .kzg_commitment_merkle_proof(0) + .map_err(|e| { + Error::FailedToParseTest(format!("Could not retrieve merkle proof: {e:?}")) + })?; + self.verify_kzg_inclusion_proof(proof) + } + KzgInclusionProofType::List => { + let proof = self + .block + .to_ref() + .kzg_commitments_merkle_proof() + .map_err(|e| { + Error::FailedToParseTest(format!("Could not retrieve merkle proof: {e:?}")) + })?; + self.verify_kzg_inclusion_proof(proof) + } + } + } +} diff --git a/testing/ef_tests/src/handler.rs b/testing/ef_tests/src/handler.rs index 52fc58f3d8c..dacaba1dcab 100644 --- a/testing/ef_tests/src/handler.rs +++ b/testing/ef_tests/src/handler.rs @@ -1,12 +1,15 @@ use crate::cases::{self, Case, Cases, EpochTransition, LoadCase, Operation}; -use crate::type_name; use crate::type_name::TypeName; +use crate::{type_name, FeatureName}; use derivative::Derivative; use std::fs::{self, DirEntry}; use std::marker::PhantomData; use std::path::PathBuf; use types::{BeaconState, EthSpec, ForkName}; +const EIP7594_FORK: ForkName = ForkName::Deneb; +const EIP7594_TESTS: [&str; 4] = ["ssz_static", "merkle_proof", "networking", "kzg"]; + pub trait Handler { type Case: Case + LoadCase; @@ -28,10 +31,21 @@ pub trait Handler { Self::Case::is_enabled_for_fork(fork_name) } + fn is_enabled_for_feature(&self, feature_name: FeatureName) -> bool { + Self::Case::is_enabled_for_feature(feature_name) + } + fn run(&self) { for fork_name in ForkName::list_all() { if !self.disabled_forks().contains(&fork_name) && self.is_enabled_for_fork(fork_name) { - self.run_for_fork(fork_name) + self.run_for_fork(fork_name); + + if fork_name == EIP7594_FORK + && EIP7594_TESTS.contains(&Self::runner_name()) + && self.is_enabled_for_feature(FeatureName::Eip7594) + { + self.run_for_feature(EIP7594_FORK, FeatureName::Eip7594); + } } } } @@ -81,6 +95,47 @@ pub trait Handler { ); crate::results::assert_tests_pass(&name, &handler_path, &results); } + + fn run_for_feature(&self, fork_name: ForkName, feature_name: FeatureName) { + let feature_name_str = feature_name.to_string(); + + let handler_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("consensus-spec-tests") + .join("tests") + .join(Self::config_name()) + .join(&feature_name_str) + .join(Self::runner_name()) + .join(self.handler_name()); + + // Iterate through test suites + let as_directory = |entry: Result| -> Option { + entry + .ok() + .filter(|e| e.file_type().map(|ty| ty.is_dir()).unwrap()) + }; + + let test_cases = fs::read_dir(&handler_path) + .unwrap_or_else(|e| panic!("handler dir {} exists: {:?}", handler_path.display(), e)) + .filter_map(as_directory) + .flat_map(|suite| fs::read_dir(suite.path()).expect("suite dir exists")) + .filter_map(as_directory) + .map(|test_case_dir| { + let path = test_case_dir.path(); + let case = Self::Case::load_from_dir(&path, fork_name).expect("test should load"); + (path, case) + }) + .collect(); + + let results = Cases { test_cases }.test_results(fork_name, Self::use_rayon()); + + let name = format!( + "{}/{}/{}", + feature_name_str, + Self::runner_name(), + self.handler_name() + ); + crate::results::assert_tests_pass(&name, &handler_path, &results); + } } macro_rules! bls_eth_handler { @@ -784,6 +839,86 @@ impl Handler for KZGVerifyKZGProofHandler { } } +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct GetCustodyColumnsHandler(PhantomData); + +impl Handler for GetCustodyColumnsHandler { + type Case = cases::GetCustodyColumns; + + fn config_name() -> &'static str { + E::name() + } + + fn runner_name() -> &'static str { + "networking" + } + + fn handler_name(&self) -> String { + "get_custody_columns".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGComputeCellsAndKZGProofHandler(PhantomData); + +impl Handler for KZGComputeCellsAndKZGProofHandler { + type Case = cases::KZGComputeCellsAndKZGProofs; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "compute_cells_and_kzg_proofs".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGVerifyCellKZGProofBatchHandler(PhantomData); + +impl Handler for KZGVerifyCellKZGProofBatchHandler { + type Case = cases::KZGVerifyCellKZGProofBatch; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "verify_cell_kzg_proof_batch".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGRecoverCellsAndKZGProofHandler(PhantomData); + +impl Handler for KZGRecoverCellsAndKZGProofHandler { + type Case = cases::KZGRecoverCellsAndKZGProofs; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "recover_cells_and_kzg_proofs".into() + } +} + #[derive(Derivative)] #[derivative(Default(bound = ""))] pub struct MerkleProofValidityHandler(PhantomData); diff --git a/testing/ef_tests/src/lib.rs b/testing/ef_tests/src/lib.rs index e55551be701..e7367719d72 100644 --- a/testing/ef_tests/src/lib.rs +++ b/testing/ef_tests/src/lib.rs @@ -1,10 +1,11 @@ pub use case_result::CaseResult; pub use cases::WithdrawalsPayload; pub use cases::{ - Case, EffectiveBalanceUpdates, Eth1DataReset, HistoricalRootsUpdate, HistoricalSummariesUpdate, - InactivityUpdates, JustificationAndFinalization, ParticipationFlagUpdates, - ParticipationRecordUpdates, PendingBalanceDeposits, PendingConsolidations, RandaoMixesReset, - RegistryUpdates, RewardsAndPenalties, Slashings, SlashingsReset, SyncCommitteeUpdates, + Case, EffectiveBalanceUpdates, Eth1DataReset, FeatureName, HistoricalRootsUpdate, + HistoricalSummariesUpdate, InactivityUpdates, JustificationAndFinalization, + ParticipationFlagUpdates, ParticipationRecordUpdates, PendingBalanceDeposits, + PendingConsolidations, RandaoMixesReset, RegistryUpdates, RewardsAndPenalties, Slashings, + SlashingsReset, SyncCommitteeUpdates, }; pub use decode::log_file_access; pub use error::Error; diff --git a/testing/ef_tests/src/type_name.rs b/testing/ef_tests/src/type_name.rs index c61dfef09cc..49de073d6ae 100644 --- a/testing/ef_tests/src/type_name.rs +++ b/testing/ef_tests/src/type_name.rs @@ -1,5 +1,4 @@ //! Mapping from types to canonical string identifiers used in testing. -use types::blob_sidecar::BlobIdentifier; use types::historical_summary::HistoricalSummary; use types::*; @@ -58,7 +57,9 @@ type_name_generic!(BeaconBlockBodyElectra, "BeaconBlockBody"); type_name!(BeaconBlockHeader); type_name_generic!(BeaconState); type_name!(BlobIdentifier); +type_name!(DataColumnIdentifier); type_name_generic!(BlobSidecar); +type_name_generic!(DataColumnSidecar); type_name!(Checkpoint); type_name!(ConsolidationRequest); type_name_generic!(ContributionAndProof); diff --git a/testing/ef_tests/tests/tests.rs b/testing/ef_tests/tests/tests.rs index 7f69521bb67..2c62edb62cc 100644 --- a/testing/ef_tests/tests/tests.rs +++ b/testing/ef_tests/tests/tests.rs @@ -237,8 +237,9 @@ macro_rules! ssz_static_test_no_run { #[cfg(feature = "fake_crypto")] mod ssz_static { - use ef_tests::{Handler, SszStaticHandler, SszStaticTHCHandler, SszStaticWithSpecHandler}; - use types::blob_sidecar::BlobIdentifier; + use ef_tests::{ + FeatureName, Handler, SszStaticHandler, SszStaticTHCHandler, SszStaticWithSpecHandler, + }; use types::historical_summary::HistoricalSummary; use types::{ AttesterSlashingBase, AttesterSlashingElectra, ConsolidationRequest, DepositRequest, @@ -627,6 +628,22 @@ mod ssz_static { SszStaticHandler::::capella_and_later().run(); } + #[test] + fn data_column_sidecar() { + SszStaticHandler::, MinimalEthSpec>::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + SszStaticHandler::, MainnetEthSpec>::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + } + + #[test] + fn data_column_identifier() { + SszStaticHandler::::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + SszStaticHandler::::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + } + #[test] fn consolidation() { SszStaticHandler::::electra_and_later().run(); @@ -884,6 +901,26 @@ fn kzg_verify_kzg_proof() { KZGVerifyKZGProofHandler::::default().run(); } +/* TODO(das): enable these tests +#[test] +fn kzg_compute_cells_and_proofs() { + KZGComputeCellsAndKZGProofHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} + +#[test] +fn kzg_verify_cell_proof_batch() { + KZGVerifyCellKZGProofBatchHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} + +#[test] +fn kzg_recover_cells_and_proofs() { + KZGRecoverCellsAndKZGProofHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} +*/ + #[test] fn merkle_proof_validity() { MerkleProofValidityHandler::::default().run(); @@ -908,3 +945,11 @@ fn rewards() { RewardsHandler::::new(handler).run(); } } + +#[test] +fn get_custody_columns() { + GetCustodyColumnsHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + GetCustodyColumnsHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +}