Skip to content

Commit

Permalink
Avoid retrying same sampling peer that previously failed.
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmygchen committed Jul 12, 2024
1 parent bf300b3 commit b9b8a7d
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
36 changes: 36 additions & 0 deletions beacon_node/network/src/sync/block_lookups/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,21 @@ impl TestRig {
});
}

fn sampling_requests_failed(
&mut self,
sampling_ids: DCByRootIds,
peer_id: PeerId,
error: RPCError,
) {
for (request_id, _) in sampling_ids {
self.send_sync_message(SyncMessage::RpcError {
peer_id,
request_id,
error: error.clone(),
})
}
}

fn complete_valid_block_request(
&mut self,
id: SingleLookupReqId,
Expand Down Expand Up @@ -1964,6 +1979,27 @@ fn sampling_with_retries() {
r.expect_clean_finished_sampling();
}

#[test]
fn sampling_avoid_retrying_same_peer() {
let Some(mut r) = TestRig::test_setup_after_peerdas() else {
return;
};
let peer_id_1 = r.new_connected_supernode_peer();
let peer_id_2 = r.new_connected_supernode_peer();
let block_root = Hash256::random();
r.trigger_sample_block(block_root, Slot::new(0));
// Retrieve all outgoing sample requests for random column indexes, and return empty responses
let sampling_ids =
r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES);
r.sampling_requests_failed(sampling_ids, peer_id_1, RPCError::Disconnected);
// Should retry the other peer
let sampling_ids =
r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES);
r.sampling_requests_failed(sampling_ids, peer_id_2, RPCError::Disconnected);
// Expect no more retries
r.expect_empty_network();
}

#[test]
fn custody_lookup_happy_path() {
let Some(mut r) = TestRig::test_setup_after_peerdas() else {
Expand Down
6 changes: 4 additions & 2 deletions beacon_node/network/src/sync/sampling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,6 @@ mod request {
column_index: ColumnIndex,
status: Status,
// TODO(das): Should downscore peers that claim to not have the sample?
#[allow(dead_code)]
peers_dont_have: HashSet<PeerId>,
}

Expand Down Expand Up @@ -490,11 +489,13 @@ mod request {

// TODO: When is a fork and only a subset of your peers know about a block, sampling should only
// be queried on the peers on that fork. Should this case be handled? How to handle it?
let peer_ids = cx.get_custodial_peers(
let mut peer_ids = cx.get_custodial_peers(
block_slot.epoch(<T::EthSpec as EthSpec>::slots_per_epoch()),
self.column_index,
);

peer_ids.retain(|peer_id| !self.peers_dont_have.contains(peer_id));

// TODO(das) randomize custodial peer and avoid failing peers
if let Some(peer_id) = peer_ids.first().cloned() {
cx.data_column_lookup_request(
Expand All @@ -521,6 +522,7 @@ mod request {
pub(crate) fn on_sampling_error(&mut self) -> Result<PeerId, SamplingError> {
match self.status.clone() {
Status::Sampling(peer_id) => {
self.peers_dont_have.insert(peer_id);
self.status = Status::NotStarted;
Ok(peer_id)
}
Expand Down

0 comments on commit b9b8a7d

Please sign in to comment.