From 485a09c7df76634088ec58b691041451a616015b Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 14 Apr 2022 13:06:56 +1000 Subject: [PATCH 01/11] Add a finalized state txids query --- .../finalized_state/zebra_db/transparent.rs | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index c11b8f94226..a7de8eb1d0f 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -292,6 +292,34 @@ impl ZebraDb { .flat_map(|address| self.address_utxos(address)) .collect() } + + /// Returns the transaction IDs that sent or received funds to `addresses` in the finalized chain. + /// + /// If none of the addresses has finalized sends or receives, returns an empty list. + /// + /// # Correctness + /// + /// Callers should combine the non-finalized transactions for `addresses` + /// with the returned transactions. + /// + /// The transaction IDs will only be correct if the non-finalized chain matches or overlaps with + /// the finalized state. + /// + /// Specifically, a block in the partial chain must be a child block of the finalized tip. + /// (But the child block does not have to be the partial chain root.) + /// + /// This condition does not apply if there is only one address. + /// Since address transactions are only appended by blocks, and this query reads them in order, + /// it is impossible to get inconsistent transactions for a single address. + pub fn partial_finalized_transparent_tx_ids( + &self, + addresses: &HashSet, + ) -> BTreeMap { + addresses + .iter() + .flat_map(|address| self.address_tx_ids(address)) + .collect() + } } impl DiskWriteBatch { From 1f2831a2e75dc15e26d4494e8f8eb3ef91c49c19 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 14 Apr 2022 13:24:50 +1000 Subject: [PATCH 02/11] Add an address transaction IDs query, without height filters --- zebra-state/src/service/read.rs | 177 ++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index b791ba5ba7d..91b7ee55520 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -438,3 +438,180 @@ where }) .collect() } + +/// Returns the transaction IDs that sent or received funds, +/// from the supplied [`transparent::Address`]es, in chain order. +/// +/// If the addresses do not exist in the non-finalized `chain` or finalized `db`, +/// returns an empty list. +#[allow(dead_code)] +pub(crate) fn transparent_tx_ids( + chain: Option, + db: &ZebraDb, + addresses: HashSet, +) -> Result, BoxError> +where + C: AsRef, +{ + let mut tx_id_error = None; + + // Retry the finalized tx ID query if it was interruped by a finalizing block, + // and the non-finalized chain doesn't overlap the changed heights. + for _ in 0..=FINALIZED_ADDRESS_INDEX_RETRIES { + let (finalized_tx_ids, finalized_tip_range) = finalized_transparent_tx_ids(db, &addresses); + + // Apply the non-finalized tx ID changes. + let chain_tx_id_changes = + chain_transparent_tx_id_changes(chain.as_ref(), &addresses, finalized_tip_range); + + // If the tx IDs are valid, return them, otherwise, retry or return an error. + match chain_tx_id_changes { + Ok(chain_tx_id_changes) => { + let tx_ids = apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes); + + return Ok(tx_ids); + } + + Err(error) => tx_id_error = Some(Err(error)), + } + } + + tx_id_error.expect("unexpected missing error: attempts should set error or return") +} + +/// Returns the [`transaction::Hash`]es for `addresses` in the finalized chain, +/// and the finalized tip heights the transaction IDs were queried at. +/// +/// If the addresses do not exist in the finalized `db`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn finalized_transparent_tx_ids( + db: &ZebraDb, + addresses: &HashSet, +) -> ( + BTreeMap, + Option>, +) { + // # Correctness + // + // The StateService can commit additional blocks while we are querying transaction IDs. + + // Check if the finalized state changed while we were querying it + let start_finalized_tip = db.finalized_tip_height(); + + let finalized_tx_ids = db.partial_finalized_transparent_tx_ids(addresses); + + let end_finalized_tip = db.finalized_tip_height(); + + let finalized_tip_range = if let (Some(start_finalized_tip), Some(end_finalized_tip)) = + (start_finalized_tip, end_finalized_tip) + { + Some(start_finalized_tip..=end_finalized_tip) + } else { + // State is empty + None + }; + + (finalized_tx_ids, finalized_tip_range) +} + +/// Returns the extra transaction IDs for `addresses` in the non-finalized chain, +/// matching or overlapping the transaction IDs for the `finalized_tip_range`. +/// +/// If the addresses do not exist in the non-finalized `chain`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn chain_transparent_tx_id_changes( + chain: Option, + addresses: &HashSet, + finalized_tip_range: Option>, +) -> Result, BoxError> +where + C: AsRef, +{ + let finalized_tip_range = match finalized_tip_range { + Some(finalized_tip_range) => finalized_tip_range, + None => { + assert!( + chain.is_none(), + "unexpected non-finalized chain when finalized state is empty" + ); + + // Empty chains don't contain any tx IDs. + return Ok(Default::default()); + } + }; + + // # Correctness + // + // The StateService commits blocks to the finalized state before updating the latest chain, + // and it can commit additional blocks after we've cloned this `chain` variable. + // + // But we can compensate for addresses with mismatching blocks, + // by adding the overlapping non-finalized transaction IDs. + // + // If there is only one address, mismatches aren't possible, + // because tx IDs are added to the finalized state in chain order (and never removed), + // and they are queried in chain order. + + // Check if the finalized and non-finalized states match or overlap + let required_min_chain_root = finalized_tip_range.start().0 + 1; + let mut required_chain_overlap = required_min_chain_root..=finalized_tip_range.end().0; + + if chain.is_none() { + if required_chain_overlap.is_empty() || addresses.len() <= 1 { + // The non-finalized chain is empty, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is empty. + return Err("unable to get tx IDs: state was committing a block, and non-finalized chain is empty".into()); + } + } + + let chain = chain.unwrap(); + let chain = chain.as_ref(); + + let chain_root = chain.non_finalized_root_height().0; + let chain_tip = chain.non_finalized_tip_height().0; + + assert!( + chain_root <= required_min_chain_root, + "unexpected chain gap: the best chain is updated after its previous root is finalized" + ); + + // If we've already committed this entire chain, ignore its UTXO changes. + // This is more likely if the non-finalized state is just getting started. + if chain_tip > *required_chain_overlap.end() { + if required_chain_overlap.is_empty() || addresses.len() <= 1 { + // The non-finalized chain has been committed, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is below the inconsistent query range. + return Err("unable to get tx IDs: state was committing a block, and non-finalized chain has been committed".into()); + } + } + + // Correctness: some finalized tx IDs might have come from different blocks for different addresses, + // but we've just checked they can be corrected by applying the non-finalized UTXO changes. + assert!( + required_chain_overlap.all(|height| chain.blocks.contains_key(&Height(height))) || addresses.len() <= 1, + "tx ID query inconsistency: chain must contain required overlap blocks if there are multiple addresses", + ); + + Ok(chain.partial_transparent_tx_ids(addresses)) +} + +/// Returns the combined the supplied finalized and non-finalized transaction IDs. +fn apply_tx_id_changes( + finalized_tx_ids: BTreeMap, + chain_tx_ids: BTreeMap, +) -> BTreeMap { + // Correctness: compensate for inconsistent tx IDs finalized blocks across multiple addresses, + // by combining them with overalapping non-finalized block tx IDs. + finalized_tx_ids + .into_iter() + .chain(chain_tx_ids.into_iter()) + .collect() +} From aea01691f8e4434b211270d687ee41212577d0bd Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 14 Apr 2022 13:39:58 +1000 Subject: [PATCH 03/11] Connect the address transaction ID query to the RPC --- zebra-rpc/src/methods.rs | 20 +++++++++++--------- zebra-rpc/src/methods/tests/vectors.rs | 2 +- zebra-state/src/request.rs | 25 +++++++++++++++++-------- zebra-state/src/response.rs | 13 +++++++------ zebra-state/src/service.rs | 22 ++++++++++++---------- 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/zebra-rpc/src/methods.rs b/zebra-rpc/src/methods.rs index 09cb120c951..1c1076b1583 100644 --- a/zebra-rpc/src/methods.rs +++ b/zebra-rpc/src/methods.rs @@ -647,7 +647,6 @@ where end: u32, ) -> BoxFuture>> { let mut state = self.state.clone(); - let mut response_transactions = vec![]; let start = Height(start); let end = Height(end); @@ -661,7 +660,7 @@ where // height range checks check_height_range(start, end, chain_height?)?; - let valid_addresses: Result> = addresses + let valid_addresses: Result> = addresses .iter() .map(|address| { address.parse().map_err(|_| { @@ -670,8 +669,10 @@ where }) .collect(); - let request = - zebra_state::ReadRequest::TransactionsByAddresses(valid_addresses?, start, end); + let request = zebra_state::ReadRequest::TransactionIdsByAddresses { + addresses: valid_addresses?, + height_range: start..=end, + }; let response = state .ready() .and_then(|service| service.call(request)) @@ -682,13 +683,14 @@ where data: None, })?; - match response { - zebra_state::ReadResponse::TransactionIds(hashes) => response_transactions - .append(&mut hashes.iter().map(|h| h.to_string()).collect()), + let hashes = match response { + zebra_state::ReadResponse::AddressesTransactionIds(hashes) => { + hashes.values().map(|tx_id| tx_id.to_string()).collect() + } _ => unreachable!("unmatched response to a TransactionsByAddresses request"), - } + }; - Ok(response_transactions) + Ok(hashes) } .boxed() } diff --git a/zebra-rpc/src/methods/tests/vectors.rs b/zebra-rpc/src/methods/tests/vectors.rs index d034cc0d54c..04d09e6520c 100644 --- a/zebra-rpc/src/methods/tests/vectors.rs +++ b/zebra-rpc/src/methods/tests/vectors.rs @@ -431,7 +431,7 @@ async fn rpc_getaddresstxids_response() { // TODO: The length of the response should be 1 // Fix in the context of #3147 - assert_eq!(response.len(), 0); + assert_eq!(response.len(), 10); mempool.expect_no_requests().await; diff --git a/zebra-state/src/request.rs b/zebra-state/src/request.rs index 55f10cce2ca..7630e92e3b9 100644 --- a/zebra-state/src/request.rs +++ b/zebra-state/src/request.rs @@ -2,6 +2,7 @@ use std::{ collections::{HashMap, HashSet}, + ops::RangeInclusive, sync::Arc, }; @@ -438,19 +439,27 @@ pub enum ReadRequest { /// * [`Response::Transaction(None)`](Response::Transaction) otherwise. Transaction(transaction::Hash), - /// Looks up transactions hashes that were made by provided addresses in a blockchain height range. + /// Looks up the balance of a set of transparent addresses. + /// + /// Returns an [`Amount`] with the total balance of the set of addresses. + AddressBalance(HashSet), + + /// Looks up transaction hashes that sent or received from addresses, + /// in an inclusive blockchain height range. /// /// Returns /// - /// * A vector of transaction hashes. + /// * A set of transaction hashes. /// * An empty vector if no transactions were found for the given arguments. /// - /// Returned txids are in the order they appear in blocks, which ensures that they are topologically sorted + /// Returned txids are in the order they appear in blocks, + /// which ensures that they are topologically sorted /// (i.e. parent txids will appear before child txids). - TransactionsByAddresses(Vec, block::Height, block::Height), + TransactionIdsByAddresses { + /// The requested addresses. + addresses: HashSet, - /// Looks up the balance of a set of transparent addresses. - /// - /// Returns an [`Amount`] with the total balance of the set of addresses. - AddressBalance(HashSet), + /// The blocks to be queried for transactions. + height_range: RangeInclusive, + }, } diff --git a/zebra-state/src/response.rs b/zebra-state/src/response.rs index d2d25cd1fee..4e9797e571d 100644 --- a/zebra-state/src/response.rs +++ b/zebra-state/src/response.rs @@ -1,11 +1,11 @@ //! State [`tower::Service`] response types. -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; use zebra_chain::{ amount::{Amount, NonNegative}, block::{self, Block}, - transaction::{Hash, Transaction}, + transaction::{self, Transaction}, transparent, }; @@ -13,6 +13,7 @@ use zebra_chain::{ // will work with inline links. #[allow(unused_imports)] use crate::Request; +use crate::TransactionLocation; #[derive(Clone, Debug, PartialEq, Eq)] /// A response to a [`StateService`] [`Request`]. @@ -55,10 +56,10 @@ pub enum ReadResponse { /// Response to [`ReadRequest::Transaction`] with the specified transaction. Transaction(Option<(Arc, block::Height)>), - /// Response to [`ReadRequest::TransactionsByAddresses`] with the obtained transaction ids, - /// in the order they appear in blocks. - TransactionIds(Vec), - /// Response to [`ReadRequest::AddressBalance`] with the total balance of the addresses. AddressBalance(Amount), + + /// Response to [`ReadRequest::TransactionIdsByAddresses`] with the obtained transaction ids, + /// in the order they appear in blocks. + AddressesTransactionIds(BTreeMap), } diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 3195a6e4f03..25b08623575 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -993,24 +993,26 @@ impl Service for ReadStateService { } // For the get_address_tx_ids RPC. - ReadRequest::TransactionsByAddresses(_addresses, _start, _end) => { + ReadRequest::TransactionIdsByAddresses { + addresses, + // TODO: filter by height range + height_range: _, + } => { metrics::counter!( "state.requests", 1, "service" => "read_state", - "type" => "transactions_by_addresses", + "type" => "transaction_ids_by_addresses", ); - let _state = self.clone(); + let state = self.clone(); async move { - // TODO: Respond with found transactions - // At least the following pull requests should be merged: - // - #4022 - // - #4038 - // Do the corresponding update in the context of #3147 - let transaction_ids = vec![]; - Ok(ReadResponse::TransactionIds(transaction_ids)) + let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| { + read::transparent_tx_ids(best_chain, &state.db, addresses) + }); + + tx_ids.map(ReadResponse::AddressesTransactionIds) } .boxed() } From 5e4e0964d88d9407d67fa1561551561df6d6622d Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 15:03:56 +1000 Subject: [PATCH 04/11] Basic filtering of address transaction IDs by height range --- zebra-state/src/service.rs | 5 ++--- zebra-state/src/service/read.rs | 16 +++++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 25b08623575..3cd3fbff508 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -995,8 +995,7 @@ impl Service for ReadStateService { // For the get_address_tx_ids RPC. ReadRequest::TransactionIdsByAddresses { addresses, - // TODO: filter by height range - height_range: _, + height_range, } => { metrics::counter!( "state.requests", @@ -1009,7 +1008,7 @@ impl Service for ReadStateService { async move { let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| { - read::transparent_tx_ids(best_chain, &state.db, addresses) + read::transparent_tx_ids(best_chain, &state.db, addresses, height_range) }); tx_ids.map(ReadResponse::AddressesTransactionIds) diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index 91b7ee55520..6d7a666ab2b 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -439,16 +439,17 @@ where .collect() } -/// Returns the transaction IDs that sent or received funds, -/// from the supplied [`transparent::Address`]es, in chain order. +/// Returns the transaction IDs that sent or received funds from the supplied [`transparent::Address`]es, +/// within `height_range`, in chain order. /// /// If the addresses do not exist in the non-finalized `chain` or finalized `db`, +/// or the `height_range` is totally outside both the `chain` and `db` range, /// returns an empty list. -#[allow(dead_code)] pub(crate) fn transparent_tx_ids( chain: Option, db: &ZebraDb, addresses: HashSet, + height_range: RangeInclusive, ) -> Result, BoxError> where C: AsRef, @@ -467,7 +468,9 @@ where // If the tx IDs are valid, return them, otherwise, retry or return an error. match chain_tx_id_changes { Ok(chain_tx_id_changes) => { - let tx_ids = apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes); + // TODO: do the height_range filter in the queries, to improve performance + let tx_ids = + apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes, height_range); return Ok(tx_ids); } @@ -603,15 +606,18 @@ where Ok(chain.partial_transparent_tx_ids(addresses)) } -/// Returns the combined the supplied finalized and non-finalized transaction IDs. +/// Returns the combined the supplied finalized and non-finalized transaction IDs, +/// filtered by `height_range`. fn apply_tx_id_changes( finalized_tx_ids: BTreeMap, chain_tx_ids: BTreeMap, + height_range: RangeInclusive, ) -> BTreeMap { // Correctness: compensate for inconsistent tx IDs finalized blocks across multiple addresses, // by combining them with overalapping non-finalized block tx IDs. finalized_tx_ids .into_iter() .chain(chain_tx_ids.into_iter()) + .filter(|(tx_loc, _tx_hash)| height_range.contains(&tx_loc.height)) .collect() } From 483169654885f781b4543f04e652df60a7a15d92 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 15:15:36 +1000 Subject: [PATCH 05/11] Add a network and range argument to the getaddresstxids test --- zebra-rpc/src/methods/tests/vectors.rs | 37 +++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/zebra-rpc/src/methods/tests/vectors.rs b/zebra-rpc/src/methods/tests/vectors.rs index 04d09e6520c..8a80e884d39 100644 --- a/zebra-rpc/src/methods/tests/vectors.rs +++ b/zebra-rpc/src/methods/tests/vectors.rs @@ -1,6 +1,6 @@ //! Fixed test vectors for RPC methods. -use std::sync::Arc; +use std::{ops::RangeInclusive, sync::Arc}; use jsonrpc_core::ErrorCode; use tower::buffer::Buffer; @@ -395,43 +395,50 @@ async fn rpc_getaddresstxids_invalid_arguments() { async fn rpc_getaddresstxids_response() { zebra_test::init(); - let blocks: Vec> = zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS - .iter() - .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) - .collect(); + for network in [Mainnet, Testnet] { + rpc_getaddresstxids_response_with(network, 1..=10).await; + } +} + +async fn rpc_getaddresstxids_response_with(network: Network, range: RangeInclusive) { + let blocks: Vec> = match network { + Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS, + Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS, + } + .iter() + .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) + .collect(); // get the first transaction of the first block let first_block_first_transaction = &blocks[1].transactions[0]; - // get the address, this is always `t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd` + // get the address let address = &first_block_first_transaction.outputs()[1] - .address(Mainnet) + .address(network) .unwrap(); let mut mempool: MockService<_, _, _, BoxError> = MockService::build().for_unit_tests(); // Create a populated state service let (_state, read_state, latest_chain_tip, _chain_tip_change) = - zebra_state::populated_state(blocks.clone(), Mainnet).await; + zebra_state::populated_state(blocks.clone(), network).await; let (rpc, rpc_tx_queue_task_handle) = RpcImpl::new( "RPC test", Buffer::new(mempool.clone(), 1), Buffer::new(read_state.clone(), 1), latest_chain_tip, - Mainnet, + network, ); // call the method with valid arguments let addresses = vec![address.to_string()]; - let start: u32 = 1; - let end: u32 = 1; let response = rpc - .get_address_tx_ids(addresses, start, end) + .get_address_tx_ids(addresses, *range.start(), *range.end()) .await .expect("arguments are valid so no error can happen here"); - // TODO: The length of the response should be 1 - // Fix in the context of #3147 - assert_eq!(response.len(), 10); + // The first few blocks after genesis send funds to the same founders reward address, + // in one output per coinbase transaction. + assert_eq!(response.len(), range.count()); mempool.expect_no_requests().await; From a39cb73261becbf65870abfa516bc28c2a4a6972 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 15:35:11 +1000 Subject: [PATCH 06/11] Test all block range combinations for mainnet --- zebra-rpc/src/methods/tests/vectors.rs | 63 ++++++++++++++++++-------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/zebra-rpc/src/methods/tests/vectors.rs b/zebra-rpc/src/methods/tests/vectors.rs index 8a80e884d39..ff36256001d 100644 --- a/zebra-rpc/src/methods/tests/vectors.rs +++ b/zebra-rpc/src/methods/tests/vectors.rs @@ -11,6 +11,7 @@ use zebra_chain::{ parameters::Network::*, serialization::{ZcashDeserializeInto, ZcashSerialize}, transaction::{UnminedTx, UnminedTxId}, + transparent, }; use zebra_network::constants::USER_AGENT; use zebra_node_services::BoxError; @@ -396,30 +397,53 @@ async fn rpc_getaddresstxids_response() { zebra_test::init(); for network in [Mainnet, Testnet] { - rpc_getaddresstxids_response_with(network, 1..=10).await; - } -} + let blocks: Vec> = match network { + Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS, + Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS, + } + .iter() + .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) + .collect(); -async fn rpc_getaddresstxids_response_with(network: Network, range: RangeInclusive) { - let blocks: Vec> = match network { - Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS, - Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS, + // The first few blocks after genesis send funds to the same founders reward address, + // in one output per coinbase transaction. + // + // Get the coinbase transaction of the first block + // (the genesis block coinbase transaction is ignored by the consensus rules). + let first_block_first_transaction = &blocks[1].transactions[0]; + + // Get the address. + let address = first_block_first_transaction.outputs()[1] + .address(network) + .unwrap(); + + if network == Mainnet { + // Exhaustively test possible block ranges for mainnet. + // + // TODO: if it takes too long on slower machines, turn this into a proptest with 10-20 cases + for start in 1..=10 { + for end in start..=10 { + rpc_getaddresstxids_response_with(network, start..=end, &blocks, &address) + .await; + } + } + } else { + // Just test the full range for testnet. + rpc_getaddresstxids_response_with(network, 1..=10, &blocks, &address).await; + } } - .iter() - .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) - .collect(); - - // get the first transaction of the first block - let first_block_first_transaction = &blocks[1].transactions[0]; - // get the address - let address = &first_block_first_transaction.outputs()[1] - .address(network) - .unwrap(); +} +async fn rpc_getaddresstxids_response_with( + network: Network, + range: RangeInclusive, + blocks: &[Arc], + address: &transparent::Address, +) { let mut mempool: MockService<_, _, _, BoxError> = MockService::build().for_unit_tests(); // Create a populated state service let (_state, read_state, latest_chain_tip, _chain_tip_change) = - zebra_state::populated_state(blocks.clone(), network).await; + zebra_state::populated_state(blocks.to_owned(), network).await; let (rpc, rpc_tx_queue_task_handle) = RpcImpl::new( "RPC test", @@ -436,8 +460,7 @@ async fn rpc_getaddresstxids_response_with(network: Network, range: RangeInclusi .await .expect("arguments are valid so no error can happen here"); - // The first few blocks after genesis send funds to the same founders reward address, - // in one output per coinbase transaction. + // One founders reward output per coinbase transactions, no other transactions. assert_eq!(response.len(), range.count()); mempool.expect_no_requests().await; From 8d39960dfbf76f2b287f21141c76c00c63a3f6ef Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 15:35:29 +1000 Subject: [PATCH 07/11] Fix a file descriptor limit error --- zebra-rpc/src/methods/tests/vectors.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/zebra-rpc/src/methods/tests/vectors.rs b/zebra-rpc/src/methods/tests/vectors.rs index ff36256001d..16c40aacae6 100644 --- a/zebra-rpc/src/methods/tests/vectors.rs +++ b/zebra-rpc/src/methods/tests/vectors.rs @@ -465,7 +465,20 @@ async fn rpc_getaddresstxids_response_with( mempool.expect_no_requests().await; - // The queue task should continue without errors or panics + // Shut down the queue task, to close the state's file descriptors. + // (If we don't, opening ~100 simultaneous states causes process file descriptor limit errors.) + // + // TODO: abort all the join handles in all the tests, except one? + rpc_tx_queue_task_handle.abort(); + + // The queue task should not have panicked or exited by itself. + // It can still be running, or it can have exited due to the abort. let rpc_tx_queue_task_result = rpc_tx_queue_task_handle.now_or_never(); - assert!(matches!(rpc_tx_queue_task_result, None)); + assert!( + rpc_tx_queue_task_result.is_none() + || rpc_tx_queue_task_result + .unwrap() + .unwrap_err() + .is_cancelled() + ); } From 319b49532009d59ebf45bdbb40796c36e129c274 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 15:55:18 +1000 Subject: [PATCH 08/11] Optimise seeking the first transaction for an address The first transaction's location is part of the address location. --- .../service/finalized_state/disk_format/transparent.rs | 7 +++++-- .../src/service/finalized_state/zebra_db/transparent.rs | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/transparent.rs b/zebra-state/src/service/finalized_state/disk_format/transparent.rs index 5641a2d6847..37bc6c32feb 100644 --- a/zebra-state/src/service/finalized_state/disk_format/transparent.rs +++ b/zebra-state/src/service/finalized_state/disk_format/transparent.rs @@ -403,11 +403,14 @@ impl AddressTransaction { /// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction { // Iterating from the lowest possible transaction location gets us the first transaction. - let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0); + // + // The address location is the output location of the first UTXO sent to the address, + // and addresses can not spend funds until they receive their first UTXO. + let first_utxo_location = address_location.transaction_location(); AddressTransaction { address_location, - transaction_location: zero_transaction_location, + transaction_location: first_utxo_location, } } diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index a7de8eb1d0f..80c22c2cf95 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -146,7 +146,7 @@ impl ZebraDb { let mut unspent_output = AddressUnspentOutput::address_iterator_start(address_location); loop { - // A valid key representing an entry for this address or the next + // Seek to a valid entry for this address, or the first entry for the next address unspent_output = match self .db .zs_next_key_value_from(&utxo_loc_by_transparent_addr_loc, &unspent_output) @@ -216,16 +216,16 @@ impl ZebraDb { // Manually fetch the entire addresses' transaction locations let mut addr_transactions = BTreeSet::new(); - // An invalid key representing the minimum possible transaction + // A valid key representing the first UTXO send to the address let mut transaction_location = AddressTransaction::address_iterator_start(address_location); loop { - // A valid key representing an entry for this address or the next + // Seek to a valid entry for this address, or the first entry for the next address transaction_location = match self .db .zs_next_key_value_from(&tx_loc_by_transparent_addr_loc, &transaction_location) { - Some((unspent_output, ())) => unspent_output, + Some((transaction_location, ())) => transaction_location, // We're finished with the final address in the column family None => break, }; From edf3dfffb71d6bf180bc4f902e3f507697e4cb5b Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 16:11:03 +1000 Subject: [PATCH 09/11] Filter finalized address transaction IDs by height range --- .../disk_format/transparent.rs | 20 ++++--- .../finalized_state/zebra_db/transparent.rs | 52 +++++++++++++++---- zebra-state/src/service/read.rs | 32 ++++++------ 3 files changed, 72 insertions(+), 32 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/transparent.rs b/zebra-state/src/service/finalized_state/disk_format/transparent.rs index 37bc6c32feb..d75a280380e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/transparent.rs +++ b/zebra-state/src/service/finalized_state/disk_format/transparent.rs @@ -5,7 +5,7 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::fmt::Debug; +use std::{cmp::max, fmt::Debug}; use zebra_chain::{ amount::{self, Amount, NonNegative}, @@ -396,21 +396,29 @@ impl AddressTransaction { } /// Create an [`AddressTransaction`] which starts iteration for the supplied address. + /// Starts at the first UTXO, or at the `query_start` height, whichever is greater. + /// /// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`]. /// - /// The transaction location is before all unspent output locations in the index. - /// It is always invalid, due to the genesis consensus rules. But this is not an issue - /// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. - pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction { + /// The transaction location might be invalid, if it is based on the `query_start` height. + /// But this is not an issue, since [`ReadDisk::zs_next_key_value_from`] + /// will fetch the next existing (valid) value. + pub fn address_iterator_start( + address_location: AddressLocation, + query_start: Height, + ) -> AddressTransaction { // Iterating from the lowest possible transaction location gets us the first transaction. // // The address location is the output location of the first UTXO sent to the address, // and addresses can not spend funds until they receive their first UTXO. let first_utxo_location = address_location.transaction_location(); + // Iterating from the start height filters out transactions that aren't needed. + let query_start_location = TransactionLocation::from_usize(query_start, 0); + AddressTransaction { address_location, - transaction_location: first_utxo_location, + transaction_location: max(first_utxo_location, query_start_location), } } diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index 80c22c2cf95..6df20b962b0 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -11,10 +11,14 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + ops::RangeInclusive, +}; use zebra_chain::{ amount::{self, Amount, NonNegative}, + block::Height, transaction, transparent, }; @@ -177,19 +181,32 @@ impl ZebraDb { self.db.zs_get(&hash_by_tx_loc, &tx_location) } - /// Returns the [`transaction::Hash`]es that created or spent outputs for a [`transparent::Address`], - /// in chain order, if they are in the finalized state. - #[allow(dead_code)] + /// Returns the transaction IDs that sent or received funds to `address`, + /// in the finalized chain `query_height_range`. + /// + /// If address has no finalized sends or receives, + /// or the `query_height_range` is totally outside the finalized block range, + /// returns an empty list. pub fn address_tx_ids( &self, address: &transparent::Address, + query_height_range: RangeInclusive, ) -> BTreeMap { let address_location = match self.address_location(address) { Some(address_location) => address_location, None => return BTreeMap::new(), }; - let transaction_locations = self.address_transaction_locations(address_location); + // Skip this address if it was first used after the end height. + // + // The address location is the output location of the first UTXO sent to the address, + // and addresses can not spend funds until they receive their first UTXO. + if address_location.height() > *query_height_range.end() { + return BTreeMap::new(); + } + + let transaction_locations = + self.address_transaction_locations(address_location, query_height_range); transaction_locations .iter() @@ -205,10 +222,10 @@ impl ZebraDb { /// Returns the locations of any transactions that sent or received from a [`transparent::Address`], /// if they are in the finalized state. - #[allow(dead_code)] pub fn address_transaction_locations( &self, address_location: AddressLocation, + query_height_range: RangeInclusive, ) -> BTreeSet { let tx_loc_by_transparent_addr_loc = self.db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap(); @@ -216,8 +233,12 @@ impl ZebraDb { // Manually fetch the entire addresses' transaction locations let mut addr_transactions = BTreeSet::new(); - // A valid key representing the first UTXO send to the address - let mut transaction_location = AddressTransaction::address_iterator_start(address_location); + // A potentially invalid key representing the first UTXO send to the address, + // or the query start height. + let mut transaction_location = AddressTransaction::address_iterator_start( + address_location, + *query_height_range.start(), + ); loop { // Seek to a valid entry for this address, or the first entry for the next address @@ -235,6 +256,11 @@ impl ZebraDb { break; } + // We're past the end height, so we're finished with this query + if transaction_location.transaction_location().height > *query_height_range.end() { + break; + } + addr_transactions.insert(transaction_location); // A potentially invalid key representing the next possible output @@ -293,9 +319,12 @@ impl ZebraDb { .collect() } - /// Returns the transaction IDs that sent or received funds to `addresses` in the finalized chain. + /// Returns the transaction IDs that sent or received funds to `addresses`, + /// in the finalized chain `query_height_range`. /// - /// If none of the addresses has finalized sends or receives, returns an empty list. + /// If none of the addresses has finalized sends or receives, + /// or the `query_height_range` is totally outside the finalized block range, + /// returns an empty list. /// /// # Correctness /// @@ -314,10 +343,11 @@ impl ZebraDb { pub fn partial_finalized_transparent_tx_ids( &self, addresses: &HashSet, + query_height_range: RangeInclusive, ) -> BTreeMap { addresses .iter() - .flat_map(|address| self.address_tx_ids(address)) + .flat_map(|address| self.address_tx_ids(address, query_height_range.clone())) .collect() } } diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index 6d7a666ab2b..28dcd7d5b12 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -440,16 +440,16 @@ where } /// Returns the transaction IDs that sent or received funds from the supplied [`transparent::Address`]es, -/// within `height_range`, in chain order. +/// within `query_height_range`, in chain order. /// /// If the addresses do not exist in the non-finalized `chain` or finalized `db`, -/// or the `height_range` is totally outside both the `chain` and `db` range, +/// or the `query_height_range` is totally outside both the `chain` and `db` range, /// returns an empty list. pub(crate) fn transparent_tx_ids( chain: Option, db: &ZebraDb, addresses: HashSet, - height_range: RangeInclusive, + query_height_range: RangeInclusive, ) -> Result, BoxError> where C: AsRef, @@ -459,18 +459,21 @@ where // Retry the finalized tx ID query if it was interruped by a finalizing block, // and the non-finalized chain doesn't overlap the changed heights. for _ in 0..=FINALIZED_ADDRESS_INDEX_RETRIES { - let (finalized_tx_ids, finalized_tip_range) = finalized_transparent_tx_ids(db, &addresses); + let (finalized_tx_ids, finalized_tip_range) = + finalized_transparent_tx_ids(db, &addresses, query_height_range.clone()); // Apply the non-finalized tx ID changes. - let chain_tx_id_changes = - chain_transparent_tx_id_changes(chain.as_ref(), &addresses, finalized_tip_range); + let chain_tx_id_changes = chain_transparent_tx_id_changes( + chain.as_ref(), + &addresses, + finalized_tip_range, + query_height_range.clone(), + ); // If the tx IDs are valid, return them, otherwise, retry or return an error. match chain_tx_id_changes { Ok(chain_tx_id_changes) => { - // TODO: do the height_range filter in the queries, to improve performance - let tx_ids = - apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes, height_range); + let tx_ids = apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes); return Ok(tx_ids); } @@ -482,7 +485,7 @@ where tx_id_error.expect("unexpected missing error: attempts should set error or return") } -/// Returns the [`transaction::Hash`]es for `addresses` in the finalized chain, +/// Returns the [`transaction::Hash`]es for `addresses` in the finalized chain `query_height_range`, /// and the finalized tip heights the transaction IDs were queried at. /// /// If the addresses do not exist in the finalized `db`, returns an empty list. @@ -491,6 +494,7 @@ where fn finalized_transparent_tx_ids( db: &ZebraDb, addresses: &HashSet, + query_height_range: RangeInclusive, ) -> ( BTreeMap, Option>, @@ -502,7 +506,7 @@ fn finalized_transparent_tx_ids( // Check if the finalized state changed while we were querying it let start_finalized_tip = db.finalized_tip_height(); - let finalized_tx_ids = db.partial_finalized_transparent_tx_ids(addresses); + let finalized_tx_ids = db.partial_finalized_transparent_tx_ids(addresses, query_height_range); let end_finalized_tip = db.finalized_tip_height(); @@ -528,6 +532,7 @@ fn chain_transparent_tx_id_changes( chain: Option, addresses: &HashSet, finalized_tip_range: Option>, + _query_height_range: RangeInclusive, ) -> Result, BoxError> where C: AsRef, @@ -606,18 +611,15 @@ where Ok(chain.partial_transparent_tx_ids(addresses)) } -/// Returns the combined the supplied finalized and non-finalized transaction IDs, -/// filtered by `height_range`. +/// Returns the combined finalized and non-finalized transaction IDs. fn apply_tx_id_changes( finalized_tx_ids: BTreeMap, chain_tx_ids: BTreeMap, - height_range: RangeInclusive, ) -> BTreeMap { // Correctness: compensate for inconsistent tx IDs finalized blocks across multiple addresses, // by combining them with overalapping non-finalized block tx IDs. finalized_tx_ids .into_iter() .chain(chain_tx_ids.into_iter()) - .filter(|(tx_loc, _tx_hash)| height_range.contains(&tx_loc.height)) .collect() } From 5fa6f39cc0f2eff3569adecbeb97a8a35a92ce80 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 16:21:34 +1000 Subject: [PATCH 10/11] Filter non-finalized address transaction IDs by the height range --- .../src/service/non_finalized_state/chain.rs | 10 +++--- .../non_finalized_state/chain/index.rs | 32 ++++++++++++------- zebra-state/src/service/read.rs | 20 +++++++++--- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/zebra-state/src/service/non_finalized_state/chain.rs b/zebra-state/src/service/non_finalized_state/chain.rs index be7992d67c9..d0959728167 100644 --- a/zebra-state/src/service/non_finalized_state/chain.rs +++ b/zebra-state/src/service/non_finalized_state/chain.rs @@ -4,7 +4,7 @@ use std::{ cmp::Ordering, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - ops::Deref, + ops::{Deref, RangeInclusive}, sync::Arc, }; @@ -13,7 +13,7 @@ use tracing::instrument; use zebra_chain::{ amount::{Amount, NegativeAllowed, NonNegative}, - block, + block::{self, Height}, history_tree::HistoryTree, orchard, parameters::Network, @@ -573,7 +573,8 @@ impl Chain { (created_utxos, spent_utxos) } - /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds. + /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds, + /// in the non-finalized chain, filtered using the `query_height_range`. /// /// If none of the addresses receive or spend funds in this partial chain, returns an empty list. /// @@ -594,9 +595,10 @@ impl Chain { pub fn partial_transparent_tx_ids( &self, addresses: &HashSet, + query_height_range: RangeInclusive, ) -> BTreeMap { self.partial_transparent_indexes(addresses) - .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash)) + .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash, query_height_range.clone())) .collect() } diff --git a/zebra-state/src/service/non_finalized_state/chain/index.rs b/zebra-state/src/service/non_finalized_state/chain/index.rs index f1f73523fb6..24bfe5f6f9f 100644 --- a/zebra-state/src/service/non_finalized_state/chain/index.rs +++ b/zebra-state/src/service/non_finalized_state/chain/index.rs @@ -1,11 +1,15 @@ //! Transparent address indexes for non-finalized chains. -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap}, + ops::RangeInclusive, +}; use mset::MultiSet; use zebra_chain::{ amount::{Amount, NegativeAllowed}, + block::Height, transaction, transparent, }; @@ -205,29 +209,33 @@ impl TransparentTransfers { self.balance } - /// Returns the [`transaction::Hash`]es of the transactions that - /// sent or received transparent transfers to this address, - /// in this partial chain, in chain order. + /// Returns the [`transaction::Hash`]es of the transactions that sent or received + /// transparent transfers to this address, in this partial chain, filtered by `query_height_range`. + /// + /// The transactions are returned in chain order. /// /// `chain_tx_by_hash` should be the `tx_by_hash` field from the [`Chain`] containing this index. /// /// # Panics /// /// If `chain_tx_by_hash` is missing some transaction hashes from this index. - #[allow(dead_code)] pub fn tx_ids( &self, chain_tx_by_hash: &HashMap, + query_height_range: RangeInclusive, ) -> BTreeMap { self.tx_ids .distinct_elements() - .map(|tx_hash| { - ( - *chain_tx_by_hash - .get(tx_hash) - .expect("all hashes are indexed"), - *tx_hash, - ) + .filter_map(|tx_hash| { + let tx_loc = *chain_tx_by_hash + .get(tx_hash) + .expect("all hashes are indexed"); + + if query_height_range.contains(&tx_loc.height) { + Some((tx_loc, *tx_hash)) + } else { + None + } }) .collect() } diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index 28dcd7d5b12..3039baac012 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -37,6 +37,12 @@ pub use utxo::AddressUtxos; /// If any more arrive, the client should wait until we're synchronised with our peers. const FINALIZED_ADDRESS_INDEX_RETRIES: usize = 3; +/// The full range of address heights. +/// +/// The genesis coinbase transactions are ignored by a consensus rule, +/// so they are not included in any address indexes. +pub const ADDRESS_HEIGHTS_FULL_RANGE: RangeInclusive = Height(1)..=Height::MAX; + /// Returns the [`Block`] with [`block::Hash`](zebra_chain::block::Hash) or /// [`Height`](zebra_chain::block::Height), /// if it exists in the non-finalized `chain` or finalized `db`. @@ -421,7 +427,11 @@ where let chain_tx_ids = chain .as_ref() - .map(|chain| chain.as_ref().partial_transparent_tx_ids(addresses)) + .map(|chain| { + chain + .as_ref() + .partial_transparent_tx_ids(addresses, ADDRESS_HEIGHTS_FULL_RANGE) + }) .unwrap_or_default(); // First try the in-memory chain, then the disk database @@ -522,8 +532,8 @@ fn finalized_transparent_tx_ids( (finalized_tx_ids, finalized_tip_range) } -/// Returns the extra transaction IDs for `addresses` in the non-finalized chain, -/// matching or overlapping the transaction IDs for the `finalized_tip_range`. +/// Returns the extra transaction IDs for `addresses` in the non-finalized chain `query_height_range`, +/// matching or overlapping the transaction IDs for the `finalized_tip_range`, /// /// If the addresses do not exist in the non-finalized `chain`, returns an empty list. // @@ -532,7 +542,7 @@ fn chain_transparent_tx_id_changes( chain: Option, addresses: &HashSet, finalized_tip_range: Option>, - _query_height_range: RangeInclusive, + query_height_range: RangeInclusive, ) -> Result, BoxError> where C: AsRef, @@ -608,7 +618,7 @@ where "tx ID query inconsistency: chain must contain required overlap blocks if there are multiple addresses", ); - Ok(chain.partial_transparent_tx_ids(addresses)) + Ok(chain.partial_transparent_tx_ids(addresses, query_height_range)) } /// Returns the combined finalized and non-finalized transaction IDs. From 9fc14901e28378090da8a245b767b44e59a091fa Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 20 Apr 2022 16:22:59 +1000 Subject: [PATCH 11/11] Fix up snapshot tests for the new height range API --- .../zebra_db/block/tests/snapshot.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs index c2575504db7..0691aa09c02 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs @@ -45,11 +45,14 @@ use zebra_chain::{ }; use crate::{ - service::finalized_state::{ - disk_format::{ - block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation, + service::{ + finalized_state::{ + disk_format::{ + block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation, + }, + FinalizedState, }, - FinalizedState, + read::ADDRESS_HEIGHTS_FULL_RANGE, }, Config, }; @@ -495,7 +498,9 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) { } let mut stored_transaction_locations = Vec::new(); - for transaction_location in state.address_transaction_locations(stored_address_location) { + for transaction_location in + state.address_transaction_locations(stored_address_location, ADDRESS_HEIGHTS_FULL_RANGE) + { assert_eq!( transaction_location.address_location(), stored_address_location