diff --git a/zebra-rpc/src/methods.rs b/zebra-rpc/src/methods.rs index 09cb120c951..1c1076b1583 100644 --- a/zebra-rpc/src/methods.rs +++ b/zebra-rpc/src/methods.rs @@ -647,7 +647,6 @@ where end: u32, ) -> BoxFuture>> { let mut state = self.state.clone(); - let mut response_transactions = vec![]; let start = Height(start); let end = Height(end); @@ -661,7 +660,7 @@ where // height range checks check_height_range(start, end, chain_height?)?; - let valid_addresses: Result> = addresses + let valid_addresses: Result> = addresses .iter() .map(|address| { address.parse().map_err(|_| { @@ -670,8 +669,10 @@ where }) .collect(); - let request = - zebra_state::ReadRequest::TransactionsByAddresses(valid_addresses?, start, end); + let request = zebra_state::ReadRequest::TransactionIdsByAddresses { + addresses: valid_addresses?, + height_range: start..=end, + }; let response = state .ready() .and_then(|service| service.call(request)) @@ -682,13 +683,14 @@ where data: None, })?; - match response { - zebra_state::ReadResponse::TransactionIds(hashes) => response_transactions - .append(&mut hashes.iter().map(|h| h.to_string()).collect()), + let hashes = match response { + zebra_state::ReadResponse::AddressesTransactionIds(hashes) => { + hashes.values().map(|tx_id| tx_id.to_string()).collect() + } _ => unreachable!("unmatched response to a TransactionsByAddresses request"), - } + }; - Ok(response_transactions) + Ok(hashes) } .boxed() } diff --git a/zebra-rpc/src/methods/tests/vectors.rs b/zebra-rpc/src/methods/tests/vectors.rs index d034cc0d54c..16c40aacae6 100644 --- a/zebra-rpc/src/methods/tests/vectors.rs +++ b/zebra-rpc/src/methods/tests/vectors.rs @@ -1,6 +1,6 @@ //! Fixed test vectors for RPC methods. -use std::sync::Arc; +use std::{ops::RangeInclusive, sync::Arc}; use jsonrpc_core::ErrorCode; use tower::buffer::Buffer; @@ -11,6 +11,7 @@ use zebra_chain::{ parameters::Network::*, serialization::{ZcashDeserializeInto, ZcashSerialize}, transaction::{UnminedTx, UnminedTxId}, + transparent, }; use zebra_network::constants::USER_AGENT; use zebra_node_services::BoxError; @@ -395,47 +396,89 @@ async fn rpc_getaddresstxids_invalid_arguments() { async fn rpc_getaddresstxids_response() { zebra_test::init(); - let blocks: Vec> = zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS + for network in [Mainnet, Testnet] { + let blocks: Vec> = match network { + Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS, + Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS, + } .iter() .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) .collect(); - // get the first transaction of the first block - let first_block_first_transaction = &blocks[1].transactions[0]; - // get the address, this is always `t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd` - let address = &first_block_first_transaction.outputs()[1] - .address(Mainnet) - .unwrap(); + // The first few blocks after genesis send funds to the same founders reward address, + // in one output per coinbase transaction. + // + // Get the coinbase transaction of the first block + // (the genesis block coinbase transaction is ignored by the consensus rules). + let first_block_first_transaction = &blocks[1].transactions[0]; + + // Get the address. + let address = first_block_first_transaction.outputs()[1] + .address(network) + .unwrap(); + + if network == Mainnet { + // Exhaustively test possible block ranges for mainnet. + // + // TODO: if it takes too long on slower machines, turn this into a proptest with 10-20 cases + for start in 1..=10 { + for end in start..=10 { + rpc_getaddresstxids_response_with(network, start..=end, &blocks, &address) + .await; + } + } + } else { + // Just test the full range for testnet. + rpc_getaddresstxids_response_with(network, 1..=10, &blocks, &address).await; + } + } +} +async fn rpc_getaddresstxids_response_with( + network: Network, + range: RangeInclusive, + blocks: &[Arc], + address: &transparent::Address, +) { let mut mempool: MockService<_, _, _, BoxError> = MockService::build().for_unit_tests(); // Create a populated state service let (_state, read_state, latest_chain_tip, _chain_tip_change) = - zebra_state::populated_state(blocks.clone(), Mainnet).await; + zebra_state::populated_state(blocks.to_owned(), network).await; let (rpc, rpc_tx_queue_task_handle) = RpcImpl::new( "RPC test", Buffer::new(mempool.clone(), 1), Buffer::new(read_state.clone(), 1), latest_chain_tip, - Mainnet, + network, ); // call the method with valid arguments let addresses = vec![address.to_string()]; - let start: u32 = 1; - let end: u32 = 1; let response = rpc - .get_address_tx_ids(addresses, start, end) + .get_address_tx_ids(addresses, *range.start(), *range.end()) .await .expect("arguments are valid so no error can happen here"); - // TODO: The length of the response should be 1 - // Fix in the context of #3147 - assert_eq!(response.len(), 0); + // One founders reward output per coinbase transactions, no other transactions. + assert_eq!(response.len(), range.count()); mempool.expect_no_requests().await; - // The queue task should continue without errors or panics + // Shut down the queue task, to close the state's file descriptors. + // (If we don't, opening ~100 simultaneous states causes process file descriptor limit errors.) + // + // TODO: abort all the join handles in all the tests, except one? + rpc_tx_queue_task_handle.abort(); + + // The queue task should not have panicked or exited by itself. + // It can still be running, or it can have exited due to the abort. let rpc_tx_queue_task_result = rpc_tx_queue_task_handle.now_or_never(); - assert!(matches!(rpc_tx_queue_task_result, None)); + assert!( + rpc_tx_queue_task_result.is_none() + || rpc_tx_queue_task_result + .unwrap() + .unwrap_err() + .is_cancelled() + ); } diff --git a/zebra-state/src/request.rs b/zebra-state/src/request.rs index 55f10cce2ca..7630e92e3b9 100644 --- a/zebra-state/src/request.rs +++ b/zebra-state/src/request.rs @@ -2,6 +2,7 @@ use std::{ collections::{HashMap, HashSet}, + ops::RangeInclusive, sync::Arc, }; @@ -438,19 +439,27 @@ pub enum ReadRequest { /// * [`Response::Transaction(None)`](Response::Transaction) otherwise. Transaction(transaction::Hash), - /// Looks up transactions hashes that were made by provided addresses in a blockchain height range. + /// Looks up the balance of a set of transparent addresses. + /// + /// Returns an [`Amount`] with the total balance of the set of addresses. + AddressBalance(HashSet), + + /// Looks up transaction hashes that sent or received from addresses, + /// in an inclusive blockchain height range. /// /// Returns /// - /// * A vector of transaction hashes. + /// * A set of transaction hashes. /// * An empty vector if no transactions were found for the given arguments. /// - /// Returned txids are in the order they appear in blocks, which ensures that they are topologically sorted + /// Returned txids are in the order they appear in blocks, + /// which ensures that they are topologically sorted /// (i.e. parent txids will appear before child txids). - TransactionsByAddresses(Vec, block::Height, block::Height), + TransactionIdsByAddresses { + /// The requested addresses. + addresses: HashSet, - /// Looks up the balance of a set of transparent addresses. - /// - /// Returns an [`Amount`] with the total balance of the set of addresses. - AddressBalance(HashSet), + /// The blocks to be queried for transactions. + height_range: RangeInclusive, + }, } diff --git a/zebra-state/src/response.rs b/zebra-state/src/response.rs index d2d25cd1fee..4e9797e571d 100644 --- a/zebra-state/src/response.rs +++ b/zebra-state/src/response.rs @@ -1,11 +1,11 @@ //! State [`tower::Service`] response types. -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; use zebra_chain::{ amount::{Amount, NonNegative}, block::{self, Block}, - transaction::{Hash, Transaction}, + transaction::{self, Transaction}, transparent, }; @@ -13,6 +13,7 @@ use zebra_chain::{ // will work with inline links. #[allow(unused_imports)] use crate::Request; +use crate::TransactionLocation; #[derive(Clone, Debug, PartialEq, Eq)] /// A response to a [`StateService`] [`Request`]. @@ -55,10 +56,10 @@ pub enum ReadResponse { /// Response to [`ReadRequest::Transaction`] with the specified transaction. Transaction(Option<(Arc, block::Height)>), - /// Response to [`ReadRequest::TransactionsByAddresses`] with the obtained transaction ids, - /// in the order they appear in blocks. - TransactionIds(Vec), - /// Response to [`ReadRequest::AddressBalance`] with the total balance of the addresses. AddressBalance(Amount), + + /// Response to [`ReadRequest::TransactionIdsByAddresses`] with the obtained transaction ids, + /// in the order they appear in blocks. + AddressesTransactionIds(BTreeMap), } diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 3195a6e4f03..3cd3fbff508 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -993,24 +993,25 @@ impl Service for ReadStateService { } // For the get_address_tx_ids RPC. - ReadRequest::TransactionsByAddresses(_addresses, _start, _end) => { + ReadRequest::TransactionIdsByAddresses { + addresses, + height_range, + } => { metrics::counter!( "state.requests", 1, "service" => "read_state", - "type" => "transactions_by_addresses", + "type" => "transaction_ids_by_addresses", ); - let _state = self.clone(); + let state = self.clone(); async move { - // TODO: Respond with found transactions - // At least the following pull requests should be merged: - // - #4022 - // - #4038 - // Do the corresponding update in the context of #3147 - let transaction_ids = vec![]; - Ok(ReadResponse::TransactionIds(transaction_ids)) + let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| { + read::transparent_tx_ids(best_chain, &state.db, addresses, height_range) + }); + + tx_ids.map(ReadResponse::AddressesTransactionIds) } .boxed() } diff --git a/zebra-state/src/service/finalized_state/disk_format/transparent.rs b/zebra-state/src/service/finalized_state/disk_format/transparent.rs index 5641a2d6847..d75a280380e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/transparent.rs +++ b/zebra-state/src/service/finalized_state/disk_format/transparent.rs @@ -5,7 +5,7 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::fmt::Debug; +use std::{cmp::max, fmt::Debug}; use zebra_chain::{ amount::{self, Amount, NonNegative}, @@ -396,18 +396,29 @@ impl AddressTransaction { } /// Create an [`AddressTransaction`] which starts iteration for the supplied address. + /// Starts at the first UTXO, or at the `query_start` height, whichever is greater. + /// /// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`]. /// - /// The transaction location is before all unspent output locations in the index. - /// It is always invalid, due to the genesis consensus rules. But this is not an issue - /// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. - pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction { + /// The transaction location might be invalid, if it is based on the `query_start` height. + /// But this is not an issue, since [`ReadDisk::zs_next_key_value_from`] + /// will fetch the next existing (valid) value. + pub fn address_iterator_start( + address_location: AddressLocation, + query_start: Height, + ) -> AddressTransaction { // Iterating from the lowest possible transaction location gets us the first transaction. - let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0); + // + // The address location is the output location of the first UTXO sent to the address, + // and addresses can not spend funds until they receive their first UTXO. + let first_utxo_location = address_location.transaction_location(); + + // Iterating from the start height filters out transactions that aren't needed. + let query_start_location = TransactionLocation::from_usize(query_start, 0); AddressTransaction { address_location, - transaction_location: zero_transaction_location, + transaction_location: max(first_utxo_location, query_start_location), } } diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs index c2575504db7..0691aa09c02 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs @@ -45,11 +45,14 @@ use zebra_chain::{ }; use crate::{ - service::finalized_state::{ - disk_format::{ - block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation, + service::{ + finalized_state::{ + disk_format::{ + block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation, + }, + FinalizedState, }, - FinalizedState, + read::ADDRESS_HEIGHTS_FULL_RANGE, }, Config, }; @@ -495,7 +498,9 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) { } let mut stored_transaction_locations = Vec::new(); - for transaction_location in state.address_transaction_locations(stored_address_location) { + for transaction_location in + state.address_transaction_locations(stored_address_location, ADDRESS_HEIGHTS_FULL_RANGE) + { assert_eq!( transaction_location.address_location(), stored_address_location diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index c11b8f94226..6df20b962b0 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -11,10 +11,14 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + ops::RangeInclusive, +}; use zebra_chain::{ amount::{self, Amount, NonNegative}, + block::Height, transaction, transparent, }; @@ -146,7 +150,7 @@ impl ZebraDb { let mut unspent_output = AddressUnspentOutput::address_iterator_start(address_location); loop { - // A valid key representing an entry for this address or the next + // Seek to a valid entry for this address, or the first entry for the next address unspent_output = match self .db .zs_next_key_value_from(&utxo_loc_by_transparent_addr_loc, &unspent_output) @@ -177,19 +181,32 @@ impl ZebraDb { self.db.zs_get(&hash_by_tx_loc, &tx_location) } - /// Returns the [`transaction::Hash`]es that created or spent outputs for a [`transparent::Address`], - /// in chain order, if they are in the finalized state. - #[allow(dead_code)] + /// Returns the transaction IDs that sent or received funds to `address`, + /// in the finalized chain `query_height_range`. + /// + /// If address has no finalized sends or receives, + /// or the `query_height_range` is totally outside the finalized block range, + /// returns an empty list. pub fn address_tx_ids( &self, address: &transparent::Address, + query_height_range: RangeInclusive, ) -> BTreeMap { let address_location = match self.address_location(address) { Some(address_location) => address_location, None => return BTreeMap::new(), }; - let transaction_locations = self.address_transaction_locations(address_location); + // Skip this address if it was first used after the end height. + // + // The address location is the output location of the first UTXO sent to the address, + // and addresses can not spend funds until they receive their first UTXO. + if address_location.height() > *query_height_range.end() { + return BTreeMap::new(); + } + + let transaction_locations = + self.address_transaction_locations(address_location, query_height_range); transaction_locations .iter() @@ -205,10 +222,10 @@ impl ZebraDb { /// Returns the locations of any transactions that sent or received from a [`transparent::Address`], /// if they are in the finalized state. - #[allow(dead_code)] pub fn address_transaction_locations( &self, address_location: AddressLocation, + query_height_range: RangeInclusive, ) -> BTreeSet { let tx_loc_by_transparent_addr_loc = self.db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap(); @@ -216,16 +233,20 @@ impl ZebraDb { // Manually fetch the entire addresses' transaction locations let mut addr_transactions = BTreeSet::new(); - // An invalid key representing the minimum possible transaction - let mut transaction_location = AddressTransaction::address_iterator_start(address_location); + // A potentially invalid key representing the first UTXO send to the address, + // or the query start height. + let mut transaction_location = AddressTransaction::address_iterator_start( + address_location, + *query_height_range.start(), + ); loop { - // A valid key representing an entry for this address or the next + // Seek to a valid entry for this address, or the first entry for the next address transaction_location = match self .db .zs_next_key_value_from(&tx_loc_by_transparent_addr_loc, &transaction_location) { - Some((unspent_output, ())) => unspent_output, + Some((transaction_location, ())) => transaction_location, // We're finished with the final address in the column family None => break, }; @@ -235,6 +256,11 @@ impl ZebraDb { break; } + // We're past the end height, so we're finished with this query + if transaction_location.transaction_location().height > *query_height_range.end() { + break; + } + addr_transactions.insert(transaction_location); // A potentially invalid key representing the next possible output @@ -292,6 +318,38 @@ impl ZebraDb { .flat_map(|address| self.address_utxos(address)) .collect() } + + /// Returns the transaction IDs that sent or received funds to `addresses`, + /// in the finalized chain `query_height_range`. + /// + /// If none of the addresses has finalized sends or receives, + /// or the `query_height_range` is totally outside the finalized block range, + /// returns an empty list. + /// + /// # Correctness + /// + /// Callers should combine the non-finalized transactions for `addresses` + /// with the returned transactions. + /// + /// The transaction IDs will only be correct if the non-finalized chain matches or overlaps with + /// the finalized state. + /// + /// Specifically, a block in the partial chain must be a child block of the finalized tip. + /// (But the child block does not have to be the partial chain root.) + /// + /// This condition does not apply if there is only one address. + /// Since address transactions are only appended by blocks, and this query reads them in order, + /// it is impossible to get inconsistent transactions for a single address. + pub fn partial_finalized_transparent_tx_ids( + &self, + addresses: &HashSet, + query_height_range: RangeInclusive, + ) -> BTreeMap { + addresses + .iter() + .flat_map(|address| self.address_tx_ids(address, query_height_range.clone())) + .collect() + } } impl DiskWriteBatch { diff --git a/zebra-state/src/service/non_finalized_state/chain.rs b/zebra-state/src/service/non_finalized_state/chain.rs index be7992d67c9..d0959728167 100644 --- a/zebra-state/src/service/non_finalized_state/chain.rs +++ b/zebra-state/src/service/non_finalized_state/chain.rs @@ -4,7 +4,7 @@ use std::{ cmp::Ordering, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - ops::Deref, + ops::{Deref, RangeInclusive}, sync::Arc, }; @@ -13,7 +13,7 @@ use tracing::instrument; use zebra_chain::{ amount::{Amount, NegativeAllowed, NonNegative}, - block, + block::{self, Height}, history_tree::HistoryTree, orchard, parameters::Network, @@ -573,7 +573,8 @@ impl Chain { (created_utxos, spent_utxos) } - /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds. + /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds, + /// in the non-finalized chain, filtered using the `query_height_range`. /// /// If none of the addresses receive or spend funds in this partial chain, returns an empty list. /// @@ -594,9 +595,10 @@ impl Chain { pub fn partial_transparent_tx_ids( &self, addresses: &HashSet, + query_height_range: RangeInclusive, ) -> BTreeMap { self.partial_transparent_indexes(addresses) - .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash)) + .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash, query_height_range.clone())) .collect() } diff --git a/zebra-state/src/service/non_finalized_state/chain/index.rs b/zebra-state/src/service/non_finalized_state/chain/index.rs index f1f73523fb6..24bfe5f6f9f 100644 --- a/zebra-state/src/service/non_finalized_state/chain/index.rs +++ b/zebra-state/src/service/non_finalized_state/chain/index.rs @@ -1,11 +1,15 @@ //! Transparent address indexes for non-finalized chains. -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap}, + ops::RangeInclusive, +}; use mset::MultiSet; use zebra_chain::{ amount::{Amount, NegativeAllowed}, + block::Height, transaction, transparent, }; @@ -205,29 +209,33 @@ impl TransparentTransfers { self.balance } - /// Returns the [`transaction::Hash`]es of the transactions that - /// sent or received transparent transfers to this address, - /// in this partial chain, in chain order. + /// Returns the [`transaction::Hash`]es of the transactions that sent or received + /// transparent transfers to this address, in this partial chain, filtered by `query_height_range`. + /// + /// The transactions are returned in chain order. /// /// `chain_tx_by_hash` should be the `tx_by_hash` field from the [`Chain`] containing this index. /// /// # Panics /// /// If `chain_tx_by_hash` is missing some transaction hashes from this index. - #[allow(dead_code)] pub fn tx_ids( &self, chain_tx_by_hash: &HashMap, + query_height_range: RangeInclusive, ) -> BTreeMap { self.tx_ids .distinct_elements() - .map(|tx_hash| { - ( - *chain_tx_by_hash - .get(tx_hash) - .expect("all hashes are indexed"), - *tx_hash, - ) + .filter_map(|tx_hash| { + let tx_loc = *chain_tx_by_hash + .get(tx_hash) + .expect("all hashes are indexed"); + + if query_height_range.contains(&tx_loc.height) { + Some((tx_loc, *tx_hash)) + } else { + None + } }) .collect() } diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index b791ba5ba7d..3039baac012 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -37,6 +37,12 @@ pub use utxo::AddressUtxos; /// If any more arrive, the client should wait until we're synchronised with our peers. const FINALIZED_ADDRESS_INDEX_RETRIES: usize = 3; +/// The full range of address heights. +/// +/// The genesis coinbase transactions are ignored by a consensus rule, +/// so they are not included in any address indexes. +pub const ADDRESS_HEIGHTS_FULL_RANGE: RangeInclusive = Height(1)..=Height::MAX; + /// Returns the [`Block`] with [`block::Hash`](zebra_chain::block::Hash) or /// [`Height`](zebra_chain::block::Height), /// if it exists in the non-finalized `chain` or finalized `db`. @@ -421,7 +427,11 @@ where let chain_tx_ids = chain .as_ref() - .map(|chain| chain.as_ref().partial_transparent_tx_ids(addresses)) + .map(|chain| { + chain + .as_ref() + .partial_transparent_tx_ids(addresses, ADDRESS_HEIGHTS_FULL_RANGE) + }) .unwrap_or_default(); // First try the in-memory chain, then the disk database @@ -438,3 +448,188 @@ where }) .collect() } + +/// Returns the transaction IDs that sent or received funds from the supplied [`transparent::Address`]es, +/// within `query_height_range`, in chain order. +/// +/// If the addresses do not exist in the non-finalized `chain` or finalized `db`, +/// or the `query_height_range` is totally outside both the `chain` and `db` range, +/// returns an empty list. +pub(crate) fn transparent_tx_ids( + chain: Option, + db: &ZebraDb, + addresses: HashSet, + query_height_range: RangeInclusive, +) -> Result, BoxError> +where + C: AsRef, +{ + let mut tx_id_error = None; + + // Retry the finalized tx ID query if it was interruped by a finalizing block, + // and the non-finalized chain doesn't overlap the changed heights. + for _ in 0..=FINALIZED_ADDRESS_INDEX_RETRIES { + let (finalized_tx_ids, finalized_tip_range) = + finalized_transparent_tx_ids(db, &addresses, query_height_range.clone()); + + // Apply the non-finalized tx ID changes. + let chain_tx_id_changes = chain_transparent_tx_id_changes( + chain.as_ref(), + &addresses, + finalized_tip_range, + query_height_range.clone(), + ); + + // If the tx IDs are valid, return them, otherwise, retry or return an error. + match chain_tx_id_changes { + Ok(chain_tx_id_changes) => { + let tx_ids = apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes); + + return Ok(tx_ids); + } + + Err(error) => tx_id_error = Some(Err(error)), + } + } + + tx_id_error.expect("unexpected missing error: attempts should set error or return") +} + +/// Returns the [`transaction::Hash`]es for `addresses` in the finalized chain `query_height_range`, +/// and the finalized tip heights the transaction IDs were queried at. +/// +/// If the addresses do not exist in the finalized `db`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn finalized_transparent_tx_ids( + db: &ZebraDb, + addresses: &HashSet, + query_height_range: RangeInclusive, +) -> ( + BTreeMap, + Option>, +) { + // # Correctness + // + // The StateService can commit additional blocks while we are querying transaction IDs. + + // Check if the finalized state changed while we were querying it + let start_finalized_tip = db.finalized_tip_height(); + + let finalized_tx_ids = db.partial_finalized_transparent_tx_ids(addresses, query_height_range); + + let end_finalized_tip = db.finalized_tip_height(); + + let finalized_tip_range = if let (Some(start_finalized_tip), Some(end_finalized_tip)) = + (start_finalized_tip, end_finalized_tip) + { + Some(start_finalized_tip..=end_finalized_tip) + } else { + // State is empty + None + }; + + (finalized_tx_ids, finalized_tip_range) +} + +/// Returns the extra transaction IDs for `addresses` in the non-finalized chain `query_height_range`, +/// matching or overlapping the transaction IDs for the `finalized_tip_range`, +/// +/// If the addresses do not exist in the non-finalized `chain`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn chain_transparent_tx_id_changes( + chain: Option, + addresses: &HashSet, + finalized_tip_range: Option>, + query_height_range: RangeInclusive, +) -> Result, BoxError> +where + C: AsRef, +{ + let finalized_tip_range = match finalized_tip_range { + Some(finalized_tip_range) => finalized_tip_range, + None => { + assert!( + chain.is_none(), + "unexpected non-finalized chain when finalized state is empty" + ); + + // Empty chains don't contain any tx IDs. + return Ok(Default::default()); + } + }; + + // # Correctness + // + // The StateService commits blocks to the finalized state before updating the latest chain, + // and it can commit additional blocks after we've cloned this `chain` variable. + // + // But we can compensate for addresses with mismatching blocks, + // by adding the overlapping non-finalized transaction IDs. + // + // If there is only one address, mismatches aren't possible, + // because tx IDs are added to the finalized state in chain order (and never removed), + // and they are queried in chain order. + + // Check if the finalized and non-finalized states match or overlap + let required_min_chain_root = finalized_tip_range.start().0 + 1; + let mut required_chain_overlap = required_min_chain_root..=finalized_tip_range.end().0; + + if chain.is_none() { + if required_chain_overlap.is_empty() || addresses.len() <= 1 { + // The non-finalized chain is empty, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is empty. + return Err("unable to get tx IDs: state was committing a block, and non-finalized chain is empty".into()); + } + } + + let chain = chain.unwrap(); + let chain = chain.as_ref(); + + let chain_root = chain.non_finalized_root_height().0; + let chain_tip = chain.non_finalized_tip_height().0; + + assert!( + chain_root <= required_min_chain_root, + "unexpected chain gap: the best chain is updated after its previous root is finalized" + ); + + // If we've already committed this entire chain, ignore its UTXO changes. + // This is more likely if the non-finalized state is just getting started. + if chain_tip > *required_chain_overlap.end() { + if required_chain_overlap.is_empty() || addresses.len() <= 1 { + // The non-finalized chain has been committed, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is below the inconsistent query range. + return Err("unable to get tx IDs: state was committing a block, and non-finalized chain has been committed".into()); + } + } + + // Correctness: some finalized tx IDs might have come from different blocks for different addresses, + // but we've just checked they can be corrected by applying the non-finalized UTXO changes. + assert!( + required_chain_overlap.all(|height| chain.blocks.contains_key(&Height(height))) || addresses.len() <= 1, + "tx ID query inconsistency: chain must contain required overlap blocks if there are multiple addresses", + ); + + Ok(chain.partial_transparent_tx_ids(addresses, query_height_range)) +} + +/// Returns the combined finalized and non-finalized transaction IDs. +fn apply_tx_id_changes( + finalized_tx_ids: BTreeMap, + chain_tx_ids: BTreeMap, +) -> BTreeMap { + // Correctness: compensate for inconsistent tx IDs finalized blocks across multiple addresses, + // by combining them with overalapping non-finalized block tx IDs. + finalized_tx_ids + .into_iter() + .chain(chain_tx_ids.into_iter()) + .collect() +}