Skip to content

Commit

Permalink
v1.18: Cleans up stale accounts hash cache files (backport of solana-…
Browse files Browse the repository at this point in the history
…labs#34933) (solana-labs#34938)

Cleans up stale accounts hash cache files (solana-labs#34933)

(cherry picked from commit 5898b9a)

Co-authored-by: Brooks <brooks@solana.com>
  • Loading branch information
mergify[bot] and brooksprumo authored Jan 25, 2024
1 parent 2c5f24a commit d5d98fa
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 23 deletions.
16 changes: 11 additions & 5 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7537,6 +7537,7 @@ impl AccountsDb {
config: &CalcAccountsHashConfig<'_>,
kind: CalcAccountsHashKind,
slot: Slot,
storages_start_slot: Slot,
) -> CacheHashData {
let accounts_hash_cache_path = if !config.store_detailed_debug_info_on_failure {
accounts_hash_cache_path
Expand All @@ -7548,7 +7549,10 @@ impl AccountsDb {
_ = std::fs::remove_dir_all(&failed_dir);
failed_dir
};
CacheHashData::new(accounts_hash_cache_path, kind == CalcAccountsHashKind::Full)
CacheHashData::new(
accounts_hash_cache_path,
(kind == CalcAccountsHashKind::Incremental).then_some(storages_start_slot),
)
}

// modeled after calculate_accounts_delta_hash
Expand Down Expand Up @@ -7607,7 +7611,8 @@ impl AccountsDb {
) -> Result<(AccountsHashKind, u64), AccountsHashVerificationError> {
let total_time = Measure::start("");
let _guard = self.active_stats.activate(ActiveStatItem::Hash);
stats.oldest_root = storages.range().start;
let storages_start_slot = storages.range().start;
stats.oldest_root = storages_start_slot;

self.mark_old_slots_as_dirty(storages, config.epoch_schedule.slots_per_epoch, &mut stats);

Expand All @@ -7623,7 +7628,8 @@ impl AccountsDb {
accounts_hash_cache_path,
config,
kind,
slot
slot,
storages_start_slot,
));
stats.cache_hash_data_us += cache_hash_data_us;

Expand Down Expand Up @@ -9769,7 +9775,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path().to_path_buf();
self.scan_snapshot_stores_with_cache(
&CacheHashData::new(accounts_hash_cache_path, true),
&CacheHashData::new(accounts_hash_cache_path, None),
storage,
stats,
bins,
Expand Down Expand Up @@ -10837,7 +10843,7 @@ pub mod tests {
};

let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(accounts_hash_cache_path, true),
&CacheHashData::new(accounts_hash_cache_path, None),
&CalcAccountsHashConfig::default(),
&get_storage_refs(&[storage]),
test_scan,
Expand Down
121 changes: 103 additions & 18 deletions accounts-db/src/cache_hash_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use {
bytemuck::{Pod, Zeroable},
memmap2::MmapMut,
solana_measure::measure::Measure,
solana_sdk::clock::Slot,
std::{
collections::HashSet,
fs::{self, remove_file, File, OpenOptions},
Expand Down Expand Up @@ -192,7 +193,8 @@ impl CacheHashDataFile {
pub(crate) struct CacheHashData {
cache_dir: PathBuf,
pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>,
should_delete_old_cache_files_on_drop: bool,
/// Decides which old cache files to delete. See `delete_old_cache_files()` for more info.
storages_start_slot: Option<Slot>,
pub stats: Arc<CacheHashDataStats>,
}

Expand All @@ -204,18 +206,15 @@ impl Drop for CacheHashData {
}

impl CacheHashData {
pub(crate) fn new(
cache_dir: PathBuf,
should_delete_old_cache_files_on_drop: bool,
) -> CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option<Slot>) -> CacheHashData {
std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
panic!("error creating cache dir {}: {err}", cache_dir.display())
});

let result = CacheHashData {
cache_dir,
pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())),
should_delete_old_cache_files_on_drop,
storages_start_slot,
stats: Arc::default(),
};

Expand All @@ -225,17 +224,35 @@ impl CacheHashData {

/// delete all pre-existing files that will not be used
pub(crate) fn delete_old_cache_files(&self) {
if self.should_delete_old_cache_files_on_drop {
let old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());
if !old_cache_files.is_empty() {
self.stats
.unused_cache_files
.fetch_add(old_cache_files.len(), Ordering::Relaxed);
for file_name in old_cache_files.iter() {
let result = self.cache_dir.join(file_name);
let _ = fs::remove_file(result);
}
// all the renaming files in `pre_existing_cache_files` were *not* used for this
// accounts hash calculation
let mut old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());

// If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus
// all unused cache files can be deleted.
// If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation,
// and we only want to delete the unused cache files *that IAH considered*.
if let Some(storages_start_slot) = self.storages_start_slot {
old_cache_files.retain(|old_cache_file| {
let Some(parsed_filename) = parse_filename(old_cache_file) else {
// if parsing the cache filename fails, we *do* want to delete it
return true;
};

// if the old cache file is in the incremental accounts hash calculation range,
// then delete it
parsed_filename.slot_range_start >= storages_start_slot
});
}

if !old_cache_files.is_empty() {
self.stats
.unused_cache_files
.fetch_add(old_cache_files.len(), Ordering::Relaxed);
for file_name in old_cache_files.iter() {
let result = self.cache_dir.join(file_name);
let _ = fs::remove_file(result);
}
}
}
Expand Down Expand Up @@ -360,6 +377,39 @@ impl CacheHashData {
}
}

/// The values of each part of a cache hash data filename
#[derive(Debug)]
pub struct ParsedFilename {
pub slot_range_start: Slot,
pub slot_range_end: Slot,
pub bin_range_start: u64,
pub bin_range_end: u64,
pub hash: u64,
}

/// Parses a cache hash data filename into its parts
///
/// Returns None if the filename is invalid
fn parse_filename(cache_filename: impl AsRef<Path>) -> Option<ParsedFilename> {
let filename = cache_filename.as_ref().to_string_lossy().to_string();
let parts: Vec<_> = filename.split('.').collect(); // The parts are separated by a `.`
if parts.len() != 5 {
return None;
}
let slot_range_start = parts.first()?.parse().ok()?;
let slot_range_end = parts.get(1)?.parse().ok()?;
let bin_range_start = parts.get(2)?.parse().ok()?;
let bin_range_end = parts.get(3)?.parse().ok()?;
let hash = u64::from_str_radix(parts.get(4)?, 16).ok()?; // the hash is in hex
Some(ParsedFilename {
slot_range_start,
slot_range_end,
bin_range_start,
bin_range_end,
hash,
})
}

#[cfg(test)]
mod tests {
use {super::*, crate::accounts_hash::AccountHash, rand::Rng};
Expand Down Expand Up @@ -427,7 +477,7 @@ mod tests {
data_this_pass.push(this_bin_data);
}
}
let cache = CacheHashData::new(cache_dir.clone(), true);
let cache = CacheHashData::new(cache_dir.clone(), None);
let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files();
Expand Down Expand Up @@ -517,4 +567,39 @@ mod tests {
ct,
)
}

#[test]
fn test_parse_filename() {
let good_filename = "123.456.0.65536.537d65697d9b2baa";
let parsed_filename = parse_filename(good_filename).unwrap();
assert_eq!(parsed_filename.slot_range_start, 123);
assert_eq!(parsed_filename.slot_range_end, 456);
assert_eq!(parsed_filename.bin_range_start, 0);
assert_eq!(parsed_filename.bin_range_end, 65536);
assert_eq!(parsed_filename.hash, 0x537d65697d9b2baa);

let bad_filenames = [
// bad separator
"123-456-0-65536.537d65697d9b2baa",
// bad values
"abc.456.0.65536.537d65697d9b2baa",
"123.xyz.0.65536.537d65697d9b2baa",
"123.456.?.65536.537d65697d9b2baa",
"123.456.0.@#$%^.537d65697d9b2baa",
"123.456.0.65536.base19shouldfail",
"123.456.0.65536.123456789012345678901234567890",
// missing values
"123.456.0.65536.",
"123.456.0.65536",
// extra junk
"123.456.0.65536.537d65697d9b2baa.42",
"123.456.0.65536.537d65697d9b2baa.",
"123.456.0.65536.537d65697d9b2baa/",
".123.456.0.65536.537d65697d9b2baa",
"/123.456.0.65536.537d65697d9b2baa",
];
for bad_filename in bad_filenames {
assert!(parse_filename(bad_filename).is_none());
}
}
}

0 comments on commit d5d98fa

Please sign in to comment.