From 9d0037bbd2b26003a745a64c0598d8741d972cae Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 28 Aug 2021 16:50:23 +0800 Subject: [PATCH 1/7] Handle usize overflow on big cache capacity - Remove the multiply operation (`max_capacity * 32`) when setting the capacity of the popularity estimator (`FrequencySketch`). - The implementation of the estimator was replaced in an older version of Moka and the multiply operation was not really needed since then. - Add code to ensure that the capacity of the popularity estimator always fits within a range of `128u32..=u32::MAX`. - Change the type of the internal fields of the estimator from `usize` to `u32` so that the estimator will work consistency on any platform with different pointer width. --- src/common/frequency_sketch.rs | 28 ++++++++++++++++++---------- src/sync/base_cache.rs | 8 +++++++- src/unsync/cache.rs | 8 +++++++- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/common/frequency_sketch.rs b/src/common/frequency_sketch.rs index 413ac02c..187d61c5 100644 --- a/src/common/frequency_sketch.rs +++ b/src/common/frequency_sketch.rs @@ -15,10 +15,10 @@ /// a time window. The maximum frequency of an element is limited to 15 (4-bits) /// and an aging process periodically halves the popularity of all elements. pub(crate) struct FrequencySketch { - sample_size: usize, - table_mask: usize, + sample_size: u32, + table_mask: u32, table: Vec, - size: usize, + size: u32, } // A mixture of seeds from FNV-1a, CityHash, and Murmur3. (Taken from Caffeine) @@ -68,19 +68,27 @@ static ONE_MASK: u64 = 0x1111_1111_1111_1111; impl FrequencySketch { /// Creates a frequency sketch with the capacity. - pub(crate) fn with_capacity(cap: usize) -> Self { - let maximum = cap.min((i32::MAX >> 1) as usize); + pub(crate) fn with_capacity(cap: u32) -> Self { + // The max byte size of the table `[u64; table_size]`: + // - 8GiB for 32-bit or 64-bit addressing. (Can track about one billion keys) + // - 8KiB for 16-bit addressing. (Can track about one thousand keys). + let maximum = if cfg!(target_pointer_width = "16") { + cap.min(1024) + } else { + // target_pointer_width will be 32, 64 or larger. + cap.min((i32::MAX >> 1) as u32) + }; let table_size = if maximum == 0 { 1 } else { maximum.next_power_of_two() }; - let table = vec![0; table_size]; + let table = vec![0; table_size as usize]; let table_mask = 0.max(table_size - 1); let sample_size = if cap == 0 { 10 } else { - maximum.saturating_mul(10).min(i32::MAX as usize) + maximum.saturating_mul(10).min(i32::MAX as u32) }; Self { sample_size, @@ -146,7 +154,7 @@ impl FrequencySketch { count += (*entry & ONE_MASK).count_ones(); *entry = (*entry >> 1) & RESET_MASK; } - self.size = (self.size >> 1) - (count >> 2) as usize; + self.size = (self.size >> 1) - (count >> 2); } /// Returns the table index for the counter at the specified depth. @@ -154,7 +162,7 @@ impl FrequencySketch { let i = depth as usize; let mut hash = hash.wrapping_add(SEED[i]).wrapping_mul(SEED[i]); hash += hash >> 32; - hash as usize & self.table_mask + (hash as u32 & self.table_mask) as usize } } @@ -229,7 +237,7 @@ mod tests { let mut sketch = FrequencySketch::with_capacity(64); let hasher = hasher(); - for i in 1..(20 * sketch.table.len()) { + for i in 1..(20 * sketch.table.len() as u32) { sketch.increment(hasher(i)); if sketch.size != i { reset = true; diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 167fbbed..5bdb3a50 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -19,6 +19,7 @@ use quanta::{Clock, Instant}; use std::{ borrow::Borrow, collections::hash_map::RandomState, + convert::TryInto, hash::{BuildHasher, Hash, Hasher}, ptr::NonNull, rc::Rc, @@ -392,7 +393,12 @@ where initial_capacity, build_hasher.clone(), ); - let skt_capacity = usize::max(max_capacity * 32, 100); + + // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. + let skt_capacity = max_capacity + .try_into() // Convert to u32. + .unwrap_or(u32::MAX) + .max(128); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index ca4128ca..27d3d4c2 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -9,6 +9,7 @@ use quanta::{Clock, Instant}; use std::{ borrow::Borrow, collections::{hash_map::RandomState, HashMap}, + convert::TryInto, hash::{BuildHasher, Hash, Hasher}, ptr::NonNull, rc::Rc, @@ -153,7 +154,12 @@ where initial_capacity.unwrap_or_default(), build_hasher.clone(), ); - let skt_capacity = usize::max(max_capacity * 32, 100); + + // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. + let skt_capacity = max_capacity + .try_into() // Convert to u32. + .unwrap_or(u32::MAX) + .max(128); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { max_capacity, From 7cadf647016b41f1a1d52c9a5a81dbf65501fa06 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 28 Aug 2021 17:10:47 +0800 Subject: [PATCH 2/7] cargo fmt --- src/common/frequency_sketch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/frequency_sketch.rs b/src/common/frequency_sketch.rs index 187d61c5..05434a26 100644 --- a/src/common/frequency_sketch.rs +++ b/src/common/frequency_sketch.rs @@ -71,7 +71,7 @@ impl FrequencySketch { pub(crate) fn with_capacity(cap: u32) -> Self { // The max byte size of the table `[u64; table_size]`: // - 8GiB for 32-bit or 64-bit addressing. (Can track about one billion keys) - // - 8KiB for 16-bit addressing. (Can track about one thousand keys). + // - 8KiB for 16-bit addressing. (Can track about one thousand keys). let maximum = if cfg!(target_pointer_width = "16") { cap.min(1024) } else { From 7c755653942ca9db7431f5376a8444466a4da005 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 28 Aug 2021 18:08:11 +0800 Subject: [PATCH 3/7] Handle usize overflow on big cache capacity - Reduce the max size of the popularity estimator on platforms with 32-bit addressing. - Change the type of the internal table of the estimator from `Vec` to `Box<[u64]>`. --- src/common/frequency_sketch.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/common/frequency_sketch.rs b/src/common/frequency_sketch.rs index 05434a26..ace2f03e 100644 --- a/src/common/frequency_sketch.rs +++ b/src/common/frequency_sketch.rs @@ -17,7 +17,7 @@ pub(crate) struct FrequencySketch { sample_size: u32, table_mask: u32, - table: Vec, + table: Box<[u64]>, size: u32, } @@ -69,21 +69,29 @@ static ONE_MASK: u64 = 0x1111_1111_1111_1111; impl FrequencySketch { /// Creates a frequency sketch with the capacity. pub(crate) fn with_capacity(cap: u32) -> Self { - // The max byte size of the table `[u64; table_size]`: - // - 8GiB for 32-bit or 64-bit addressing. (Can track about one billion keys) - // - 8KiB for 16-bit addressing. (Can track about one thousand keys). + // The max byte size of the table, Box<[u64; table_size]> + // + // | Pointer width | Max size | + // |:-----------------|---------:| + // | 16 bit | 8 KiB | + // | 32 bit | 128 MiB | + // | 64 bit or bigger | 8 GiB | + let maximum = if cfg!(target_pointer_width = "16") { cap.min(1024) + } else if cfg!(target_pointer_width = "32") { + cap.min(2u32.pow(24)) // about 16 millions } else { - // target_pointer_width will be 32, 64 or larger. - cap.min((i32::MAX >> 1) as u32) + // Same to Caffeine's limit: + // `Integer.MAX_VALUE >>> 1` with `ceilingPowerOfTwo()` applied. + cap.min(2u32.pow(30)) // about 1 billion }; let table_size = if maximum == 0 { 1 } else { maximum.next_power_of_two() }; - let table = vec![0; table_size as usize]; + let table = vec![0; table_size as usize].into_boxed_slice(); let table_mask = 0.max(table_size - 1); let sample_size = if cap == 0 { 10 @@ -149,7 +157,7 @@ impl FrequencySketch { /// Reduces every counter by half of its original value. fn reset(&mut self) { let mut count = 0u32; - for entry in &mut self.table { + for entry in self.table.iter_mut() { // Count number of odd numbers. count += (*entry & ONE_MASK).count_ones(); *entry = (*entry >> 1) & RESET_MASK; From cd2ef6b2deb2fa638c505ef28bf0485e524b7e80 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 28 Aug 2021 19:23:05 +0800 Subject: [PATCH 4/7] Update the change log --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1a0008c..1d79f47e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Moka — Change Log +## Version 0.5.2 + +### Fixed + +- Handle `usize` overflow on big cache capacity. ([#28][gh-pull-0028]) + + ## Version 0.5.1 ### Changed @@ -81,6 +88,7 @@ [caffeine-git]: https://github.com/ben-manes/caffeine +[gh-pull-0028]: https://github.com/moka-rs/moka/pull/28/ [gh-pull-0022]: https://github.com/moka-rs/moka/pull/22/ [gh-pull-0020]: https://github.com/moka-rs/moka/pull/20/ [gh-pull-0019]: https://github.com/moka-rs/moka/pull/19/ From 968898a4867b5730a0c52aa3e698f8ce37e2dc6c Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 29 Aug 2021 17:15:52 +0800 Subject: [PATCH 5/7] Handle usize overflow on big cache capacity - Add unit test to ensure overflows not to happen and the sizes of the population estimators are correct. --- src/common/frequency_sketch.rs | 8 +++++ src/sync/base_cache.rs | 59 ++++++++++++++++++++++++++++++++++ src/unsync/cache.rs | 40 +++++++++++++++++++++++ 3 files changed, 107 insertions(+) diff --git a/src/common/frequency_sketch.rs b/src/common/frequency_sketch.rs index ace2f03e..d244dbb8 100644 --- a/src/common/frequency_sketch.rs +++ b/src/common/frequency_sketch.rs @@ -174,6 +174,14 @@ impl FrequencySketch { } } +// Methods only available for testing. +#[cfg(test)] +impl FrequencySketch { + pub(crate) fn table_len(&self) -> usize { + self.table.len() + } +} + // Some test cases were ported from Caffeine at: // https://github.com/ben-manes/caffeine/blob/master/caffeine/src/test/java/com/github/benmanes/caffeine/cache/FrequencySketchTest.java // diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 5bdb3a50..ab87c1a9 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -1069,3 +1069,62 @@ fn is_expired_entry_wo( } false } + +#[cfg(test)] +mod tests { + use super::BaseCache; + + #[cfg_attr(target_pointer_width = "16", ignore)] + #[test] + fn test_skt_capacity_will_not_overflow() { + use std::collections::hash_map::RandomState; + + // power of two + let pot = |exp| 2_usize.pow(exp); + + let ensure_sketch_len = |max_capacity, len, name| { + let cache = BaseCache::::new( + max_capacity, + None, + RandomState::default(), + None, + None, + false, + ); + assert_eq!( + cache.inner.frequency_sketch.read().table_len(), + len, + "{}", + name + ); + }; + + if cfg!(target_pointer_width = "32") { + let pot24 = pot(24); + let pot16 = pot(16); + ensure_sketch_len(0, 128, "0"); + ensure_sketch_len(128, 128, "128"); + ensure_sketch_len(pot16, pot16, "pot16"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot16 + 1, pot(17), "pot16 + 1"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot24 - 1, pot24, "pot24 - 1"); + ensure_sketch_len(pot24, pot24, "pot24"); + ensure_sketch_len(pot(27), pot24, "pot(27)"); + ensure_sketch_len(usize::MAX, pot24, "usize::MAX"); + } else { + // target_pointer_width: 64 or larger. + let pot30 = pot(30); + let pot16 = pot(16); + ensure_sketch_len(0, 128, "0"); + ensure_sketch_len(128, 128, "128"); + ensure_sketch_len(pot16, pot16, "pot16"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot16 + 1, pot(17), "pot16 + 1"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot30 - 1, pot30, "pot30- 1"); + ensure_sketch_len(pot30, pot30, "pot30"); + ensure_sketch_len(usize::MAX, pot30, "usize::MAX"); + }; + } +} diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 27d3d4c2..94d0ac03 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -772,4 +772,44 @@ mod tests { assert_eq!(cache.get(&"b"), None); assert!(cache.cache.is_empty()); } + + #[cfg_attr(target_pointer_width = "16", ignore)] + #[test] + fn test_skt_capacity_will_not_overflow() { + // power of two + let pot = |exp| 2_usize.pow(exp); + + let ensure_sketch_len = |max_capacity, len, name| { + let cache = Cache::::new(max_capacity); + assert_eq!(cache.frequency_sketch.table_len(), len, "{}", name); + }; + + if cfg!(target_pointer_width = "32") { + let pot24 = pot(24); + let pot16 = pot(16); + ensure_sketch_len(0, 128, "0"); + ensure_sketch_len(128, 128, "128"); + ensure_sketch_len(pot16, pot16, "pot16"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot16 + 1, pot(17), "pot16 + 1"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot24 - 1, pot24, "pot24 - 1"); + ensure_sketch_len(pot24, pot24, "pot24"); + ensure_sketch_len(pot(27), pot24, "pot(27)"); + ensure_sketch_len(usize::MAX, pot24, "usize::MAX"); + } else { + // target_pointer_width: 64 or larger. + let pot30 = pot(30); + let pot16 = pot(16); + ensure_sketch_len(0, 128, "0"); + ensure_sketch_len(128, 128, "128"); + ensure_sketch_len(pot16, pot16, "pot16"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot16 + 1, pot(17), "pot16 + 1"); + // due to ceiling to next_power_of_two + ensure_sketch_len(pot30 - 1, pot30, "pot30- 1"); + ensure_sketch_len(pot30, pot30, "pot30"); + ensure_sketch_len(usize::MAX, pot30, "usize::MAX"); + }; + } } From 766f8043cea9f5b29446a300b77f936f5f0c42cb Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 29 Aug 2021 19:33:01 +0800 Subject: [PATCH 6/7] A cosmetic change on the change log --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d79f47e..ef120682 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Fixed -- Handle `usize` overflow on big cache capacity. ([#28][gh-pull-0028]) +- `usize` overflow on big cache capacity. ([#28][gh-pull-0028]) ## Version 0.5.1 From 950ec1b8a3a855a6654e86b72925097a18053132 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 29 Aug 2021 19:45:22 +0800 Subject: [PATCH 7/7] Handle usize overflow on big cache capacity - Not a bug but for clarity, avoid a lossy cast from `u64` to `u32` before applying the table mask. --- src/common/frequency_sketch.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/frequency_sketch.rs b/src/common/frequency_sketch.rs index d244dbb8..44abc795 100644 --- a/src/common/frequency_sketch.rs +++ b/src/common/frequency_sketch.rs @@ -16,7 +16,7 @@ /// and an aging process periodically halves the popularity of all elements. pub(crate) struct FrequencySketch { sample_size: u32, - table_mask: u32, + table_mask: u64, table: Box<[u64]>, size: u32, } @@ -92,7 +92,7 @@ impl FrequencySketch { maximum.next_power_of_two() }; let table = vec![0; table_size as usize].into_boxed_slice(); - let table_mask = 0.max(table_size - 1); + let table_mask = 0.max(table_size - 1) as u64; let sample_size = if cap == 0 { 10 } else { @@ -170,7 +170,7 @@ impl FrequencySketch { let i = depth as usize; let mut hash = hash.wrapping_add(SEED[i]).wrapping_mul(SEED[i]); hash += hash >> 32; - (hash as u32 & self.table_mask) as usize + (hash & self.table_mask) as usize } }