From 78aa2b2e71ee684d90abf36aa3f2c9a8b18e5fd5 Mon Sep 17 00:00:00 2001 From: Mihir Nanavati <3020417+mihirn@users.noreply.github.com> Date: Wed, 25 Oct 2023 18:17:34 -0400 Subject: [PATCH] histogram: change downsampling to take target grouping power (#87) The existing downsampling interface takes a reduction factor and downsamples the histogram accordingly. Change this to take the target grouping power, rather than the factor. --- histogram/Cargo.toml | 2 +- histogram/src/sparse.rs | 20 ++++++++++---------- histogram/src/standard.rs | 21 ++++++++++----------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/histogram/Cargo.toml b/histogram/Cargo.toml index b4a2f84..5ca0e36 100644 --- a/histogram/Cargo.toml +++ b/histogram/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "histogram" -version = "0.8.2" +version = "0.8.3" edition = "2021" authors = ["Brian Martin "] license = "MIT OR Apache-2.0" diff --git a/histogram/src/sparse.rs b/histogram/src/sparse.rs index 09cbee0..15c6dc3 100644 --- a/histogram/src/sparse.rs +++ b/histogram/src/sparse.rs @@ -124,22 +124,20 @@ impl SparseHistogram { }) } - /// Returns a new histogram with a reduced grouping power. The specified - /// reduction factor should be 0 < factor < existing grouping power. + /// Returns a new histogram with a reduced grouping power. The reduced + /// grouping power should lie in the range (0..existing grouping power). /// /// This works by iterating over every bucket in the existing histogram /// and inserting the contained values into the new histogram. While we /// do not know the exact values of the data points (only that they lie /// within the bucket's range), it does not matter since the bucket is /// not split during downsampling and any value can be used. - pub fn downsample(&self, factor: u8) -> Result { - let grouping_power = self.config.grouping_power(); - - if factor == 0 || grouping_power <= factor { + pub fn downsample(&self, grouping_power: u8) -> Result { + if grouping_power >= self.config.grouping_power() { return Err(Error::MaxPowerTooLow); } - let config = Config::new(grouping_power - factor, self.config.max_value_power())?; + let config = Config::new(grouping_power, self.config.max_value_power())?; let mut histogram = SparseHistogram::with_config(&config); // Multiple buckets in the old histogram will map to the same bucket @@ -305,9 +303,11 @@ mod tests { compare_histograms(&histogram, &hsparse); // Downsample and compare heck the percentiles lie within error margin - for factor in 1..7 { - let h1 = histogram.downsample(factor).unwrap(); - let h2 = hsparse.downsample(factor).unwrap(); + let grouping_power = histogram.config.grouping_power(); + for factor in 1..grouping_power { + let reduced_gp = grouping_power - factor; + let h1 = histogram.downsample(reduced_gp).unwrap(); + let h2 = hsparse.downsample(reduced_gp).unwrap(); compare_histograms(&h1, &h2); } } diff --git a/histogram/src/standard.rs b/histogram/src/standard.rs index a1ebdcc..00cd17d 100644 --- a/histogram/src/standard.rs +++ b/histogram/src/standard.rs @@ -130,11 +130,11 @@ impl Histogram { } } - /// Returns a new histogram with a reduced grouping power. The specified - /// reduction factor should be 0 < factor < existing grouping power. + /// Returns a new histogram with a reduced grouping power. The reduced + /// grouping power should lie in the range (0..existing grouping power). /// - /// The specified factor determines how much the grouping power is reduced - /// by, with every step of grouping power approximately halves the total + /// The difference in grouping powers determines how much histogram size + /// is reduced by, with every step approximately halving the total /// number of buckets (and hence total size of the histogram), while /// doubling the relative error. /// @@ -143,14 +143,12 @@ impl Histogram { /// do not know the exact values of the data points (only that they lie /// within the bucket's range), it does not matter since the bucket is /// not split during downsampling and any value can be used. - pub fn downsample(&self, factor: u8) -> Result { - let grouping_power = self.config.grouping_power(); - - if factor == 0 || grouping_power <= factor { + pub fn downsample(&self, grouping_power: u8) -> Result { + if grouping_power >= self.config.grouping_power() { return Err(Error::MaxPowerTooLow); } - let mut histogram = Histogram::new(grouping_power - factor, self.config.max_value_power())?; + let mut histogram = Histogram::new(grouping_power, self.config.max_value_power())?; for (i, n) in self.as_slice().iter().enumerate() { // Skip empty buckets if *n != 0 { @@ -369,7 +367,8 @@ mod tests { // Downsample and check the percentiles lie within error margin let h = histogram.clone(); - for factor in 1..7 { + let grouping_power = histogram.config.grouping_power(); + for factor in 1..grouping_power { let error = histogram.config.error(); for p in &percentiles { @@ -381,7 +380,7 @@ mod tests { assert!(e < error); } - histogram = h.downsample(factor).unwrap(); + histogram = h.downsample(grouping_power - factor).unwrap(); } }