Skip to content

Commit

Permalink
histogram: change downsampling to take target grouping power (#87)
Browse files Browse the repository at this point in the history
The existing downsampling interface takes a reduction factor and
downsamples the histogram accordingly. Change this to take the
target grouping power, rather than the factor.
  • Loading branch information
mihirn authored Oct 25, 2023
1 parent 7dae1f0 commit 78aa2b2
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 22 deletions.
2 changes: 1 addition & 1 deletion histogram/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "histogram"
version = "0.8.2"
version = "0.8.3"
edition = "2021"
authors = ["Brian Martin <brian@pelikan.io>"]
license = "MIT OR Apache-2.0"
Expand Down
20 changes: 10 additions & 10 deletions histogram/src/sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,20 @@ impl SparseHistogram {
})
}

/// Returns a new histogram with a reduced grouping power. The specified
/// reduction factor should be 0 < factor < existing grouping power.
/// Returns a new histogram with a reduced grouping power. The reduced
/// grouping power should lie in the range (0..existing grouping power).
///
/// This works by iterating over every bucket in the existing histogram
/// and inserting the contained values into the new histogram. While we
/// do not know the exact values of the data points (only that they lie
/// within the bucket's range), it does not matter since the bucket is
/// not split during downsampling and any value can be used.
pub fn downsample(&self, factor: u8) -> Result<SparseHistogram, Error> {
let grouping_power = self.config.grouping_power();

if factor == 0 || grouping_power <= factor {
pub fn downsample(&self, grouping_power: u8) -> Result<SparseHistogram, Error> {
if grouping_power >= self.config.grouping_power() {
return Err(Error::MaxPowerTooLow);
}

let config = Config::new(grouping_power - factor, self.config.max_value_power())?;
let config = Config::new(grouping_power, self.config.max_value_power())?;
let mut histogram = SparseHistogram::with_config(&config);

// Multiple buckets in the old histogram will map to the same bucket
Expand Down Expand Up @@ -305,9 +303,11 @@ mod tests {
compare_histograms(&histogram, &hsparse);

// Downsample and compare heck the percentiles lie within error margin
for factor in 1..7 {
let h1 = histogram.downsample(factor).unwrap();
let h2 = hsparse.downsample(factor).unwrap();
let grouping_power = histogram.config.grouping_power();
for factor in 1..grouping_power {
let reduced_gp = grouping_power - factor;
let h1 = histogram.downsample(reduced_gp).unwrap();
let h2 = hsparse.downsample(reduced_gp).unwrap();
compare_histograms(&h1, &h2);
}
}
Expand Down
21 changes: 10 additions & 11 deletions histogram/src/standard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ impl Histogram {
}
}

/// Returns a new histogram with a reduced grouping power. The specified
/// reduction factor should be 0 < factor < existing grouping power.
/// Returns a new histogram with a reduced grouping power. The reduced
/// grouping power should lie in the range (0..existing grouping power).
///
/// The specified factor determines how much the grouping power is reduced
/// by, with every step of grouping power approximately halves the total
/// The difference in grouping powers determines how much histogram size
/// is reduced by, with every step approximately halving the total
/// number of buckets (and hence total size of the histogram), while
/// doubling the relative error.
///
Expand All @@ -143,14 +143,12 @@ impl Histogram {
/// do not know the exact values of the data points (only that they lie
/// within the bucket's range), it does not matter since the bucket is
/// not split during downsampling and any value can be used.
pub fn downsample(&self, factor: u8) -> Result<Histogram, Error> {
let grouping_power = self.config.grouping_power();

if factor == 0 || grouping_power <= factor {
pub fn downsample(&self, grouping_power: u8) -> Result<Histogram, Error> {
if grouping_power >= self.config.grouping_power() {
return Err(Error::MaxPowerTooLow);
}

let mut histogram = Histogram::new(grouping_power - factor, self.config.max_value_power())?;
let mut histogram = Histogram::new(grouping_power, self.config.max_value_power())?;
for (i, n) in self.as_slice().iter().enumerate() {
// Skip empty buckets
if *n != 0 {
Expand Down Expand Up @@ -369,7 +367,8 @@ mod tests {

// Downsample and check the percentiles lie within error margin
let h = histogram.clone();
for factor in 1..7 {
let grouping_power = histogram.config.grouping_power();
for factor in 1..grouping_power {
let error = histogram.config.error();

for p in &percentiles {
Expand All @@ -381,7 +380,7 @@ mod tests {
assert!(e < error);
}

histogram = h.downsample(factor).unwrap();
histogram = h.downsample(grouping_power - factor).unwrap();
}
}

Expand Down

0 comments on commit 78aa2b2

Please sign in to comment.