From 4ce486ab058e6a26e6d392574f23a30bcd8287fd Mon Sep 17 00:00:00 2001 From: Mihir Nanavati Date: Tue, 17 Oct 2023 21:23:45 -0400 Subject: [PATCH] histogram: skip empty buckets while downsampling Skipping empty buckets significantly speeds up downsampling for sparse histograms. For a histogram with a config of (7, 64) and 100 samples, this results in around a 5x speedup. --- histogram/src/standard.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/histogram/src/standard.rs b/histogram/src/standard.rs index 776426d1..ee849053 100644 --- a/histogram/src/standard.rs +++ b/histogram/src/standard.rs @@ -134,8 +134,8 @@ impl Histogram { /// reduction factor should be 0 < factor < existing grouping power. /// /// The specified factor determines how much the grouping power is reduced - /// by, with every step of grouping power approximately halvomh the total - /// number of buckets (and hence total size of thie histogram), while + /// by, with every step of grouping power approximately halves the total + /// number of buckets (and hence total size of the histogram), while /// doubling the relative error. /// /// This works by iterating over every bucket in the existing histogram @@ -152,8 +152,11 @@ impl Histogram { let mut histogram = Histogram::new(grouping_power - factor, self.config.max_value_power())?; for (i, n) in self.as_slice().iter().enumerate() { - let val = self.config.index_to_lower_bound(i); - histogram.add(val, *n)?; + // Skip empty buckets + if *n != 0 { + let val = self.config.index_to_lower_bound(i); + histogram.add(val, *n)?; + } } Ok(histogram)