From fdee0b67d8c9d84cce7df5f5497bbc611f36ba5b Mon Sep 17 00:00:00 2001 From: Kenny <3454741+WarriorOfWire@users.noreply.github.com> Date: Thu, 12 Jan 2023 21:19:18 -0800 Subject: [PATCH 1/2] replace unwrap() with expect("unique message") unwrap() gives no clues where to look when a bug arises. This change replaces common tdigest unwrap() calls with expect("message"). This does the same thing as unwrap(), it just provides more context for users to report with bugs. Fwiw, I find that explaining an expectation in rust can sometimes show me that the expectation is unreasonable. I can't come up with good explanations for some of these, but I gave an effort to at least provide a first-approximation of what the expectation at work is. Any refinements to what these expectations are actually trying to assert would be welcome! --- crates/t-digest/src/lib.rs | 29 +++++++++++++++-------------- extension/src/tdigest.rs | 12 ++++++------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/crates/t-digest/src/lib.rs b/crates/t-digest/src/lib.rs index 7de8be44..f9c51190 100644 --- a/crates/t-digest/src/lib.rs +++ b/crates/t-digest/src/lib.rs @@ -232,7 +232,7 @@ impl TDigest { max: self.max(), centroids: centroids.to_vec(), }) - .unwrap() + .expect("could not create a ron string") } } @@ -293,8 +293,8 @@ impl TDigest { let mut result = TDigest::new_with_size(self.max_size()); result.count = self.count() + (sorted_values.len() as u64); - let maybe_min = OrderedFloat::from(*sorted_values.first().unwrap()); - let maybe_max = OrderedFloat::from(*sorted_values.last().unwrap()); + let maybe_min = OrderedFloat::from(*sorted_values.first().expect("invalid sorted pair - missing first")); + let maybe_max = OrderedFloat::from(*sorted_values.last().expect("invalid sorted pair - missing second")); if self.count() > 0 { result.min = std::cmp::min(self.min, maybe_min); @@ -315,14 +315,14 @@ impl TDigest { let mut iter_sorted_values = sorted_values.iter().peekable(); let mut curr: Centroid = if let Some(c) = iter_centroids.peek() { - let curr = **iter_sorted_values.peek().unwrap(); + let curr = **iter_sorted_values.peek().expect("sorted value list must not be empty"); if c.mean() < curr { - iter_centroids.next().unwrap().clone() + iter_centroids.next().expect("next centroid to clone must exist").clone() } else { - Centroid::new(*iter_sorted_values.next().unwrap(), 1) + Centroid::new(*iter_sorted_values.next().expect("next sorted value to use as centroid mean must exist"), 1) } } else { - Centroid::new(*iter_sorted_values.next().unwrap(), 1) + Centroid::new(*iter_sorted_values.next().expect("next value after an empty value must exist (oh?)"), 1) }; let mut weight_so_far: u64 = curr.weight(); @@ -333,14 +333,14 @@ impl TDigest { while iter_centroids.peek().is_some() || iter_sorted_values.peek().is_some() { let next: Centroid = if let Some(c) = iter_centroids.peek() { if iter_sorted_values.peek().is_none() - || c.mean() < **iter_sorted_values.peek().unwrap() + || c.mean() < **iter_sorted_values.peek().expect("this was already checked in this condition") { - iter_centroids.next().unwrap().clone() + iter_centroids.next().expect("the next centroid after an empty sorted value must exist (but the loop says ||)").clone() } else { - Centroid::new(*iter_sorted_values.next().unwrap(), 1) + Centroid::new(*iter_sorted_values.next().expect("this was logically guarded in the preceding condition"), 1) } } else { - Centroid::new(*iter_sorted_values.next().unwrap(), 1) + Centroid::new(*iter_sorted_values.next().expect("the next sorted value after a missing centroid must exist (but the loop says ||)"), 1) }; let next_sum: f64 = next.mean() * next.weight() as f64; @@ -424,7 +424,8 @@ impl TDigest { } // TODO should this be the smaller of the sizes? - let max_size = digests.first().unwrap().max_size; + // TODO: should this be configurable? Should it make sure it's at least some reasonable size? + let max_size = digests.first().expect("there was no digest to merge").max_size; let mut centroids: Vec = Vec::with_capacity(n_centroids); let mut starts: Vec = Vec::with_capacity(digests.len()); @@ -475,7 +476,7 @@ impl TDigest { let mut q_limit_times_count: f64 = Self::k_to_q(k_limit, max_size as f64) * (count as f64); let mut iter_centroids = centroids.iter_mut(); - let mut curr = iter_centroids.next().unwrap(); + let mut curr = iter_centroids.next().expect("we made a centroid list with contents from the digests list"); let mut weight_so_far: u64 = curr.weight(); let mut sums_to_merge: f64 = 0.0; let mut weights_to_merge: u64 = 0; @@ -1048,7 +1049,7 @@ mod tests { .chain(batch4.iter()) .copied() .collect(); - master.sort_by(|a, b| a.partial_cmp(b).unwrap()); + master.sort_by(|a, b| a.partial_cmp(b).expect("a comparison must exist")); if master.len() < 100 { return TestResult::discard(); diff --git a/extension/src/tdigest.rs b/extension/src/tdigest.rs index e1e235c6..2bdeae0a 100644 --- a/extension/src/tdigest.rs +++ b/extension/src/tdigest.rs @@ -46,7 +46,7 @@ pub fn tdigest_trans_inner( } }; let mut state = match state { - None => tdigest::Builder::with_size(size.try_into().unwrap()).into(), + None => tdigest::Builder::with_size(size.try_into().expect("unsigned size must fit in usize")).into(), Some(state) => state, }; state.push(value); @@ -88,7 +88,7 @@ use crate::raw::bytea; #[pg_extern(immutable, parallel_safe, strict)] pub fn tdigest_serialize(state: Internal) -> bytea { - let state: &mut tdigest::Builder = unsafe { state.get_mut().unwrap() }; + let state: &mut tdigest::Builder = unsafe { state.get_mut().expect("state must exist to serialize tdigest") }; // TODO this macro is really broken let hack = state.build(); let hackref = &hack; @@ -123,7 +123,7 @@ impl<'input> InOutFuncs for TDigest<'input> { fn output(&self, buffer: &mut StringInfo) { use crate::serialization::{str_to_db_encoding, EncodedStr::*}; - let stringified = ron::to_string(&**self).unwrap(); + let stringified = ron::to_string(&**self).expect("must be able to ronstring self"); match str_to_db_encoding(&stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), @@ -137,7 +137,7 @@ impl<'input> InOutFuncs for TDigest<'input> { use crate::serialization::str_from_db_encoding; let input = str_from_db_encoding(input); - let mut val: TDigestData = ron::from_str(input).unwrap(); + let mut val: TDigestData = ron::from_str(input).expect("must be able to read self as ronstring"); val.buckets = val .centroids .len() @@ -160,7 +160,7 @@ impl<'input> TDigest<'input> { } fn from_internal_tdigest(digest: &InternalTDigest) -> TDigest<'static> { - let max_buckets: u32 = digest.max_size().try_into().unwrap(); + let max_buckets: u32 = digest.max_size().try_into().expect("usize must fit into u32 for max_buckets"); let centroids = digest.raw_centroids(); @@ -295,7 +295,7 @@ fn tdigest_compound_final( #[pg_extern(immutable, parallel_safe)] fn tdigest_compound_serialize(state: Internal, _fcinfo: pg_sys::FunctionCallInfo) -> bytea { - let state: Inner = unsafe { state.to_inner().unwrap() }; + let state: Inner = unsafe { state.to_inner().expect("an internal tdigest must exist for serialization") }; crate::do_serialize!(state) } From 4a84eef5db65933b03ddc488e507a842764f9499 Mon Sep 17 00:00:00 2001 From: Kenny <3454741+WarriorOfWire@users.noreply.github.com> Date: Sat, 14 Jan 2023 13:28:43 -0800 Subject: [PATCH 2/2] cargo fmt --- extension/src/tdigest.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/extension/src/tdigest.rs b/extension/src/tdigest.rs index 2bdeae0a..6bc498e3 100644 --- a/extension/src/tdigest.rs +++ b/extension/src/tdigest.rs @@ -46,7 +46,10 @@ pub fn tdigest_trans_inner( } }; let mut state = match state { - None => tdigest::Builder::with_size(size.try_into().expect("unsigned size must fit in usize")).into(), + None => tdigest::Builder::with_size( + size.try_into().expect("unsigned size must fit in usize"), + ) + .into(), Some(state) => state, }; state.push(value); @@ -88,7 +91,11 @@ use crate::raw::bytea; #[pg_extern(immutable, parallel_safe, strict)] pub fn tdigest_serialize(state: Internal) -> bytea { - let state: &mut tdigest::Builder = unsafe { state.get_mut().expect("state must exist to serialize tdigest") }; + let state: &mut tdigest::Builder = unsafe { + state + .get_mut() + .expect("state must exist to serialize tdigest") + }; // TODO this macro is really broken let hack = state.build(); let hackref = &hack; @@ -137,7 +144,8 @@ impl<'input> InOutFuncs for TDigest<'input> { use crate::serialization::str_from_db_encoding; let input = str_from_db_encoding(input); - let mut val: TDigestData = ron::from_str(input).expect("must be able to read self as ronstring"); + let mut val: TDigestData = + ron::from_str(input).expect("must be able to read self as ronstring"); val.buckets = val .centroids .len() @@ -160,7 +168,10 @@ impl<'input> TDigest<'input> { } fn from_internal_tdigest(digest: &InternalTDigest) -> TDigest<'static> { - let max_buckets: u32 = digest.max_size().try_into().expect("usize must fit into u32 for max_buckets"); + let max_buckets: u32 = digest + .max_size() + .try_into() + .expect("usize must fit into u32 for max_buckets"); let centroids = digest.raw_centroids(); @@ -295,7 +306,11 @@ fn tdigest_compound_final( #[pg_extern(immutable, parallel_safe)] fn tdigest_compound_serialize(state: Internal, _fcinfo: pg_sys::FunctionCallInfo) -> bytea { - let state: Inner = unsafe { state.to_inner().expect("an internal tdigest must exist for serialization") }; + let state: Inner = unsafe { + state + .to_inner() + .expect("an internal tdigest must exist for serialization") + }; crate::do_serialize!(state) }