diff --git a/crates/polars-arrow/src/legacy/compute/decimal.rs b/crates/polars-arrow/src/legacy/compute/decimal.rs index 12a9fc53f6c1..4afc35f0993d 100644 --- a/crates/polars-arrow/src/legacy/compute/decimal.rs +++ b/crates/polars-arrow/src/legacy/compute/decimal.rs @@ -40,12 +40,15 @@ pub(crate) fn deserialize_decimal( precision: Option, scale: u8, ) -> Option { - // While parse_integer_checked will parse negative numbers, we want to handle - // the negative sign ourselves, and so check for it initially, then handle it + // While parse_integer_checked will parse positive/negative numbers, we want to + // handle the sign ourselves, and so check for it initially, then handle it // at the end. - let negative = bytes.first() == Some(&b'-'); - if negative { - bytes = &bytes[1..]; + let negative = match bytes.first() { + Some(s @ (b'+' | b'-')) => { + bytes = &bytes[1..]; + *s == b'-' + }, + _ => false, }; let (lhs, rhs) = split_decimal_bytes(bytes); let precision = precision.unwrap_or(38); @@ -59,41 +62,36 @@ pub(crate) fn deserialize_decimal( // at scale 0; there is no scale -2 where it would only need precision 2). let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b); + if lhs_s + scale > precision { + // the integer already exceeds the precision + return None; + } + let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs { // A decimal separator was found, so LHS and RHS need to be combined. - Some(rhs) => parse_integer_checked(rhs) - .map(|y| (x, y, rhs)) - .and_then(|(lhs, rhs, rhs_b)| { - // We include all digits on the RHS, including both leading and trailing zeros, - // as significant. This is consistent with standard scientific practice for writing - // numbers. However, an alternative for parsing could truncate trailing zeros that extend - // beyond the scale: we choose not to do this here. - let scale_adjust = scale as i8 - rhs_b.len() as i8; - - if (lhs_s + scale > precision) - || (scale_adjust < 0) - || (rhs_b.first() == Some(&b'-')) - { - // LHS significant figures and scale exceed precision, - // RHS significant figures (all digits in RHS) exceed scale, or - // RHS starts with a '-' and the number is not well-formed. - None - } else if (rhs_b.len() as u8) == scale { - // RHS has exactly scale significant digits, so no adjustment - // is needed to RHS. - Some((lhs, rhs)) - } else { - // RHS needs adjustment to scale. scale_adjust is known to be - // positive. - Some((lhs, rhs * 10i128.pow(scale_adjust as u32))) - } + Some(mut rhs) => { + if matches!(rhs.first(), Some(b'+' | b'-')) { + // RHS starts with a '+'/'-' sign and the number is not well-formed. + return None; + } + let scale_adjust = if (scale as usize) <= rhs.len() { + // Truncate trailing digits that extend beyond the scale + rhs = &rhs[..scale as usize]; + None + } else { + Some(scale as u32 - rhs.len() as u32) + }; + + parse_integer_checked(rhs).map(|y| { + let lhs = x * 10i128.pow(scale as u32); + let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s)); + lhs + rhs }) - .map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs), + }, // No decimal separator was found; we have an integer / LHS only. None => { - if (lhs_s + scale > precision) || lhs_b.is_empty() { - // Either the integer itself exceeds the precision, or we simply have - // no number at all / an empty string. + if lhs_b.is_empty() { + // we simply have no number at all / an empty string. return None; } Some(x * 10i128.pow(scale as u32)) @@ -235,6 +233,12 @@ mod test { Some(14390) ); + let val = "+000000.5"; + assert_eq!( + deserialize_decimal(val.as_bytes(), precision, scale), + Some(50) + ); + let val = "-0.5"; assert_eq!( deserialize_decimal(val.as_bytes(), precision, scale), @@ -300,10 +304,10 @@ mod test { assert_eq!(deserialize_decimal(val, Some(4), 1), None); let val = b"1200.010"; - assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale + assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale - assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale + assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs index c90ecd314c45..18e3f84f5f22 100644 --- a/crates/polars-core/src/chunked_array/ops/decimal.rs +++ b/crates/polars-core/src/chunked_array/ops/decimal.rs @@ -47,6 +47,6 @@ mod test { assert_eq!(s.get(1).unwrap(), AnyValue::Null); assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 5)); assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 5)); - assert_eq!(s.get(6).unwrap(), AnyValue::Null); + assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 5)); } }