allow casting string to decimal with insufficient scale (#13956)

pola-rs · Jan 26, 2024 · 298e71f · 298e71f
1 parent 4e19d62
commit 298e71f
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 38 deletions.
diff --git a/crates/polars-arrow/src/legacy/compute/decimal.rs b/crates/polars-arrow/src/legacy/compute/decimal.rs
@@ -40,12 +40,15 @@ pub(crate) fn deserialize_decimal(
     precision: Option<u8>,
     scale: u8,
 ) -> Option<i128> {
-    // While parse_integer_checked will parse negative numbers, we want to handle
-    // the negative sign ourselves, and so check for it initially, then handle it
+    // While parse_integer_checked will parse positive/negative numbers, we want to
+    // handle the sign ourselves, and so check for it initially, then handle it
     // at the end.
-    let negative = bytes.first() == Some(&b'-');
-    if negative {
-        bytes = &bytes[1..];
+    let negative = match bytes.first() {
+        Some(s @ (b'+' | b'-')) => {
+            bytes = &bytes[1..];
+            *s == b'-'
+        },
+        _ => false,
     };
     let (lhs, rhs) = split_decimal_bytes(bytes);
     let precision = precision.unwrap_or(38);
@@ -59,41 +62,36 @@ pub(crate) fn deserialize_decimal(
     // at scale 0; there is no scale -2 where it would only need precision 2).
     let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b);
 
+    if lhs_s + scale > precision {
+        // the integer already exceeds the precision
+        return None;
+    }
+
     let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs {
         // A decimal separator was found, so LHS and RHS need to be combined.
-        Some(rhs) => parse_integer_checked(rhs)
-            .map(|y| (x, y, rhs))
-            .and_then(|(lhs, rhs, rhs_b)| {
-                // We include all digits on the RHS, including both leading and trailing zeros,
-                // as significant.  This is consistent with standard scientific practice for writing
-                // numbers.  However, an alternative for parsing could truncate trailing zeros that extend
-                // beyond the scale: we choose not to do this here.
-                let scale_adjust = scale as i8 - rhs_b.len() as i8;
-
-                if (lhs_s + scale > precision)
-                    || (scale_adjust < 0)
-                    || (rhs_b.first() == Some(&b'-'))
-                {
-                    // LHS significant figures and scale exceed precision,
-                    // RHS significant figures (all digits in RHS) exceed scale, or
-                    // RHS starts with a '-' and the number is not well-formed.
-                    None
-                } else if (rhs_b.len() as u8) == scale {
-                    // RHS has exactly scale significant digits, so no adjustment
-                    // is needed to RHS.
-                    Some((lhs, rhs))
-                } else {
-                    // RHS needs adjustment to scale. scale_adjust is known to be
-                    // positive.
-                    Some((lhs, rhs * 10i128.pow(scale_adjust as u32)))
-                }
+        Some(mut rhs) => {
+            if matches!(rhs.first(), Some(b'+' | b'-')) {
+                // RHS starts with a '+'/'-' sign and the number is not well-formed.
+                return None;
+            }
+            let scale_adjust = if (scale as usize) <= rhs.len() {
+                // Truncate trailing digits that extend beyond the scale
+                rhs = &rhs[..scale as usize];
+                None
+            } else {
+                Some(scale as u32 - rhs.len() as u32)
+            };
+
+            parse_integer_checked(rhs).map(|y| {
+                let lhs = x * 10i128.pow(scale as u32);
+                let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s));
+                lhs + rhs
             })
-            .map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs),
+        },
         // No decimal separator was found; we have an integer / LHS only.
         None => {
-            if (lhs_s + scale > precision) || lhs_b.is_empty() {
-                // Either the integer itself exceeds the precision, or we simply have
-                // no number at all / an empty string.
+            if lhs_b.is_empty() {
+                // we simply have no number at all / an empty string.
                 return None;
             }
             Some(x * 10i128.pow(scale as u32))
@@ -235,6 +233,12 @@ mod test {
             Some(14390)
         );
 
+        let val = "+000000.5";
+        assert_eq!(
+            deserialize_decimal(val.as_bytes(), precision, scale),
+            Some(50)
+        );
+
         let val = "-0.5";
         assert_eq!(
             deserialize_decimal(val.as_bytes(), precision, scale),
@@ -300,10 +304,10 @@ mod test {
         assert_eq!(deserialize_decimal(val, Some(4), 1), None);
 
         let val = b"1200.010";
-        assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale
+        assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale
         assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale
         assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale
-        assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale
+        assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale
         assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale
         assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale
         assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale

diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs
@@ -47,6 +47,6 @@ mod test {
         assert_eq!(s.get(1).unwrap(), AnyValue::Null);
         assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 5));
         assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 5));
-        assert_eq!(s.get(6).unwrap(), AnyValue::Null);
+        assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 5));
     }
 }