Skip to content

Commit

Permalink
allow casting string to decimal with insufficient scale (#13956)
Browse files Browse the repository at this point in the history
  • Loading branch information
flisky authored Jan 26, 2024
1 parent 4e19d62 commit 298e71f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 38 deletions.
78 changes: 41 additions & 37 deletions crates/polars-arrow/src/legacy/compute/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ pub(crate) fn deserialize_decimal(
precision: Option<u8>,
scale: u8,
) -> Option<i128> {
// While parse_integer_checked will parse negative numbers, we want to handle
// the negative sign ourselves, and so check for it initially, then handle it
// While parse_integer_checked will parse positive/negative numbers, we want to
// handle the sign ourselves, and so check for it initially, then handle it
// at the end.
let negative = bytes.first() == Some(&b'-');
if negative {
bytes = &bytes[1..];
let negative = match bytes.first() {
Some(s @ (b'+' | b'-')) => {
bytes = &bytes[1..];
*s == b'-'
},
_ => false,
};
let (lhs, rhs) = split_decimal_bytes(bytes);
let precision = precision.unwrap_or(38);
Expand All @@ -59,41 +62,36 @@ pub(crate) fn deserialize_decimal(
// at scale 0; there is no scale -2 where it would only need precision 2).
let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b);

if lhs_s + scale > precision {
// the integer already exceeds the precision
return None;
}

let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs {
// A decimal separator was found, so LHS and RHS need to be combined.
Some(rhs) => parse_integer_checked(rhs)
.map(|y| (x, y, rhs))
.and_then(|(lhs, rhs, rhs_b)| {
// We include all digits on the RHS, including both leading and trailing zeros,
// as significant. This is consistent with standard scientific practice for writing
// numbers. However, an alternative for parsing could truncate trailing zeros that extend
// beyond the scale: we choose not to do this here.
let scale_adjust = scale as i8 - rhs_b.len() as i8;

if (lhs_s + scale > precision)
|| (scale_adjust < 0)
|| (rhs_b.first() == Some(&b'-'))
{
// LHS significant figures and scale exceed precision,
// RHS significant figures (all digits in RHS) exceed scale, or
// RHS starts with a '-' and the number is not well-formed.
None
} else if (rhs_b.len() as u8) == scale {
// RHS has exactly scale significant digits, so no adjustment
// is needed to RHS.
Some((lhs, rhs))
} else {
// RHS needs adjustment to scale. scale_adjust is known to be
// positive.
Some((lhs, rhs * 10i128.pow(scale_adjust as u32)))
}
Some(mut rhs) => {
if matches!(rhs.first(), Some(b'+' | b'-')) {
// RHS starts with a '+'/'-' sign and the number is not well-formed.
return None;
}
let scale_adjust = if (scale as usize) <= rhs.len() {
// Truncate trailing digits that extend beyond the scale
rhs = &rhs[..scale as usize];
None
} else {
Some(scale as u32 - rhs.len() as u32)
};

parse_integer_checked(rhs).map(|y| {
let lhs = x * 10i128.pow(scale as u32);
let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s));
lhs + rhs
})
.map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs),
},
// No decimal separator was found; we have an integer / LHS only.
None => {
if (lhs_s + scale > precision) || lhs_b.is_empty() {
// Either the integer itself exceeds the precision, or we simply have
// no number at all / an empty string.
if lhs_b.is_empty() {
// we simply have no number at all / an empty string.
return None;
}
Some(x * 10i128.pow(scale as u32))
Expand Down Expand Up @@ -235,6 +233,12 @@ mod test {
Some(14390)
);

let val = "+000000.5";
assert_eq!(
deserialize_decimal(val.as_bytes(), precision, scale),
Some(50)
);

let val = "-0.5";
assert_eq!(
deserialize_decimal(val.as_bytes(), precision, scale),
Expand Down Expand Up @@ -300,10 +304,10 @@ mod test {
assert_eq!(deserialize_decimal(val, Some(4), 1), None);

let val = b"1200.010";
assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale
assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale
assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale
assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale
assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale
assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale
assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale
assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale
assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ mod test {
assert_eq!(s.get(1).unwrap(), AnyValue::Null);
assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 5));
assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 5));
assert_eq!(s.get(6).unwrap(), AnyValue::Null);
assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 5));
}
}

0 comments on commit 298e71f

Please sign in to comment.