Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow casting string to decimal with insufficient scale #13956

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 41 additions & 37 deletions crates/polars-arrow/src/legacy/compute/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ pub(crate) fn deserialize_decimal(
precision: Option<u8>,
scale: u8,
) -> Option<i128> {
// While parse_integer_checked will parse negative numbers, we want to handle
// the negative sign ourselves, and so check for it initially, then handle it
// While parse_integer_checked will parse positive/negative numbers, we want to
// handle the sign ourselves, and so check for it initially, then handle it
// at the end.
let negative = bytes.first() == Some(&b'-');
if negative {
bytes = &bytes[1..];
let negative = match bytes.first() {
Some(s @ (b'+' | b'-')) => {
bytes = &bytes[1..];
*s == b'-'
},
_ => false,
};
let (lhs, rhs) = split_decimal_bytes(bytes);
let precision = precision.unwrap_or(38);
Expand All @@ -59,41 +62,36 @@ pub(crate) fn deserialize_decimal(
// at scale 0; there is no scale -2 where it would only need precision 2).
let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b);

if lhs_s + scale > precision {
// the integer already exceeds the precision
return None;
}

let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs {
// A decimal separator was found, so LHS and RHS need to be combined.
Some(rhs) => parse_integer_checked(rhs)
.map(|y| (x, y, rhs))
.and_then(|(lhs, rhs, rhs_b)| {
// We include all digits on the RHS, including both leading and trailing zeros,
// as significant. This is consistent with standard scientific practice for writing
// numbers. However, an alternative for parsing could truncate trailing zeros that extend
// beyond the scale: we choose not to do this here.
let scale_adjust = scale as i8 - rhs_b.len() as i8;

if (lhs_s + scale > precision)
|| (scale_adjust < 0)
|| (rhs_b.first() == Some(&b'-'))
{
// LHS significant figures and scale exceed precision,
// RHS significant figures (all digits in RHS) exceed scale, or
// RHS starts with a '-' and the number is not well-formed.
None
} else if (rhs_b.len() as u8) == scale {
// RHS has exactly scale significant digits, so no adjustment
// is needed to RHS.
Some((lhs, rhs))
} else {
// RHS needs adjustment to scale. scale_adjust is known to be
// positive.
Some((lhs, rhs * 10i128.pow(scale_adjust as u32)))
}
Some(mut rhs) => {
if matches!(rhs.first(), Some(b'+' | b'-')) {
// RHS starts with a '+'/'-' sign and the number is not well-formed.
return None;
}
let scale_adjust = if (scale as usize) <= rhs.len() {
// Truncate trailing digits that extend beyond the scale
rhs = &rhs[..scale as usize];
None
} else {
Some(scale as u32 - rhs.len() as u32)
};

parse_integer_checked(rhs).map(|y| {
let lhs = x * 10i128.pow(scale as u32);
let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s));
lhs + rhs
})
.map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs),
},
// No decimal separator was found; we have an integer / LHS only.
None => {
if (lhs_s + scale > precision) || lhs_b.is_empty() {
// Either the integer itself exceeds the precision, or we simply have
// no number at all / an empty string.
if lhs_b.is_empty() {
// we simply have no number at all / an empty string.
return None;
}
Some(x * 10i128.pow(scale as u32))
Expand Down Expand Up @@ -235,6 +233,12 @@ mod test {
Some(14390)
);

let val = "+000000.5";
assert_eq!(
deserialize_decimal(val.as_bytes(), precision, scale),
Some(50)
);

let val = "-0.5";
assert_eq!(
deserialize_decimal(val.as_bytes(), precision, scale),
Expand Down Expand Up @@ -300,10 +304,10 @@ mod test {
assert_eq!(deserialize_decimal(val, Some(4), 1), None);

let val = b"1200.010";
assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale
assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale
assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale
assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale
assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale
assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale
assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale
assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale
assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ mod test {
assert_eq!(s.get(1).unwrap(), AnyValue::Null);
assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 5));
assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 5));
assert_eq!(s.get(6).unwrap(), AnyValue::Null);
assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 5));
}
}
Loading