Skip to content

Commit

Permalink
Err on try_from_le_slice, fix apache#3577
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Aug 23, 2024
1 parent 2795b94 commit 9734024
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 28 deletions.
9 changes: 4 additions & 5 deletions parquet/src/encodings/rle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use std::{cmp, mem::size_of};
use bytes::Bytes;

use crate::errors::{ParquetError, Result};
use crate::util::bit_util::from_le_slice;
use crate::util::bit_util::{self, BitReader, BitWriter, FromBytes};

/// Rle/Bit-Packing Hybrid Encoding
Expand Down Expand Up @@ -356,13 +355,13 @@ impl RleDecoder {
}

let value = if self.rle_left > 0 {
let rle_value = from_le_slice(
let rle_value = T::try_from_le_slice(
&self
.current_value
.as_mut()
.expect("current_value should be Some")
.to_ne_bytes(),
);
)?;
self.rle_left -= 1;
rle_value
} else {
Expand All @@ -388,9 +387,9 @@ impl RleDecoder {
let num_values =
cmp::min(buffer.len() - values_read, self.rle_left as usize);
for i in 0..num_values {
let repeated_value = from_le_slice(
let repeated_value = T::try_from_le_slice(
&self.current_value.as_mut().unwrap().to_ne_bytes(),
);
)?;
buffer[values_read + i] = repeated_value;
}
self.rle_left -= num_values as u32;
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/file/page_index/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
use crate::errors::ParquetError;
use crate::file::metadata::LevelHistogram;
use crate::format::{BoundaryOrder, ColumnIndex};
use crate::util::bit_util::from_le_slice;
use std::fmt::Debug;

/// Typed statistics for one data page
Expand Down Expand Up @@ -207,7 +206,7 @@ impl<T: ParquetValueType> NativeIndex<T> {
} else {
let min = min.as_slice();
let max = max.as_slice();
(Some(from_le_slice::<T>(min)), Some(from_le_slice::<T>(max)))
(Some(T::try_from_le_slice(min)?), Some(T::try_from_le_slice(max)?))
};
Ok(PageIndex {
min,
Expand Down
5 changes: 2 additions & 3 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,6 @@ mod tests {
use crate::file::writer::SerializedFileWriter;
use crate::record::RowAccessor;
use crate::schema::parser::parse_message_type;
use crate::util::bit_util::from_le_slice;
use crate::util::test_common::file_util::{get_test_file, get_test_path};

use super::*;
Expand Down Expand Up @@ -1537,8 +1536,8 @@ mod tests {
assert_eq!(row_group_index.indexes.len(), page_size);
assert_eq!(row_group_index.boundary_order, boundary_order);
row_group_index.indexes.iter().all(|x| {
x.min.as_ref().unwrap() >= &from_le_slice::<T>(min_max.0)
&& x.max.as_ref().unwrap() <= &from_le_slice::<T>(min_max.1)
x.min.as_ref().unwrap() >= &T::try_from_le_slice(min_max.0).unwrap()
&& x.max.as_ref().unwrap() <= &T::try_from_le_slice(min_max.1).unwrap()
});
}

Expand Down
18 changes: 11 additions & 7 deletions parquet/src/file/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ use crate::basic::Type;
use crate::data_type::private::ParquetValueType;
use crate::data_type::*;
use crate::errors::{ParquetError, Result};
use crate::util::bit_util::from_le_slice;
use crate::util::bit_util::FromBytes;

pub(crate) mod private {
use super::*;
Expand Down Expand Up @@ -186,14 +186,18 @@ pub fn from_thrift(
// INT96 statistics may not be correct, because comparison is signed
// byte-wise, not actual timestamps. It is recommended to ignore
// min/max statistics for INT96 columns.
let min = min.map(|data| {
let min = if let Some(data) = min {
assert_eq!(data.len(), 12);
from_le_slice::<Int96>(&data)
});
let max = max.map(|data| {
Some(Int96::try_from_le_slice(&data)?)
} else {
None
};
let max = if let Some(data) = max {
assert_eq!(data.len(), 12);
from_le_slice::<Int96>(&data)
});
Some(Int96::try_from_le_slice(&data)?)
} else {
None
};
Statistics::int96(min, max, distinct_count, null_count, old_format)
}
Type::FLOAT => Statistics::float(
Expand Down
16 changes: 5 additions & 11 deletions parquet/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
use crate::errors::{ParquetError, Result};
use crate::util::bit_pack::{unpack16, unpack32, unpack64, unpack8};

#[inline]
pub fn from_le_slice<T: FromBytes>(bs: &[u8]) -> T {
// TODO: propagate the error (#3577)
T::try_from_le_slice(bs).unwrap()
}

#[inline]
fn array_from_slice<const N: usize>(bs: &[u8]) -> Result<[u8; N]> {
// Need to slice as may be called with zero-padded values
Expand Down Expand Up @@ -97,9 +91,9 @@ unsafe impl FromBytes for Int96 {
fn from_le_bytes(bs: Self::Buffer) -> Self {
let mut i = Int96::new();
i.set_data(
from_le_slice(&bs[0..4]),
from_le_slice(&bs[4..8]),
from_le_slice(&bs[8..12]),
u32::try_from_le_slice(&bs[0..4]).unwrap(),
u32::try_from_le_slice(&bs[4..8]).unwrap(),
u32::try_from_le_slice(&bs[8..12]).unwrap(),
);
i
}
Expand Down Expand Up @@ -438,7 +432,7 @@ impl BitReader {
}

// TODO: better to avoid copying here
Some(from_le_slice(v.as_bytes()))
T::try_from_le_slice(v.as_bytes()).ok()
}

/// Read multiple values from their packed representation where each element is represented
Expand Down Expand Up @@ -1026,7 +1020,7 @@ mod tests {
.collect();

// Generic values used to check against actual values read from `get_batch`.
let expected_values: Vec<T> = values.iter().map(|v| from_le_slice(v.as_bytes())).collect();
let expected_values: Vec<T> = values.iter().map(|v| T::try_from_le_slice(v.as_bytes()).unwrap()).collect();

(0..total).for_each(|i| writer.put_value(values[i], num_bits));

Expand Down

0 comments on commit 9734024

Please sign in to comment.