Skip to content

Commit dc9afbb

Browse files
chore: add expect_stat, expect_single_stat in GetStat trait (lancedb#3126)
This PR tries to add helper function `expect_stat` and `expect_single_stat` to make DataBlock statistics easier to use.
1 parent e6c2343 commit dc9afbb

File tree

4 files changed

+235
-603
lines changed

4 files changed

+235
-603
lines changed

rust/lance-encoding/src/data.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -951,17 +951,17 @@ impl DataBlock {
951951
as_type_ref!(as_variable_width_ref, VariableWidth, VariableWidthBlock);
952952
as_type_ref!(as_struct_ref, Struct, StructDataBlock);
953953
as_type_ref!(as_dictionary_ref, Dictionary, DictionaryDataBlock);
954-
as_type_ref_mut!(as_all_null_mut_ref, AllNull, AllNullDataBlock);
955-
as_type_ref_mut!(as_nullable_mut_ref, Nullable, NullableDataBlock);
956-
as_type_ref_mut!(as_fixed_width_mut_ref, FixedWidth, FixedWidthDataBlock);
954+
as_type_ref_mut!(as_all_null_ref_mut, AllNull, AllNullDataBlock);
955+
as_type_ref_mut!(as_nullable_ref_mut, Nullable, NullableDataBlock);
956+
as_type_ref_mut!(as_fixed_width_ref_mut, FixedWidth, FixedWidthDataBlock);
957957
as_type_ref_mut!(
958-
as_fixed_size_list_mut_ref,
958+
as_fixed_size_list_ref_mut,
959959
FixedSizeList,
960960
FixedSizeListBlock
961961
);
962-
as_type_ref_mut!(as_variable_width_mut_ref, VariableWidth, VariableWidthBlock);
963-
as_type_ref_mut!(as_struct_mut_ref, Struct, StructDataBlock);
964-
as_type_ref_mut!(as_dictionary_mut_ref, Dictionary, DictionaryDataBlock);
962+
as_type_ref_mut!(as_variable_width_ref_mut, VariableWidth, VariableWidthBlock);
963+
as_type_ref_mut!(as_struct_ref_mut, Struct, StructDataBlock);
964+
as_type_ref_mut!(as_dictionary_ref_mut, Dictionary, DictionaryDataBlock);
965965
}
966966

967967
// Methods to convert from Arrow -> DataBlock

rust/lance-encoding/src/encoder.rs

+4-12
Original file line numberDiff line numberDiff line change
@@ -791,9 +791,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
791791
data: &DataBlock,
792792
) -> Result<Box<dyn MiniBlockCompressor>> {
793793
if let DataBlock::FixedWidth(ref fixed_width_data) = data {
794-
let bit_widths = data
795-
.get_stat(Stat::BitWidth)
796-
.expect("FixedWidthDataBlock should have valid `Stat::BitWidth` statistics");
794+
let bit_widths = data.expect_stat(Stat::BitWidth);
797795
// Temporary hack to work around https://github.com/lancedb/lance/issues/3102
798796
// Ideally we should still be able to bit-pack here (either to 0 or 1 bit per value)
799797
let has_all_zeros = bit_widths
@@ -812,15 +810,9 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
812810
}
813811
if let DataBlock::VariableWidth(ref variable_width_data) = data {
814812
if variable_width_data.bits_per_offset == 32 {
815-
let data_size = variable_width_data.get_stat(Stat::DataSize).expect(
816-
"VariableWidth DataBlock should have valid `Stat::DataSize` statistics",
817-
);
818-
let data_size = data_size.as_primitive::<UInt64Type>().value(0);
819-
820-
let max_len = variable_width_data.get_stat(Stat::MaxLength).expect(
821-
"VariableWidth DataBlock should have valid `Stat::DataSize` statistics",
822-
);
823-
let max_len = max_len.as_primitive::<UInt64Type>().value(0);
813+
let data_size =
814+
variable_width_data.expect_single_stat::<UInt64Type>(Stat::DataSize);
815+
let max_len = variable_width_data.expect_single_stat::<UInt64Type>(Stat::MaxLength);
824816

825817
if max_len >= FSST_LEAST_INPUT_MAX_LENGTH
826818
&& data_size >= FSST_LEAST_INPUT_SIZE as u64

rust/lance-encoding/src/encodings/physical/bitpack_fastlanes.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -1573,9 +1573,7 @@ macro_rules! chunk_data_impl {
15731573
let data_buffer = $data.data.borrow_to_typed_slice::<$data_type>();
15741574
let data_buffer = data_buffer.as_ref();
15751575

1576-
let bit_widths = $data
1577-
.get_stat(Stat::BitWidth)
1578-
.expect("FixedWidthDataBlock should have valid bit width statistics");
1576+
let bit_widths = $data.expect_stat(Stat::BitWidth);
15791577
let bit_widths_array = bit_widths
15801578
.as_any()
15811579
.downcast_ref::<PrimitiveArray<UInt64Type>>()

0 commit comments

Comments
 (0)