@@ -235,7 +235,7 @@ use lance_core::{Error, Result};
235
235
use tracing:: instrument;
236
236
237
237
use crate :: buffer:: LanceBuffer ;
238
- use crate :: data:: DataBlock ;
238
+ use crate :: data:: { DataBlock , FixedWidthDataBlock , VariableWidthBlock } ;
239
239
use crate :: encoder:: { values_column_encoding, EncodedBatch } ;
240
240
use crate :: encodings:: logical:: binary:: BinaryFieldScheduler ;
241
241
use crate :: encodings:: logical:: blob:: BlobFieldScheduler ;
@@ -248,7 +248,9 @@ use crate::encodings::logical::primitive::{
248
248
use crate :: encodings:: logical:: r#struct:: {
249
249
SimpleStructDecoder , SimpleStructScheduler , StructuralStructDecoder , StructuralStructScheduler ,
250
250
} ;
251
- use crate :: encodings:: physical:: binary:: { BinaryBlockDecompressor , BinaryMiniBlockDecompressor } ;
251
+ use crate :: encodings:: physical:: binary:: {
252
+ BinaryBlockDecompressor , BinaryMiniBlockDecompressor , VariableDecoder ,
253
+ } ;
252
254
use crate :: encodings:: physical:: bitpack_fastlanes:: BitpackMiniBlockDecompressor ;
253
255
use crate :: encodings:: physical:: fsst:: FsstMiniBlockDecompressor ;
254
256
use crate :: encodings:: physical:: struct_encoding:: PackedStructFixedWidthMiniBlockDecompressor ;
@@ -459,17 +461,20 @@ pub trait MiniBlockDecompressor: std::fmt::Debug + Send + Sync {
459
461
fn decompress ( & self , data : LanceBuffer , num_values : u64 ) -> Result < DataBlock > ;
460
462
}
461
463
462
- pub trait PerValueDecompressor : std:: fmt:: Debug + Send + Sync {
464
+ pub trait FixedPerValueDecompressor : std:: fmt:: Debug + Send + Sync {
463
465
/// Decompress one or more values
464
- fn decompress ( & self , data : LanceBuffer , num_values : u64 ) -> Result < DataBlock > ;
466
+ fn decompress ( & self , data : FixedWidthDataBlock ) -> Result < DataBlock > ;
465
467
/// The number of bits in each value
466
468
///
467
- /// Returns 0 if the data type is variable-width
468
- ///
469
469
/// Currently (and probably long term) this must be a multiple of 8
470
470
fn bits_per_value ( & self ) -> u64 ;
471
471
}
472
472
473
+ pub trait VariablePerValueDecompressor : std:: fmt:: Debug + Send + Sync {
474
+ /// Decompress one or more values
475
+ fn decompress ( & self , data : VariableWidthBlock ) -> Result < DataBlock > ;
476
+ }
477
+
473
478
pub trait BlockDecompressor : std:: fmt:: Debug + Send + Sync {
474
479
fn decompress ( & self , data : LanceBuffer ) -> Result < DataBlock > ;
475
480
}
@@ -480,10 +485,15 @@ pub trait DecompressorStrategy: std::fmt::Debug + Send + Sync {
480
485
description : & pb:: ArrayEncoding ,
481
486
) -> Result < Box < dyn MiniBlockDecompressor > > ;
482
487
483
- fn create_per_value_decompressor (
488
+ fn create_fixed_per_value_decompressor (
489
+ & self ,
490
+ description : & pb:: ArrayEncoding ,
491
+ ) -> Result < Box < dyn FixedPerValueDecompressor > > ;
492
+
493
+ fn create_variable_per_value_decompressor (
484
494
& self ,
485
495
description : & pb:: ArrayEncoding ,
486
- ) -> Result < Box < dyn PerValueDecompressor > > ;
496
+ ) -> Result < Box < dyn VariablePerValueDecompressor > > ;
487
497
488
498
fn create_block_decompressor (
489
499
& self ,
@@ -506,10 +516,10 @@ impl DecompressorStrategy for CoreDecompressorStrategy {
506
516
pb:: array_encoding:: ArrayEncoding :: Bitpack2 ( description) => {
507
517
Ok ( Box :: new ( BitpackMiniBlockDecompressor :: new ( description) ) )
508
518
}
509
- pb:: array_encoding:: ArrayEncoding :: BinaryMiniBlock ( _) => {
519
+ pb:: array_encoding:: ArrayEncoding :: Variable ( _) => {
510
520
Ok ( Box :: new ( BinaryMiniBlockDecompressor :: default ( ) ) )
511
521
}
512
- pb:: array_encoding:: ArrayEncoding :: FsstMiniBlock ( description) => {
522
+ pb:: array_encoding:: ArrayEncoding :: Fsst ( description) => {
513
523
Ok ( Box :: new ( FsstMiniBlockDecompressor :: new ( description) ) )
514
524
}
515
525
pb:: array_encoding:: ArrayEncoding :: PackedStructFixedWidthMiniBlock ( description) => {
@@ -521,15 +531,28 @@ impl DecompressorStrategy for CoreDecompressorStrategy {
521
531
}
522
532
}
523
533
524
- fn create_per_value_decompressor (
534
+ fn create_fixed_per_value_decompressor (
525
535
& self ,
526
536
description : & pb:: ArrayEncoding ,
527
- ) -> Result < Box < dyn PerValueDecompressor > > {
537
+ ) -> Result < Box < dyn FixedPerValueDecompressor > > {
528
538
match description. array_encoding . as_ref ( ) . unwrap ( ) {
529
539
pb:: array_encoding:: ArrayEncoding :: Flat ( flat) => {
530
540
Ok ( Box :: new ( ValueDecompressor :: new ( flat) ) )
531
541
}
532
- _ => todo ! ( ) ,
542
+ _ => todo ! ( "fixed-per-value decompressor for {:?}" , description) ,
543
+ }
544
+ }
545
+
546
+ fn create_variable_per_value_decompressor (
547
+ & self ,
548
+ description : & pb:: ArrayEncoding ,
549
+ ) -> Result < Box < dyn VariablePerValueDecompressor > > {
550
+ match description. array_encoding . as_ref ( ) . unwrap ( ) {
551
+ & pb:: array_encoding:: ArrayEncoding :: Variable ( variable) => {
552
+ assert ! ( variable. bits_per_offset < u8 :: MAX as u32 ) ;
553
+ Ok ( Box :: new ( VariableDecoder :: default ( ) ) )
554
+ }
555
+ _ => todo ! ( "variable-per-value decompressor for {:?}" , description) ,
533
556
}
534
557
}
535
558
@@ -548,7 +571,7 @@ impl DecompressorStrategy for CoreDecompressorStrategy {
548
571
constant. num_values ,
549
572
) ) )
550
573
}
551
- pb:: array_encoding:: ArrayEncoding :: BinaryBlock ( _) => {
574
+ pb:: array_encoding:: ArrayEncoding :: Variable ( _) => {
552
575
Ok ( Box :: new ( BinaryBlockDecompressor :: default ( ) ) )
553
576
}
554
577
_ => todo ! ( ) ,
0 commit comments