@@ -8,7 +8,7 @@ use std::{
8
8
} ;
9
9
10
10
use arrow_array:: { cast:: AsArray , Array , ArrayRef , StructArray } ;
11
- use arrow_schema:: { DataType , Fields } ;
11
+ use arrow_schema:: { DataType , Field , Fields } ;
12
12
use futures:: {
13
13
future:: BoxFuture ,
14
14
stream:: { FuturesOrdered , FuturesUnordered } ,
@@ -64,6 +64,89 @@ impl Ord for SchedulingJobWithStatus<'_> {
64
64
}
65
65
}
66
66
67
+ #[ derive( Debug ) ]
68
+ struct EmptyStructDecodeTask {
69
+ num_rows : u64 ,
70
+ }
71
+
72
+ impl DecodeArrayTask for EmptyStructDecodeTask {
73
+ fn decode ( self : Box < Self > ) -> Result < ArrayRef > {
74
+ Ok ( Arc :: new ( StructArray :: new_empty_fields (
75
+ self . num_rows as usize ,
76
+ None ,
77
+ ) ) )
78
+ }
79
+ }
80
+
81
+ #[ derive( Debug ) ]
82
+ struct EmptyStructDecoder {
83
+ num_rows : u64 ,
84
+ rows_drained : u64 ,
85
+ data_type : DataType ,
86
+ }
87
+
88
+ impl EmptyStructDecoder {
89
+ fn new ( num_rows : u64 ) -> Self {
90
+ Self {
91
+ num_rows,
92
+ rows_drained : 0 ,
93
+ data_type : DataType :: Struct ( Fields :: from ( Vec :: < Field > :: default ( ) ) ) ,
94
+ }
95
+ }
96
+ }
97
+
98
+ impl LogicalPageDecoder for EmptyStructDecoder {
99
+ fn wait_for_loaded ( & mut self , _loaded_need : u64 ) -> BoxFuture < Result < ( ) > > {
100
+ Box :: pin ( std:: future:: ready ( Ok ( ( ) ) ) )
101
+ }
102
+ fn rows_loaded ( & self ) -> u64 {
103
+ self . num_rows
104
+ }
105
+ fn rows_unloaded ( & self ) -> u64 {
106
+ 0
107
+ }
108
+ fn num_rows ( & self ) -> u64 {
109
+ self . num_rows
110
+ }
111
+ fn rows_drained ( & self ) -> u64 {
112
+ self . rows_drained
113
+ }
114
+ fn drain ( & mut self , num_rows : u64 ) -> Result < NextDecodeTask > {
115
+ self . rows_drained += num_rows;
116
+ Ok ( NextDecodeTask {
117
+ num_rows,
118
+ task : Box :: new ( EmptyStructDecodeTask { num_rows } ) ,
119
+ } )
120
+ }
121
+ fn data_type ( & self ) -> & DataType {
122
+ & self . data_type
123
+ }
124
+ }
125
+
126
+ #[ derive( Debug ) ]
127
+ struct EmptyStructSchedulerJob {
128
+ num_rows : u64 ,
129
+ }
130
+
131
+ impl SchedulingJob for EmptyStructSchedulerJob {
132
+ fn schedule_next (
133
+ & mut self ,
134
+ context : & mut SchedulerContext ,
135
+ _priority : & dyn PriorityRange ,
136
+ ) -> Result < ScheduledScanLine > {
137
+ let empty_decoder = Box :: new ( EmptyStructDecoder :: new ( self . num_rows ) ) ;
138
+ let struct_decoder = context. locate_decoder ( empty_decoder) ;
139
+ Ok ( ScheduledScanLine {
140
+ decoders : vec ! [ MessageType :: DecoderReady ( struct_decoder) ] ,
141
+ rows_scheduled : self . num_rows ,
142
+ } )
143
+ }
144
+
145
+ fn num_rows ( & self ) -> u64 {
146
+ self . num_rows
147
+ }
148
+ }
149
+
67
150
/// Scheduling job for struct data
68
151
///
69
152
/// The order in which we schedule the children is important. We want to schedule the child
@@ -175,9 +258,15 @@ pub struct SimpleStructScheduler {
175
258
}
176
259
177
260
impl SimpleStructScheduler {
178
- pub fn new ( children : Vec < Arc < dyn FieldScheduler > > , child_fields : Fields ) -> Self {
179
- debug_assert ! ( !children. is_empty( ) ) ;
180
- let num_rows = children[ 0 ] . num_rows ( ) ;
261
+ pub fn new (
262
+ children : Vec < Arc < dyn FieldScheduler > > ,
263
+ child_fields : Fields ,
264
+ num_rows : u64 ,
265
+ ) -> Self {
266
+ let num_rows = children
267
+ . first ( )
268
+ . map ( |child| child. num_rows ( ) )
269
+ . unwrap_or ( num_rows) ;
181
270
debug_assert ! ( children. iter( ) . all( |child| child. num_rows( ) == num_rows) ) ;
182
271
Self {
183
272
children,
@@ -193,6 +282,11 @@ impl FieldScheduler for SimpleStructScheduler {
193
282
ranges : & [ Range < u64 > ] ,
194
283
filter : & FilterExpression ,
195
284
) -> Result < Box < dyn SchedulingJob + ' a > > {
285
+ if self . children . is_empty ( ) {
286
+ return Ok ( Box :: new ( EmptyStructSchedulerJob {
287
+ num_rows : ranges. iter ( ) . map ( |r| r. end - r. start ) . sum ( ) ,
288
+ } ) ) ;
289
+ }
196
290
let child_schedulers = self
197
291
. children
198
292
. iter ( )
@@ -1120,6 +1214,15 @@ mod tests {
1120
1214
check_round_trip_encoding_random ( field, LanceFileVersion :: V2_0 ) . await ;
1121
1215
}
1122
1216
1217
+ #[ test_log:: test( tokio:: test) ]
1218
+ async fn test_empty_struct ( ) {
1219
+ // It's technically legal for a struct to have 0 children, need to
1220
+ // make sure we support that
1221
+ let data_type = DataType :: Struct ( Fields :: from ( Vec :: < Field > :: default ( ) ) ) ;
1222
+ let field = Field :: new ( "row" , data_type, false ) ;
1223
+ check_round_trip_encoding_random ( field, LanceFileVersion :: V2_0 ) . await ;
1224
+ }
1225
+
1123
1226
#[ test_log:: test( tokio:: test) ]
1124
1227
async fn test_complicated_struct ( ) {
1125
1228
let data_type = DataType :: Struct ( Fields :: from ( vec ! [
0 commit comments