Skip to content

Commit 6b58bc1

Browse files
authored
fix: flat KNN column stats order doesn't match schema (#3451)
this causes an error when query with distance range, and there are unindexed rows --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>
1 parent a6101e5 commit 6b58bc1

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

rust/lance/src/io/exec/knn.rs

+16-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use arrow_array::{
1111
ArrayRef, RecordBatch, StringArray,
1212
};
1313
use arrow_schema::{DataType, Field, Schema, SchemaRef};
14+
use datafusion::common::ColumnStatistics;
1415
use datafusion::error::{DataFusionError, Result as DataFusionResult};
1516
use datafusion::physical_plan::PlanProperties;
1617
use datafusion::physical_plan::{
@@ -184,18 +185,30 @@ impl ExecutionPlan for KNNVectorDistanceExec {
184185

185186
fn statistics(&self) -> DataFusionResult<Statistics> {
186187
let inner_stats = self.input.statistics()?;
187-
let dist_col_stats = inner_stats.column_statistics[0].clone();
188+
let schema = self.input.schema();
189+
let dist_stats = inner_stats
190+
.column_statistics
191+
.iter()
192+
.zip(schema.fields())
193+
.find(|(_, field)| field.name() == &self.column)
194+
.map(|(stats, _)| ColumnStatistics {
195+
null_count: stats.null_count,
196+
..Default::default()
197+
})
198+
.unwrap_or_default();
188199
let column_statistics = inner_stats
189200
.column_statistics
190201
.into_iter()
191-
.chain([dist_col_stats])
202+
.zip(schema.fields())
203+
.filter(|(_, field)| field.name() != DIST_COL)
204+
.map(|(stats, _)| stats)
205+
.chain(std::iter::once(dist_stats))
192206
.collect::<Vec<_>>();
193207
Ok(Statistics {
194208
num_rows: inner_stats.num_rows,
195209
column_statistics,
196210
..Statistics::new_unknown(self.schema().as_ref())
197211
})
198-
// self.input.statistics()
199212
}
200213

201214
fn properties(&self) -> &PlanProperties {

0 commit comments

Comments
 (0)