Skip to content

Commit 57dee40

Browse files
authored
fix: handle null return from predicate evaluation in pushdown (#1898)
1 parent eccb929 commit 57dee40

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

rust/lance/src/io/exec/pushdown_scan.rs

+53
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,12 @@ impl FragmentScanner {
383383
// Nothing matched
384384
return Ok(None);
385385
}
386+
ColumnarValue::Scalar(ScalarValue::Boolean(None)) => {
387+
// The predicate evaluated to null for all inputs. Usually
388+
// this means that all inputs were null. When it comes to
389+
// filtering, null means no
390+
return Ok(None);
391+
}
386392
ColumnarValue::Array(array) => {
387393
let array = array
388394
.as_any()
@@ -708,6 +714,53 @@ mod test {
708714
assert!(results.is_empty());
709715
}
710716

717+
#[tokio::test]
718+
async fn test_null_batch() {
719+
// If every row in a batch is null then a predicate can evaluate to Scalar(NULL)
720+
// Ensure we handle that.
721+
let test_dir = tempdir().unwrap();
722+
let test_uri = test_dir.path().to_str().unwrap();
723+
724+
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
725+
"s",
726+
DataType::Utf8,
727+
true,
728+
)]));
729+
let num_rows: usize = 10;
730+
// Create a batch where every row is null
731+
let batches = vec![RecordBatch::try_new(
732+
schema.clone(),
733+
vec![Arc::new(StringArray::from_iter(
734+
(0..num_rows).map(|_| Option::<String>::None),
735+
))],
736+
)
737+
.unwrap()];
738+
let batches = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone());
739+
740+
let dataset = Dataset::write(batches, test_uri, None).await.unwrap();
741+
742+
let fragments = dataset.fragments().clone();
743+
let projection = Arc::new(dataset.schema().clone());
744+
745+
let predicate = col("s").eq(lit("x"));
746+
747+
let exec = LancePushdownScanExec::try_new(
748+
Arc::new(dataset),
749+
fragments,
750+
projection,
751+
predicate,
752+
ScanConfig::default(),
753+
)
754+
.unwrap();
755+
756+
let ctx = SessionContext::new();
757+
758+
let results = exec.execute(0, ctx.task_ctx()).unwrap();
759+
assert_eq!(results.schema(), exec.schema());
760+
let results = results.try_collect::<Vec<_>>().await.unwrap();
761+
assert!(results.is_empty());
762+
}
763+
711764
#[tokio::test]
712765
async fn test_nested_filter() {
713766
// Validate we can filter and project nested columns and they will be

0 commit comments

Comments
 (0)