Skip to content

Commit e733bae

Browse files
authored
fix: do not panic when performing a pushdown scan on a multi-data-file fragment (#1873)
The normal scan algorithm is: ``` open fragment reader with projected schema for batch in batches: scan batch ``` The pushdown algorithm is: ``` open fragment with full schema for batch, simplified_projection in filter(batches): projected scan batch(simplified_projection) ``` This means that the data files that need to be read could change from batch to batch. This was not previously being accounted for and now it is. Closes #1871
1 parent 6edcad7 commit e733bae

File tree

2 files changed

+60
-15
lines changed

2 files changed

+60
-15
lines changed

python/python/tests/test_dataset.py

+21
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,27 @@ def test_merge_with_commit(tmp_path: Path):
619619
assert tbl == expected
620620

621621

622+
def test_merge_search(tmp_path: Path):
623+
left_table = pa.Table.from_pydict({"id": [1, 2, 3], "left": ["a", "b", "c"]})
624+
right_table = pa.Table.from_pydict({"id": [1, 2, 3], "right": ["A", "B", "C"]})
625+
626+
left_ds = lance.write_dataset(left_table, tmp_path / "left")
627+
628+
right_ds = lance.write_dataset(right_table, tmp_path / "right")
629+
left_ds.merge(right_ds, "id")
630+
631+
full = left_ds.to_table()
632+
full_filtered = left_ds.to_table(filter="id < 3")
633+
634+
partial = left_ds.to_table(columns=["left"])
635+
636+
assert full.column("left") == partial.column("left")
637+
638+
partial = left_ds.to_table(columns=["left"], filter="id < 3")
639+
640+
assert full_filtered.column("left") == partial.column("left")
641+
642+
622643
def test_data_files(tmp_path: Path):
623644
table = pa.Table.from_pydict({"a": range(100), "b": range(100)})
624645
base_dir = tmp_path / "test"

rust/lance/src/dataset/fragment.rs

+39-15
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,9 @@ pub struct FragmentReader {
723723

724724
/// ID of the fragment
725725
fragment_id: usize,
726+
727+
/// True if we are reading the row id
728+
with_row_id: bool,
726729
}
727730

728731
impl std::fmt::Display for FragmentReader {
@@ -772,10 +775,12 @@ impl FragmentReader {
772775
readers,
773776
deletion_vec,
774777
fragment_id,
778+
with_row_id: false,
775779
})
776780
}
777781

778782
pub(crate) fn with_row_id(&mut self) -> &mut Self {
783+
self.with_row_id = true;
779784
self.readers[0].0.with_row_id(true);
780785
self
781786
}
@@ -856,22 +861,41 @@ impl FragmentReader {
856861
params: impl Into<ReadBatchParams> + Clone,
857862
projection: &Schema,
858863
) -> Result<RecordBatch> {
859-
let read_tasks = self.readers.iter().map(|(reader, schema)| {
860-
let projection = schema.intersection(projection);
861-
let params = params.clone();
862-
863-
async move {
864-
reader
865-
.read_batch(
866-
batch_id as i32,
867-
params,
868-
&projection?,
869-
self.deletion_vec.as_ref().map(|dv| dv.as_ref()),
870-
)
871-
.await
872-
}
873-
});
864+
let read_tasks = self
865+
.readers
866+
.iter()
867+
.enumerate()
868+
.map(|(reader_idx, (reader, schema))| {
869+
let projection = schema.intersection(projection);
870+
let params = params.clone();
871+
872+
async move {
873+
// Apply ? inside the task to keep read_tasks a simple iter of futures
874+
// for try_join_all
875+
let projection = projection?;
876+
// We always get the row_id from the first reader and so we need that even
877+
// if the projection is empty
878+
let need_for_row_id = self.with_row_id && reader_idx == 0;
879+
if projection.fields.is_empty() && !need_for_row_id {
880+
// The projection caused one of the data files to become
881+
// irrelevant and so we can skip it
882+
Result::Ok(None)
883+
} else {
884+
Ok(Some(
885+
reader
886+
.read_batch(
887+
batch_id as i32,
888+
params,
889+
&projection,
890+
self.deletion_vec.as_ref().map(|dv| dv.as_ref()),
891+
)
892+
.await?,
893+
))
894+
}
895+
}
896+
});
874897
let batches = try_join_all(read_tasks).await?;
898+
let batches = batches.into_iter().flatten().collect::<Vec<_>>();
875899
let result = merge_batches(&batches)?;
876900

877901
Ok(result)

0 commit comments

Comments
 (0)