Skip to content

Commit 8ac02bc

Browse files
authored
feat: update arrow to 51, datafusion to 37 (#2240)
1 parent 1828564 commit 8ac02bc

21 files changed

+526
-306
lines changed

Cargo.toml

+21-17
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,17 @@ lance-test-macros = { version = "=0.10.15", path = "./rust/lance-test-macros" }
5757
lance-testing = { version = "=0.10.15", path = "./rust/lance-testing" }
5858
approx = "0.5.1"
5959
# Note that this one does not include pyarrow
60-
arrow = { version = "50.0.0", optional = false, features = ["prettyprint"] }
61-
arrow-arith = "50.0"
62-
arrow-array = "50.0"
63-
arrow-buffer = "50.0"
64-
arrow-cast = "50.0"
65-
arrow-data = "50.0"
66-
arrow-ipc = { version = "50.0", features = ["zstd"] }
67-
arrow-ord = "50.0"
68-
arrow-row = "50.0"
69-
arrow-schema = "50.0"
70-
arrow-select = "50.0"
60+
arrow = { version = "51.0.0", optional = false, features = ["prettyprint"] }
61+
arrow-arith = "51.0"
62+
arrow-array = "51.0"
63+
arrow-buffer = "51.0"
64+
arrow-cast = "51.0"
65+
arrow-data = "51.0"
66+
arrow-ipc = { version = "51.0", features = ["zstd"] }
67+
arrow-ord = "51.0"
68+
arrow-row = "51.0"
69+
arrow-schema = "51.0"
70+
arrow-select = "51.0"
7171
async-recursion = "1.0"
7272
async-trait = "0.1"
7373
aws-config = "0.56"
@@ -85,14 +85,18 @@ chrono = { version = "0.4.25", default-features = false, features = [
8585
"now",
8686
] }
8787
criterion = { version = "0.5", features = ["async", "async_tokio"] }
88-
datafusion = { version = "36.0.0", default-features = false, features = [
88+
datafusion = { version = "37.1", default-features = false, features = [
89+
"array_expressions",
90+
"regex_expressions",
91+
] }
92+
datafusion-common = "37.1"
93+
datafusion-functions = { version = "37.1", features = ["regex_expressions"] }
94+
datafusion-sql = "37.1"
95+
datafusion-expr = "37.1"
96+
datafusion-execution = "37.1"
97+
datafusion-physical-expr = { version = "37.1", features = [
8998
"regex_expressions",
9099
] }
91-
datafusion-common = "36.0"
92-
datafusion-sql = "36.0"
93-
datafusion-expr = "36.0"
94-
datafusion-execution = "36.0"
95-
datafusion-physical-expr = "36.0"
96100
either = "1.0"
97101
futures = "0.3"
98102
http = "0.2.9"

python/Cargo.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ name = "lance"
1212
crate-type = ["cdylib"]
1313

1414
[dependencies]
15-
arrow = { version = "50.0.0", features = ["pyarrow"] }
16-
arrow-array = "50.0"
17-
arrow-data = "50.0"
18-
arrow-schema = "50.0"
15+
arrow = { version = "51.0.0", features = ["pyarrow"] }
16+
arrow-array = "51.0"
17+
arrow-data = "51.0"
18+
arrow-schema = "51.0"
1919
object_store = "0.9.0"
2020
async-trait = "0.1"
2121
chrono = "0.4.31"

rust/lance-datafusion/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ arrow-ord.workspace = true
1717
async-trait.workspace = true
1818
datafusion.workspace = true
1919
datafusion-common.workspace = true
20+
datafusion-functions.workspace = true
2021
datafusion-physical-expr.workspace = true
21-
datafusion-substrait = { version = "36.0", optional = true }
22+
datafusion-substrait = { version = "37.1", optional = true }
2223
futures.workspace = true
2324
lance-arrow.workspace = true
2425
lance-core = { workspace = true, features = ["datafusion"] }

rust/lance-datafusion/src/exec.rs

+19-14
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ use datafusion::{
1717
TaskContext,
1818
},
1919
physical_plan::{
20-
streaming::PartitionStream, DisplayAs, DisplayFormatType, ExecutionPlan,
20+
streaming::PartitionStream, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
2121
SendableRecordBatchStream,
2222
},
2323
};
24-
use datafusion_common::DataFusionError;
25-
use datafusion_physical_expr::Partitioning;
24+
use datafusion_common::{DataFusionError, Statistics};
25+
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
2626

2727
use lance_arrow::SchemaExt;
2828
use lance_core::Result;
@@ -32,11 +32,15 @@ use log::{info, warn};
3232
///
3333
/// It can only be used once, and will return the stream. After that the node
3434
/// is exhuasted.
35+
///
36+
/// Note: the stream should be finite, otherwise we will report datafusion properties
37+
/// incorrectly.
3538
pub struct OneShotExec {
3639
stream: Mutex<Option<SendableRecordBatchStream>>,
3740
// We save off a copy of the schema to speed up formatting and so ExecutionPlan::schema & display_as
3841
// can still function after exhuasted
3942
schema: Arc<ArrowSchema>,
43+
properties: PlanProperties,
4044
}
4145

4246
impl OneShotExec {
@@ -45,7 +49,12 @@ impl OneShotExec {
4549
let schema = stream.schema().clone();
4650
Self {
4751
stream: Mutex::new(Some(stream)),
48-
schema,
52+
schema: schema.clone(),
53+
properties: PlanProperties::new(
54+
EquivalenceProperties::new(schema),
55+
Partitioning::RoundRobinBatch(1),
56+
datafusion::physical_plan::ExecutionMode::Bounded,
57+
),
4958
}
5059
}
5160
}
@@ -96,14 +105,6 @@ impl ExecutionPlan for OneShotExec {
96105
self.schema.clone()
97106
}
98107

99-
fn output_partitioning(&self) -> datafusion_physical_expr::Partitioning {
100-
Partitioning::RoundRobinBatch(1)
101-
}
102-
103-
fn output_ordering(&self) -> Option<&[datafusion_physical_expr::PhysicalSortExpr]> {
104-
None
105-
}
106-
107108
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
108109
vec![]
109110
}
@@ -135,7 +136,11 @@ impl ExecutionPlan for OneShotExec {
135136
}
136137

137138
fn statistics(&self) -> datafusion_common::Result<datafusion_common::Statistics> {
138-
todo!()
139+
Ok(Statistics::new_unknown(&self.schema))
140+
}
141+
142+
fn properties(&self) -> &datafusion::physical_plan::PlanProperties {
143+
&self.properties
139144
}
140145
}
141146

@@ -194,7 +199,7 @@ pub fn execute_plan(
194199
let session_state = SessionState::new_with_config_rt(session_config, runtime_env);
195200
// NOTE: we are only executing the first partition here. Therefore, if
196201
// the plan has more than one partition, we will be missing data.
197-
assert_eq!(plan.output_partitioning().partition_count(), 1);
202+
assert_eq!(plan.properties().partitioning.partition_count(), 1);
198203
Ok(plan.execute(0, session_state.task_ctx())?)
199204
}
200205

rust/lance-datafusion/src/expr.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc<Schema>) -> Result<E
508508
match relation {
509509
TableReference::Bare { table } => {
510510
if table == "dummy" {
511-
Ok(Transformed::Yes(Expr::Column(Column {
511+
Ok(Transformed::yes(Expr::Column(Column {
512512
relation: None,
513513
name: column.name,
514514
})))
@@ -524,12 +524,12 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc<Schema>) -> Result<E
524524
_ => Err(DataFusionError::Substrait("Unexpected partially or fully qualified table reference encountered when parsing filter".into()))
525525
}
526526
} else {
527-
Ok(Transformed::No(Expr::Column(column)))
527+
Ok(Transformed::no(Expr::Column(column)))
528528
}
529529
}
530-
_ => Ok(Transformed::No(node)),
530+
_ => Ok(Transformed::no(node)),
531531
})?;
532-
Ok(expr)
532+
Ok(expr.data)
533533
}
534534

535535
#[cfg(test)]

rust/lance-index/src/scalar/btree.rs

+2
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ impl Ord for OrderableScalarValue {
494494
(Dictionary(_k1, _v1), Dictionary(_k2, _v2)) => todo!(),
495495
(Dictionary(_, v1), Null) => Self(*v1.clone()).cmp(&Self(ScalarValue::Null)),
496496
(Dictionary(_, _), _) => panic!("Attempt to compare Dictionary with non-Dictionary"),
497+
// What would a btree of unions even look like? May not be possible.
498+
(Union(_, _, _), _) => todo!("Support for union scalars"),
497499
(Null, Null) => Ordering::Equal,
498500
(Null, _) => todo!(),
499501
}

rust/lance-index/src/scalar/expression.rs

+12
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,18 @@ mod tests {
549549
fn options(&self) -> &ConfigOptions {
550550
todo!()
551551
}
552+
553+
fn udfs_names(&self) -> Vec<String> {
554+
todo!()
555+
}
556+
557+
fn udafs_names(&self) -> Vec<String> {
558+
todo!()
559+
}
560+
561+
fn udwfs_names(&self) -> Vec<String> {
562+
todo!()
563+
}
552564
}
553565

554566
fn check(

rust/lance/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ arrow.workspace = true
5757
num_cpus.workspace = true
5858
# TODO: use datafusion sub-modules to reduce build size?
5959
datafusion.workspace = true
60+
datafusion-functions.workspace = true
6061
datafusion-physical-expr.workspace = true
6162
lapack = { version = "0.19.0", optional = true }
6263
lru_time_cache = "0.11"

0 commit comments

Comments
 (0)