From ae0860eb07093cf3a9471a718f70c1196bc53b9b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 9 Mar 2022 17:59:37 -0700 Subject: [PATCH 1/3] Add PyDataFrame.explain --- datafusion/tests/test_dataframe.py | 14 ++++++++++++++ src/dataframe.rs | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 9a97c25..6e6d60f 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -179,3 +179,17 @@ def test_struct_select(struct_df): assert result.column(0) == pa.array([5, 7, 9]) assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_explain(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + + df = df.explain(False, False) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array(["logical_plan", "physical_plan"]) diff --git a/src/dataframe.rs b/src/dataframe.rs index 9050df9..a95d6ca 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -127,4 +127,9 @@ impl PyDataFrame { .join(right.df, join_type, &join_keys.0, &join_keys.1)?; Ok(Self::new(df)) } + + fn explain(&self, verbose: bool, analyze: bool) -> PyResult { + let df = self.df.explain(verbose, analyze)?; + Ok(Self::new(df)) + } } From c4277c05e43903671035d84c26d2ec67071df140 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 9 Mar 2022 18:03:55 -0700 Subject: [PATCH 2/3] fix indent --- datafusion/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 6e6d60f..79b88a6 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -185,7 +185,7 @@ def test_explain(df): df = df.select( column("a") + column("b"), column("a") - column("b"), - ) + ) df = df.explain(False, False) From 238d1f28ed439b67691d45b0f26858f4e5bff504 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 9 Mar 2022 18:21:18 -0700 Subject: [PATCH 3/3] print results in explain rather than returning the data --- datafusion/tests/test_dataframe.py | 8 +------- src/dataframe.rs | 7 +++++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 79b88a6..e6b9ef1 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -186,10 +186,4 @@ def test_explain(df): column("a") + column("b"), column("a") - column("b"), ) - - df = df.explain(False, False) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array(["logical_plan", "physical_plan"]) + df.explain() diff --git a/src/dataframe.rs b/src/dataframe.rs index a95d6ca..7c21102 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -128,8 +128,11 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn explain(&self, verbose: bool, analyze: bool) -> PyResult { + /// Print the query plan + #[args(verbose = false, analyze = false)] + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { let df = self.df.explain(verbose, analyze)?; - Ok(Self::new(df)) + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) } }