From c0c865b2dd4e121a4130006a2ed7cc1a1ec00e8d Mon Sep 17 00:00:00 2001 From: Xin Li <33629085+xinlifoobar@users.noreply.github.com> Date: Tue, 21 May 2024 01:35:30 +0800 Subject: [PATCH] Add examples of how to convert logical plan to/from sql strings (#10558) * Add examples of how to convert logical plan to/from sql strings * Fix clippy * fix comments * fix comments --- datafusion-examples/examples/plan_to_sql.rs | 71 ++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index 3915d3991f769..0e9485ba7fbe1 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -20,7 +20,7 @@ use datafusion::error::Result; use datafusion::prelude::*; use datafusion::sql::unparser::expr_to_sql; use datafusion_sql::unparser::dialect::CustomDialect; -use datafusion_sql::unparser::Unparser; +use datafusion_sql::unparser::{plan_to_sql, Unparser}; /// This example demonstrates the programmatic construction of /// SQL using the DataFusion Expr [`Expr`] and LogicalPlan [`LogicalPlan`] API. @@ -41,6 +41,8 @@ async fn main() -> Result<()> { simple_expr_to_sql_demo()?; simple_expr_to_sql_demo_no_escape()?; simple_expr_to_sql_demo_escape_mysql_style()?; + simple_plan_to_sql_parquest_dataframe_demo().await?; + round_trip_plan_to_sql_parquest_dataframe_demo().await?; Ok(()) } @@ -77,3 +79,70 @@ fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> { assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#); Ok(()) } + +/// DataFusion can convert a logic plan created using the DataFrames API to read from a parquet file +/// to SQL, using column name escaping PostgreSQL style. +async fn simple_plan_to_sql_parquest_dataframe_demo() -> Result<()> { + // create local execution context + let ctx = SessionContext::new(); + + let testdata = datafusion::test_util::parquet_test_data(); + let df = ctx + .read_parquet( + &format!("{testdata}/alltypes_plain.parquet"), + ParquetReadOptions::default(), + ) + .await? + .select_columns(&["id", "int_col", "double_col", "date_string_col"])?; + + let ast = plan_to_sql(df.logical_plan())?; + + let sql = format!("{}", ast); + + assert_eq!( + sql, + r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""# + ); + + Ok(()) +} + +// DataFusion could parse a SQL into a DataFrame, adding a Filter, and converting that back to sql. +async fn round_trip_plan_to_sql_parquest_dataframe_demo() -> Result<()> { + // create local execution context + let ctx = SessionContext::new(); + + let testdata = datafusion::test_util::parquet_test_data(); + + // register parquet file with the execution context + ctx.register_parquet( + "alltypes_plain", + &format!("{testdata}/alltypes_plain.parquet"), + ParquetReadOptions::default(), + ) + .await?; + + // create a logical plan from a SQL string and then programmatically add new filters + let df = ctx + .sql( + "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ + FROM alltypes_plain", + ) + .await? + .filter( + col("id") + .gt(lit(1)) + .and(col("tinyint_col").lt(col("double_col"))), + )?; + + let ast = plan_to_sql(df.logical_plan())?; + + let sql = format!("{}", ast); + + assert_eq!( + sql, + r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"# + ); + + Ok(()) +}