Skip to content

Commit 482b489

Browse files
goldmedalalamb
andauthored
Introduce UserDefinedLogicalNodeUnparser for User-defined Logical Plan unparsing (#13880)
* make ast builder public * introduce udlp unparser * add documents * add examples * add negative tests and fmt * fix the doc * rename udlp to extension * apply the first unparsing result only * improve the doc * seperate the enum for the unparsing result * fix the doc --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 9fbcf23 commit 482b489

File tree

6 files changed

+526
-25
lines changed

6 files changed

+526
-25
lines changed

datafusion-examples/examples/plan_to_sql.rs

+162-1
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,25 @@
1616
// under the License.
1717

1818
use datafusion::error::Result;
19-
19+
use datafusion::logical_expr::sqlparser::ast::Statement;
2020
use datafusion::prelude::*;
2121
use datafusion::sql::unparser::expr_to_sql;
22+
use datafusion_common::DFSchemaRef;
23+
use datafusion_expr::{
24+
Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode,
25+
UserDefinedLogicalNodeCore,
26+
};
27+
use datafusion_sql::unparser::ast::{
28+
DerivedRelationBuilder, QueryBuilder, RelationBuilder, SelectBuilder,
29+
};
2230
use datafusion_sql::unparser::dialect::CustomDialectBuilder;
31+
use datafusion_sql::unparser::extension_unparser::UserDefinedLogicalNodeUnparser;
32+
use datafusion_sql::unparser::extension_unparser::{
33+
UnparseToStatementResult, UnparseWithinStatementResult,
34+
};
2335
use datafusion_sql::unparser::{plan_to_sql, Unparser};
36+
use std::fmt;
37+
use std::sync::Arc;
2438

2539
/// This example demonstrates the programmatic construction of SQL strings using
2640
/// the DataFusion Expr [`Expr`] and LogicalPlan [`LogicalPlan`] API.
@@ -44,6 +58,10 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
4458
///
4559
/// 5. [`round_trip_plan_to_sql_demo`]: Create a logical plan from a SQL string, modify it using the
4660
/// DataFrames API and convert it back to a sql string.
61+
///
62+
/// 6. [`unparse_my_logical_plan_as_statement`]: Create a custom logical plan and unparse it as a statement.
63+
///
64+
/// 7. [`unparse_my_logical_plan_as_subquery`]: Create a custom logical plan and unparse it as a subquery.
4765
4866
#[tokio::main]
4967
async fn main() -> Result<()> {
@@ -53,6 +71,8 @@ async fn main() -> Result<()> {
5371
simple_expr_to_sql_demo_escape_mysql_style()?;
5472
simple_plan_to_sql_demo().await?;
5573
round_trip_plan_to_sql_demo().await?;
74+
unparse_my_logical_plan_as_statement().await?;
75+
unparse_my_logical_plan_as_subquery().await?;
5676
Ok(())
5777
}
5878

@@ -152,3 +172,144 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> {
152172

153173
Ok(())
154174
}
175+
176+
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
177+
struct MyLogicalPlan {
178+
input: LogicalPlan,
179+
}
180+
181+
impl UserDefinedLogicalNodeCore for MyLogicalPlan {
182+
fn name(&self) -> &str {
183+
"MyLogicalPlan"
184+
}
185+
186+
fn inputs(&self) -> Vec<&LogicalPlan> {
187+
vec![&self.input]
188+
}
189+
190+
fn schema(&self) -> &DFSchemaRef {
191+
self.input.schema()
192+
}
193+
194+
fn expressions(&self) -> Vec<Expr> {
195+
vec![]
196+
}
197+
198+
fn fmt_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199+
write!(f, "MyLogicalPlan")
200+
}
201+
202+
fn with_exprs_and_inputs(
203+
&self,
204+
_exprs: Vec<Expr>,
205+
inputs: Vec<LogicalPlan>,
206+
) -> Result<Self> {
207+
Ok(MyLogicalPlan {
208+
input: inputs.into_iter().next().unwrap(),
209+
})
210+
}
211+
}
212+
213+
struct PlanToStatement {}
214+
impl UserDefinedLogicalNodeUnparser for PlanToStatement {
215+
fn unparse_to_statement(
216+
&self,
217+
node: &dyn UserDefinedLogicalNode,
218+
unparser: &Unparser,
219+
) -> Result<UnparseToStatementResult> {
220+
if let Some(plan) = node.as_any().downcast_ref::<MyLogicalPlan>() {
221+
let input = unparser.plan_to_sql(&plan.input)?;
222+
Ok(UnparseToStatementResult::Modified(input))
223+
} else {
224+
Ok(UnparseToStatementResult::Unmodified)
225+
}
226+
}
227+
}
228+
229+
/// This example demonstrates how to unparse a custom logical plan as a statement.
230+
/// The custom logical plan is a simple extension of the logical plan that reads from a parquet file.
231+
/// It can be unparse as a statement that reads from the same parquet file.
232+
async fn unparse_my_logical_plan_as_statement() -> Result<()> {
233+
let ctx = SessionContext::new();
234+
let testdata = datafusion::test_util::parquet_test_data();
235+
let inner_plan = ctx
236+
.read_parquet(
237+
&format!("{testdata}/alltypes_plain.parquet"),
238+
ParquetReadOptions::default(),
239+
)
240+
.await?
241+
.select_columns(&["id", "int_col", "double_col", "date_string_col"])?
242+
.into_unoptimized_plan();
243+
244+
let node = Arc::new(MyLogicalPlan { input: inner_plan });
245+
246+
let my_plan = LogicalPlan::Extension(Extension { node });
247+
let unparser =
248+
Unparser::default().with_extension_unparsers(vec![Arc::new(PlanToStatement {})]);
249+
let sql = unparser.plan_to_sql(&my_plan)?.to_string();
250+
assert_eq!(
251+
sql,
252+
r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""#
253+
);
254+
Ok(())
255+
}
256+
257+
struct PlanToSubquery {}
258+
impl UserDefinedLogicalNodeUnparser for PlanToSubquery {
259+
fn unparse(
260+
&self,
261+
node: &dyn UserDefinedLogicalNode,
262+
unparser: &Unparser,
263+
_query: &mut Option<&mut QueryBuilder>,
264+
_select: &mut Option<&mut SelectBuilder>,
265+
relation: &mut Option<&mut RelationBuilder>,
266+
) -> Result<UnparseWithinStatementResult> {
267+
if let Some(plan) = node.as_any().downcast_ref::<MyLogicalPlan>() {
268+
let Statement::Query(input) = unparser.plan_to_sql(&plan.input)? else {
269+
return Ok(UnparseWithinStatementResult::Unmodified);
270+
};
271+
let mut derived_builder = DerivedRelationBuilder::default();
272+
derived_builder.subquery(input);
273+
derived_builder.lateral(false);
274+
if let Some(rel) = relation {
275+
rel.derived(derived_builder);
276+
}
277+
}
278+
Ok(UnparseWithinStatementResult::Modified)
279+
}
280+
}
281+
282+
/// This example demonstrates how to unparse a custom logical plan as a subquery.
283+
/// The custom logical plan is a simple extension of the logical plan that reads from a parquet file.
284+
/// It can be unparse as a subquery that reads from the same parquet file, with some columns projected.
285+
async fn unparse_my_logical_plan_as_subquery() -> Result<()> {
286+
let ctx = SessionContext::new();
287+
let testdata = datafusion::test_util::parquet_test_data();
288+
let inner_plan = ctx
289+
.read_parquet(
290+
&format!("{testdata}/alltypes_plain.parquet"),
291+
ParquetReadOptions::default(),
292+
)
293+
.await?
294+
.select_columns(&["id", "int_col", "double_col", "date_string_col"])?
295+
.into_unoptimized_plan();
296+
297+
let node = Arc::new(MyLogicalPlan { input: inner_plan });
298+
299+
let my_plan = LogicalPlan::Extension(Extension { node });
300+
let plan = LogicalPlanBuilder::from(my_plan)
301+
.project(vec![
302+
col("id").alias("my_id"),
303+
col("int_col").alias("my_int"),
304+
])?
305+
.build()?;
306+
let unparser =
307+
Unparser::default().with_extension_unparsers(vec![Arc::new(PlanToSubquery {})]);
308+
let sql = unparser.plan_to_sql(&plan)?.to_string();
309+
assert_eq!(
310+
sql,
311+
"SELECT \"?table?\".id AS my_id, \"?table?\".int_col AS my_int FROM \
312+
(SELECT \"?table?\".id, \"?table?\".int_col, \"?table?\".double_col, \"?table?\".date_string_col FROM \"?table?\")",
313+
);
314+
Ok(())
315+
}

datafusion/sql/src/unparser/ast.rs

+8-14
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! This file contains builders to create SQL ASTs. They are purposefully
19-
//! not exported as they will eventually be move to the SQLparser package.
20-
//!
21-
//!
22-
//! See <https://github.com/apache/datafusion/issues/8661>
23-
2418
use core::fmt;
2519

2620
use sqlparser::ast;
2721
use sqlparser::ast::helpers::attached_token::AttachedToken;
2822

2923
#[derive(Clone)]
30-
pub(super) struct QueryBuilder {
24+
pub struct QueryBuilder {
3125
with: Option<ast::With>,
3226
body: Option<Box<ast::SetExpr>>,
3327
order_by: Vec<ast::OrderByExpr>,
@@ -128,7 +122,7 @@ impl Default for QueryBuilder {
128122
}
129123

130124
#[derive(Clone)]
131-
pub(super) struct SelectBuilder {
125+
pub struct SelectBuilder {
132126
distinct: Option<ast::Distinct>,
133127
top: Option<ast::Top>,
134128
projection: Vec<ast::SelectItem>,
@@ -299,7 +293,7 @@ impl Default for SelectBuilder {
299293
}
300294

301295
#[derive(Clone)]
302-
pub(super) struct TableWithJoinsBuilder {
296+
pub struct TableWithJoinsBuilder {
303297
relation: Option<RelationBuilder>,
304298
joins: Vec<ast::Join>,
305299
}
@@ -346,7 +340,7 @@ impl Default for TableWithJoinsBuilder {
346340
}
347341

348342
#[derive(Clone)]
349-
pub(super) struct RelationBuilder {
343+
pub struct RelationBuilder {
350344
relation: Option<TableFactorBuilder>,
351345
}
352346

@@ -421,7 +415,7 @@ impl Default for RelationBuilder {
421415
}
422416

423417
#[derive(Clone)]
424-
pub(super) struct TableRelationBuilder {
418+
pub struct TableRelationBuilder {
425419
name: Option<ast::ObjectName>,
426420
alias: Option<ast::TableAlias>,
427421
args: Option<Vec<ast::FunctionArg>>,
@@ -491,7 +485,7 @@ impl Default for TableRelationBuilder {
491485
}
492486
}
493487
#[derive(Clone)]
494-
pub(super) struct DerivedRelationBuilder {
488+
pub struct DerivedRelationBuilder {
495489
lateral: Option<bool>,
496490
subquery: Option<Box<ast::Query>>,
497491
alias: Option<ast::TableAlias>,
@@ -541,7 +535,7 @@ impl Default for DerivedRelationBuilder {
541535
}
542536

543537
#[derive(Clone)]
544-
pub(super) struct UnnestRelationBuilder {
538+
pub struct UnnestRelationBuilder {
545539
pub alias: Option<ast::TableAlias>,
546540
pub array_exprs: Vec<ast::Expr>,
547541
with_offset: bool,
@@ -605,7 +599,7 @@ impl Default for UnnestRelationBuilder {
605599
/// Runtime error when a `build()` method is called and one or more required fields
606600
/// do not have a value.
607601
#[derive(Debug, Clone)]
608-
pub(super) struct UninitializedFieldError(&'static str);
602+
pub struct UninitializedFieldError(&'static str);
609603

610604
impl UninitializedFieldError {
611605
/// Create a new `UninitializedFieldError` for the specified field name.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::unparser::ast::{QueryBuilder, RelationBuilder, SelectBuilder};
19+
use crate::unparser::Unparser;
20+
use datafusion_expr::UserDefinedLogicalNode;
21+
use sqlparser::ast::Statement;
22+
23+
/// This trait allows users to define custom unparser logic for their custom logical nodes.
24+
pub trait UserDefinedLogicalNodeUnparser {
25+
/// Unparse the custom logical node to SQL within a statement.
26+
///
27+
/// This method is called when the custom logical node is part of a statement.
28+
/// e.g. `SELECT * FROM custom_logical_node`
29+
///
30+
/// The return value should be [UnparseWithinStatementResult::Modified] if the custom logical node was successfully unparsed.
31+
/// Otherwise, return [UnparseWithinStatementResult::Unmodified].
32+
fn unparse(
33+
&self,
34+
_node: &dyn UserDefinedLogicalNode,
35+
_unparser: &Unparser,
36+
_query: &mut Option<&mut QueryBuilder>,
37+
_select: &mut Option<&mut SelectBuilder>,
38+
_relation: &mut Option<&mut RelationBuilder>,
39+
) -> datafusion_common::Result<UnparseWithinStatementResult> {
40+
Ok(UnparseWithinStatementResult::Unmodified)
41+
}
42+
43+
/// Unparse the custom logical node to a statement.
44+
///
45+
/// This method is called when the custom logical node is a custom statement.
46+
///
47+
/// The return value should be [UnparseToStatementResult::Modified] if the custom logical node was successfully unparsed.
48+
/// Otherwise, return [UnparseToStatementResult::Unmodified].
49+
fn unparse_to_statement(
50+
&self,
51+
_node: &dyn UserDefinedLogicalNode,
52+
_unparser: &Unparser,
53+
) -> datafusion_common::Result<UnparseToStatementResult> {
54+
Ok(UnparseToStatementResult::Unmodified)
55+
}
56+
}
57+
58+
/// The result of unparsing a custom logical node within a statement.
59+
pub enum UnparseWithinStatementResult {
60+
/// If the custom logical node was successfully unparsed within a statement.
61+
Modified,
62+
/// If the custom logical node wasn't unparsed.
63+
Unmodified,
64+
}
65+
66+
/// The result of unparsing a custom logical node to a statement.
67+
pub enum UnparseToStatementResult {
68+
/// If the custom logical node was successfully unparsed to a statement.
69+
Modified(Statement),
70+
/// If the custom logical node wasn't unparsed.
71+
Unmodified,
72+
}

0 commit comments

Comments
 (0)