Skip to content

Commit 6aa423b

Browse files
alambmetesynnada
andauthored
feature: Support EXPLAIN COPY (#7291)
* Support EXPLAIN COPY * clippy * clippy * Fix argument handling * Apply suggestions from code review Co-authored-by: Metehan Yıldırım <[email protected]> * Improve nested explain error --------- Co-authored-by: Metehan Yıldırım <[email protected]>
1 parent 835553e commit 6aa423b

File tree

6 files changed

+177
-67
lines changed

6 files changed

+177
-67
lines changed

datafusion/core/src/execution/context.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,30 +1698,39 @@ impl SessionState {
16981698
}
16991699

17001700
let mut visitor = RelationVisitor(&mut relations);
1701-
match statement {
1702-
DFStatement::Statement(s) => {
1703-
let _ = s.as_ref().visit(&mut visitor);
1704-
}
1705-
DFStatement::CreateExternalTable(table) => {
1706-
visitor
1707-
.0
1708-
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
1709-
}
1710-
DFStatement::DescribeTableStmt(table) => visitor.insert(&table.table_name),
1711-
DFStatement::CopyTo(CopyToStatement {
1712-
source,
1713-
target: _,
1714-
options: _,
1715-
}) => match source {
1716-
CopyToSource::Relation(table_name) => {
1717-
visitor.insert(table_name);
1701+
fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor<'_>) {
1702+
match statement {
1703+
DFStatement::Statement(s) => {
1704+
let _ = s.as_ref().visit(visitor);
17181705
}
1719-
CopyToSource::Query(query) => {
1720-
query.visit(&mut visitor);
1706+
DFStatement::CreateExternalTable(table) => {
1707+
visitor
1708+
.0
1709+
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
17211710
}
1722-
},
1711+
DFStatement::DescribeTableStmt(table) => {
1712+
visitor.insert(&table.table_name)
1713+
}
1714+
DFStatement::CopyTo(CopyToStatement {
1715+
source,
1716+
target: _,
1717+
options: _,
1718+
}) => match source {
1719+
CopyToSource::Relation(table_name) => {
1720+
visitor.insert(table_name);
1721+
}
1722+
CopyToSource::Query(query) => {
1723+
query.visit(visitor);
1724+
}
1725+
},
1726+
DFStatement::Explain(explain) => {
1727+
visit_statement(&explain.statement, visitor)
1728+
}
1729+
}
17231730
}
17241731

1732+
visit_statement(statement, &mut visitor);
1733+
17251734
// Always include information_schema if available
17261735
if self.config.information_schema() {
17271736
for s in INFORMATION_SCHEMA_TABLES {

datafusion/sql/src/parser.rs

Lines changed: 103 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,35 @@ fn parse_file_type(s: &str) -> Result<String, ParserError> {
4444
Ok(s.to_uppercase())
4545
}
4646

47+
/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion
48+
/// specific COPY and other statements)
49+
#[derive(Debug, Clone, PartialEq, Eq)]
50+
pub struct ExplainStatement {
51+
pub analyze: bool,
52+
pub verbose: bool,
53+
pub statement: Box<Statement>,
54+
}
55+
56+
impl fmt::Display for ExplainStatement {
57+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58+
let Self {
59+
analyze,
60+
verbose,
61+
statement,
62+
} = self;
63+
64+
write!(f, "EXPLAIN ")?;
65+
if *analyze {
66+
write!(f, "ANALYZE ")?;
67+
}
68+
if *verbose {
69+
write!(f, "VERBOSE ")?;
70+
}
71+
72+
write!(f, "{statement}")
73+
}
74+
}
75+
4776
/// DataFusion extension DDL for `COPY`
4877
///
4978
/// # Syntax:
@@ -74,7 +103,7 @@ pub struct CopyToStatement {
74103
/// The URL to where the data is heading
75104
pub target: String,
76105
/// Target specific options
77-
pub options: HashMap<String, Value>,
106+
pub options: Vec<(String, Value)>,
78107
}
79108

80109
impl fmt::Display for CopyToStatement {
@@ -88,10 +117,8 @@ impl fmt::Display for CopyToStatement {
88117
write!(f, "COPY {source} TO {target}")?;
89118

90119
if !options.is_empty() {
91-
let mut opts: Vec<_> =
92-
options.iter().map(|(k, v)| format!("{k} {v}")).collect();
120+
let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect();
93121
// print them in sorted order
94-
opts.sort_unstable();
95122
write!(f, " ({})", opts.join(", "))?;
96123
}
97124

@@ -208,6 +235,8 @@ pub enum Statement {
208235
DescribeTableStmt(DescribeTableStmt),
209236
/// Extension: `COPY TO`
210237
CopyTo(CopyToStatement),
238+
/// EXPLAIN for extensions
239+
Explain(ExplainStatement),
211240
}
212241

213242
impl fmt::Display for Statement {
@@ -217,11 +246,12 @@ impl fmt::Display for Statement {
217246
Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
218247
Statement::DescribeTableStmt(_) => write!(f, "DESCRIBE TABLE ..."),
219248
Statement::CopyTo(stmt) => write!(f, "{stmt}"),
249+
Statement::Explain(stmt) => write!(f, "{stmt}"),
220250
}
221251
}
222252
}
223253

224-
/// DataFusion SQL Parser based on [`sqlparser`]
254+
/// Datafusion SQL Parser based on [`sqlparser`]
225255
///
226256
/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s
227257
/// [`Parser`](sqlparser::parser::Parser).
@@ -307,24 +337,24 @@ impl<'a> DFParser<'a> {
307337
Token::Word(w) => {
308338
match w.keyword {
309339
Keyword::CREATE => {
310-
// move one token forward
311-
self.parser.next_token();
312-
// use custom parsing
340+
self.parser.next_token(); // CREATE
313341
self.parse_create()
314342
}
315343
Keyword::COPY => {
316-
// move one token forward
317-
self.parser.next_token();
344+
self.parser.next_token(); // COPY
318345
self.parse_copy()
319346
}
320347
Keyword::DESCRIBE => {
321-
// move one token forward
322-
self.parser.next_token();
323-
// use custom parsing
348+
self.parser.next_token(); // DESCRIBE
324349
self.parse_describe()
325350
}
351+
Keyword::EXPLAIN => {
352+
// (TODO parse all supported statements)
353+
self.parser.next_token(); // EXPLAIN
354+
self.parse_explain()
355+
}
326356
_ => {
327-
// use the native parser
357+
// use sqlparser-rs parser
328358
Ok(Statement::Statement(Box::from(
329359
self.parser.parse_statement()?,
330360
)))
@@ -369,7 +399,7 @@ impl<'a> DFParser<'a> {
369399
let options = if self.parser.peek_token().token == Token::LParen {
370400
self.parse_value_options()?
371401
} else {
372-
HashMap::new()
402+
vec![]
373403
};
374404

375405
Ok(Statement::CopyTo(CopyToStatement {
@@ -421,6 +451,19 @@ impl<'a> DFParser<'a> {
421451
}
422452
}
423453

454+
/// Parse a SQL `EXPLAIN`
455+
pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
456+
let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
457+
let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
458+
let statement = self.parse_statement()?;
459+
460+
Ok(Statement::Explain(ExplainStatement {
461+
statement: Box::new(statement),
462+
analyze,
463+
verbose,
464+
}))
465+
}
466+
424467
/// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
425468
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
426469
if self.parser.parse_keyword(Keyword::EXTERNAL) {
@@ -758,14 +801,14 @@ impl<'a> DFParser<'a> {
758801
/// Unlike [`Self::parse_string_options`], this method supports
759802
/// keywords as key names as well as multiple value types such as
760803
/// Numbers as well as Strings.
761-
fn parse_value_options(&mut self) -> Result<HashMap<String, Value>, ParserError> {
762-
let mut options = HashMap::new();
804+
fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
805+
let mut options = vec![];
763806
self.parser.expect_token(&Token::LParen)?;
764807

765808
loop {
766809
let key = self.parse_option_key()?;
767810
let value = self.parse_option_value()?;
768-
options.insert(key, value);
811+
options.push((key, value));
769812
let comma = self.parser.consume_token(&Token::Comma);
770813
if self.parser.consume_token(&Token::RParen) {
771814
// allow a trailing comma, even though it's not in standard
@@ -1285,13 +1328,39 @@ mod tests {
12851328
let expected = Statement::CopyTo(CopyToStatement {
12861329
source: object_name("foo"),
12871330
target: "bar".to_string(),
1288-
options: HashMap::new(),
1331+
options: vec![],
12891332
});
12901333

12911334
assert_eq!(verified_stmt(sql), expected);
12921335
Ok(())
12931336
}
12941337

1338+
#[test]
1339+
fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
1340+
let cases = vec![
1341+
("EXPLAIN COPY foo TO bar", false, false),
1342+
("EXPLAIN ANALYZE COPY foo TO bar", true, false),
1343+
("EXPLAIN VERBOSE COPY foo TO bar", false, true),
1344+
("EXPLAIN ANALYZE VERBOSE COPY foo TO bar", true, true),
1345+
];
1346+
for (sql, analyze, verbose) in cases {
1347+
println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1348+
1349+
let expected_copy = Statement::CopyTo(CopyToStatement {
1350+
source: object_name("foo"),
1351+
target: "bar".to_string(),
1352+
options: vec![],
1353+
});
1354+
let expected = Statement::Explain(ExplainStatement {
1355+
analyze,
1356+
verbose,
1357+
statement: Box::new(expected_copy),
1358+
});
1359+
assert_eq!(verified_stmt(sql), expected);
1360+
}
1361+
Ok(())
1362+
}
1363+
12951364
#[test]
12961365
fn copy_to_query_to_table() -> Result<(), ParserError> {
12971366
let statement = verified_stmt("SELECT 1");
@@ -1313,7 +1382,7 @@ mod tests {
13131382
let expected = Statement::CopyTo(CopyToStatement {
13141383
source: CopyToSource::Query(query),
13151384
target: "bar".to_string(),
1316-
options: HashMap::new(),
1385+
options: vec![],
13171386
});
13181387
assert_eq!(verified_stmt(sql), expected);
13191388
Ok(())
@@ -1325,28 +1394,22 @@ mod tests {
13251394
let expected = Statement::CopyTo(CopyToStatement {
13261395
source: object_name("foo"),
13271396
target: "bar".to_string(),
1328-
options: HashMap::from([(
1397+
options: vec![(
13291398
"row_group_size".to_string(),
13301399
Value::Number("55".to_string(), false),
1331-
)]),
1400+
)],
13321401
});
13331402
assert_eq!(verified_stmt(sql), expected);
13341403
Ok(())
13351404
}
13361405

13371406
#[test]
13381407
fn copy_to_multi_options() -> Result<(), ParserError> {
1408+
// order of options is preserved
13391409
let sql =
13401410
"COPY foo TO bar (format parquet, row_group_size 55, compression snappy)";
1341-
// canonical order is alphabetical
1342-
let canonical =
1343-
"COPY foo TO bar (compression snappy, format parquet, row_group_size 55)";
13441411

1345-
let expected_options = HashMap::from([
1346-
(
1347-
"compression".to_string(),
1348-
Value::UnQuotedString("snappy".to_string()),
1349-
),
1412+
let expected_options = vec![
13501413
(
13511414
"format".to_string(),
13521415
Value::UnQuotedString("parquet".to_string()),
@@ -1355,14 +1418,17 @@ mod tests {
13551418
"row_group_size".to_string(),
13561419
Value::Number("55".to_string(), false),
13571420
),
1358-
]);
1421+
(
1422+
"compression".to_string(),
1423+
Value::UnQuotedString("snappy".to_string()),
1424+
),
1425+
];
13591426

1360-
let options =
1361-
if let Statement::CopyTo(copy_to) = one_statement_parses_to(sql, canonical) {
1362-
copy_to.options
1363-
} else {
1364-
panic!("Expected copy");
1365-
};
1427+
let options = if let Statement::CopyTo(copy_to) = verified_stmt(sql) {
1428+
copy_to.options
1429+
} else {
1430+
panic!("Expected copy");
1431+
};
13661432

13671433
assert_eq!(options, expected_options);
13681434

datafusion/sql/src/statement.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use crate::parser::{
1919
CopyToSource, CopyToStatement, CreateExternalTable, DFParser, DescribeTableStmt,
20-
LexOrdering, Statement as DFStatement,
20+
ExplainStatement, LexOrdering, Statement as DFStatement,
2121
};
2222
use crate::planner::{
2323
object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel,
@@ -93,6 +93,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
9393
DFStatement::Statement(s) => self.sql_statement_to_plan(*s),
9494
DFStatement::DescribeTableStmt(s) => self.describe_table_to_plan(s),
9595
DFStatement::CopyTo(s) => self.copy_to_plan(s),
96+
DFStatement::Explain(ExplainStatement {
97+
verbose,
98+
analyze,
99+
statement,
100+
}) => self.explain_to_plan(verbose, analyze, *statement),
96101
}
97102
}
98103

@@ -127,7 +132,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
127132
format: _,
128133
describe_alias: _,
129134
..
130-
} => self.explain_statement_to_plan(verbose, analyze, *statement),
135+
} => {
136+
self.explain_to_plan(verbose, analyze, DFStatement::Statement(statement))
137+
}
131138
Statement::Query(query) => self.query_to_plan(*query, planner_context),
132139
Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable),
133140
Statement::SetVariable {
@@ -712,13 +719,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
712719

713720
/// Generate a plan for EXPLAIN ... that will print out a plan
714721
///
715-
fn explain_statement_to_plan(
722+
/// Note this is the sqlparser explain statement, not the
723+
/// datafusion `EXPLAIN` statement.
724+
fn explain_to_plan(
716725
&self,
717726
verbose: bool,
718727
analyze: bool,
719-
statement: Statement,
728+
statement: DFStatement,
720729
) -> Result<LogicalPlan> {
721-
let plan = self.sql_statement_to_plan(statement)?;
730+
let plan = self.statement_to_plan(statement)?;
731+
if matches!(plan, LogicalPlan::Explain(_)) {
732+
return plan_err!("Nested EXPLAINs are not supported");
733+
}
722734
let plan = Arc::new(plan);
723735
let schema = LogicalPlan::explain_schema();
724736
let schema = schema.to_dfschema_ref()?;

0 commit comments

Comments
 (0)