Skip to content

Commit f3722c0

Browse files
authored
Add SQLOptions for controlling allowed SQL statements, update docs (#7333)
* Add `SQLOptions` for controlling allowed SQL statements, update docs * fix docs
1 parent 6aa423b commit f3722c0

File tree

4 files changed

+326
-56
lines changed

4 files changed

+326
-56
lines changed

datafusion/core/src/execution/context.rs

Lines changed: 208 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ use crate::{
2828
optimizer::optimizer::Optimizer,
2929
physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
3030
};
31-
use datafusion_common::{alias::AliasGenerator, not_impl_err, plan_err};
31+
use datafusion_common::{
32+
alias::AliasGenerator,
33+
not_impl_err, plan_err,
34+
tree_node::{TreeNode, TreeNodeVisitor, VisitRecursion},
35+
};
3236
use datafusion_execution::registry::SerializerRegistry;
3337
use datafusion_expr::{
3438
logical_plan::{DdlStatement, Statement},
@@ -163,35 +167,64 @@ where
163167
/// * Register a custom data source that can be referenced from a SQL query.
164168
/// * Execution a SQL query
165169
///
170+
/// # Example: DataFrame API
171+
///
166172
/// The following example demonstrates how to use the context to execute a query against a CSV
167173
/// data source using the DataFrame API:
168174
///
169175
/// ```
170176
/// use datafusion::prelude::*;
171-
/// # use datafusion::error::Result;
177+
/// # use datafusion::{error::Result, assert_batches_eq};
172178
/// # #[tokio::main]
173179
/// # async fn main() -> Result<()> {
174180
/// let ctx = SessionContext::new();
175181
/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
176182
/// let df = df.filter(col("a").lt_eq(col("b")))?
177183
/// .aggregate(vec![col("a")], vec![min(col("b"))])?
178184
/// .limit(0, Some(100))?;
179-
/// let results = df.collect();
185+
/// let results = df
186+
/// .collect()
187+
/// .await?;
188+
/// assert_batches_eq!(
189+
/// &[
190+
/// "+---+----------------+",
191+
/// "| a | MIN(?table?.b) |",
192+
/// "+---+----------------+",
193+
/// "| 1 | 2 |",
194+
/// "+---+----------------+",
195+
/// ],
196+
/// &results
197+
/// );
180198
/// # Ok(())
181199
/// # }
182200
/// ```
183201
///
202+
/// # Example: SQL API
203+
///
184204
/// The following example demonstrates how to execute the same query using SQL:
185205
///
186206
/// ```
187207
/// use datafusion::prelude::*;
188-
///
189-
/// # use datafusion::error::Result;
208+
/// # use datafusion::{error::Result, assert_batches_eq};
190209
/// # #[tokio::main]
191210
/// # async fn main() -> Result<()> {
192211
/// let mut ctx = SessionContext::new();
193212
/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
194-
/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
213+
/// let results = ctx
214+
/// .sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")
215+
/// .await?
216+
/// .collect()
217+
/// .await?;
218+
/// assert_batches_eq!(
219+
/// &[
220+
/// "+---+----------------+",
221+
/// "| a | MIN(example.b) |",
222+
/// "+---+----------------+",
223+
/// "| 1 | 2 |",
224+
/// "+---+----------------+",
225+
/// ],
226+
/// &results
227+
/// );
195228
/// # Ok(())
196229
/// # }
197230
/// ```
@@ -342,22 +375,82 @@ impl SessionContext {
342375
self.state.read().config.clone()
343376
}
344377

345-
/// Creates a [`DataFrame`] that will execute a SQL query.
378+
/// Creates a [`DataFrame`] from SQL query text.
346379
///
347380
/// Note: This API implements DDL statements such as `CREATE TABLE` and
348381
/// `CREATE VIEW` and DML statements such as `INSERT INTO` with in-memory
349-
/// default implementations.
382+
/// default implementations. See [`Self::sql_with_options`].
383+
///
384+
/// # Example: Running SQL queries
385+
///
386+
/// See the example on [`Self`]
350387
///
351-
/// If this is not desirable, consider using [`SessionState::create_logical_plan()`] which
352-
/// does not mutate the state based on such statements.
388+
/// # Example: Creating a Table with SQL
389+
///
390+
/// ```
391+
/// use datafusion::prelude::*;
392+
/// # use datafusion::{error::Result, assert_batches_eq};
393+
/// # #[tokio::main]
394+
/// # async fn main() -> Result<()> {
395+
/// let mut ctx = SessionContext::new();
396+
/// ctx
397+
/// .sql("CREATE TABLE foo (x INTEGER)")
398+
/// .await?
399+
/// .collect()
400+
/// .await?;
401+
/// assert!(ctx.table_exist("foo").unwrap());
402+
/// # Ok(())
403+
/// # }
404+
/// ```
353405
pub async fn sql(&self, sql: &str) -> Result<DataFrame> {
354-
// create a query planner
406+
self.sql_with_options(sql, SQLOptions::new()).await
407+
}
408+
409+
/// Creates a [`DataFrame`] from SQL query text, first validating
410+
/// that the queries are allowed by `options`
411+
///
412+
/// # Example: Preventing Creating a Table with SQL
413+
///
414+
/// If you want to avoid creating tables, or modifying data or the
415+
/// session, set [`SQLOptions`] appropriately:
416+
///
417+
/// ```
418+
/// use datafusion::prelude::*;
419+
/// # use datafusion::{error::Result};
420+
/// # use datafusion::physical_plan::collect;
421+
/// # #[tokio::main]
422+
/// # async fn main() -> Result<()> {
423+
/// let mut ctx = SessionContext::new();
424+
/// let options = SQLOptions::new()
425+
/// .with_allow_ddl(false);
426+
/// let err = ctx.sql_with_options("CREATE TABLE foo (x INTEGER)", options)
427+
/// .await
428+
/// .unwrap_err();
429+
/// assert_eq!(
430+
/// err.to_string(),
431+
/// "Error during planning: DDL not supported: CreateMemoryTable"
432+
/// );
433+
/// # Ok(())
434+
/// # }
435+
/// ```
436+
pub async fn sql_with_options(
437+
&self,
438+
sql: &str,
439+
options: SQLOptions,
440+
) -> Result<DataFrame> {
355441
let plan = self.state().create_logical_plan(sql).await?;
442+
options.verify_plan(&plan)?;
356443

357444
self.execute_logical_plan(plan).await
358445
}
359446

360-
/// Execute the [`LogicalPlan`], return a [`DataFrame`]
447+
/// Execute the [`LogicalPlan`], return a [`DataFrame`]. This API
448+
/// is not featured limited (so all SQL such as `CREATE TABLE` and
449+
/// `COPY` will be run).
450+
///
451+
/// If you wish to limit the type of plan that can be run from
452+
/// SQL, see [`Self::sql_with_options`] and
453+
/// [`SQLOptions::verify_plan`].
361454
pub async fn execute_logical_plan(&self, plan: LogicalPlan) -> Result<DataFrame> {
362455
match plan {
363456
LogicalPlan::Ddl(ddl) => match ddl {
@@ -1304,7 +1397,7 @@ impl FunctionRegistry for SessionContext {
13041397
/// A planner used to add extensions to DataFusion logical and physical plans.
13051398
#[async_trait]
13061399
pub trait QueryPlanner {
1307-
/// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
1400+
/// Given a `LogicalPlan`, create an [`ExecutionPlan`] suitable for execution
13081401
async fn create_physical_plan(
13091402
&self,
13101403
logical_plan: &LogicalPlan,
@@ -1317,7 +1410,7 @@ struct DefaultQueryPlanner {}
13171410

13181411
#[async_trait]
13191412
impl QueryPlanner for DefaultQueryPlanner {
1320-
/// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
1413+
/// Given a `LogicalPlan`, create an [`ExecutionPlan`] suitable for execution
13211414
async fn create_physical_plan(
13221415
&self,
13231416
logical_plan: &LogicalPlan,
@@ -1628,7 +1721,8 @@ impl SessionState {
16281721
&mut self.table_factories
16291722
}
16301723

1631-
/// Convert a SQL string into an AST Statement
1724+
/// Parse an SQL string into an DataFusion specific AST
1725+
/// [`Statement`]. See [`SessionContext::sql`] for running queries.
16321726
pub fn sql_to_statement(
16331727
&self,
16341728
sql: &str,
@@ -1787,9 +1881,15 @@ impl SessionState {
17871881
query.statement_to_plan(statement)
17881882
}
17891883

1790-
/// Creates a [`LogicalPlan`] from the provided SQL string
1884+
/// Creates a [`LogicalPlan`] from the provided SQL string. This
1885+
/// interface will plan any SQL DataFusion supports, including DML
1886+
/// like `CREATE TABLE`, and `COPY` (which can write to local
1887+
/// files.
17911888
///
1792-
/// See [`SessionContext::sql`] for a higher-level interface that also handles DDL
1889+
/// See [`SessionContext::sql`] and
1890+
/// [`SessionContext::sql_with_options`] for a higher-level
1891+
/// interface that handles DDL and verification of allowed
1892+
/// statements.
17931893
pub async fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan> {
17941894
let dialect = self.config.options().sql_parser.dialect.as_str();
17951895
let statement = self.sql_to_statement(sql, dialect)?;
@@ -1870,7 +1970,11 @@ impl SessionState {
18701970

18711971
/// Creates a physical plan from a logical plan.
18721972
///
1873-
/// Note: this first calls [`Self::optimize`] on the provided plan
1973+
/// Note: this first calls [`Self::optimize`] on the provided
1974+
/// plan.
1975+
///
1976+
/// This function will error for [`LogicalPlan`]s such as catalog
1977+
/// DDL `CREATE TABLE` must be handled by another layer.
18741978
pub async fn create_physical_plan(
18751979
&self,
18761980
logical_plan: &LogicalPlan,
@@ -2095,6 +2199,92 @@ impl SerializerRegistry for EmptySerializerRegistry {
20952199
}
20962200
}
20972201

2202+
/// Describes which SQL statements can be run.
2203+
///
2204+
/// See [`SessionContext::sql_with_options`] for more details.
2205+
#[derive(Clone, Debug, Copy)]
2206+
pub struct SQLOptions {
2207+
/// See [`Self::with_allow_ddl`]
2208+
allow_ddl: bool,
2209+
/// See [`Self::with_allow_dml`]
2210+
allow_dml: bool,
2211+
/// See [`Self::with_allow_statements`]
2212+
allow_statements: bool,
2213+
}
2214+
2215+
impl Default for SQLOptions {
2216+
fn default() -> Self {
2217+
Self {
2218+
allow_ddl: true,
2219+
allow_dml: true,
2220+
allow_statements: true,
2221+
}
2222+
}
2223+
}
2224+
2225+
impl SQLOptions {
2226+
/// Create a new `SQLOptions` with default values
2227+
pub fn new() -> Self {
2228+
Default::default()
2229+
}
2230+
2231+
/// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true`.
2232+
pub fn with_allow_ddl(mut self, allow: bool) -> Self {
2233+
self.allow_ddl = allow;
2234+
self
2235+
}
2236+
2237+
/// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true`
2238+
pub fn with_allow_dml(mut self, allow: bool) -> Self {
2239+
self.allow_dml = allow;
2240+
self
2241+
}
2242+
2243+
/// Should Statements such as (e.g. `SET VARIABLE and `BEGIN TRANSACTION` ...`) be run?. Defaults to `true`
2244+
pub fn with_allow_statements(mut self, allow: bool) -> Self {
2245+
self.allow_statements = allow;
2246+
self
2247+
}
2248+
2249+
/// Return an error if the [`LogicalPlan`] has any nodes that are
2250+
/// incompatible with this [`SQLOptions`].
2251+
pub fn verify_plan(&self, plan: &LogicalPlan) -> Result<()> {
2252+
plan.visit(&mut BadPlanVisitor::new(self))?;
2253+
Ok(())
2254+
}
2255+
}
2256+
2257+
struct BadPlanVisitor<'a> {
2258+
options: &'a SQLOptions,
2259+
}
2260+
impl<'a> BadPlanVisitor<'a> {
2261+
fn new(options: &'a SQLOptions) -> Self {
2262+
Self { options }
2263+
}
2264+
}
2265+
2266+
impl<'a> TreeNodeVisitor for BadPlanVisitor<'a> {
2267+
type N = LogicalPlan;
2268+
2269+
fn pre_visit(&mut self, node: &Self::N) -> Result<VisitRecursion> {
2270+
match node {
2271+
LogicalPlan::Ddl(ddl) if !self.options.allow_ddl => {
2272+
plan_err!("DDL not supported: {}", ddl.name())
2273+
}
2274+
LogicalPlan::Dml(dml) if !self.options.allow_dml => {
2275+
plan_err!("DML not supported: {}", dml.op)
2276+
}
2277+
LogicalPlan::Copy(_) if !self.options.allow_dml => {
2278+
plan_err!("DML not supported: COPY")
2279+
}
2280+
LogicalPlan::Statement(stmt) if !self.options.allow_statements => {
2281+
plan_err!("Statement not supported: {}", stmt.name())
2282+
}
2283+
_ => Ok(VisitRecursion::Continue),
2284+
}
2285+
}
2286+
}
2287+
20982288
#[cfg(test)]
20992289
mod tests {
21002290
use super::*;
@@ -2646,43 +2836,6 @@ mod tests {
26462836
Ok(())
26472837
}
26482838

2649-
#[tokio::test]
2650-
async fn unsupported_sql_returns_error() -> Result<()> {
2651-
let ctx = SessionContext::new();
2652-
ctx.register_table("test", test::table_with_sequence(1, 1).unwrap())
2653-
.unwrap();
2654-
let state = ctx.state();
2655-
2656-
// create view
2657-
let sql = "create view test_view as select * from test";
2658-
let plan = state.create_logical_plan(sql).await;
2659-
let physical_plan = state.create_physical_plan(&plan.unwrap()).await;
2660-
assert!(physical_plan.is_err());
2661-
assert_eq!(
2662-
format!("{}", physical_plan.unwrap_err()),
2663-
"This feature is not implemented: Unsupported logical plan: CreateView"
2664-
);
2665-
// // drop view
2666-
let sql = "drop view test_view";
2667-
let plan = state.create_logical_plan(sql).await;
2668-
let physical_plan = state.create_physical_plan(&plan.unwrap()).await;
2669-
assert!(physical_plan.is_err());
2670-
assert_eq!(
2671-
format!("{}", physical_plan.unwrap_err()),
2672-
"This feature is not implemented: Unsupported logical plan: DropView"
2673-
);
2674-
// // drop table
2675-
let sql = "drop table test";
2676-
let plan = state.create_logical_plan(sql).await;
2677-
let physical_plan = state.create_physical_plan(&plan.unwrap()).await;
2678-
assert!(physical_plan.is_err());
2679-
assert_eq!(
2680-
format!("{}", physical_plan.unwrap_err()),
2681-
"This feature is not implemented: Unsupported logical plan: DropTable"
2682-
);
2683-
Ok(())
2684-
}
2685-
26862839
struct MyPhysicalPlanner {}
26872840

26882841
#[async_trait]

datafusion/core/src/prelude.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
//! ```
2727
2828
pub use crate::dataframe::DataFrame;
29-
pub use crate::execution::context::{SessionConfig, SessionContext};
29+
pub use crate::execution::context::{SQLOptions, SessionConfig, SessionContext};
3030
pub use crate::execution::options::{
3131
AvroReadOptions, CsvReadOptions, NdJsonReadOptions, ParquetReadOptions,
3232
};

datafusion/core/tests/sql/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ pub mod projection;
9696
pub mod references;
9797
pub mod repartition;
9898
pub mod select;
99+
mod sql_api;
99100
pub mod subqueries;
100101
pub mod timestamp;
101102
pub mod udf;

0 commit comments

Comments
 (0)