Skip to content

Commit 00700ae

Browse files
committed
Merge branch 'main' into 13525/invariant-checking-for-implicit-LP-changes
2 parents 9842d19 + e99e02b commit 00700ae

File tree

162 files changed

+925
-541
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+925
-541
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ version = "43.0.0"
7373
# selectively turn them on if needed, since we can override default-features = true (from false)
7474
# for the inherited dependency but cannot do the reverse (override from true to false).
7575
#
76-
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
76+
# See for more details: https://github.com/rust-lang/cargo/issues/11329
7777
ahash = { version = "0.8", default-features = false, features = [
7878
"runtime-rng",
7979
] }

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Default features:
113113
- `regex_expressions`: regular expression functions, such as `regexp_match`
114114
- `unicode_expressions`: Include unicode aware functions such as `character_length`
115115
- `unparser`: enables support to reverse LogicalPlans back into SQL
116-
- `recursive-protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.
116+
- `recursive_protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.
117117

118118
Optional features:
119119

datafusion-cli/Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ datafusion = { path = "../datafusion/core", version = "43.0.0", features = [
4545
"datetime_expressions",
4646
"encoding_expressions",
4747
"parquet",
48+
"recursive_protection",
4849
"regex_expressions",
4950
"unicode_expressions",
5051
"compression",

datafusion-cli/src/functions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
360360
Field::new("total_uncompressed_size", DataType::Int64, true),
361361
]));
362362

363-
// construct recordbatch from metadata
363+
// construct record batch from metadata
364364
let mut filename_arr = vec![];
365365
let mut row_group_id_arr = vec![];
366366
let mut row_group_num_rows_arr = vec![];

datafusion-examples/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
This crate includes end to end, highly commented examples of how to use
2323
various DataFusion APIs to help you get started.
2424

25-
## Prerequisites:
25+
## Prerequisites
2626

2727
Run `git submodule update --init` to init test files.
2828

datafusion-examples/examples/advanced_parquet_index.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ use url::Url;
8282
/// Specifically, this example illustrates how to:
8383
/// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
8484
/// 2. Use [`PruningPredicate`] for predicate analysis
85-
/// 3. Pass a row group selection to [`ParuetExec`]
85+
/// 3. Pass a row group selection to [`ParquetExec`]
8686
/// 4. Pass a row selection (within a row group) to [`ParquetExec`]
8787
///
8888
/// Note this is a *VERY* low level example for people who want to build their
@@ -211,7 +211,7 @@ async fn main() -> Result<()> {
211211
//
212212
// Note: in order to prune pages, the Page Index must be loaded and the
213213
// ParquetExec will load it on demand if not present. To avoid a second IO
214-
// during query, this example loaded the Page Index pre-emptively by setting
214+
// during query, this example loaded the Page Index preemptively by setting
215215
// `ArrowReader::with_page_index` in `IndexedFile::try_new`
216216
provider.set_use_row_selection(true);
217217
println!("** Select data, predicate `id = 950`");

datafusion-examples/examples/advanced_udwf.rs

+89-5
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@ use arrow::{
2424
};
2525
use arrow_schema::Field;
2626
use datafusion::error::Result;
27+
use datafusion::functions_aggregate::average::avg_udaf;
2728
use datafusion::prelude::*;
2829
use datafusion_common::ScalarValue;
29-
use datafusion_expr::function::WindowUDFFieldArgs;
30+
use datafusion_expr::expr::WindowFunction;
31+
use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFFieldArgs};
32+
use datafusion_expr::simplify::SimplifyInfo;
3033
use datafusion_expr::{
31-
PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl,
34+
Expr, PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl,
3235
};
3336
use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
3437

@@ -142,6 +145,67 @@ impl PartitionEvaluator for MyPartitionEvaluator {
142145
}
143146
}
144147

148+
/// This UDWF will show how to use the WindowUDFImpl::simplify() API
149+
#[derive(Debug, Clone)]
150+
struct SimplifySmoothItUdf {
151+
signature: Signature,
152+
}
153+
154+
impl SimplifySmoothItUdf {
155+
fn new() -> Self {
156+
Self {
157+
signature: Signature::exact(
158+
// this function will always take one arguments of type f64
159+
vec![DataType::Float64],
160+
// this function is deterministic and will always return the same
161+
// result for the same input
162+
Volatility::Immutable,
163+
),
164+
}
165+
}
166+
}
167+
impl WindowUDFImpl for SimplifySmoothItUdf {
168+
fn as_any(&self) -> &dyn Any {
169+
self
170+
}
171+
172+
fn name(&self) -> &str {
173+
"simplify_smooth_it"
174+
}
175+
176+
fn signature(&self) -> &Signature {
177+
&self.signature
178+
}
179+
180+
fn partition_evaluator(
181+
&self,
182+
_partition_evaluator_args: PartitionEvaluatorArgs,
183+
) -> Result<Box<dyn PartitionEvaluator>> {
184+
todo!()
185+
}
186+
187+
/// this function will simplify `SimplifySmoothItUdf` to `AggregateUDF` for `Avg`
188+
/// default implementation will not be called (left as `todo!()`)
189+
fn simplify(&self) -> Option<WindowFunctionSimplification> {
190+
let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| {
191+
Ok(Expr::WindowFunction(WindowFunction {
192+
fun: datafusion_expr::WindowFunctionDefinition::AggregateUDF(avg_udaf()),
193+
args: window_function.args,
194+
partition_by: window_function.partition_by,
195+
order_by: window_function.order_by,
196+
window_frame: window_function.window_frame,
197+
null_treatment: window_function.null_treatment,
198+
}))
199+
};
200+
201+
Some(Box::new(simplify))
202+
}
203+
204+
fn field(&self, field_args: WindowUDFFieldArgs) -> Result<Field> {
205+
Ok(Field::new(field_args.name(), DataType::Float64, true))
206+
}
207+
}
208+
145209
// create local execution context with `cars.csv` registered as a table named `cars`
146210
async fn create_context() -> Result<SessionContext> {
147211
// declare a new context. In spark API, this corresponds to a new spark SQL session
@@ -162,12 +226,15 @@ async fn main() -> Result<()> {
162226
let smooth_it = WindowUDF::from(SmoothItUdf::new());
163227
ctx.register_udwf(smooth_it.clone());
164228

165-
// Use SQL to run the new window function
229+
let simplify_smooth_it = WindowUDF::from(SimplifySmoothItUdf::new());
230+
ctx.register_udwf(simplify_smooth_it.clone());
231+
232+
// Use SQL to retrieve entire table
166233
let df = ctx.sql("SELECT * from cars").await?;
167234
// print the results
168235
df.show().await?;
169236

170-
// Use SQL to run the new window function:
237+
// Use SQL to run smooth_it:
171238
//
172239
// `PARTITION BY car`:each distinct value of car (red, and green)
173240
// should be treated as a separate partition (and will result in
@@ -201,7 +268,7 @@ async fn main() -> Result<()> {
201268
// print the results
202269
df.show().await?;
203270

204-
// this time, call the new widow function with an explicit
271+
// this time, call the function with an explicit
205272
// window so evaluate will be invoked with each window.
206273
//
207274
// `ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING`: each invocation
@@ -232,5 +299,22 @@ async fn main() -> Result<()> {
232299
// print the results
233300
df.show().await?;
234301

302+
// Use SQL to run simplify_smooth_it
303+
let df = ctx
304+
.sql(
305+
"SELECT \
306+
car, \
307+
speed, \
308+
simplify_smooth_it(speed) OVER (PARTITION BY car ORDER BY time) AS smooth_speed,\
309+
time \
310+
from cars \
311+
ORDER BY \
312+
car",
313+
)
314+
.await?;
315+
316+
// print the results
317+
df.show().await?;
318+
235319
Ok(())
236320
}

datafusion-examples/examples/analyzer_rule.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
138138
fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
139139
// use the TreeNode API to recursively walk the LogicalPlan tree
140140
// and all of its children (inputs)
141-
let transfomed_plan = plan.transform(|plan| {
141+
let transformed_plan = plan.transform(|plan| {
142142
// This closure is called for each LogicalPlan node
143143
// if it is a Scan node, add a filter to remove all managers
144144
if is_employee_table_scan(&plan) {
@@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
166166
//
167167
// This example does not need the value of either flag, so simply
168168
// extract the LogicalPlan "data"
169-
Ok(transfomed_plan.data)
169+
Ok(transformed_plan.data)
170170
}
171171

172172
fn name(&self) -> &str {

datafusion-examples/examples/catalog.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ async fn main() -> Result<()> {
4646

4747
let ctx = SessionContext::new();
4848
let state = ctx.state();
49-
let cataloglist = Arc::new(CustomCatalogProviderList::new());
49+
let catalog_list = Arc::new(CustomCatalogProviderList::new());
5050

5151
// use our custom catalog list for context. each context has a single catalog list.
5252
// context will by default have [`MemoryCatalogProviderList`]
53-
ctx.register_catalog_list(cataloglist.clone());
53+
ctx.register_catalog_list(catalog_list.clone());
5454

5555
// initialize our catalog and schemas
5656
let catalog = DirCatalog::new();
@@ -81,7 +81,7 @@ async fn main() -> Result<()> {
8181
ctx.register_catalog("dircat", Arc::new(catalog));
8282
{
8383
// catalog was passed down into our custom catalog list since we override the ctx's default
84-
let catalogs = cataloglist.catalogs.read().unwrap();
84+
let catalogs = catalog_list.catalogs.read().unwrap();
8585
assert!(catalogs.contains_key("dircat"));
8686
};
8787

@@ -144,8 +144,8 @@ impl DirSchema {
144144
async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
145145
let DirSchemaOpts { ext, dir, format } = opts;
146146
let mut tables = HashMap::new();
147-
let direntries = std::fs::read_dir(dir).unwrap();
148-
for res in direntries {
147+
let dir_entries = std::fs::read_dir(dir).unwrap();
148+
for res in dir_entries {
149149
let entry = res.unwrap();
150150
let filename = entry.file_name().to_str().unwrap().to_string();
151151
if !filename.ends_with(ext) {

datafusion-examples/examples/expr_api.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
5353
/// 4. Simplify expressions: [`simplify_demo`]
5454
/// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
5555
/// 6. Get the types of the expressions: [`expression_type_demo`]
56-
/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
56+
/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
5757
#[tokio::main]
5858
async fn main() -> Result<()> {
5959
// The easiest way to do create expressions is to use the
@@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
392392
)?;
393393
assert!(physical_expr.evaluate(&batch).is_ok());
394394

395-
// 4. Apply explict type coercion by manually rewriting the expression
395+
// 4. Apply explicit type coercion by manually rewriting the expression
396396
let coerced_expr = expr
397397
.transform(|e| {
398398
// Only type coerces binary expressions.

datafusion-examples/examples/function_factory.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use datafusion_expr::{
3636
///
3737
/// Apart from [FunctionFactory], this example covers
3838
/// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
39-
/// a function call with another expression at rutime.
39+
/// a function call with another expression at runtime.
4040
///
4141
/// This example is rather simple and does not cover all cases required for a
4242
/// real implementation.

datafusion-examples/examples/memtable.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use std::sync::Arc;
2525
use std::time::Duration;
2626
use tokio::time::timeout;
2727

28-
/// This example demonstrates executing a simple query against a Memtable
28+
/// This example demonstrates executing a simple query against a [`MemTable`]
2929
#[tokio::main]
3030
async fn main() -> Result<()> {
3131
let mem_table = create_memtable()?;

datafusion-examples/examples/optimizer_rule.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl MyOptimizerRule {
146146
// Closure called for each sub tree
147147
match expr {
148148
Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
149-
// destruture the expression
149+
// destructure the expression
150150
let BinaryExpr { left, op: _, right } = binary_expr;
151151
// rewrite to `my_eq(left, right)`
152152
let udf = ScalarUDF::new_from_impl(MyEq::new());

datafusion-examples/examples/plan_to_sql.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
6565
Ok(())
6666
}
6767

68-
/// DataFusioon can remove parentheses when converting an expression to SQL.
68+
/// DataFusion can remove parentheses when converting an expression to SQL.
6969
/// Note that output is intended for humans, not for other SQL engines,
7070
/// as difference in precedence rules can cause expressions to be parsed differently.
7171
fn simple_expr_to_pretty_sql_demo() -> Result<()> {

datafusion-examples/examples/simple_udtf.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
140140
let limit = exprs
141141
.get(1)
142142
.map(|expr| {
143-
// try to simpify the expression, so 1+2 becomes 3, for example
143+
// try to simplify the expression, so 1+2 becomes 3, for example
144144
let execution_props = ExecutionProps::new();
145145
let info = SimplifyContext::new(&execution_props);
146146
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
@@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
173173
.with_header(true)
174174
.build(file)?;
175175
let mut batches = vec![];
176-
for bacth in reader {
177-
batches.push(bacth?);
176+
for batch in reader {
177+
batches.push(batch?);
178178
}
179179
let schema = Arc::new(schema);
180180
Ok((schema, batches))

0 commit comments

Comments
 (0)