Skip to content

Commit d69098d

Browse files
committed
Merge branch 'main' into better-cse-identifier
# Conflicts: # datafusion/optimizer/src/common_subexpr_eliminate.rs
2 parents d69033a + 08e4e6a commit d69098d

File tree

116 files changed

+2490
-916
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+2490
-916
lines changed

datafusion-cli/Cargo.lock

Lines changed: 112 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-examples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ cargo run --example csv_sql
4545
- [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
4646
- [`advanced_udf.rs`](examples/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
4747
- [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
48+
- [`advanced_parquet_index.rs`](examples/advanced_parquet_index.rs): Creates a detailed secondary index that covers the contents of several parquet files
4849
- [`avro_sql.rs`](examples/avro_sql.rs): Build and run a query plan from a SQL statement against a local AVRO file
4950
- [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog
5051
- [`csv_sql.rs`](examples/csv_sql.rs): Build and run a query plan from a SQL statement against a local CSV file

datafusion-examples/examples/advanced_parquet_index.rs

Lines changed: 664 additions & 0 deletions
Large diffs are not rendered by default.

datafusion-examples/examples/rewrite_expr.rs

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
1919
use datafusion_common::config::ConfigOptions;
2020
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
21-
use datafusion_common::{plan_err, Result, ScalarValue};
21+
use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue};
2222
use datafusion_expr::{
2323
AggregateUDF, Between, Expr, Filter, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
2424
};
2525
use datafusion_optimizer::analyzer::{Analyzer, AnalyzerRule};
26-
use datafusion_optimizer::optimizer::Optimizer;
27-
use datafusion_optimizer::{utils, OptimizerConfig, OptimizerContext, OptimizerRule};
26+
use datafusion_optimizer::optimizer::{ApplyOrder, Optimizer};
27+
use datafusion_optimizer::{OptimizerConfig, OptimizerContext, OptimizerRule};
2828
use datafusion_sql::planner::{ContextProvider, SqlToRel};
2929
use datafusion_sql::sqlparser::dialect::PostgreSqlDialect;
3030
use datafusion_sql::sqlparser::parser::Parser;
@@ -131,32 +131,28 @@ impl OptimizerRule for MyOptimizerRule {
131131
"my_optimizer_rule"
132132
}
133133

134-
fn try_optimize(
134+
fn apply_order(&self) -> Option<ApplyOrder> {
135+
Some(ApplyOrder::BottomUp)
136+
}
137+
138+
fn supports_rewrite(&self) -> bool {
139+
true
140+
}
141+
142+
fn rewrite(
135143
&self,
136-
plan: &LogicalPlan,
137-
config: &dyn OptimizerConfig,
138-
) -> Result<Option<LogicalPlan>> {
139-
// recurse down and optimize children first
140-
let optimized_plan = utils::optimize_children(self, plan, config)?;
141-
match optimized_plan {
142-
Some(LogicalPlan::Filter(filter)) => {
144+
plan: LogicalPlan,
145+
_config: &dyn OptimizerConfig,
146+
) -> Result<Transformed<LogicalPlan>, DataFusionError> {
147+
match plan {
148+
LogicalPlan::Filter(filter) => {
143149
let predicate = my_rewrite(filter.predicate.clone())?;
144-
Ok(Some(LogicalPlan::Filter(Filter::try_new(
150+
Ok(Transformed::yes(LogicalPlan::Filter(Filter::try_new(
145151
predicate,
146-
filter.input,
152+
filter.input.clone(),
147153
)?)))
148154
}
149-
Some(optimized_plan) => Ok(Some(optimized_plan)),
150-
None => match plan {
151-
LogicalPlan::Filter(filter) => {
152-
let predicate = my_rewrite(filter.predicate.clone())?;
153-
Ok(Some(LogicalPlan::Filter(Filter::try_new(
154-
predicate,
155-
filter.input.clone(),
156-
)?)))
157-
}
158-
_ => Ok(None),
159-
},
155+
_ => Ok(Transformed::no(plan)),
160156
}
161157
}
162158
}

datafusion/common/src/column.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ impl Column {
127127
})
128128
}
129129

130+
/// return the column's name.
131+
///
132+
/// Note: This ignores the relation and returns the column name only.
133+
pub fn name(&self) -> &str {
134+
&self.name
135+
}
136+
130137
/// Serialize column into a flat name string
131138
pub fn flat_name(&self) -> String {
132139
match &self.relation {

datafusion/common/src/config.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,13 @@ pub struct TableParquetOptions {
13931393
pub key_value_metadata: HashMap<String, Option<String>>,
13941394
}
13951395

1396+
impl TableParquetOptions {
1397+
/// Return new default TableParquetOptions
1398+
pub fn new() -> Self {
1399+
Self::default()
1400+
}
1401+
}
1402+
13961403
impl ConfigField for TableParquetOptions {
13971404
fn visit<V: Visit>(&self, v: &mut V, key_prefix: &str, description: &'static str) {
13981405
self.global.visit(v, key_prefix, description);
@@ -1559,6 +1566,7 @@ config_namespace! {
15591566
pub delimiter: u8, default = b','
15601567
pub quote: u8, default = b'"'
15611568
pub escape: Option<u8>, default = None
1569+
pub double_quote: Option<bool>, default = None
15621570
pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
15631571
pub schema_infer_max_rec: usize, default = 100
15641572
pub date_format: Option<String>, default = None
@@ -1624,6 +1632,13 @@ impl CsvOptions {
16241632
self
16251633
}
16261634

1635+
/// Set true to indicate that the CSV quotes should be doubled.
1636+
/// - default to true
1637+
pub fn with_double_quote(mut self, double_quote: bool) -> Self {
1638+
self.double_quote = Some(double_quote);
1639+
self
1640+
}
1641+
16271642
/// Set a `CompressionTypeVariant` of CSV
16281643
/// - defaults to `CompressionTypeVariant::UNCOMPRESSED`
16291644
pub fn with_file_compression_type(
@@ -1668,6 +1683,7 @@ pub enum FormatOptions {
16681683
AVRO,
16691684
ARROW,
16701685
}
1686+
16711687
impl Display for FormatOptions {
16721688
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16731689
let out = match self {

datafusion/common/src/file_options/csv_writer.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
6969
if let Some(v) = &value.null_value {
7070
builder = builder.with_null(v.into())
7171
}
72+
if let Some(v) = &value.escape {
73+
builder = builder.with_escape(*v)
74+
}
75+
if let Some(v) = &value.double_quote {
76+
builder = builder.with_double_quote(*v)
77+
}
7278
Ok(CsvWriterOptions {
7379
writer_options: builder,
7480
compression: value.compression,

0 commit comments

Comments
 (0)