Skip to content

Commit 55b792a

Browse files
y-f-uphillipleblancalamb
authored
Support alternate formats for unparsing datetime to timestamp and interval (#11466)
* Unparser rule for datatime cast (#10) * use timestamp as the identifier for date64 * rename * implement CustomDialectBuilder * fix * dialect with interval style (#11) --------- Co-authored-by: Phillip LeBlanc <[email protected]> * fmt * clippy * doc * Update datafusion/sql/src/unparser/expr.rs Co-authored-by: Andrew Lamb <[email protected]> * update the doc for CustomDialectBuilder * fix doc test --------- Co-authored-by: Phillip LeBlanc <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 5f0993c commit 55b792a

File tree

3 files changed

+420
-65
lines changed

3 files changed

+420
-65
lines changed

datafusion-examples/examples/plan_to_sql.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use datafusion::error::Result;
1919

2020
use datafusion::prelude::*;
2121
use datafusion::sql::unparser::expr_to_sql;
22-
use datafusion_sql::unparser::dialect::CustomDialect;
22+
use datafusion_sql::unparser::dialect::CustomDialectBuilder;
2323
use datafusion_sql::unparser::{plan_to_sql, Unparser};
2424

2525
/// This example demonstrates the programmatic construction of SQL strings using
@@ -80,7 +80,9 @@ fn simple_expr_to_pretty_sql_demo() -> Result<()> {
8080
/// using a custom dialect and an explicit unparser
8181
fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
8282
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
83-
let dialect = CustomDialect::new(Some('`'));
83+
let dialect = CustomDialectBuilder::new()
84+
.with_identifier_quote_style('`')
85+
.build();
8486
let unparser = Unparser::new(&dialect);
8587
let sql = unparser.expr_to_sql(&expr)?.to_string();
8688
assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#);

datafusion/sql/src/unparser/dialect.rs

+140
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,33 @@ pub trait Dialect {
3535
fn supports_nulls_first_in_sort(&self) -> bool {
3636
true
3737
}
38+
39+
// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
40+
// E.g. Trino, Athena and Dremio does not have DATETIME data type
41+
fn use_timestamp_for_date64(&self) -> bool {
42+
false
43+
}
44+
45+
fn interval_style(&self) -> IntervalStyle {
46+
IntervalStyle::PostgresVerbose
47+
}
3848
}
49+
50+
/// `IntervalStyle` to use for unparsing
51+
///
52+
/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
53+
/// different DBMS follows different standards, popular ones are:
54+
/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
55+
/// compatible with arrow display format, as well as duckdb
56+
/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
57+
/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
58+
#[derive(Clone, Copy)]
59+
pub enum IntervalStyle {
60+
PostgresVerbose,
61+
SQLStandard,
62+
MySQL,
63+
}
64+
3965
pub struct DefaultDialect {}
4066

4167
impl Dialect for DefaultDialect {
@@ -57,6 +83,10 @@ impl Dialect for PostgreSqlDialect {
5783
fn identifier_quote_style(&self, _: &str) -> Option<char> {
5884
Some('"')
5985
}
86+
87+
fn interval_style(&self) -> IntervalStyle {
88+
IntervalStyle::PostgresVerbose
89+
}
6090
}
6191

6292
pub struct MySqlDialect {}
@@ -69,6 +99,10 @@ impl Dialect for MySqlDialect {
6999
fn supports_nulls_first_in_sort(&self) -> bool {
70100
false
71101
}
102+
103+
fn interval_style(&self) -> IntervalStyle {
104+
IntervalStyle::MySQL
105+
}
72106
}
73107

74108
pub struct SqliteDialect {}
@@ -81,12 +115,29 @@ impl Dialect for SqliteDialect {
81115

82116
pub struct CustomDialect {
83117
identifier_quote_style: Option<char>,
118+
supports_nulls_first_in_sort: bool,
119+
use_timestamp_for_date64: bool,
120+
interval_style: IntervalStyle,
121+
}
122+
123+
impl Default for CustomDialect {
124+
fn default() -> Self {
125+
Self {
126+
identifier_quote_style: None,
127+
supports_nulls_first_in_sort: true,
128+
use_timestamp_for_date64: false,
129+
interval_style: IntervalStyle::SQLStandard,
130+
}
131+
}
84132
}
85133

86134
impl CustomDialect {
135+
// create a CustomDialect
136+
#[deprecated(note = "please use `CustomDialectBuilder` instead")]
87137
pub fn new(identifier_quote_style: Option<char>) -> Self {
88138
Self {
89139
identifier_quote_style,
140+
..Default::default()
90141
}
91142
}
92143
}
@@ -95,4 +146,93 @@ impl Dialect for CustomDialect {
95146
fn identifier_quote_style(&self, _: &str) -> Option<char> {
96147
self.identifier_quote_style
97148
}
149+
150+
fn supports_nulls_first_in_sort(&self) -> bool {
151+
self.supports_nulls_first_in_sort
152+
}
153+
154+
fn use_timestamp_for_date64(&self) -> bool {
155+
self.use_timestamp_for_date64
156+
}
157+
158+
fn interval_style(&self) -> IntervalStyle {
159+
self.interval_style
160+
}
161+
}
162+
163+
/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
164+
///
165+
///
166+
/// # Examples
167+
///
168+
/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
169+
/// but with `use_timestamp_for_date64` overridden to `true`
170+
///
171+
/// ```
172+
/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
173+
/// let dialect = CustomDialectBuilder::new()
174+
/// .with_use_timestamp_for_date64(true)
175+
/// .build();
176+
/// ```
177+
pub struct CustomDialectBuilder {
178+
identifier_quote_style: Option<char>,
179+
supports_nulls_first_in_sort: bool,
180+
use_timestamp_for_date64: bool,
181+
interval_style: IntervalStyle,
182+
}
183+
184+
impl Default for CustomDialectBuilder {
185+
fn default() -> Self {
186+
Self::new()
187+
}
188+
}
189+
190+
impl CustomDialectBuilder {
191+
pub fn new() -> Self {
192+
Self {
193+
identifier_quote_style: None,
194+
supports_nulls_first_in_sort: true,
195+
use_timestamp_for_date64: false,
196+
interval_style: IntervalStyle::PostgresVerbose,
197+
}
198+
}
199+
200+
pub fn build(self) -> CustomDialect {
201+
CustomDialect {
202+
identifier_quote_style: self.identifier_quote_style,
203+
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
204+
use_timestamp_for_date64: self.use_timestamp_for_date64,
205+
interval_style: self.interval_style,
206+
}
207+
}
208+
209+
/// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
210+
pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
211+
self.identifier_quote_style = Some(identifier_quote_style);
212+
self
213+
}
214+
215+
/// Customize the dialect to supports `NULLS FIRST` in `ORDER BY` clauses
216+
pub fn with_supports_nulls_first_in_sort(
217+
mut self,
218+
supports_nulls_first_in_sort: bool,
219+
) -> Self {
220+
self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
221+
self
222+
}
223+
224+
/// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
225+
pub fn with_use_timestamp_for_date64(
226+
mut self,
227+
use_timestamp_for_date64: bool,
228+
) -> Self {
229+
self.use_timestamp_for_date64 = use_timestamp_for_date64;
230+
self
231+
}
232+
233+
/// Customize the dialect with a specific interval style listed in `IntervalStyle`
234+
pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
235+
self.interval_style = interval_style;
236+
self
237+
}
98238
}

0 commit comments

Comments
 (0)