Skip to content

Commit ade14e7

Browse files
authored
Implement SHOW FUNCTIONS (#13799)
* introduce rid for different signature * implement show functions syntax * add syntax example * avoid duplicate join * fix clippy * show function_type instead of routine_type * add some doc and comments
1 parent 7089c64 commit ade14e7

File tree

3 files changed

+245
-82
lines changed

3 files changed

+245
-82
lines changed

datafusion/core/src/catalog_common/information_schema.rs

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use arrow::{
3434
datatypes::{DataType, Field, Schema, SchemaRef},
3535
record_batch::RecordBatch,
3636
};
37-
use arrow_array::builder::BooleanBuilder;
37+
use arrow_array::builder::{BooleanBuilder, UInt8Builder};
3838
use async_trait::async_trait;
3939
use datafusion_common::error::Result;
4040
use datafusion_common::DataFusionError;
@@ -247,6 +247,7 @@ impl InformationSchemaConfig {
247247
return_type,
248248
"SCALAR",
249249
udf.documentation().map(|d| d.description.to_string()),
250+
udf.documentation().map(|d| d.syntax_example.to_string()),
250251
)
251252
}
252253
}
@@ -266,6 +267,7 @@ impl InformationSchemaConfig {
266267
return_type,
267268
"AGGREGATE",
268269
udaf.documentation().map(|d| d.description.to_string()),
270+
udaf.documentation().map(|d| d.syntax_example.to_string()),
269271
)
270272
}
271273
}
@@ -285,6 +287,7 @@ impl InformationSchemaConfig {
285287
return_type,
286288
"WINDOW",
287289
udwf.documentation().map(|d| d.description.to_string()),
290+
udwf.documentation().map(|d| d.syntax_example.to_string()),
288291
)
289292
}
290293
}
@@ -308,7 +311,8 @@ impl InformationSchemaConfig {
308311
args: Option<&Vec<(String, String)>>,
309312
arg_types: Vec<String>,
310313
return_type: Option<String>,
311-
is_variadic: bool| {
314+
is_variadic: bool,
315+
rid: u8| {
312316
for (position, type_name) in arg_types.iter().enumerate() {
313317
let param_name =
314318
args.and_then(|a| a.get(position).map(|arg| arg.0.as_str()));
@@ -322,6 +326,7 @@ impl InformationSchemaConfig {
322326
type_name,
323327
None::<&str>,
324328
is_variadic,
329+
rid,
325330
);
326331
}
327332
if let Some(return_type) = return_type {
@@ -335,48 +340,52 @@ impl InformationSchemaConfig {
335340
return_type.as_str(),
336341
None::<&str>,
337342
false,
343+
rid,
338344
);
339345
}
340346
};
341347

342348
for (func_name, udf) in udfs {
343349
let args = udf.documentation().and_then(|d| d.arguments.clone());
344350
let combinations = get_udf_args_and_return_types(udf)?;
345-
for (arg_types, return_type) in combinations {
351+
for (rid, (arg_types, return_type)) in combinations.into_iter().enumerate() {
346352
add_parameters(
347353
func_name,
348354
args.as_ref(),
349355
arg_types,
350356
return_type,
351357
Self::is_variadic(udf.signature()),
358+
rid as u8,
352359
);
353360
}
354361
}
355362

356363
for (func_name, udaf) in udafs {
357364
let args = udaf.documentation().and_then(|d| d.arguments.clone());
358365
let combinations = get_udaf_args_and_return_types(udaf)?;
359-
for (arg_types, return_type) in combinations {
366+
for (rid, (arg_types, return_type)) in combinations.into_iter().enumerate() {
360367
add_parameters(
361368
func_name,
362369
args.as_ref(),
363370
arg_types,
364371
return_type,
365372
Self::is_variadic(udaf.signature()),
373+
rid as u8,
366374
);
367375
}
368376
}
369377

370378
for (func_name, udwf) in udwfs {
371379
let args = udwf.documentation().and_then(|d| d.arguments.clone());
372380
let combinations = get_udwf_args_and_return_types(udwf)?;
373-
for (arg_types, return_type) in combinations {
381+
for (rid, (arg_types, return_type)) in combinations.into_iter().enumerate() {
374382
add_parameters(
375383
func_name,
376384
args.as_ref(),
377385
arg_types,
378386
return_type,
379387
Self::is_variadic(udwf.signature()),
388+
rid as u8,
380389
);
381390
}
382391
}
@@ -1095,6 +1104,7 @@ impl InformationSchemaRoutines {
10951104
Field::new("data_type", DataType::Utf8, true),
10961105
Field::new("function_type", DataType::Utf8, true),
10971106
Field::new("description", DataType::Utf8, true),
1107+
Field::new("syntax_example", DataType::Utf8, true),
10981108
]));
10991109

11001110
Self { schema, config }
@@ -1114,6 +1124,7 @@ impl InformationSchemaRoutines {
11141124
data_type: StringBuilder::new(),
11151125
function_type: StringBuilder::new(),
11161126
description: StringBuilder::new(),
1127+
syntax_example: StringBuilder::new(),
11171128
}
11181129
}
11191130
}
@@ -1131,6 +1142,7 @@ struct InformationSchemaRoutinesBuilder {
11311142
data_type: StringBuilder,
11321143
function_type: StringBuilder,
11331144
description: StringBuilder,
1145+
syntax_example: StringBuilder,
11341146
}
11351147

11361148
impl InformationSchemaRoutinesBuilder {
@@ -1145,6 +1157,7 @@ impl InformationSchemaRoutinesBuilder {
11451157
data_type: Option<impl AsRef<str>>,
11461158
function_type: impl AsRef<str>,
11471159
description: Option<impl AsRef<str>>,
1160+
syntax_example: Option<impl AsRef<str>>,
11481161
) {
11491162
self.specific_catalog.append_value(catalog_name.as_ref());
11501163
self.specific_schema.append_value(schema_name.as_ref());
@@ -1157,6 +1170,7 @@ impl InformationSchemaRoutinesBuilder {
11571170
self.data_type.append_option(data_type.as_ref());
11581171
self.function_type.append_value(function_type.as_ref());
11591172
self.description.append_option(description);
1173+
self.syntax_example.append_option(syntax_example);
11601174
}
11611175

11621176
fn finish(&mut self) -> RecordBatch {
@@ -1174,6 +1188,7 @@ impl InformationSchemaRoutinesBuilder {
11741188
Arc::new(self.data_type.finish()),
11751189
Arc::new(self.function_type.finish()),
11761190
Arc::new(self.description.finish()),
1191+
Arc::new(self.syntax_example.finish()),
11771192
],
11781193
)
11791194
.unwrap()
@@ -1222,6 +1237,12 @@ impl InformationSchemaParameters {
12221237
Field::new("data_type", DataType::Utf8, false),
12231238
Field::new("parameter_default", DataType::Utf8, true),
12241239
Field::new("is_variadic", DataType::Boolean, false),
1240+
// `rid` (short for `routine id`) is used to differentiate parameters from different signatures
1241+
// (It serves as the group-by key when generating the `SHOW FUNCTIONS` query).
1242+
// For example, the following signatures have different `rid` values:
1243+
// - `datetrunc(Utf8, Timestamp(Microsecond, Some("+TZ"))) -> Timestamp(Microsecond, Some("+TZ"))`
1244+
// - `datetrunc(Utf8View, Timestamp(Nanosecond, None)) -> Timestamp(Nanosecond, None)`
1245+
Field::new("rid", DataType::UInt8, false),
12251246
]));
12261247

12271248
Self { schema, config }
@@ -1239,7 +1260,7 @@ impl InformationSchemaParameters {
12391260
data_type: StringBuilder::new(),
12401261
parameter_default: StringBuilder::new(),
12411262
is_variadic: BooleanBuilder::new(),
1242-
inserted: HashSet::new(),
1263+
rid: UInt8Builder::new(),
12431264
}
12441265
}
12451266
}
@@ -1255,8 +1276,7 @@ struct InformationSchemaParametersBuilder {
12551276
data_type: StringBuilder,
12561277
parameter_default: StringBuilder,
12571278
is_variadic: BooleanBuilder,
1258-
// use HashSet to avoid duplicate rows. The key is (specific_name, ordinal_position, parameter_mode, data_type)
1259-
inserted: HashSet<(String, u64, String, String)>,
1279+
rid: UInt8Builder,
12601280
}
12611281

12621282
impl InformationSchemaParametersBuilder {
@@ -1272,25 +1292,19 @@ impl InformationSchemaParametersBuilder {
12721292
data_type: impl AsRef<str>,
12731293
parameter_default: Option<impl AsRef<str>>,
12741294
is_variadic: bool,
1295+
rid: u8,
12751296
) {
1276-
let key = (
1277-
specific_name.as_ref().to_string(),
1278-
ordinal_position,
1279-
parameter_mode.as_ref().to_string(),
1280-
data_type.as_ref().to_string(),
1281-
);
1282-
if self.inserted.insert(key) {
1283-
self.specific_catalog
1284-
.append_value(specific_catalog.as_ref());
1285-
self.specific_schema.append_value(specific_schema.as_ref());
1286-
self.specific_name.append_value(specific_name.as_ref());
1287-
self.ordinal_position.append_value(ordinal_position);
1288-
self.parameter_mode.append_value(parameter_mode.as_ref());
1289-
self.parameter_name.append_option(parameter_name.as_ref());
1290-
self.data_type.append_value(data_type.as_ref());
1291-
self.parameter_default.append_option(parameter_default);
1292-
self.is_variadic.append_value(is_variadic);
1293-
}
1297+
self.specific_catalog
1298+
.append_value(specific_catalog.as_ref());
1299+
self.specific_schema.append_value(specific_schema.as_ref());
1300+
self.specific_name.append_value(specific_name.as_ref());
1301+
self.ordinal_position.append_value(ordinal_position);
1302+
self.parameter_mode.append_value(parameter_mode.as_ref());
1303+
self.parameter_name.append_option(parameter_name.as_ref());
1304+
self.data_type.append_value(data_type.as_ref());
1305+
self.parameter_default.append_option(parameter_default);
1306+
self.is_variadic.append_value(is_variadic);
1307+
self.rid.append_value(rid);
12941308
}
12951309

12961310
fn finish(&mut self) -> RecordBatch {
@@ -1306,6 +1320,7 @@ impl InformationSchemaParametersBuilder {
13061320
Arc::new(self.data_type.finish()),
13071321
Arc::new(self.parameter_default.finish()),
13081322
Arc::new(self.is_variadic.finish()),
1323+
Arc::new(self.rid.finish()),
13091324
],
13101325
)
13111326
.unwrap()

datafusion/sql/src/statement.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
811811
self.show_columns_to_plan(extended, full, table_name)
812812
}
813813

814+
Statement::ShowFunctions { filter, .. } => {
815+
self.show_functions_to_plan(filter)
816+
}
817+
814818
Statement::Insert(Insert {
815819
or,
816820
into,
@@ -1980,6 +1984,90 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
19801984
self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1
19811985
}
19821986

1987+
/// Rewrite `SHOW FUNCTIONS` to another SQL query
1988+
/// The query is based on the `information_schema.routines` and `information_schema.parameters` tables
1989+
///
1990+
/// The output columns:
1991+
/// - function_name: The name of function
1992+
/// - return_type: The return type of the function
1993+
/// - parameters: The name of parameters (ordered by the ordinal position)
1994+
/// - parameter_types: The type of parameters (ordered by the ordinal position)
1995+
/// - description: The description of the function (the description defined in the document)
1996+
/// - syntax_example: The syntax_example of the function (the syntax_example defined in the document)
1997+
fn show_functions_to_plan(
1998+
&self,
1999+
filter: Option<ShowStatementFilter>,
2000+
) -> Result<LogicalPlan> {
2001+
let where_clause = if let Some(filter) = filter {
2002+
match filter {
2003+
ShowStatementFilter::Like(like) => {
2004+
format!("WHERE p.function_name like '{like}'")
2005+
}
2006+
_ => return plan_err!("Unsupported SHOW FUNCTIONS filter"),
2007+
}
2008+
} else {
2009+
"".to_string()
2010+
};
2011+
2012+
let query = format!(
2013+
r#"
2014+
SELECT DISTINCT
2015+
p.*,
2016+
r.function_type function_type,
2017+
r.description description,
2018+
r.syntax_example syntax_example
2019+
FROM
2020+
(
2021+
SELECT
2022+
i.specific_name function_name,
2023+
o.data_type return_type,
2024+
array_agg(i.parameter_name ORDER BY i.ordinal_position ASC) parameters,
2025+
array_agg(i.data_type ORDER BY i.ordinal_position ASC) parameter_types
2026+
FROM (
2027+
SELECT
2028+
specific_catalog,
2029+
specific_schema,
2030+
specific_name,
2031+
ordinal_position,
2032+
parameter_name,
2033+
data_type,
2034+
rid
2035+
FROM
2036+
information_schema.parameters
2037+
WHERE
2038+
parameter_mode = 'IN'
2039+
) i
2040+
JOIN
2041+
(
2042+
SELECT
2043+
specific_catalog,
2044+
specific_schema,
2045+
specific_name,
2046+
ordinal_position,
2047+
parameter_name,
2048+
data_type,
2049+
rid
2050+
FROM
2051+
information_schema.parameters
2052+
WHERE
2053+
parameter_mode = 'OUT'
2054+
) o
2055+
ON i.specific_catalog = o.specific_catalog
2056+
AND i.specific_schema = o.specific_schema
2057+
AND i.specific_name = o.specific_name
2058+
AND i.rid = o.rid
2059+
GROUP BY 1, 2, i.rid
2060+
) as p
2061+
JOIN information_schema.routines r
2062+
ON p.function_name = r.routine_name
2063+
{where_clause}
2064+
"#
2065+
);
2066+
let mut rewrite = DFParser::parse_sql(&query)?;
2067+
assert_eq!(rewrite.len(), 1);
2068+
self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1
2069+
}
2070+
19832071
fn show_create_table_to_plan(
19842072
&self,
19852073
sql_table_name: ObjectName,

0 commit comments

Comments
 (0)