Skip to content

Commit 5360d20

Browse files
Omega359alamb
andauthored
Migrate documentation for all string functions from scalar_functions.md to code (#12775)
* Added documentation for string and unicode functions. * Fixed issues with aliases. * Cargo fmt. * Minor doc fixes. * Update docs for var_pop/samp --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ef227f4 commit 5360d20

37 files changed

+2076
-827
lines changed

datafusion/core/src/bin/print_functions_docs.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,14 @@ fn print_docs(
130130
.find(|f| f.get_name() == name || f.get_aliases().contains(&name))
131131
.unwrap();
132132

133-
let name = f.get_name();
134133
let aliases = f.get_aliases();
135134
let documentation = f.get_documentation();
136135

137136
// if this name is an alias we need to display what it's an alias of
138137
if aliases.contains(&name) {
139-
let _ = write!(docs, "_Alias of [{name}](#{name})._");
138+
let fname = f.get_name();
139+
let _ = writeln!(docs, r#"### `{name}`"#);
140+
let _ = writeln!(docs, "_Alias of [{fname}](#{fname})._");
140141
continue;
141142
}
142143

@@ -183,10 +184,10 @@ fn print_docs(
183184

184185
// next, aliases
185186
if !f.get_aliases().is_empty() {
186-
let _ = write!(docs, "#### Aliases");
187+
let _ = writeln!(docs, "#### Aliases");
187188

188189
for alias in f.get_aliases() {
189-
let _ = writeln!(docs, "- {alias}");
190+
let _ = writeln!(docs, "- {}", alias.replace("_", r#"\_"#));
190191
}
191192
}
192193

datafusion/functions/src/string/ascii.rs

+33-18
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,6 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
2626
use std::any::Any;
2727
use std::sync::{Arc, OnceLock};
2828

29-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
30-
31-
fn get_ascii_doc() -> &'static Documentation {
32-
DOCUMENTATION.get_or_init(|| {
33-
Documentation::builder()
34-
.with_doc_section(DOC_SECTION_STRING)
35-
.with_description("Returns the ASCII value of the first character in a string.")
36-
.with_syntax_example("ascii(str)")
37-
.with_argument(
38-
"str",
39-
"String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View.",
40-
)
41-
.with_related_udf("chr")
42-
.build()
43-
.unwrap()
44-
})
45-
}
46-
4729
#[derive(Debug)]
4830
pub struct AsciiFunc {
4931
signature: Signature,
@@ -96,6 +78,39 @@ impl ScalarUDFImpl for AsciiFunc {
9678
}
9779
}
9880

81+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
82+
83+
fn get_ascii_doc() -> &'static Documentation {
84+
DOCUMENTATION.get_or_init(|| {
85+
Documentation::builder()
86+
.with_doc_section(DOC_SECTION_STRING)
87+
.with_description(
88+
"Returns the Unicode character code of the first character in a string.",
89+
)
90+
.with_syntax_example("ascii(str)")
91+
.with_sql_example(
92+
r#"```sql
93+
> select ascii('abc');
94+
+--------------------+
95+
| ascii(Utf8("abc")) |
96+
+--------------------+
97+
| 97 |
98+
+--------------------+
99+
> select ascii('🚀');
100+
+-------------------+
101+
| ascii(Utf8("🚀")) |
102+
+-------------------+
103+
| 128640 |
104+
+-------------------+
105+
```"#,
106+
)
107+
.with_standard_argument("str", "String")
108+
.with_related_udf("chr")
109+
.build()
110+
.unwrap()
111+
})
112+
}
113+
99114
fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
100115
where
101116
V: ArrayAccessor<Item = &'a str>,

datafusion/functions/src/string/bit_length.rs

+35-5
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::any::Any;
19-
2018
use arrow::compute::kernels::length::bit_length;
2119
use arrow::datatypes::DataType;
20+
use std::any::Any;
21+
use std::sync::OnceLock;
2222

23+
use crate::utils::utf8_to_int_type;
2324
use datafusion_common::{exec_err, Result, ScalarValue};
24-
use datafusion_expr::{ColumnarValue, Volatility};
25+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
26+
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
2527
use datafusion_expr::{ScalarUDFImpl, Signature};
2628

27-
use crate::utils::utf8_to_int_type;
28-
2929
#[derive(Debug)]
3030
pub struct BitLengthFunc {
3131
signature: Signature,
@@ -88,4 +88,34 @@ impl ScalarUDFImpl for BitLengthFunc {
8888
},
8989
}
9090
}
91+
92+
fn documentation(&self) -> Option<&Documentation> {
93+
Some(get_bit_length_doc())
94+
}
95+
}
96+
97+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
98+
99+
fn get_bit_length_doc() -> &'static Documentation {
100+
DOCUMENTATION.get_or_init(|| {
101+
Documentation::builder()
102+
.with_doc_section(DOC_SECTION_STRING)
103+
.with_description("Returns the bit length of a string.")
104+
.with_syntax_example("bit_length(str)")
105+
.with_sql_example(
106+
r#"```sql
107+
> select bit_length('datafusion');
108+
+--------------------------------+
109+
| bit_length(Utf8("datafusion")) |
110+
+--------------------------------+
111+
| 80 |
112+
+--------------------------------+
113+
```"#,
114+
)
115+
.with_standard_argument("str", "String")
116+
.with_related_udf("length")
117+
.with_related_udf("octet_length")
118+
.build()
119+
.unwrap()
120+
})
91121
}

datafusion/functions/src/string/btrim.rs

+35-6
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use crate::string::common::*;
19+
use crate::utils::{make_scalar_function, utf8_to_str_type};
1820
use arrow::array::{ArrayRef, OffsetSizeTrait};
1921
use arrow::datatypes::DataType;
20-
use std::any::Any;
21-
2222
use datafusion_common::{exec_err, Result};
2323
use datafusion_expr::function::Hint;
24+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2425
use datafusion_expr::TypeSignature::*;
25-
use datafusion_expr::{ColumnarValue, Volatility};
26+
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
2627
use datafusion_expr::{ScalarUDFImpl, Signature};
27-
28-
use crate::string::common::*;
29-
use crate::utils::{make_scalar_function, utf8_to_str_type};
28+
use std::any::Any;
29+
use std::sync::OnceLock;
3030

3131
/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
3232
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
@@ -109,6 +109,35 @@ impl ScalarUDFImpl for BTrimFunc {
109109
fn aliases(&self) -> &[String] {
110110
&self.aliases
111111
}
112+
113+
fn documentation(&self) -> Option<&Documentation> {
114+
Some(get_btrim_doc())
115+
}
116+
}
117+
118+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
119+
120+
fn get_btrim_doc() -> &'static Documentation {
121+
DOCUMENTATION.get_or_init(|| {
122+
Documentation::builder()
123+
.with_doc_section(DOC_SECTION_STRING)
124+
.with_description("Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.")
125+
.with_syntax_example("btrim(str[, trim_str])")
126+
.with_sql_example(r#"```sql
127+
> select btrim('__datafusion____', '_');
128+
+-------------------------------------------+
129+
| btrim(Utf8("__datafusion____"),Utf8("_")) |
130+
+-------------------------------------------+
131+
| datafusion |
132+
+-------------------------------------------+
133+
```"#)
134+
.with_standard_argument("str", "String")
135+
.with_argument("trim_str", "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._")
136+
.with_related_udf("ltrim")
137+
.with_related_udf("rtrim")
138+
.build()
139+
.unwrap()
140+
})
112141
}
113142

114143
#[cfg(test)]

datafusion/functions/src/string/chr.rs

+35-4
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,21 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::Arc;
19+
use std::sync::{Arc, OnceLock};
2020

2121
use arrow::array::ArrayRef;
2222
use arrow::array::StringArray;
2323
use arrow::datatypes::DataType;
2424
use arrow::datatypes::DataType::Int64;
2525
use arrow::datatypes::DataType::Utf8;
2626

27+
use crate::utils::make_scalar_function;
2728
use datafusion_common::cast::as_int64_array;
2829
use datafusion_common::{exec_err, Result};
29-
use datafusion_expr::{ColumnarValue, Volatility};
30+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
31+
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
3032
use datafusion_expr::{ScalarUDFImpl, Signature};
3133

32-
use crate::utils::make_scalar_function;
33-
3434
/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
3535
/// chr(65) = 'A'
3636
pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -99,4 +99,35 @@ impl ScalarUDFImpl for ChrFunc {
9999
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
100100
make_scalar_function(chr, vec![])(args)
101101
}
102+
103+
fn documentation(&self) -> Option<&Documentation> {
104+
Some(get_chr_doc())
105+
}
106+
}
107+
108+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
109+
110+
fn get_chr_doc() -> &'static Documentation {
111+
DOCUMENTATION.get_or_init(|| {
112+
Documentation::builder()
113+
.with_doc_section(DOC_SECTION_STRING)
114+
.with_description(
115+
"Returns the character with the specified ASCII or Unicode code value.",
116+
)
117+
.with_syntax_example("chr(expression)")
118+
.with_sql_example(
119+
r#"```sql
120+
> select chr(128640);
121+
+--------------------+
122+
| chr(Int64(128640)) |
123+
+--------------------+
124+
| 🚀 |
125+
+--------------------+
126+
```"#,
127+
)
128+
.with_standard_argument("expression", "String")
129+
.with_related_udf("ascii")
130+
.build()
131+
.unwrap()
132+
})
102133
}

datafusion/functions/src/string/concat.rs

+35-5
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,18 @@
1818
use arrow::array::{as_largestring_array, Array};
1919
use arrow::datatypes::DataType;
2020
use std::any::Any;
21-
use std::sync::Arc;
21+
use std::sync::{Arc, OnceLock};
2222

23+
use crate::string::common::*;
24+
use crate::string::concat;
2325
use datafusion_common::cast::{as_string_array, as_string_view_array};
2426
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
2527
use datafusion_expr::expr::ScalarFunction;
28+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2629
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
27-
use datafusion_expr::{lit, ColumnarValue, Expr, Volatility};
30+
use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
2831
use datafusion_expr::{ScalarUDFImpl, Signature};
2932

30-
use crate::string::common::*;
31-
use crate::string::concat;
32-
3333
#[derive(Debug)]
3434
pub struct ConcatFunc {
3535
signature: Signature,
@@ -244,6 +244,36 @@ impl ScalarUDFImpl for ConcatFunc {
244244
) -> Result<ExprSimplifyResult> {
245245
simplify_concat(args)
246246
}
247+
248+
fn documentation(&self) -> Option<&Documentation> {
249+
Some(get_concat_doc())
250+
}
251+
}
252+
253+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
254+
255+
fn get_concat_doc() -> &'static Documentation {
256+
DOCUMENTATION.get_or_init(|| {
257+
Documentation::builder()
258+
.with_doc_section(DOC_SECTION_STRING)
259+
.with_description("Concatenates multiple strings together.")
260+
.with_syntax_example("concat(str[, ..., str_n])")
261+
.with_sql_example(
262+
r#"```sql
263+
> select concat('data', 'f', 'us', 'ion');
264+
+-------------------------------------------------------+
265+
| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
266+
+-------------------------------------------------------+
267+
| datafusion |
268+
+-------------------------------------------------------+
269+
```"#,
270+
)
271+
.with_standard_argument("str", "String")
272+
.with_argument("str_n", "Subsequent string expressions to concatenate.")
273+
.with_related_udf("concat_ws")
274+
.build()
275+
.unwrap()
276+
})
247277
}
248278

249279
pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {

0 commit comments

Comments
 (0)