Skip to content

Commit d5d9d30

Browse files
authored
Crypto Function Migration (#12840)
* Update crypto docs * delete old * fmt checks
1 parent f7591fb commit d5d9d30

File tree

8 files changed

+332
-89
lines changed

8 files changed

+332
-89
lines changed

datafusion/functions/src/crypto/digest.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
use super::basic::{digest, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
2121
use datafusion_common::Result;
22+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
2223
use datafusion_expr::{
23-
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
24+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
2425
};
2526
use std::any::Any;
27+
use std::sync::OnceLock;
2628

2729
#[derive(Debug)]
2830
pub struct DigestFunc {
@@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc {
6971
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
7072
digest(args)
7173
}
74+
75+
fn documentation(&self) -> Option<&Documentation> {
76+
Some(get_digest_doc())
77+
}
78+
}
79+
80+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
81+
82+
fn get_digest_doc() -> &'static Documentation {
83+
DOCUMENTATION.get_or_init(|| {
84+
Documentation::builder()
85+
.with_doc_section(DOC_SECTION_HASHING)
86+
.with_description(
87+
"Computes the binary hash of an expression using the specified algorithm.",
88+
)
89+
.with_syntax_example("digest(expression, algorithm)")
90+
.with_sql_example(
91+
r#"```sql
92+
> select digest('foo', 'sha256');
93+
+------------------------------------------+
94+
| digest(Utf8("foo"), Utf8("sha256")) |
95+
+------------------------------------------+
96+
| <binary_hash_result> |
97+
+------------------------------------------+
98+
```"#,
99+
)
100+
.with_standard_argument(
101+
"expression", "String")
102+
.with_argument(
103+
"algorithm",
104+
"String expression specifying algorithm to use. Must be one of:
105+
106+
- md5
107+
- sha224
108+
- sha256
109+
- sha384
110+
- sha512
111+
- blake2s
112+
- blake2b
113+
- blake3",
114+
)
115+
.build()
116+
.unwrap()
117+
})
72118
}

datafusion/functions/src/crypto/md5.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
use crate::crypto::basic::md5;
2020
use arrow::datatypes::DataType;
2121
use datafusion_common::{plan_err, Result};
22-
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
22+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
23+
use datafusion_expr::{
24+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
25+
};
2326
use std::any::Any;
27+
use std::sync::OnceLock;
2428

2529
#[derive(Debug)]
2630
pub struct Md5Func {
@@ -84,4 +88,32 @@ impl ScalarUDFImpl for Md5Func {
8488
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
8589
md5(args)
8690
}
91+
92+
fn documentation(&self) -> Option<&Documentation> {
93+
Some(get_md5_doc())
94+
}
95+
}
96+
97+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
98+
99+
fn get_md5_doc() -> &'static Documentation {
100+
DOCUMENTATION.get_or_init(|| {
101+
Documentation::builder()
102+
.with_doc_section(DOC_SECTION_HASHING)
103+
.with_description("Computes an MD5 128-bit checksum for a string expression.")
104+
.with_syntax_example("md5(expression)")
105+
.with_sql_example(
106+
r#"```sql
107+
> select md5('foo');
108+
+-------------------------------------+
109+
| md5(Utf8("foo")) |
110+
+-------------------------------------+
111+
| <md5_checksum_result> |
112+
+-------------------------------------+
113+
```"#,
114+
)
115+
.with_standard_argument("expression", "String")
116+
.build()
117+
.unwrap()
118+
})
87119
}

datafusion/functions/src/crypto/sha224.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation {
5858
.with_doc_section(DOC_SECTION_HASHING)
5959
.with_description("Computes the SHA-224 hash of a binary string.")
6060
.with_syntax_example("sha224(expression)")
61+
.with_sql_example(
62+
r#"```sql
63+
> select sha224('foo');
64+
+------------------------------------------+
65+
| sha224(Utf8("foo")) |
66+
+------------------------------------------+
67+
| <sha224_hash_result> |
68+
+------------------------------------------+
69+
```"#,
70+
)
6171
.with_standard_argument("expression", "String")
6272
.build()
6373
.unwrap()

datafusion/functions/src/crypto/sha256.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
use super::basic::{sha256, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
2121
use datafusion_common::Result;
22-
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
22+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
23+
use datafusion_expr::{
24+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
25+
};
2326
use std::any::Any;
27+
use std::sync::OnceLock;
2428

2529
#[derive(Debug)]
2630
pub struct SHA256Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA256Func {
6064
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
6165
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
6266
}
67+
6368
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
6469
sha256(args)
6570
}
71+
72+
fn documentation(&self) -> Option<&Documentation> {
73+
Some(get_sha256_doc())
74+
}
75+
}
76+
77+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
78+
79+
fn get_sha256_doc() -> &'static Documentation {
80+
DOCUMENTATION.get_or_init(|| {
81+
Documentation::builder()
82+
.with_doc_section(DOC_SECTION_HASHING)
83+
.with_description("Computes the SHA-256 hash of a binary string.")
84+
.with_syntax_example("sha256(expression)")
85+
.with_sql_example(
86+
r#"```sql
87+
> select sha256('foo');
88+
+--------------------------------------+
89+
| sha256(Utf8("foo")) |
90+
+--------------------------------------+
91+
| <sha256_hash_result> |
92+
+--------------------------------------+
93+
```"#,
94+
)
95+
.with_standard_argument("expression", "String")
96+
.build()
97+
.unwrap()
98+
})
6699
}

datafusion/functions/src/crypto/sha384.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
use super::basic::{sha384, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
2121
use datafusion_common::Result;
22-
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
22+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
23+
use datafusion_expr::{
24+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
25+
};
2326
use std::any::Any;
27+
use std::sync::OnceLock;
2428

2529
#[derive(Debug)]
2630
pub struct SHA384Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA384Func {
6064
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
6165
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
6266
}
67+
6368
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
6469
sha384(args)
6570
}
71+
72+
fn documentation(&self) -> Option<&Documentation> {
73+
Some(get_sha384_doc())
74+
}
75+
}
76+
77+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
78+
79+
fn get_sha384_doc() -> &'static Documentation {
80+
DOCUMENTATION.get_or_init(|| {
81+
Documentation::builder()
82+
.with_doc_section(DOC_SECTION_HASHING)
83+
.with_description("Computes the SHA-384 hash of a binary string.")
84+
.with_syntax_example("sha384(expression)")
85+
.with_sql_example(
86+
r#"```sql
87+
> select sha384('foo');
88+
+-----------------------------------------+
89+
| sha384(Utf8("foo")) |
90+
+-----------------------------------------+
91+
| <sha384_hash_result> |
92+
+-----------------------------------------+
93+
```"#,
94+
)
95+
.with_standard_argument("expression", "String")
96+
.build()
97+
.unwrap()
98+
})
6699
}

datafusion/functions/src/crypto/sha512.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
use super::basic::{sha512, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
2121
use datafusion_common::Result;
22-
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
22+
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
23+
use datafusion_expr::{
24+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
25+
};
2326
use std::any::Any;
27+
use std::sync::OnceLock;
2428

2529
#[derive(Debug)]
2630
pub struct SHA512Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA512Func {
6064
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
6165
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
6266
}
67+
6368
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
6469
sha512(args)
6570
}
71+
72+
fn documentation(&self) -> Option<&Documentation> {
73+
Some(get_sha512_doc())
74+
}
75+
}
76+
77+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
78+
79+
fn get_sha512_doc() -> &'static Documentation {
80+
DOCUMENTATION.get_or_init(|| {
81+
Documentation::builder()
82+
.with_doc_section(DOC_SECTION_HASHING)
83+
.with_description("Computes the SHA-512 hash of a binary string.")
84+
.with_syntax_example("sha512(expression)")
85+
.with_sql_example(
86+
r#"```sql
87+
> select sha512('foo');
88+
+-------------------------------------------+
89+
| sha512(Utf8("foo")) |
90+
+-------------------------------------------+
91+
| <sha512_hash_result> |
92+
+-------------------------------------------+
93+
```"#,
94+
)
95+
.with_argument("expression", "String")
96+
.build()
97+
.unwrap()
98+
})
6699
}

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,90 +3066,6 @@ select map_values(map([100, 5], [42,43]));
30663066
[42, 43]
30673067
```
30683068

3069-
## Hashing Functions
3070-
3071-
- [digest](#digest)
3072-
- [md5](#md5)
3073-
- [sha256](#sha256)
3074-
- [sha384](#sha384)
3075-
- [sha512](#sha512)
3076-
3077-
### `digest`
3078-
3079-
Computes the binary hash of an expression using the specified algorithm.
3080-
3081-
```
3082-
digest(expression, algorithm)
3083-
```
3084-
3085-
#### Arguments
3086-
3087-
- **expression**: String expression to operate on.
3088-
Can be a constant, column, or function, and any combination of string operators.
3089-
- **algorithm**: String expression specifying algorithm to use.
3090-
Must be one of:
3091-
3092-
- md5
3093-
- sha224
3094-
- sha256
3095-
- sha384
3096-
- sha512
3097-
- blake2s
3098-
- blake2b
3099-
- blake3
3100-
3101-
### `md5`
3102-
3103-
Computes an MD5 128-bit checksum for a string expression.
3104-
3105-
```
3106-
md5(expression)
3107-
```
3108-
3109-
#### Arguments
3110-
3111-
- **expression**: String expression to operate on.
3112-
Can be a constant, column, or function, and any combination of string operators.
3113-
3114-
### `sha256`
3115-
3116-
Computes the SHA-256 hash of a binary string.
3117-
3118-
```
3119-
sha256(expression)
3120-
```
3121-
3122-
#### Arguments
3123-
3124-
- **expression**: String expression to operate on.
3125-
Can be a constant, column, or function, and any combination of string operators.
3126-
3127-
### `sha384`
3128-
3129-
Computes the SHA-384 hash of a binary string.
3130-
3131-
```
3132-
sha384(expression)
3133-
```
3134-
3135-
#### Arguments
3136-
3137-
- **expression**: String expression to operate on.
3138-
Can be a constant, column, or function, and any combination of string operators.
3139-
3140-
### `sha512`
3141-
3142-
Computes the SHA-512 hash of a binary string.
3143-
3144-
```
3145-
sha512(expression)
3146-
```
3147-
3148-
#### Arguments
3149-
3150-
- **expression**: String expression to operate on.
3151-
Can be a constant, column, or function, and any combination of string operators.
3152-
31533069
## Other Functions
31543070

31553071
- [arrow_cast](#arrow_cast)

0 commit comments

Comments
 (0)