Skip to content

Commit ce45bb2

Browse files
committed
port digest
1 parent e53c9a0 commit ce45bb2

File tree

12 files changed

+55
-82
lines changed

12 files changed

+55
-82
lines changed

datafusion/expr/src/built_in_function.rs

-16
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ pub enum BuiltinScalarFunction {
6262
Cosh,
6363
/// degrees
6464
Degrees,
65-
/// Digest
66-
Digest,
6765
/// exp
6866
Exp,
6967
/// factorial
@@ -413,7 +411,6 @@ impl BuiltinScalarFunction {
413411
BuiltinScalarFunction::SHA256 => Volatility::Immutable,
414412
BuiltinScalarFunction::SHA384 => Volatility::Immutable,
415413
BuiltinScalarFunction::SHA512 => Volatility::Immutable,
416-
BuiltinScalarFunction::Digest => Volatility::Immutable,
417414
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
418415
BuiltinScalarFunction::StringToArray => Volatility::Immutable,
419416
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
@@ -684,9 +681,6 @@ impl BuiltinScalarFunction {
684681
BuiltinScalarFunction::SHA512 => {
685682
utf8_or_binary_to_binary_type(&input_expr_types[0], "sha512")
686683
}
687-
BuiltinScalarFunction::Digest => {
688-
utf8_or_binary_to_binary_type(&input_expr_types[0], "digest")
689-
}
690684
BuiltinScalarFunction::SplitPart => {
691685
utf8_to_str_type(&input_expr_types[0], "split_part")
692686
}
@@ -968,15 +962,6 @@ impl BuiltinScalarFunction {
968962
BuiltinScalarFunction::FromUnixtime => {
969963
Signature::uniform(1, vec![Int64], self.volatility())
970964
}
971-
BuiltinScalarFunction::Digest => Signature::one_of(
972-
vec![
973-
Exact(vec![Utf8, Utf8]),
974-
Exact(vec![LargeUtf8, Utf8]),
975-
Exact(vec![Binary, Utf8]),
976-
Exact(vec![LargeBinary, Utf8]),
977-
],
978-
self.volatility(),
979-
),
980965
BuiltinScalarFunction::DateTrunc => Signature::one_of(
981966
vec![
982967
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
@@ -1374,7 +1359,6 @@ impl BuiltinScalarFunction {
13741359
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
13751360

13761361
// hashing functions
1377-
BuiltinScalarFunction::Digest => &["digest"],
13781362
BuiltinScalarFunction::MD5 => &["md5"],
13791363
BuiltinScalarFunction::SHA224 => &["sha224"],
13801364
BuiltinScalarFunction::SHA256 => &["sha256"],

datafusion/expr/src/expr_fn.rs

-16
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,6 @@ scalar_expr!(
760760
code_point,
761761
"converts the Unicode code point to a UTF8 character"
762762
);
763-
scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`");
764763
scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
765764
scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`");
766765
scalar_expr!(Lower, lower, string, "convert the string to lower case");
@@ -1371,19 +1370,4 @@ mod test {
13711370
unreachable!();
13721371
}
13731372
}
1374-
1375-
#[test]
1376-
fn digest_function_definitions() {
1377-
if let Expr::ScalarFunction(ScalarFunction {
1378-
func_def: ScalarFunctionDefinition::BuiltIn(fun),
1379-
args,
1380-
}) = digest(col("tableA.a"), lit("md5"))
1381-
{
1382-
let name = BuiltinScalarFunction::Digest;
1383-
assert_eq!(name, fun);
1384-
assert_eq!(2, args.len());
1385-
} else {
1386-
unreachable!();
1387-
}
1388-
}
13891373
}

datafusion/functions/src/crypto/basic.rs

+19
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! "crypto" DataFusion functions
19+
120
use arrow::array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait};
221
use arrow::datatypes::DataType;
322
use blake2::{Blake2b512, Blake2s256, Digest};

datafusion/functions/src/crypto/digest.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
// under the License.
1717

1818
//! "crypto" DataFusion functions
19-
use super::basic::{digest, utf8_or_binary_to_binary_type, DigestAlgorithm};
19+
use super::basic::{digest, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::{exec_err, Result, ScalarValue};
21+
use datafusion_common::Result;
2222
use datafusion_expr::{
2323
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
2424
};

datafusion/functions/src/lib.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
143143
.chain(datetime::functions())
144144
.chain(encoding::functions())
145145
.chain(math::functions())
146-
.chain(regex::functions());
146+
.chain(regex::functions())
147+
.chain(crypto::functions());
147148

148149
all_functions.try_for_each(|udf| {
149150
let existing_udf = registry.register_udf(udf)?;

datafusion/physical-expr/src/crypto_expressions.rs

+29-30
Original file line numberDiff line numberDiff line change
@@ -54,36 +54,6 @@ enum DigestAlgorithm {
5454
Blake3,
5555
}
5656

57-
fn digest_process(
58-
value: &ColumnarValue,
59-
digest_algorithm: DigestAlgorithm,
60-
) -> Result<ColumnarValue> {
61-
match value {
62-
ColumnarValue::Array(a) => match a.data_type() {
63-
DataType::Utf8 => digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
64-
DataType::LargeUtf8 => digest_algorithm.digest_utf8_array::<i64>(a.as_ref()),
65-
DataType::Binary => digest_algorithm.digest_binary_array::<i32>(a.as_ref()),
66-
DataType::LargeBinary => {
67-
digest_algorithm.digest_binary_array::<i64>(a.as_ref())
68-
}
69-
other => exec_err!(
70-
"Unsupported data type {other:?} for function {digest_algorithm}"
71-
),
72-
},
73-
ColumnarValue::Scalar(scalar) => match scalar {
74-
ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
75-
Ok(digest_algorithm
76-
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
77-
}
78-
ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) => Ok(digest_algorithm
79-
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
80-
other => exec_err!(
81-
"Unsupported data type {other:?} for function {digest_algorithm}"
82-
),
83-
},
84-
}
85-
}
86-
8757
macro_rules! digest_to_array {
8858
($METHOD:ident, $INPUT:expr) => {{
8959
let binary_array: BinaryArray = $INPUT
@@ -248,6 +218,35 @@ macro_rules! define_digest_function {
248218
}
249219
};
250220
}
221+
pub fn digest_process(
222+
value: &ColumnarValue,
223+
digest_algorithm: DigestAlgorithm,
224+
) -> Result<ColumnarValue> {
225+
match value {
226+
ColumnarValue::Array(a) => match a.data_type() {
227+
DataType::Utf8 => digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
228+
DataType::LargeUtf8 => digest_algorithm.digest_utf8_array::<i64>(a.as_ref()),
229+
DataType::Binary => digest_algorithm.digest_binary_array::<i32>(a.as_ref()),
230+
DataType::LargeBinary => {
231+
digest_algorithm.digest_binary_array::<i64>(a.as_ref())
232+
}
233+
other => exec_err!(
234+
"Unsupported data type {other:?} for function {digest_algorithm}"
235+
),
236+
},
237+
ColumnarValue::Scalar(scalar) => match scalar {
238+
ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
239+
Ok(digest_algorithm
240+
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
241+
}
242+
ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) => Ok(digest_algorithm
243+
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
244+
other => exec_err!(
245+
"Unsupported data type {other:?} for function {digest_algorithm}"
246+
),
247+
},
248+
}
249+
}
251250

252251
/// this function exists so that we do not need to pull in the crate hex. it is only used by md5
253252
/// function below

datafusion/physical-expr/src/functions.rs

-3
Original file line numberDiff line numberDiff line change
@@ -520,9 +520,6 @@ pub fn create_physical_fun(
520520
BuiltinScalarFunction::MD5 => {
521521
Arc::new(invoke_if_crypto_expressions_feature_flag!(md5, "md5"))
522522
}
523-
BuiltinScalarFunction::Digest => {
524-
Arc::new(invoke_if_crypto_expressions_feature_flag!(digest, "digest"))
525-
}
526523
BuiltinScalarFunction::OctetLength => Arc::new(|args| match &args[0] {
527524
ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
528525
ColumnarValue::Scalar(v) => match v {

datafusion/proto/proto/datafusion.proto

+1-1
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ enum ScalarFunction {
556556
Ascii = 4;
557557
Ceil = 5;
558558
Cos = 6;
559-
Digest = 7;
559+
// 7 was Digest
560560
Exp = 8;
561561
Floor = 9;
562562
Ln = 10;

datafusion/proto/src/generated/pbjson.rs

-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

+1-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

+1-6
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use datafusion_expr::{
5454
array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan,
5555
atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, coalesce,
5656
concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin,
57-
date_part, date_trunc, degrees, digest, ends_with, exp,
57+
date_part, date_trunc, degrees, ends_with, exp,
5858
expr::{self, InList, Sort, WindowFunction},
5959
factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, iszero, lcm,
6060
left, levenshtein, ln, log, log10, log2,
@@ -512,7 +512,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
512512
ScalarFunction::Sha256 => Self::SHA256,
513513
ScalarFunction::Sha384 => Self::SHA384,
514514
ScalarFunction::Sha512 => Self::SHA512,
515-
ScalarFunction::Digest => Self::Digest,
516515
ScalarFunction::Log2 => Self::Log2,
517516
ScalarFunction::Signum => Self::Signum,
518517
ScalarFunction::Ascii => Self::Ascii,
@@ -1616,10 +1615,6 @@ pub fn parse_expr(
16161615
Ok(sha512(parse_expr(&args[0], registry, codec)?))
16171616
}
16181617
ScalarFunction::Md5 => Ok(md5(parse_expr(&args[0], registry, codec)?)),
1619-
ScalarFunction::Digest => Ok(digest(
1620-
parse_expr(&args[0], registry, codec)?,
1621-
parse_expr(&args[1], registry, codec)?,
1622-
)),
16231618
ScalarFunction::Ascii => {
16241619
Ok(ascii(parse_expr(&args[0], registry, codec)?))
16251620
}

datafusion/proto/src/logical_plan/to_proto.rs

-1
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
14911491
BuiltinScalarFunction::SHA256 => Self::Sha256,
14921492
BuiltinScalarFunction::SHA384 => Self::Sha384,
14931493
BuiltinScalarFunction::SHA512 => Self::Sha512,
1494-
BuiltinScalarFunction::Digest => Self::Digest,
14951494
BuiltinScalarFunction::Log2 => Self::Log2,
14961495
BuiltinScalarFunction::Signum => Self::Signum,
14971496
BuiltinScalarFunction::Ascii => Self::Ascii,

0 commit comments

Comments
 (0)