Skip to content

Commit 1d6f574

Browse files
Cheng-Yuan-LaiIan Lai
Cheng-Yuan-Lai
authored and
Ian Lai
committed
support binaryView
1 parent 9858d2c commit 1d6f574

File tree

2 files changed

+95
-40
lines changed

2 files changed

+95
-40
lines changed

datafusion/functions/src/crypto/basic.rs

+89-34
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717

1818
//! "crypto" DataFusion functions
1919
20-
use arrow::array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait};
20+
use arrow::array::{
21+
Array, ArrayRef, BinaryArray, BinaryArrayType, BinaryViewArray, GenericBinaryArray,
22+
OffsetSizeTrait,
23+
};
2124
use arrow::array::{AsArray, GenericStringArray, StringArray, StringViewArray};
2225
use arrow::datatypes::DataType;
2326
use blake2::{Blake2b512, Blake2s256, Digest};
@@ -198,11 +201,13 @@ pub fn utf8_or_binary_to_binary_type(
198201
arg_type: &DataType,
199202
name: &str,
200203
) -> Result<DataType> {
204+
dbg!(arg_type);
201205
Ok(match arg_type {
202206
DataType::Utf8View
203207
| DataType::LargeUtf8
204208
| DataType::Utf8
205209
| DataType::Binary
210+
| DataType::BinaryView
206211
| DataType::LargeBinary => DataType::Binary,
207212
DataType::Null => DataType::Null,
208213
_ => {
@@ -251,30 +256,44 @@ impl DigestAlgorithm {
251256
where
252257
T: OffsetSizeTrait,
253258
{
254-
let input_value = as_generic_binary_array::<T>(value)?;
255-
let array: ArrayRef = match self {
256-
Self::Md5 => digest_to_array!(Md5, input_value),
257-
Self::Sha224 => digest_to_array!(Sha224, input_value),
258-
Self::Sha256 => digest_to_array!(Sha256, input_value),
259-
Self::Sha384 => digest_to_array!(Sha384, input_value),
260-
Self::Sha512 => digest_to_array!(Sha512, input_value),
261-
Self::Blake2b => digest_to_array!(Blake2b512, input_value),
262-
Self::Blake2s => digest_to_array!(Blake2s256, input_value),
263-
Self::Blake3 => {
264-
let binary_array: BinaryArray = input_value
265-
.iter()
266-
.map(|opt| {
267-
opt.map(|x| {
268-
let mut digest = Blake3::default();
269-
digest.update(x);
270-
Blake3::finalize(&digest).as_bytes().to_vec()
271-
})
272-
})
273-
.collect();
274-
Arc::new(binary_array)
259+
let array = match value.data_type() {
260+
DataType::Binary | DataType::LargeBinary => {
261+
let v = value.as_binary::<T>();
262+
self.digest_binary_array_impl::<&GenericBinaryArray<T>>(v)
263+
}
264+
DataType::BinaryView => {
265+
let v = value.as_binary_view();
266+
self.digest_binary_array_impl::<&BinaryViewArray>(v)
267+
}
268+
other => {
269+
return exec_err!("unsupported type for digest_utf_array: {other:?}")
275270
}
276271
};
277272
Ok(ColumnarValue::Array(array))
273+
// let input_value = as_generic_binary_array::<T>(value)?;
274+
// let array: ArrayRef = match self {
275+
// Self::Md5 => digest_to_array!(Md5, input_value),
276+
// Self::Sha224 => digest_to_array!(Sha224, input_value),
277+
// Self::Sha256 => digest_to_array!(Sha256, input_value),
278+
// Self::Sha384 => digest_to_array!(Sha384, input_value),
279+
// Self::Sha512 => digest_to_array!(Sha512, input_value),
280+
// Self::Blake2b => digest_to_array!(Blake2b512, input_value),
281+
// Self::Blake2s => digest_to_array!(Blake2s256, input_value),
282+
// Self::Blake3 => {
283+
// let binary_array: BinaryArray = input_value
284+
// .iter()
285+
// .map(|opt| {
286+
// opt.map(|x| {
287+
// let mut digest = Blake3::default();
288+
// digest.update(x);
289+
// Blake3::finalize(&digest).as_bytes().to_vec()
290+
// })
291+
// })
292+
// .collect();
293+
// Arc::new(binary_array)
294+
// }
295+
// };
296+
// Ok(ColumnarValue::Array(array))
278297
}
279298

280299
/// digest a string array to their hash values
@@ -328,6 +347,37 @@ impl DigestAlgorithm {
328347
}
329348
}
330349
}
350+
351+
pub fn digest_binary_array_impl<'a, BinaryArrType>(
352+
self,
353+
input_value: BinaryArrType,
354+
) -> ArrayRef
355+
where
356+
BinaryArrType: BinaryArrayType<'a>,
357+
{
358+
match self {
359+
Self::Md5 => digest_to_array!(Md5, input_value),
360+
Self::Sha224 => digest_to_array!(Sha224, input_value),
361+
Self::Sha256 => digest_to_array!(Sha256, input_value),
362+
Self::Sha384 => digest_to_array!(Sha384, input_value),
363+
Self::Sha512 => digest_to_array!(Sha512, input_value),
364+
Self::Blake2b => digest_to_array!(Blake2b512, input_value),
365+
Self::Blake2s => digest_to_array!(Blake2s256, input_value),
366+
Self::Blake3 => {
367+
let binary_array: BinaryArray = input_value
368+
.iter()
369+
.map(|opt| {
370+
opt.map(|x| {
371+
let mut digest = Blake3::default();
372+
digest.update(x);
373+
Blake3::finalize(&digest).as_bytes().to_vec()
374+
})
375+
})
376+
.collect();
377+
Arc::new(binary_array)
378+
}
379+
}
380+
}
331381
}
332382
pub fn digest_process(
333383
value: &ColumnarValue,
@@ -342,22 +392,27 @@ pub fn digest_process(
342392
DataType::LargeBinary => {
343393
digest_algorithm.digest_binary_array::<i64>(a.as_ref())
344394
}
345-
other => exec_err!(
346-
"Unsupported data type {other:?} for function {digest_algorithm}"
347-
),
348-
},
349-
ColumnarValue::Scalar(scalar) => match scalar {
350-
ScalarValue::Utf8View(a)
351-
| ScalarValue::Utf8(a)
352-
| ScalarValue::LargeUtf8(a) => {
353-
Ok(digest_algorithm
354-
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
395+
DataType::BinaryView => {
396+
digest_algorithm.digest_binary_array::<i32>(a.as_ref())
355397
}
356-
ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) => Ok(digest_algorithm
357-
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
358398
other => exec_err!(
359399
"Unsupported data type {other:?} for function {digest_algorithm}"
360400
),
361401
},
402+
ColumnarValue::Scalar(scalar) => {
403+
match scalar {
404+
ScalarValue::Utf8View(a)
405+
| ScalarValue::Utf8(a)
406+
| ScalarValue::LargeUtf8(a) => Ok(digest_algorithm
407+
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes()))),
408+
ScalarValue::Binary(a)
409+
| ScalarValue::LargeBinary(a)
410+
| ScalarValue::BinaryView(a) => Ok(digest_algorithm
411+
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
412+
other => exec_err!(
413+
"Unsupported data type {other:?} for function {digest_algorithm}"
414+
),
415+
}
416+
}
362417
}
363418
}

datafusion/functions/src/crypto/sha256.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use datafusion_common::{
2424
};
2525
use datafusion_expr::{
2626
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
27-
TypeSignature, Volatility,
27+
Volatility,
2828
};
2929
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
3030
use datafusion_macros::user_doc;
@@ -57,12 +57,12 @@ impl Default for SHA256Func {
5757
impl SHA256Func {
5858
pub fn new() -> Self {
5959
Self {
60-
signature: Signature::new(
61-
TypeSignature::Coercible(vec![Coercion::new_implicit(
62-
TypeSignatureClass::Native(logical_string()),
63-
vec![TypeSignatureClass::Native(logical_binary())],
60+
signature: Signature::coercible(
61+
vec![Coercion::new_implicit(
62+
TypeSignatureClass::Native(logical_binary()),
63+
vec![TypeSignatureClass::Native(logical_string())],
6464
NativeType::Binary,
65-
)]),
65+
)],
6666
Volatility::Immutable,
6767
),
6868
}

0 commit comments

Comments
 (0)