From 04dbd835a1c79b86165c73126589f2815a5fec02 Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Thu, 28 Sep 2023 19:25:09 +0900 Subject: [PATCH] feat: support greatest function (#2490) * feat: support greatest function * feat: make greatest take date_type as input * fix: move sqlness test into common/function/time.sql * fix: avoid using unwarp * fix: use downcast * refactor: simplify arrow cast --- src/common/function/src/scalars/timestamp.rs | 3 + .../src/scalars/timestamp/greatest.rs | 175 ++++++++++++++++++ .../standalone/common/function/time.result | 16 ++ .../cases/standalone/common/function/time.sql | 4 + .../cases/standalone/common/select/dummy.sql | 1 - 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/common/function/src/scalars/timestamp/greatest.rs diff --git a/src/common/function/src/scalars/timestamp.rs b/src/common/function/src/scalars/timestamp.rs index eb0e1afb1cb1..102bfb934bfc 100644 --- a/src/common/function/src/scalars/timestamp.rs +++ b/src/common/function/src/scalars/timestamp.rs @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. use std::sync::Arc; +mod greatest; mod to_unixtime; +use greatest::GreatestFunction; use to_unixtime::ToUnixtimeFunction; use crate::scalars::function_registry::FunctionRegistry; @@ -23,5 +25,6 @@ pub(crate) struct TimestampFunction; impl TimestampFunction { pub fn register(registry: &FunctionRegistry) { registry.register(Arc::new(ToUnixtimeFunction)); + registry.register(Arc::new(GreatestFunction)); } } diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs new file mode 100644 index 000000000000..e583872d5a24 --- /dev/null +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -0,0 +1,175 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self}; + +use common_query::error::{ + self, ArrowComputeSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu, +}; +use common_query::prelude::{Signature, Volatility}; +use datatypes::arrow::array::AsArray; +use datatypes::arrow::compute::cast; +use datatypes::arrow::compute::kernels::comparison::gt_dyn; +use datatypes::arrow::compute::kernels::zip; +use datatypes::arrow::datatypes::{DataType as ArrowDataType, Date32Type}; +use datatypes::prelude::ConcreteDataType; +use datatypes::vectors::{Helper, VectorRef}; +use snafu::{ensure, ResultExt}; + +use crate::scalars::function::{Function, FunctionContext}; + +#[derive(Clone, Debug, Default)] +pub struct GreatestFunction; + +const NAME: &str = "greatest"; + +impl Function for GreatestFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::date_datatype()) + } + + fn signature(&self) -> Signature { + Signature::uniform( + 2, + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::date_datatype(), + ], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + match columns[0].data_type() { + ConcreteDataType::String(_) => { + let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date32) + .context(ArrowComputeSnafu)?; + let column1 = column1.as_primitive::(); + let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date32) + .context(ArrowComputeSnafu)?; + let column2 = column2.as_primitive::(); + let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; + let result = + zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; + Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) + } + ConcreteDataType::Date(_) => { + let column1 = columns[0].to_arrow_array(); + let column1 = column1.as_primitive::(); + let column2 = columns[1].to_arrow_array(); + let column2 = column2.as_primitive::(); + let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; + let result = + zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; + Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) + } + _ => UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(), + } + } +} + +impl fmt::Display for GreatestFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "GREATEST") + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + use std::sync::Arc; + + use common_time::Date; + use datatypes::prelude::ConcreteDataType; + use datatypes::types::DateType; + use datatypes::value::Value; + use datatypes::vectors::{DateVector, StringVector, Vector}; + + use super::GreatestFunction; + use crate::scalars::function::FunctionContext; + use crate::scalars::Function; + + #[test] + fn test_greatest_takes_string_vector() { + let function = GreatestFunction; + assert_eq!( + function.return_type(&[]).unwrap(), + ConcreteDataType::Date(DateType) + ); + let columns = vec![ + Arc::new(StringVector::from(vec![ + "1970-01-01".to_string(), + "2012-12-23".to_string(), + ])) as _, + Arc::new(StringVector::from(vec![ + "2001-02-01".to_string(), + "1999-01-01".to_string(), + ])) as _, + ]; + + let result = function.eval(FunctionContext::default(), &columns).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.len(), 2); + assert_eq!( + result.get(0), + Value::Date(Date::from_str("2001-02-01").unwrap()) + ); + assert_eq!( + result.get(1), + Value::Date(Date::from_str("2012-12-23").unwrap()) + ); + } + + #[test] + fn test_greatest_takes_date_vector() { + let function = GreatestFunction; + assert_eq!( + function.return_type(&[]).unwrap(), + ConcreteDataType::Date(DateType) + ); + let columns = vec![ + Arc::new(DateVector::from_slice(vec![-1, 2])) as _, + Arc::new(DateVector::from_slice(vec![0, 1])) as _, + ]; + + let result = function.eval(FunctionContext::default(), &columns).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.len(), 2); + assert_eq!( + result.get(0), + Value::Date(Date::from_str("1970-01-01").unwrap()) + ); + assert_eq!( + result.get(1), + Value::Date(Date::from_str("1970-01-03").unwrap()) + ); + } +} diff --git a/tests/cases/standalone/common/function/time.result b/tests/cases/standalone/common/function/time.result index 5d5898fe1db4..123b6a3f2f7c 100644 --- a/tests/cases/standalone/common/function/time.result +++ b/tests/cases/standalone/common/function/time.result @@ -4,3 +4,19 @@ select current_time(); ++|current_time()|++|TIME|++ +select GREATEST('1999-01-30', '2023-03-01'); + ++-------------------------------------------------+ +| greatest(Utf8("1999-01-30"),Utf8("2023-03-01")) | ++-------------------------------------------------+ +| 2023-03-01 | ++-------------------------------------------------+ + +select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); + ++-------------------------------------------------+ +| greatest(Utf8("2000-02-11"),Utf8("2020-12-30")) | ++-------------------------------------------------+ +| 2020-12-30 | ++-------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/time.sql b/tests/cases/standalone/common/function/time.sql index fd45687b424f..46d5c2347fd5 100644 --- a/tests/cases/standalone/common/function/time.sql +++ b/tests/cases/standalone/common/function/time.sql @@ -1,3 +1,7 @@ -- SQLNESS REPLACE (\d+:\d+:\d+\.\d+) TIME -- SQLNESS REPLACE [\s\-]+ select current_time(); + +select GREATEST('1999-01-30', '2023-03-01'); + +select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); diff --git a/tests/cases/standalone/common/select/dummy.sql b/tests/cases/standalone/common/select/dummy.sql index 81ef9324a7ce..4690900f50b9 100644 --- a/tests/cases/standalone/common/select/dummy.sql +++ b/tests/cases/standalone/common/select/dummy.sql @@ -31,4 +31,3 @@ select b from test_unixtime; select TO_UNIXTIME(b) from test_unixtime; DROP TABLE test_unixtime; -