diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 84aa9feec654..4683142c717c 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -15,14 +15,15 @@ // specific language governing permissions and limitations // under the License. -use arrow::datatypes::DataType; -use arrow::datatypes::DataType::{Int64, Timestamp}; -use arrow::datatypes::TimeUnit::Second; use std::any::Any; -use std::sync::OnceLock; +use std::sync::{Arc, OnceLock}; -use datafusion_common::{exec_err, Result}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Int64, Timestamp, Utf8}; +use arrow::datatypes::TimeUnit::Second; +use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; @@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc { impl FromUnixtimeFunc { pub fn new() -> Self { Self { - signature: Signature::uniform(1, vec![Int64], Volatility::Immutable), + signature: Signature::one_of( + vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])], + Volatility::Immutable, + ), } } } @@ -59,28 +63,55 @@ impl ScalarUDFImpl for FromUnixtimeFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Second, None)) + fn return_type(&self, arg_types: &[DataType]) -> Result { + match arg_types.len() { + 1 => Ok(Timestamp(Second, None)), + 2 => match &arg_types[1] { + Utf8 => Ok(Timestamp(Second, Some(Utf8.to_string().into()))), + _ => exec_err!( + "Second argument for `from_unixtime` must be non-null Utf8, received {:?}", + arg_types[1] + ), + } + _ => exec_err!( + "from_unixtime function requires 1 or 2 arguments, got {}", + arg_types.len() + ), + } } fn invoke(&self, args: &[ColumnarValue]) -> Result { - if args.len() != 1 { + let len = args.len(); + if len != 1 && len != 2 { return exec_err!( - "from_unixtime function requires 1 argument, got {}", + "from_unixtime function requires 1 or 2 argument, got {}", args.len() ); } - match args[0].data_type() { - Int64 => args[0].cast_to(&Timestamp(Second, None), None), - other => { - exec_err!( - "Unsupported data type {:?} for function from_unixtime", - other - ) - } + if args[0].data_type() != Int64 { + return exec_err!( + "Unsupported data type {:?} for function from_unixtime", + args[0].data_type() + ); + } + + match len { + 1 => args[0].cast_to(&Timestamp(Second, None), None), + 2 => match &args[1] { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0] + .cast_to(&Timestamp(Second, Some(Arc::from(tz.to_string()))), None), + _ => { + exec_err!( + "Unsupported data type {:?} for function from_unixtime", + args[1].data_type() + ) + } + }, + _ => unreachable!(), } } + fn documentation(&self) -> Option<&Documentation> { Some(get_from_unixtime_doc()) } @@ -93,12 +124,59 @@ fn get_from_unixtime_doc() -> &'static Documentation { Documentation::builder() .with_doc_section(DOC_SECTION_DATETIME) .with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.") - .with_syntax_example("from_unixtime(expression)") + .with_syntax_example("from_unixtime(expression, timezone)") .with_argument( "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", ) + .with_argument( + "timezone", + "Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.", + ) + .build() .unwrap() }) } + +#[cfg(test)] +mod test { + use crate::datetime::from_unixtime::FromUnixtimeFunc; + use datafusion_common::ScalarValue; + use datafusion_common::ScalarValue::Int64; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + #[test] + fn test_without_timezone() { + let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))]; + + let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + + match result { + ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => { + assert_eq!(sec, 1729900800); + } + _ => panic!("Expected scalar value"), + } + } + + #[test] + fn test_with_timezone() { + let args = [ + ColumnarValue::Scalar(Int64(Some(1729900800))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "America/New_York".to_string(), + ))), + ]; + + let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + + match result { + ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => { + assert_eq!(sec, 1729900800); + assert_eq!(tz.to_string(), "America/New_York"); + } + _ => panic!("Expected scalar value"), + } + } +} diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md index c15821ac89a3..9db6f0151b01 100644 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ b/docs/source/user-guide/sql/scalar_functions_new.md @@ -2003,12 +2003,13 @@ _Alias of [date_trunc](#date_trunc)._ Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp. ``` -from_unixtime(expression) +from_unixtime(expression, timezone) ``` #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **timezone**: Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC. ### `make_date`