From b2004bd2d130202fb90326a62a9ed988915fd6c3 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Mon, 9 Sep 2024 21:49:21 -0400 Subject: [PATCH] Updates based on code review. --- datafusion/functions-nested/src/range.rs | 74 ++++++++++---------- datafusion/sqllogictest/test_files/array.slt | 13 +++- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 552fa210251f..e591d9e20b57 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -35,7 +35,7 @@ use datafusion_common::cast::{ as_date32_array, as_int64_array, as_interval_mdn_array, as_timestamp_nanosecond_array, }; use datafusion_common::{ - exec_err, internal_err, not_impl_datafusion_err, DataFusionError, Result, + exec_datafusion_err, exec_err, internal_err, not_impl_datafusion_err, Result, }; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use itertools::Itertools; @@ -125,7 +125,7 @@ impl ScalarUDFImpl for Range { make_scalar_function(|args| gen_range_timestamp(args, false))(args) } dt => { - exec_err!("unsupported type for range. Expected Int64, Date32 or Timestamp, got: {dt}") + exec_err!("unsupported type for RANGE. Expected Int64, Date32 or Timestamp, got: {dt}") } } } @@ -167,8 +167,8 @@ impl ScalarUDFImpl for GenSeries { &self.signature } - fn coerce_types(&self, _arg_types: &[DataType]) -> Result> { - _arg_types + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + arg_types .iter() .map(|arg_type| match arg_type { Null => Ok(Null), @@ -216,7 +216,7 @@ impl ScalarUDFImpl for GenSeries { } dt => { exec_err!( - "unsupported type for gen_series. Expected Int64, Date32 or Timestamp, got: {}", + "unsupported type for GENERATE_SERIES. Expected Int64, Date32 or Timestamp, got: {}", dt ) } @@ -352,7 +352,7 @@ fn gen_range_iter( } } -fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { +fn gen_range_date(args: &[ArrayRef], include_upper_bound: bool) -> Result { if args.len() != 3 { return exec_err!("arguments length does not match"); } @@ -390,7 +390,7 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { } let neg = months < 0 || days < 0; - if !include_upper { + if !include_upper_bound { stop = Date32Type::subtract_month_day_nano(stop, step); } let mut new_date = start; @@ -413,31 +413,31 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { Ok(arr) } -fn gen_range_timestamp(args: &[ArrayRef], include_upper: bool) -> Result { +fn gen_range_timestamp(args: &[ArrayRef], include_upper_bound: bool) -> Result { if args.len() != 3 { return exec_err!( - "arguments length must be 3 for {}", - if include_upper { - "generate_series" + "Arguments length must be 3 for {}", + if include_upper_bound { + "GENERATE_SERIES" } else { - "range" + "RANGE" } ); } // coerce_types fn should coerce all types to Timestamp(Nanosecond, tz) - let (start_arr, start_tz_opt) = cast_timestamp_arg(&args[0], include_upper)?; - let (stop_arr, stop_tz_opt) = cast_timestamp_arg(&args[1], include_upper)?; + let (start_arr, start_tz_opt) = cast_timestamp_arg(&args[0], include_upper_bound)?; + let (stop_arr, stop_tz_opt) = cast_timestamp_arg(&args[1], include_upper_bound)?; let step_arr = as_interval_mdn_array(&args[2])?; let start_tz = parse_tz(start_tz_opt)?; let stop_tz = parse_tz(stop_tz_opt)?; // values are timestamps - let values_builder = if let Some(start_tz_str) = start_tz_opt { - TimestampNanosecondBuilder::new().with_timezone(&**start_tz_str) - } else { - TimestampNanosecondBuilder::new() - }; + let values_builder = start_tz_opt + .clone() + .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| { + TimestampNanosecondBuilder::new().with_timezone(start_tz_str) + }); let mut list_builder = ListBuilder::new(values_builder); for idx in 0..start_arr.len() { @@ -454,41 +454,42 @@ fn gen_range_timestamp(args: &[ArrayRef], include_upper: bool) -> Result(stop, stop_tz).ok_or( - DataFusionError::Execution(format!( + exec_datafusion_err!( "Cannot generate timestamp for stop: {}: {:?}", - stop, stop_tz - )), + stop, + stop_tz + ), )?; let mut current = start; let mut current_dt = as_datetime_with_timezone::(current, start_tz).ok_or( - DataFusionError::Execution(format!( + exec_datafusion_err!( "Cannot generate timestamp for start: {}: {:?}", - current, start_tz - )), + current, + start_tz + ), )?; let values = from_fn(|| { - if (include_upper + if (include_upper_bound && ((neg && current_dt < stop_dt) || (!neg && current_dt > stop_dt))) - || (!include_upper + || (!include_upper_bound && ((neg && current_dt <= stop_dt) || (!neg && current_dt >= stop_dt))) { @@ -528,9 +529,9 @@ fn cast_timestamp_arg( internal_err!( "Unexpected argument type for {} : {}", if include_upper { - "generate_series" + "GENERATE_SERIES" } else { - "range" + "RANGE" }, arg.data_type() ) @@ -541,7 +542,6 @@ fn cast_timestamp_arg( fn parse_tz(tz: &Option>) -> Result { let tz = tz.as_ref().map_or_else(|| "+00", |s| s); - Tz::from_str(tz).map_err(|op| { - DataFusionError::Execution(format!("failed on timezone {tz}: {:?}", op)) - }) + Tz::from_str(tz) + .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {:?}", op)) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 5b26ddafadc5..0dedb0d07927 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5917,6 +5917,17 @@ select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, ---- [2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] +## -5500000000 ns is -5.5 sec +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] + +## mixing types for timestamps is not supported +query error DataFusion error: Internal error: Unexpected argument type for GENERATE_SERIES : Date32 +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); + + ## should return NULL query ? select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); @@ -6006,7 +6017,7 @@ query error DataFusion error: Execution error: step can't be 0 for function gene select generate_series(1, 1, 0); # Test generate_series with zero step -query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 +query error DataFusion error: Execution error: Interval argument to GENERATE_SERIES must not be 0 select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); # Test generate_series with big steps