Skip to content

Commit

Permalink
Support casting Date to a time zone-specific timestamp (#7141)
Browse files Browse the repository at this point in the history
* Date can cast to tz-aware timestamp

* Cover other Date x TimeUnit cases

* Preserve local time when casting date to timestamp
  • Loading branch information
friendlymatthew authored Feb 24, 2025
1 parent a0c3186 commit 493d3ee
Showing 1 changed file with 242 additions and 32 deletions.
274 changes: 242 additions & 32 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
}
(Timestamp(_, _), _) if to_type.is_numeric() => true,
(_, Timestamp(_, _)) if from_type.is_numeric() => true,
(Date64, Timestamp(_, None)) => true,
(Date32, Timestamp(_, None)) => true,
(Date64, Timestamp(_, _)) => true,
(Date32, Timestamp(_, _)) => true,
(
Timestamp(_, _),
Timestamp(_, _)
Expand Down Expand Up @@ -1806,44 +1806,63 @@ pub fn cast_with_options(
})?,
))
}
(Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
array
(Date64, Timestamp(TimeUnit::Second, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampSecondType>(|x| x / MILLISECONDS),
)),
(Date64, Timestamp(TimeUnit::Millisecond, None)) => {
cast_reinterpret_arrays::<Date64Type, TimestampMillisecondType>(array)
.unary::<_, TimestampSecondType>(|x| x / MILLISECONDS);

cast_with_options(&array, to_type, cast_options)
}
(Date64, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
array
(Date64, Timestamp(TimeUnit::Millisecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS)),
)),
(Date64, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
array
.reinterpret_cast::<TimestampMillisecondType>();

cast_with_options(&array, to_type, cast_options)
}

(Date64, Timestamp(TimeUnit::Microsecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS)),
)),
(Date32, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS));

cast_with_options(&array, to_type, cast_options)
}
(Date64, Timestamp(TimeUnit::Nanosecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS));

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Second, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Millisecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Millisecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Microsecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Nanosecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY),
)),
.unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}

(_, Duration(unit)) if from_type.is_numeric() => {
let array = cast_with_options(array, &Int64, cast_options)?;
Expand Down Expand Up @@ -5217,6 +5236,197 @@ mod tests {
}};
}

#[test]
fn test_cast_date32_to_timestamp_and_timestamp_with_timezone() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), None, None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;

let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampSecondType>();
let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));

let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
let c = b.as_primitive::<TimestampSecondType>();
let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00", result.value(0));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampSecondType>();
assert_eq!(1609438500, c.value(0));
assert_eq!(1640974500, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_ms() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampMillisecondType>();
assert_eq!(1609438500000, c.value(0));
assert_eq!(1640974500000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_us() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampMicrosecondType>();
assert_eq!(1609438500000000, c.value(0));
assert_eq!(1640974500000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_ns() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampNanosecondType>();
assert_eq!(1609438500000000000, c.value(0));
assert_eq!(1640974500000000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampSecondType>();
assert_eq!(863979300, c.value(0));
assert_eq!(1545675300, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_ms() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampMillisecondType>();
assert_eq!(863979300005, c.value(0));
assert_eq!(1545675300001, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_us() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampMicrosecondType>();
assert_eq!(863979300005000, c.value(0));
assert_eq!(1545675300001000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_ns() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampNanosecondType>();
assert_eq!(863979300005000000, c.value(0));
assert_eq!(1545675300001000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_timestamp_to_strings() {
// "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
Expand Down

0 comments on commit 493d3ee

Please sign in to comment.