Skip to content

Commit cf59b6c

Browse files
authored
Structured interval types for IntervalMonthDayNano or IntervalDayTime (#3125) (#5654) (#5769)
* Structured interval type (#3125) (#5654) * Update integration-test * Fix 32-bit build * Review feedback
1 parent ce8363a commit cf59b6c

File tree

25 files changed

+972
-329
lines changed

25 files changed

+972
-329
lines changed

arrow-arith/src/numeric.rs

+20-23
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use arrow_array::cast::AsArray;
2525
use arrow_array::timezone::Tz;
2626
use arrow_array::types::*;
2727
use arrow_array::*;
28-
use arrow_buffer::ArrowNativeType;
28+
use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
2929
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
3030

3131
use crate::arity::{binary, try_binary};
@@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType {
343343
type Duration: ArrowPrimitiveType<Native = i64>;
344344

345345
fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
346-
fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
347-
fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
346+
fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
347+
fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;
348348

349349
fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
350-
fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
351-
fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
350+
fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
351+
fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;
352352
}
353353

354354
macro_rules! timestamp {
@@ -360,23 +360,23 @@ macro_rules! timestamp {
360360
Self::add_year_months(left, right, tz)
361361
}
362362

363-
fn add_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
363+
fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
364364
Self::add_day_time(left, right, tz)
365365
}
366366

367-
fn add_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
367+
fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
368368
Self::add_month_day_nano(left, right, tz)
369369
}
370370

371371
fn sub_year_month(left: i64, right: i32, tz: Tz) -> Option<i64> {
372372
Self::subtract_year_months(left, right, tz)
373373
}
374374

375-
fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
375+
fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
376376
Self::subtract_day_time(left, right, tz)
377377
}
378378

379-
fn sub_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
379+
fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
380380
Self::subtract_month_day_nano(left, right, tz)
381381
}
382382
}
@@ -506,12 +506,12 @@ fn timestamp_op<T: TimestampOp>(
506506
/// Note: these should be fallible (#4456)
507507
trait DateOp: ArrowTemporalType {
508508
fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
509-
fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
510-
fn add_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
509+
fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
510+
fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
511511

512512
fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
513-
fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
514-
fn sub_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
513+
fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
514+
fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
515515
}
516516

517517
macro_rules! date {
@@ -521,23 +521,23 @@ macro_rules! date {
521521
Self::add_year_months(left, right)
522522
}
523523

524-
fn add_day_time(left: Self::Native, right: i64) -> Self::Native {
524+
fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
525525
Self::add_day_time(left, right)
526526
}
527527

528-
fn add_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
528+
fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
529529
Self::add_month_day_nano(left, right)
530530
}
531531

532532
fn sub_year_month(left: Self::Native, right: i32) -> Self::Native {
533533
Self::subtract_year_months(left, right)
534534
}
535535

536-
fn sub_day_time(left: Self::Native, right: i64) -> Self::Native {
536+
fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
537537
Self::subtract_day_time(left, right)
538538
}
539539

540-
fn sub_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
540+
fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
541541
Self::subtract_month_day_nano(left, right)
542542
}
543543
}
@@ -1346,13 +1346,10 @@ mod tests {
13461346
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
13471347
])
13481348
);
1349-
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
1350-
let b = IntervalMonthDayNanoArray::from(vec![1]);
1349+
let a = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]);
1350+
let b = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]);
13511351
let err = add(&a, &b).unwrap_err().to_string();
1352-
assert_eq!(
1353-
err,
1354-
"Compute error: Overflow happened on: 9223372036854775807 + 1"
1355-
);
1352+
assert_eq!(err, "Compute error: Overflow happened on: 2147483647 + 1");
13561353
}
13571354

13581355
fn test_duration_impl<T: ArrowPrimitiveType<Native = i64>>() {

arrow-array/src/arithmetic.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow_buffer::{i256, ArrowNativeType};
18+
use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
1919
use arrow_schema::ArrowError;
2020
use half::f16;
2121
use num::complex::ComplexFloat;
@@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {
139139

140140
macro_rules! native_type_op {
141141
($t:tt) => {
142-
native_type_op!($t, 0, 1, $t::MIN, $t::MAX);
142+
native_type_op!($t, 0, 1);
143+
};
144+
($t:tt, $zero:expr, $one: expr) => {
145+
native_type_op!($t, $zero, $one, $t::MIN, $t::MAX);
143146
};
144147
($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => {
145148
impl ArrowNativeTypeOp for $t {
@@ -284,6 +287,13 @@ native_type_op!(u32);
284287
native_type_op!(u64);
285288
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
286289

290+
native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
291+
native_type_op!(
292+
IntervalMonthDayNano,
293+
IntervalMonthDayNano::ZERO,
294+
IntervalMonthDayNano::ONE
295+
);
296+
287297
macro_rules! native_type_float_op {
288298
($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => {
289299
impl ArrowNativeTypeOp for $t {

arrow-array/src/array/dictionary_array.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,7 @@ where
946946
/// return Ok(d.with_values(r));
947947
/// }
948948
/// downcast_primitive_array! {
949-
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| x.to_string())).collect::<StringArray>())),
949+
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| format!("{x:?}"))).collect::<StringArray>())),
950950
/// d => Err(ArrowError::InvalidArgumentError(format!("{d:?} not supported")))
951951
/// }
952952
/// }

arrow-array/src/array/primitive_array.rs

+33-19
Original file line numberDiff line numberDiff line change
@@ -1502,6 +1502,7 @@ mod tests {
15021502
use crate::builder::{Decimal128Builder, Decimal256Builder};
15031503
use crate::cast::downcast_array;
15041504
use crate::BooleanArray;
1505+
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
15051506
use arrow_schema::TimeUnit;
15061507

15071508
#[test]
@@ -1624,33 +1625,46 @@ mod tests {
16241625
assert_eq!(-5, arr.value(2));
16251626
assert_eq!(-5, arr.values()[2]);
16261627

1627-
// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
1628-
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
1628+
let v0 = IntervalDayTime {
1629+
days: 34,
1630+
milliseconds: 1,
1631+
};
1632+
let v2 = IntervalDayTime {
1633+
days: -2,
1634+
milliseconds: -5,
1635+
};
1636+
1637+
let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]);
1638+
16291639
assert_eq!(3, arr.len());
16301640
assert_eq!(0, arr.offset());
16311641
assert_eq!(1, arr.null_count());
1632-
assert_eq!(1, arr.value(0));
1633-
assert_eq!(1, arr.values()[0]);
1642+
assert_eq!(v0, arr.value(0));
1643+
assert_eq!(v0, arr.values()[0]);
16341644
assert!(arr.is_null(1));
1635-
assert_eq!(-5, arr.value(2));
1636-
assert_eq!(-5, arr.values()[2]);
1645+
assert_eq!(v2, arr.value(2));
1646+
assert_eq!(v2, arr.values()[2]);
16371647

1638-
// a month_day_nano interval contains months, days and nanoseconds,
1639-
// but we do not yet have accessors for the values.
1640-
// TODO: implement month, day, and nanos access method for month_day_nano.
1641-
let arr = IntervalMonthDayNanoArray::from(vec![
1642-
Some(100000000000000000000),
1643-
None,
1644-
Some(-500000000000000000000),
1645-
]);
1648+
let v0 = IntervalMonthDayNano {
1649+
months: 2,
1650+
days: 34,
1651+
nanoseconds: -1,
1652+
};
1653+
let v2 = IntervalMonthDayNano {
1654+
months: -3,
1655+
days: -2,
1656+
nanoseconds: 4,
1657+
};
1658+
1659+
let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]);
16461660
assert_eq!(3, arr.len());
16471661
assert_eq!(0, arr.offset());
16481662
assert_eq!(1, arr.null_count());
1649-
assert_eq!(100000000000000000000, arr.value(0));
1650-
assert_eq!(100000000000000000000, arr.values()[0]);
1663+
assert_eq!(v0, arr.value(0));
1664+
assert_eq!(v0, arr.values()[0]);
16511665
assert!(arr.is_null(1));
1652-
assert_eq!(-500000000000000000000, arr.value(2));
1653-
assert_eq!(-500000000000000000000, arr.values()[2]);
1666+
assert_eq!(v2, arr.value(2));
1667+
assert_eq!(v2, arr.values()[2]);
16541668
}
16551669

16561670
#[test]
@@ -2460,7 +2474,7 @@ mod tests {
24602474
expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)"
24612475
)]
24622476
fn test_invalid_interval_type() {
2463-
let array = IntervalDayTimeArray::from(vec![1, 2, 3]);
2477+
let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]);
24642478
let _ = IntervalMonthDayNanoArray::from(array.into_data());
24652479
}
24662480

arrow-array/src/types.rs

+19-58
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::delta::{
2323
use crate::temporal_conversions::as_datetime_with_timezone;
2424
use crate::timezone::Tz;
2525
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
26-
use arrow_buffer::{i256, Buffer, OffsetBuffer};
26+
use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer};
2727
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
2828
use arrow_data::{validate_binary_view, validate_string_view};
2929
use arrow_schema::{
@@ -220,7 +220,7 @@ make_type!(
220220
);
221221
make_type!(
222222
IntervalDayTimeType,
223-
i64,
223+
IntervalDayTime,
224224
DataType::Interval(IntervalUnit::DayTime),
225225
r#"A “calendar” interval type in days and milliseconds.
226226
@@ -247,7 +247,7 @@ which can lead to surprising results. Please see the description of ordering on
247247
);
248248
make_type!(
249249
IntervalMonthDayNanoType,
250-
i128,
250+
IntervalMonthDayNano,
251251
DataType::Interval(IntervalUnit::MonthDayNano),
252252
r#"A “calendar” interval type in months, days, and nanoseconds.
253253
@@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
264264
nanoseconds represents less than a day's worth of time).
265265
266266
```text
267-
┌──────────────────────────────┬───────────────────────────┐
268-
Nanos Days Months
269-
(64 bits) │ (32 bits) │ (32 bits)
270-
└──────────────────────────────┴───────────────────────────┘
271-
0 63 95 127 bit offset
267+
┌────────────────────────────┬─────────────────────────────┐
268+
Months Days Nanos
269+
(32 bits) │ (32 bits) (64 bits)
270+
└────────────────────────────┴─────────────────────────────┘
271+
0 32 64 128 bit offset
272272
```
273273
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
274274
@@ -917,25 +917,8 @@ impl IntervalDayTimeType {
917917
/// * `days` - The number of days (+/-) represented in this interval
918918
/// * `millis` - The number of milliseconds (+/-) represented in this interval
919919
#[inline]
920-
pub fn make_value(
921-
days: i32,
922-
millis: i32,
923-
) -> <IntervalDayTimeType as ArrowPrimitiveType>::Native {
924-
/*
925-
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1433
926-
struct DayMilliseconds {
927-
int32_t days = 0;
928-
int32_t milliseconds = 0;
929-
...
930-
}
931-
64 56 48 40 32 24 16 8 0
932-
+-------+-------+-------+-------+-------+-------+-------+-------+
933-
| days | milliseconds |
934-
+-------+-------+-------+-------+-------+-------+-------+-------+
935-
*/
936-
let m = millis as u64 & u32::MAX as u64;
937-
let d = (days as u64 & u32::MAX as u64) << 32;
938-
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
920+
pub fn make_value(days: i32, milliseconds: i32) -> IntervalDayTime {
921+
IntervalDayTime { days, milliseconds }
939922
}
940923

941924
/// Turns a IntervalDayTimeType into a tuple of (days, milliseconds)
@@ -944,10 +927,8 @@ impl IntervalDayTimeType {
944927
///
945928
/// * `i` - The IntervalDayTimeType to convert
946929
#[inline]
947-
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
948-
let days = (i >> 32) as i32;
949-
let ms = i as i32;
950-
(days, ms)
930+
pub fn to_parts(i: IntervalDayTime) -> (i32, i32) {
931+
(i.days, i.milliseconds)
951932
}
952933
}
953934

@@ -960,27 +941,12 @@ impl IntervalMonthDayNanoType {
960941
/// * `days` - The number of days (+/-) represented in this interval
961942
/// * `nanos` - The number of nanoseconds (+/-) represented in this interval
962943
#[inline]
963-
pub fn make_value(
964-
months: i32,
965-
days: i32,
966-
nanos: i64,
967-
) -> <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native {
968-
/*
969-
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
970-
struct MonthDayNanos {
971-
int32_t months;
972-
int32_t days;
973-
int64_t nanoseconds;
944+
pub fn make_value(months: i32, days: i32, nanoseconds: i64) -> IntervalMonthDayNano {
945+
IntervalMonthDayNano {
946+
months,
947+
days,
948+
nanoseconds,
974949
}
975-
128 112 96 80 64 48 32 16 0
976-
+-------+-------+-------+-------+-------+-------+-------+-------+
977-
| months | days | nanos |
978-
+-------+-------+-------+-------+-------+-------+-------+-------+
979-
*/
980-
let m = (months as u128 & u32::MAX as u128) << 96;
981-
let d = (days as u128 & u32::MAX as u128) << 64;
982-
let n = nanos as u128 & u64::MAX as u128;
983-
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
984950
}
985951

986952
/// Turns a IntervalMonthDayNanoType into a tuple of (months, days, nanos)
@@ -989,13 +955,8 @@ impl IntervalMonthDayNanoType {
989955
///
990956
/// * `i` - The IntervalMonthDayNanoType to convert
991957
#[inline]
992-
pub fn to_parts(
993-
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
994-
) -> (i32, i32, i64) {
995-
let months = (i >> 96) as i32;
996-
let days = (i >> 64) as i32;
997-
let nanos = i as i64;
998-
(months, days, nanos)
958+
pub fn to_parts(i: IntervalMonthDayNano) -> (i32, i32, i64) {
959+
(i.months, i.days, i.nanoseconds)
999960
}
1000961
}
1001962

0 commit comments

Comments
 (0)