Skip to content

Commit ee55721

Browse files
authored
Expose IntervalMonthDayNano and IntervalDayTime and update docs (#5928)
* Expose IntervalMonthDayNano and IntervalDayMonth and update docs * fix doc test
1 parent 1ef22e5 commit ee55721

File tree

3 files changed

+112
-72
lines changed

3 files changed

+112
-72
lines changed

arrow-array/src/array/primitive_array.rs

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,19 +351,51 @@ pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
351351
/// hold values such as `00:02:00.123456789`
352352
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
353353

354-
/// A [`PrimitiveArray`] of “calendar” intervals in months
354+
/// A [`PrimitiveArray`] of “calendar” intervals in whole months
355355
///
356356
/// See [`IntervalYearMonthType`] for details on representation and caveats.
357+
///
358+
/// # Example
359+
/// ```
360+
/// # use arrow_array::IntervalYearMonthArray;
361+
/// let array = IntervalYearMonthArray::from(vec![
362+
/// 2, // 2 months
363+
/// 25, // 2 years and 1 month
364+
/// -1 // -1 months
365+
/// ]);
366+
/// ```
357367
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
358368

359369
/// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds
360370
///
361-
/// See [`IntervalDayTimeType`] for details on representation and caveats.
371+
/// See [`IntervalDayTime`] for details on representation and caveats.
372+
///
373+
/// # Example
374+
/// ```
375+
/// # use arrow_array::IntervalDayTimeArray;
376+
/// use arrow_array::types::IntervalDayTime;
377+
/// let array = IntervalDayTimeArray::from(vec![
378+
/// IntervalDayTime::new(1, 1000), // 1 day, 1000 milliseconds
379+
/// IntervalDayTime::new(33, 0), // 33 days, 0 milliseconds
380+
/// IntervalDayTime::new(0, 12 * 60 * 60 * 1000), // 0 days, 12 hours
381+
/// ]);
382+
/// ```
362383
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
363384

364385
/// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds.
365386
///
366-
/// See [`IntervalMonthDayNanoType`] for details on representation and caveats.
387+
/// See [`IntervalMonthDayNano`] for details on representation and caveats.
388+
///
389+
/// # Example
390+
/// ```
391+
/// # use arrow_array::IntervalMonthDayNanoArray;
392+
/// use arrow_array::types::IntervalMonthDayNano;
393+
/// let array = IntervalMonthDayNanoArray::from(vec![
394+
/// IntervalMonthDayNano::new(1, 2, 1000), // 1 month, 2 days, 1 nanosecond
395+
/// IntervalMonthDayNano::new(12, 1, 0), // 12 months, 1 days, 0 nanoseconds
396+
/// IntervalMonthDayNano::new(0, 0, 12 * 1000 * 1000), // 0 days, 12 milliseconds
397+
/// ]);
398+
/// ```
367399
pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;
368400

369401
/// A [`PrimitiveArray`] of elapsed durations in seconds

arrow-array/src/types.rs

Lines changed: 7 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::delta::{
2323
use crate::temporal_conversions::as_datetime_with_timezone;
2424
use crate::timezone::Tz;
2525
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
26-
use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer};
26+
use arrow_buffer::{i256, Buffer, OffsetBuffer};
2727
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
2828
use arrow_data::{validate_binary_view, validate_string_view};
2929
use arrow_schema::{
@@ -36,6 +36,9 @@ use std::fmt::Debug;
3636
use std::marker::PhantomData;
3737
use std::ops::{Add, Sub};
3838

39+
// re-export types so that they can be used without importing arrow_buffer explicitly
40+
pub use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
41+
3942
// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
4043
// operation assumes bit-packing.
4144
/// A boolean datatype
@@ -218,84 +221,19 @@ make_type!(
218221
IntervalYearMonthType,
219222
i32,
220223
DataType::Interval(IntervalUnit::YearMonth),
221-
"A “calendar” interval stored as the number of whole months."
224+
"A 32-bit “calendar” interval type representing the number of whole months."
222225
);
223226
make_type!(
224227
IntervalDayTimeType,
225228
IntervalDayTime,
226229
DataType::Interval(IntervalUnit::DayTime),
227-
r#"A “calendar” interval type in days and milliseconds.
228-
229-
## Representation
230-
This type is stored as a single 64 bit integer, interpreted as two i32 fields:
231-
1. the number of elapsed days
232-
2. The number of milliseconds (no leap seconds),
233-
234-
```text
235-
┌──────────────┬──────────────┐
236-
│ Days │ Milliseconds │
237-
│ (32 bits) │ (32 bits) │
238-
└──────────────┴──────────────┘
239-
0 31 63 bit offset
240-
```
241-
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L406-L408) for more details
242-
243-
## Note on Comparing and Ordering for Calendar Types
244-
245-
Values of `IntervalDayTimeType` are compared using their binary representation,
246-
which can lead to surprising results. Please see the description of ordering on
247-
[`IntervalMonthDayNanoType`] for more details
248-
"#
230+
"A “calendar” interval type representing days and milliseconds. See [`IntervalDayTime`] for more details."
249231
);
250232
make_type!(
251233
IntervalMonthDayNanoType,
252234
IntervalMonthDayNano,
253235
DataType::Interval(IntervalUnit::MonthDayNano),
254-
r#"A “calendar” interval type in months, days, and nanoseconds.
255-
256-
## Representation
257-
This type is stored as a single 128 bit integer,
258-
interpreted as three different signed integral fields:
259-
260-
1. The number of months (32 bits)
261-
2. The number days (32 bits)
262-
2. The number of nanoseconds (64 bits).
263-
264-
Nanoseconds does not allow for leap seconds.
265-
Each field is independent (e.g. there is no constraint that the quantity of
266-
nanoseconds represents less than a day's worth of time).
267-
268-
```text
269-
┌───────────────┬─────────────┬─────────────────────────────┐
270-
│ Months │ Days │ Nanos │
271-
│ (32 bits) │ (32 bits) │ (64 bits) │
272-
└───────────────┴─────────────┴─────────────────────────────┘
273-
0 32 64 128 bit offset
274-
```
275-
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
276-
277-
## Note on Comparing and Ordering for Calendar Types
278-
Values of `IntervalMonthDayNanoType` are compared using their binary representation,
279-
which can lead to surprising results.
280-
281-
Spans of time measured in calendar units are not fixed in absolute size (e.g.
282-
number of seconds) which makes defining comparisons and ordering non trivial.
283-
For example `1 month` is 28 days for February but `1 month` is 31 days
284-
in December.
285-
286-
This makes the seemingly simple operation of comparing two intervals
287-
complicated in practice. For example is `1 month` more or less than `30 days`? The
288-
answer depends on what month you are talking about.
289-
290-
This crate defines comparisons for calendar types using their binary
291-
representation which is fast and efficient, but leads
292-
to potentially surprising results.
293-
294-
For example a
295-
`IntervalMonthDayNano` of `1 month` will compare as **greater** than a
296-
`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
297-
is larger than the binary representation of 100 days.
298-
"#
236+
r"A “calendar” interval type representing months, days, and nanoseconds. See [`IntervalMonthDayNano`] for more details."
299237
);
300238
make_type!(
301239
DurationSecondType,

arrow-buffer/src/interval.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,52 @@ use crate::arith::derive_arith;
1919
use std::ops::Neg;
2020

2121
/// Value of an IntervalMonthDayNano array
22+
///
23+
/// ## Representation
24+
///
25+
/// This type is stored as a single 128 bit integer, interpreted as three
26+
/// different signed integral fields:
27+
///
28+
/// 1. The number of months (32 bits)
29+
/// 2. The number days (32 bits)
30+
/// 2. The number of nanoseconds (64 bits).
31+
///
32+
/// Nanoseconds does not allow for leap seconds.
33+
///
34+
/// Each field is independent (e.g. there is no constraint that the quantity of
35+
/// nanoseconds represents less than a day's worth of time).
36+
///
37+
/// ```text
38+
/// ┌───────────────┬─────────────┬─────────────────────────────┐
39+
/// │ Months │ Days │ Nanos │
40+
/// │ (32 bits) │ (32 bits) │ (64 bits) │
41+
/// └───────────────┴─────────────┴─────────────────────────────┘
42+
/// 0 32 64 128 bit offset
43+
/// ```
44+
/// Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
45+
///
46+
///## Note on Comparing and Ordering for Calendar Types
47+
///
48+
/// Values of `IntervalMonthDayNano` are compared using their binary
49+
/// representation, which can lead to surprising results.
50+
///
51+
/// Spans of time measured in calendar units are not fixed in absolute size (e.g.
52+
/// number of seconds) which makes defining comparisons and ordering non trivial.
53+
/// For example `1 month` is 28 days for February but `1 month` is 31 days
54+
/// in December.
55+
///
56+
/// This makes the seemingly simple operation of comparing two intervals
57+
/// complicated in practice. For example is `1 month` more or less than `30
58+
/// days`? The answer depends on what month you are talking about.
59+
///
60+
/// This crate defines comparisons for calendar types using their binary
61+
/// representation which is fast and efficient, but leads
62+
/// to potentially surprising results.
63+
///
64+
/// For example a
65+
/// `IntervalMonthDayNano` of `1 month` will compare as **greater** than a
66+
/// `IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
67+
/// is larger than the binary representation of 100 days.
2268
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
2369
#[repr(C)]
2470
pub struct IntervalMonthDayNano {
@@ -272,6 +318,30 @@ derive_arith!(
272318
);
273319

274320
/// Value of an IntervalDayTime array
321+
///
322+
/// ## Representation
323+
///
324+
/// This type is stored as a single 64 bit integer, interpreted as two i32
325+
/// fields:
326+
///
327+
/// 1. the number of elapsed days
328+
/// 2. The number of milliseconds (no leap seconds),
329+
///
330+
/// ```text
331+
/// ┌──────────────┬──────────────┐
332+
/// │ Days │ Milliseconds │
333+
/// │ (32 bits) │ (32 bits) │
334+
/// └──────────────┴──────────────┘
335+
/// 0 31 63 bit offset
336+
/// ```
337+
///
338+
/// Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L406-L408) for more details
339+
///
340+
/// ## Note on Comparing and Ordering for Calendar Types
341+
///
342+
/// Values of `IntervalDayTime` are compared using their binary representation,
343+
/// which can lead to surprising results. Please see the description of ordering on
344+
/// [`IntervalMonthDayNano`] for more details
275345
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
276346
#[repr(C)]
277347
pub struct IntervalDayTime {

0 commit comments

Comments
 (0)