Skip to content

Commit 57c4748

Browse files
authored
Minor: Document timestamp with/without cast behavior (#5826)
* Minor: Document timestamp with/without cast behavior * Update arrow-cast/src/cast/mod.rs * Fix comment blunder * Update arrow-cast/src/cast/mod.rs
1 parent 198af7a commit 57c4748

File tree

1 file changed

+102
-15
lines changed

1 file changed

+102
-15
lines changed

arrow-cast/src/cast/mod.rs

+102-15
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,22 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Defines cast kernels for `ArrayRef`, to convert `Array`s between
19-
//! supported datatypes.
18+
//! Cast kernels to convert [`ArrayRef`] between supported datatypes.
19+
//!
20+
//! See [`cast_with_options`] for more information on specific conversions.
2021
//!
2122
//! Example:
2223
//!
2324
//! ```
24-
//! use arrow_array::*;
25-
//! use arrow_cast::cast;
26-
//! use arrow_schema::DataType;
27-
//! use std::sync::Arc;
28-
//! use arrow_array::types::Float64Type;
29-
//! use arrow_array::cast::AsArray;
30-
//!
25+
//! # use arrow_array::*;
26+
//! # use arrow_cast::cast;
27+
//! # use arrow_schema::DataType;
28+
//! # use std::sync::Arc;
29+
//! # use arrow_array::types::Float64Type;
30+
//! # use arrow_array::cast::AsArray;
31+
//! // int32 to float64
3132
//! let a = Int32Array::from(vec![5, 6, 7]);
32-
//! let array = Arc::new(a) as ArrayRef;
33-
//! let b = cast(&array, &DataType::Float64).unwrap();
33+
//! let b = cast(&a, &DataType::Float64).unwrap();
3434
//! let c = b.as_primitive::<Float64Type>();
3535
//! assert_eq!(5.0, c.value(0));
3636
//! assert_eq!(6.0, c.value(1));
@@ -554,11 +554,13 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
554554
Ok(Arc::new(array))
555555
}
556556

557-
/// Cast `array` to the provided data type and return a new Array with type `to_type`, if possible.
557+
/// Try to cast `array` to `to_type` if possible.
558+
///
559+
/// Returns a new Array with type `to_type` if possible.
558560
///
559-
/// Accepts [`CastOptions`] to specify cast behavior.
561+
/// Accepts [`CastOptions`] to specify cast behavior. See also [`cast()`].
560562
///
561-
/// ## Behavior
563+
/// # Behavior
562564
/// * Boolean to Utf8: `true` => '1', `false` => `0`
563565
/// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
564566
/// short variants are accepted, other strings return null or error
@@ -577,10 +579,95 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
577579
/// (i.e. casting `6.4999` to Decimal(10, 1) becomes `6.5`). Prior to version `26.0.0`,
578580
/// casting would truncate instead (i.e. outputs `6.4` instead)
579581
///
580-
/// Unsupported Casts
582+
/// Unsupported Casts (check with `can_cast_types` before calling):
581583
/// * To or from `StructArray`
582584
/// * List to primitive
583585
/// * Interval and duration
586+
///
587+
/// # Timestamps and Timezones
588+
///
589+
/// Timestamps are stored with an optional timezone in Arrow.
590+
///
591+
/// ## Casting timestamps to a timestamp without timezone / UTC
592+
/// ```
593+
/// # use arrow_array::Int64Array;
594+
/// # use arrow_array::types::TimestampSecondType;
595+
/// # use arrow_cast::{cast, display};
596+
/// # use arrow_array::cast::AsArray;
597+
/// # use arrow_schema::{DataType, TimeUnit};
598+
/// // can use "UTC" if chrono-tz feature is enabled, here use offset based timezone
599+
/// let data_type = DataType::Timestamp(TimeUnit::Second, None);
600+
/// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
601+
/// let b = cast(&a, &data_type).unwrap();
602+
/// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
603+
/// assert_eq!(2_000_000_000, b.value(1)); // values are the same as the type has no timezone
604+
/// // use display to show them (note has no trailing Z)
605+
/// assert_eq!("2033-05-18T03:33:20", display::array_value_to_string(&b, 1).unwrap());
606+
/// ```
607+
///
608+
/// ## Casting timestamps to a timestamp with timezone
609+
///
610+
/// Similarly to the previous example, if you cast numeric values to a timestamp
611+
/// with timezone, the cast kernel will not change the underlying values
612+
/// but display and other functions will interpret them as being in the provided timezone.
613+
///
614+
/// ```
615+
/// # use arrow_array::Int64Array;
616+
/// # use arrow_array::types::TimestampSecondType;
617+
/// # use arrow_cast::{cast, display};
618+
/// # use arrow_array::cast::AsArray;
619+
/// # use arrow_schema::{DataType, TimeUnit};
620+
/// // can use "Americas/New_York" if chrono-tz feature is enabled, here use offset based timezone
621+
/// let data_type = DataType::Timestamp(TimeUnit::Second, Some("-05:00".into()));
622+
/// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
623+
/// let b = cast(&a, &data_type).unwrap();
624+
/// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
625+
/// assert_eq!(2_000_000_000, b.value(1)); // values are still the same
626+
/// // displayed in the target timezone (note the offset -05:00)
627+
/// assert_eq!("2033-05-17T22:33:20-05:00", display::array_value_to_string(&b, 1).unwrap());
628+
/// ```
629+
/// # Casting timestamps without timezone to timestamps with timezone
630+
///
631+
/// When casting from a timestamp without timezone to a timestamp with
632+
/// timezone, the cast kernel interprets the timestamp values as being in
633+
/// the destination timezone and then adjusts the underlying value to UTC as required
634+
///
635+
/// However, note that when casting from a timestamp with timezone BACK to a
636+
/// timestamp without timezone the cast kernel does not adjust the values.
637+
///
638+
/// Thus round trip casting a timestamp without timezone to a timestamp with
639+
/// timezone and back to a timestamp without timezone results in different
640+
/// values than the starting values.
641+
///
642+
/// ```
643+
/// # use arrow_array::Int64Array;
644+
/// # use arrow_array::types::{TimestampSecondType};
645+
/// # use arrow_cast::{cast, display};
646+
/// # use arrow_array::cast::AsArray;
647+
/// # use arrow_schema::{DataType, TimeUnit};
648+
/// let data_type = DataType::Timestamp(TimeUnit::Second, None);
649+
/// let data_type_tz = DataType::Timestamp(TimeUnit::Second, Some("-05:00".into()));
650+
/// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
651+
/// let b = cast(&a, &data_type).unwrap(); // cast to timestamp without timezone
652+
/// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
653+
/// assert_eq!(2_000_000_000, b.value(1)); // values are still the same
654+
/// // displayed without a timezone (note lack of offset or Z)
655+
/// assert_eq!("2033-05-18T03:33:20", display::array_value_to_string(&b, 1).unwrap());
656+
///
657+
/// // Convert timestamps without a timezone to timestamps with a timezone
658+
/// let c = cast(&b, &data_type_tz).unwrap();
659+
/// let c = c.as_primitive::<TimestampSecondType>(); // downcast to result type
660+
/// assert_eq!(2_000_018_000, c.value(1)); // value has been adjusted by offset
661+
/// // displayed with the target timezone offset (-05:00)
662+
/// assert_eq!("2033-05-18T03:33:20-05:00", display::array_value_to_string(&c, 1).unwrap());
663+
///
664+
/// // Convert from timestamp with timezone back to timestamp without timezone
665+
/// let d = cast(&c, &data_type).unwrap();
666+
/// let d = d.as_primitive::<TimestampSecondType>(); // downcast to result type
667+
/// assert_eq!(2_000_018_000, d.value(1)); // value has not been adjusted
668+
/// // NOTE: the timestamp is adjusted (08:33:20 instead of 03:33:20 as in previous example)
669+
/// assert_eq!("2033-05-18T08:33:20", display::array_value_to_string(&d, 1).unwrap());
670+
/// ```
584671
pub fn cast_with_options(
585672
array: &dyn Array,
586673
to_type: &DataType,

0 commit comments

Comments
 (0)