15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- //! Defines cast kernels for `ArrayRef`, to convert `Array`s between
19
- //! supported datatypes.
18
+ //! Cast kernels to convert [`ArrayRef`] between supported datatypes.
19
+ //!
20
+ //! See [`cast_with_options`] for more information on specific conversions.
20
21
//!
21
22
//! Example:
22
23
//!
23
24
//! ```
24
- //! use arrow_array::*;
25
- //! use arrow_cast::cast;
26
- //! use arrow_schema::DataType;
27
- //! use std::sync::Arc;
28
- //! use arrow_array::types::Float64Type;
29
- //! use arrow_array::cast::AsArray;
30
- //!
25
+ //! # use arrow_array::*;
26
+ //! # use arrow_cast::cast;
27
+ //! # use arrow_schema::DataType;
28
+ //! # use std::sync::Arc;
29
+ //! # use arrow_array::types::Float64Type;
30
+ //! # use arrow_array::cast::AsArray;
31
+ //! // int32 to float64
31
32
//! let a = Int32Array::from(vec![5, 6, 7]);
32
- //! let array = Arc::new(a) as ArrayRef;
33
- //! let b = cast(&array, &DataType::Float64).unwrap();
33
+ //! let b = cast(&a, &DataType::Float64).unwrap();
34
34
//! let c = b.as_primitive::<Float64Type>();
35
35
//! assert_eq!(5.0, c.value(0));
36
36
//! assert_eq!(6.0, c.value(1));
@@ -554,11 +554,13 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
554
554
Ok ( Arc :: new ( array) )
555
555
}
556
556
557
- /// Cast `array` to the provided data type and return a new Array with type `to_type`, if possible.
557
+ /// Try to cast `array` to `to_type` if possible.
558
+ ///
559
+ /// Returns a new Array with type `to_type` if possible.
558
560
///
559
- /// Accepts [`CastOptions`] to specify cast behavior.
561
+ /// Accepts [`CastOptions`] to specify cast behavior. See also [`cast()`].
560
562
///
561
- /// ## Behavior
563
+ /// # Behavior
562
564
/// * Boolean to Utf8: `true` => '1', `false` => `0`
563
565
/// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
564
566
/// short variants are accepted, other strings return null or error
@@ -577,10 +579,95 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
577
579
/// (i.e. casting `6.4999` to Decimal(10, 1) becomes `6.5`). Prior to version `26.0.0`,
578
580
/// casting would truncate instead (i.e. outputs `6.4` instead)
579
581
///
580
- /// Unsupported Casts
582
+ /// Unsupported Casts (check with `can_cast_types` before calling):
581
583
/// * To or from `StructArray`
582
584
/// * List to primitive
583
585
/// * Interval and duration
586
+ ///
587
+ /// # Timestamps and Timezones
588
+ ///
589
+ /// Timestamps are stored with an optional timezone in Arrow.
590
+ ///
591
+ /// ## Casting timestamps to a timestamp without timezone / UTC
592
+ /// ```
593
+ /// # use arrow_array::Int64Array;
594
+ /// # use arrow_array::types::TimestampSecondType;
595
+ /// # use arrow_cast::{cast, display};
596
+ /// # use arrow_array::cast::AsArray;
597
+ /// # use arrow_schema::{DataType, TimeUnit};
598
+ /// // can use "UTC" if chrono-tz feature is enabled, here use offset based timezone
599
+ /// let data_type = DataType::Timestamp(TimeUnit::Second, None);
600
+ /// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
601
+ /// let b = cast(&a, &data_type).unwrap();
602
+ /// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
603
+ /// assert_eq!(2_000_000_000, b.value(1)); // values are the same as the type has no timezone
604
+ /// // use display to show them (note has no trailing Z)
605
+ /// assert_eq!("2033-05-18T03:33:20", display::array_value_to_string(&b, 1).unwrap());
606
+ /// ```
607
+ ///
608
+ /// ## Casting timestamps to a timestamp with timezone
609
+ ///
610
+ /// Similarly to the previous example, if you cast numeric values to a timestamp
611
+ /// with timezone, the cast kernel will not change the underlying values
612
+ /// but display and other functions will interpret them as being in the provided timezone.
613
+ ///
614
+ /// ```
615
+ /// # use arrow_array::Int64Array;
616
+ /// # use arrow_array::types::TimestampSecondType;
617
+ /// # use arrow_cast::{cast, display};
618
+ /// # use arrow_array::cast::AsArray;
619
+ /// # use arrow_schema::{DataType, TimeUnit};
620
+ /// // can use "Americas/New_York" if chrono-tz feature is enabled, here use offset based timezone
621
+ /// let data_type = DataType::Timestamp(TimeUnit::Second, Some("-05:00".into()));
622
+ /// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
623
+ /// let b = cast(&a, &data_type).unwrap();
624
+ /// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
625
+ /// assert_eq!(2_000_000_000, b.value(1)); // values are still the same
626
+ /// // displayed in the target timezone (note the offset -05:00)
627
+ /// assert_eq!("2033-05-17T22:33:20-05:00", display::array_value_to_string(&b, 1).unwrap());
628
+ /// ```
629
+ /// # Casting timestamps without timezone to timestamps with timezone
630
+ ///
631
+ /// When casting from a timestamp without timezone to a timestamp with
632
+ /// timezone, the cast kernel interprets the timestamp values as being in
633
+ /// the destination timezone and then adjusts the underlying value to UTC as required
634
+ ///
635
+ /// However, note that when casting from a timestamp with timezone BACK to a
636
+ /// timestamp without timezone the cast kernel does not adjust the values.
637
+ ///
638
+ /// Thus round trip casting a timestamp without timezone to a timestamp with
639
+ /// timezone and back to a timestamp without timezone results in different
640
+ /// values than the starting values.
641
+ ///
642
+ /// ```
643
+ /// # use arrow_array::Int64Array;
644
+ /// # use arrow_array::types::{TimestampSecondType};
645
+ /// # use arrow_cast::{cast, display};
646
+ /// # use arrow_array::cast::AsArray;
647
+ /// # use arrow_schema::{DataType, TimeUnit};
648
+ /// let data_type = DataType::Timestamp(TimeUnit::Second, None);
649
+ /// let data_type_tz = DataType::Timestamp(TimeUnit::Second, Some("-05:00".into()));
650
+ /// let a = Int64Array::from(vec![1_000_000_000, 2_000_000_000, 3_000_000_000]);
651
+ /// let b = cast(&a, &data_type).unwrap(); // cast to timestamp without timezone
652
+ /// let b = b.as_primitive::<TimestampSecondType>(); // downcast to result type
653
+ /// assert_eq!(2_000_000_000, b.value(1)); // values are still the same
654
+ /// // displayed without a timezone (note lack of offset or Z)
655
+ /// assert_eq!("2033-05-18T03:33:20", display::array_value_to_string(&b, 1).unwrap());
656
+ ///
657
+ /// // Convert timestamps without a timezone to timestamps with a timezone
658
+ /// let c = cast(&b, &data_type_tz).unwrap();
659
+ /// let c = c.as_primitive::<TimestampSecondType>(); // downcast to result type
660
+ /// assert_eq!(2_000_018_000, c.value(1)); // value has been adjusted by offset
661
+ /// // displayed with the target timezone offset (-05:00)
662
+ /// assert_eq!("2033-05-18T03:33:20-05:00", display::array_value_to_string(&c, 1).unwrap());
663
+ ///
664
+ /// // Convert from timestamp with timezone back to timestamp without timezone
665
+ /// let d = cast(&c, &data_type).unwrap();
666
+ /// let d = d.as_primitive::<TimestampSecondType>(); // downcast to result type
667
+ /// assert_eq!(2_000_018_000, d.value(1)); // value has not been adjusted
668
+ /// // NOTE: the timestamp is adjusted (08:33:20 instead of 03:33:20 as in previous example)
669
+ /// assert_eq!("2033-05-18T08:33:20", display::array_value_to_string(&d, 1).unwrap());
670
+ /// ```
584
671
pub fn cast_with_options (
585
672
array : & dyn Array ,
586
673
to_type : & DataType ,
0 commit comments