@@ -419,7 +419,7 @@ pub type Decimal256Array = PrimitiveArray<Decimal256Type>;
419
419
420
420
pub use crate :: types:: ArrowPrimitiveType ;
421
421
422
- /// An array of [ primitive values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
422
+ /// An array of primitive values, of type [`ArrowPrimitiveType`]
423
423
///
424
424
/// # Example: From a Vec
425
425
///
@@ -480,6 +480,19 @@ pub use crate::types::ArrowPrimitiveType;
480
480
/// assert_eq!(array.values(), &[1, 0, 2]);
481
481
/// assert!(array.is_null(1));
482
482
/// ```
483
+ ///
484
+ /// # Example: Get a `PrimitiveArray` from an [`ArrayRef`]
485
+ /// ```
486
+ /// # use std::sync::Arc;
487
+ /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Float32Array, PrimitiveArray};
488
+ /// # use arrow_array::types::{Float32Type};
489
+ /// # use arrow_schema::DataType;
490
+ /// # let array: ArrayRef = Arc::new(Float32Array::from(vec![1.2, 2.3]));
491
+ /// // will panic if the array is not a Float32Array
492
+ /// assert_eq!(&DataType::Float32, array.data_type());
493
+ /// let f32_array: Float32Array = array.as_primitive().clone();
494
+ /// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3]));
495
+ /// ```
483
496
pub struct PrimitiveArray < T : ArrowPrimitiveType > {
484
497
data_type : DataType ,
485
498
/// Values data
@@ -732,22 +745,34 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
732
745
PrimitiveArray :: from ( unsafe { d. build_unchecked ( ) } )
733
746
}
734
747
735
- /// Applies an unary and infallible function to a primitive array.
736
- /// This is the fastest way to perform an operation on a primitive array when
737
- /// the benefits of a vectorized operation outweigh the cost of branching nulls and non-nulls.
748
+ /// Applies a unary infallible function to a primitive array, producing a
749
+ /// new array of potentially different type.
750
+ ///
751
+ /// This is the fastest way to perform an operation on a primitive array
752
+ /// when the benefits of a vectorized operation outweigh the cost of
753
+ /// branching nulls and non-nulls.
738
754
///
739
- /// # Implementation
755
+ /// See also
756
+ /// * [`Self::unary_mut`] for in place modification.
757
+ /// * [`Self::try_unary`] for fallible operations.
758
+ /// * [`arrow::compute::binary`] for binary operations
759
+ ///
760
+ /// [`arrow::compute::binary`]: https://docs.rs/arrow/latest/arrow/compute/fn.binary.html
761
+ /// # Null Handling
762
+ ///
763
+ /// Applies the function for all values, including those on null slots. This
764
+ /// will often allow the compiler to generate faster vectorized code, but
765
+ /// requires that the operation must be infallible (not error/panic) for any
766
+ /// value of the corresponding type or this function may panic.
740
767
///
741
- /// This will apply the function for all values, including those on null slots.
742
- /// This implies that the operation must be infallible for any value of the corresponding type
743
- /// or this function may panic.
744
768
/// # Example
745
769
/// ```rust
746
- /// # use arrow_array::{Int32Array, types::Int32Type};
770
+ /// # use arrow_array::{Int32Array, Float32Array, types::Int32Type};
747
771
/// # fn main() {
748
772
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
749
- /// let c = array.unary(|x| x * 2 + 1);
750
- /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
773
+ /// // Create a new array with the value of applying sqrt
774
+ /// let c = array.unary(|x| f32::sqrt(x as f32));
775
+ /// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), None]));
751
776
/// # }
752
777
/// ```
753
778
pub fn unary < F , O > ( & self , op : F ) -> PrimitiveArray < O >
@@ -766,24 +791,50 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
766
791
PrimitiveArray :: new ( buffer. into ( ) , nulls)
767
792
}
768
793
769
- /// Applies an unary and infallible function to a mutable primitive array.
770
- /// Mutable primitive array means that the buffer is not shared with other arrays.
771
- /// As a result, this mutates the buffer directly without allocating new buffer.
794
+ /// Applies a unary and infallible function to the array in place if possible.
795
+ ///
796
+ /// # Buffer Reuse
797
+ ///
798
+ /// If the underlying buffers are not shared with other arrays, mutates the
799
+ /// underlying buffer in place, without allocating.
800
+ ///
801
+ /// If the underlying buffer is shared, returns Err(self)
772
802
///
773
- /// # Implementation
803
+ /// # Null Handling
804
+ ///
805
+ /// See [`Self::unary`] for more information on null handling.
774
806
///
775
- /// This will apply the function for all values, including those on null slots.
776
- /// This implies that the operation must be infallible for any value of the corresponding type
777
- /// or this function may panic.
778
807
/// # Example
808
+ ///
779
809
/// ```rust
780
810
/// # use arrow_array::{Int32Array, types::Int32Type};
781
- /// # fn main() {
782
811
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
812
+ /// // Apply x*2+1 to the data in place, no allocations
783
813
/// let c = array.unary_mut(|x| x * 2 + 1).unwrap();
784
814
/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
785
- /// # }
786
815
/// ```
816
+ ///
817
+ /// # Example: modify [`ArrayRef`] in place, if not shared
818
+ ///
819
+ /// It is also possible to modify an [`ArrayRef`] if there are no other
820
+ /// references to the underlying buffer.
821
+ ///
822
+ /// ```rust
823
+ /// # use std::sync::Arc;
824
+ /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Int32Array, PrimitiveArray, types::Int32Type};
825
+ /// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), Some(7), None]));
826
+ /// // Convert to Int32Array (panic's if array.data_type is not Int32)
827
+ /// let a = array.as_primitive::<Int32Type>().clone();
828
+ /// // Try to apply x*2+1 to the data in place, fails because array is still shared
829
+ /// a.unary_mut(|x| x * 2 + 1).unwrap_err();
830
+ /// // Try again, this time dropping the last remaining reference
831
+ /// let a = array.as_primitive::<Int32Type>().clone();
832
+ /// drop(array);
833
+ /// // Now we can apply the operation in place
834
+ /// let c = a.unary_mut(|x| x * 2 + 1).unwrap();
835
+ /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
836
+ /// ```
837
+
787
838
pub fn unary_mut < F > ( self , op : F ) -> Result < PrimitiveArray < T > , PrimitiveArray < T > >
788
839
where
789
840
F : Fn ( T :: Native ) -> T :: Native ,
@@ -796,11 +847,12 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
796
847
Ok ( builder. finish ( ) )
797
848
}
798
849
799
- /// Applies a unary and fallible function to all valid values in a primitive array
850
+ /// Applies a unary fallible function to all valid values in a primitive
851
+ /// array, producing a new array of potentially different type.
800
852
///
801
- /// This is unlike [`Self::unary`] which will apply an infallible function to all rows
802
- /// regardless of validity, in many cases this will be significantly faster and should
803
- /// be preferred if `op` is infallible .
853
+ /// Applies `op` to only rows that are valid, which is often significantly
854
+ /// slower than [`Self::unary`], which should be preferred if `op` is
855
+ /// fallible .
804
856
///
805
857
/// Note: LLVM is currently unable to effectively vectorize fallible operations
806
858
pub fn try_unary < F , O , E > ( & self , op : F ) -> Result < PrimitiveArray < O > , E >
@@ -829,13 +881,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
829
881
Ok ( PrimitiveArray :: new ( values, nulls) )
830
882
}
831
883
832
- /// Applies an unary and fallible function to all valid values in a mutable primitive array.
833
- /// Mutable primitive array means that the buffer is not shared with other arrays.
834
- /// As a result, this mutates the buffer directly without allocating new buffer.
884
+ /// Applies a unary fallible function to all valid values in a mutable
885
+ /// primitive array.
886
+ ///
887
+ /// # Null Handling
888
+ ///
889
+ /// See [`Self::try_unary`] for more information on null handling.
890
+ ///
891
+ /// # Buffer Reuse
835
892
///
836
- /// This is unlike [`Self::unary_mut`] which will apply an infallible function to all rows
837
- /// regardless of validity, in many cases this will be significantly faster and should
838
- /// be preferred if `op` is infallible.
893
+ /// See [`Self::unary_mut`] for more information on buffer reuse.
839
894
///
840
895
/// This returns an `Err` when the input array is shared buffer with other
841
896
/// array. In the case, returned `Err` wraps input array. If the function
@@ -870,9 +925,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
870
925
871
926
/// Applies a unary and nullable function to all valid values in a primitive array
872
927
///
873
- /// This is unlike [`Self::unary`] which will apply an infallible function to all rows
874
- /// regardless of validity, in many cases this will be significantly faster and should
875
- /// be preferred if `op` is infallible .
928
+ /// Applies `op` to only rows that are valid, which is often significantly
929
+ /// slower than [`Self::unary`], which should be preferred if `op` is
930
+ /// fallible .
876
931
///
877
932
/// Note: LLVM is currently unable to effectively vectorize fallible operations
878
933
pub fn unary_opt < F , O > ( & self , op : F ) -> PrimitiveArray < O >
@@ -915,8 +970,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
915
970
PrimitiveArray :: new ( values, Some ( nulls) )
916
971
}
917
972
918
- /// Returns `PrimitiveBuilder` of this primitive array for mutating its values if the underlying
919
- /// data buffer is not shared by others.
973
+ /// Returns a `PrimitiveBuilder` for this array, suitable for mutating values
974
+ /// in place.
975
+ ///
976
+ /// # Buffer Reuse
977
+ ///
978
+ /// If the underlying data buffer has no other outstanding references, the
979
+ /// buffer is used without copying.
980
+ ///
981
+ /// If the underlying data buffer does have outstanding references, returns
982
+ /// `Err(self)`
920
983
pub fn into_builder ( self ) -> Result < PrimitiveBuilder < T > , Self > {
921
984
let len = self . len ( ) ;
922
985
let data = self . into_data ( ) ;
0 commit comments