@@ -97,40 +97,78 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits
97
97
}
98
98
}
99
99
100
- /// Struct to efficiently and interactively create an [ArrayData] from an existing [ArrayData] by
100
+ /// Efficiently create an [ArrayData] from one or more existing [ArrayData]s by
101
101
/// copying chunks.
102
102
///
103
- /// The main use case of this struct is to perform unary operations to arrays of arbitrary types,
104
- /// such as `filter` and `take`.
103
+ /// The main use case of this struct is to perform unary operations to arrays of
104
+ /// arbitrary types, such as `filter` and `take`.
105
+ ///
106
+ /// # Example
107
+ /// ```
108
+ /// use arrow_buffer::Buffer;
109
+ /// use arrow_data::ArrayData;
110
+ /// use arrow_data::transform::MutableArrayData;
111
+ /// use arrow_schema::DataType;
112
+ /// fn i32_array(values: &[i32]) -> ArrayData {
113
+ /// ArrayData::try_new(DataType::Int32, 5, None, 0, vec![Buffer::from_slice_ref(values)], vec![]).unwrap()
114
+ /// }
115
+ /// let arr1 = i32_array(&[1, 2, 3, 4, 5]);
116
+ /// let arr2 = i32_array(&[6, 7, 8, 9, 10]);
117
+ /// // Create a mutable array for copying values from arr1 and arr2, with a capacity for 6 elements
118
+ /// let capacity = 3 * size_of::<i32>();
119
+ /// let mut mutable = MutableArrayData::new(vec![&arr1, &arr2], false, 10);
120
+ /// // Copy the first 3 elements from arr1
121
+ /// mutable.extend(0, 0, 3);
122
+ /// // Copy the last 3 elements from arr2
123
+ /// mutable.extend(1, 2, 4);
124
+ /// // Complete the MutableArrayData into a new ArrayData
125
+ /// let frozen = mutable.freeze();
126
+ /// assert_eq!(frozen, i32_array(&[1, 2, 3, 8, 9, 10]));
127
+ /// ```
105
128
pub struct MutableArrayData < ' a > {
129
+ /// Input arrays: the data being read FROM.
130
+ ///
131
+ /// Note this is "dead code" because all actual references to the arrays are
132
+ /// stored in closures for extending values and nulls.
106
133
#[ allow( dead_code) ]
107
134
arrays : Vec < & ' a ArrayData > ,
108
- /// The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
109
- /// mutability invariants (interior mutability):
110
- /// [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
111
- /// [MutableArrayData] itself
135
+
136
+ /// In progress output array: The data being written TO
137
+ ///
138
+ /// Note these fields are in a separate struct, [_MutableArrayData], as they
139
+ /// cannot be in [MutableArrayData] itself due to mutability invariants (interior
140
+ /// mutability): [MutableArrayData] contains a function that can only mutate
141
+ /// [_MutableArrayData], not [MutableArrayData] itself
112
142
data : _MutableArrayData < ' a > ,
113
143
114
- /// the child data of the `Array` in Dictionary arrays.
115
- /// This is not stored in `MutableArrayData` because these values constant and only needed
116
- /// at the end, when freezing [_MutableArrayData].
144
+ /// The child data of the `Array` in Dictionary arrays.
145
+ ///
146
+ /// This is not stored in `_MutableArrayData` because these values are
147
+ /// constant and only needed at the end, when freezing [_MutableArrayData].
117
148
dictionary : Option < ArrayData > ,
118
149
119
- /// Variadic data buffers referenced by views
120
- /// This is not stored in `MutableArrayData` because these values constant and only needed
121
- /// at the end, when freezing [_MutableArrayData]
150
+ /// Variadic data buffers referenced by views.
151
+ ///
152
+ /// Note this this is not stored in `_MutableArrayData` because these values
153
+ /// are constant and only needed at the end, when freezing
154
+ /// [_MutableArrayData]
122
155
variadic_data_buffers : Vec < Buffer > ,
123
156
124
- /// function used to extend values from arrays. This function's lifetime is bound to the array
125
- /// because it reads values from it.
157
+ /// function used to extend output array with values from input arrays.
158
+ ///
159
+ /// This function's lifetime is bound to the input arrays because it reads
160
+ /// values from them.
126
161
extend_values : Vec < Extend < ' a > > ,
127
162
128
- /// function used to extend nulls from arrays. This function's lifetime is bound to the array
129
- /// because it reads nulls from it.
163
+ /// function used to extend the output array with nulls from input arrays.
164
+ ///
165
+ /// This function's lifetime is bound to the input arrays because it reads
166
+ /// nulls from it.
130
167
extend_null_bits : Vec < ExtendNullBits < ' a > > ,
131
168
132
- /// function used to extend nulls.
133
- /// this is independent of the arrays and therefore has no lifetime.
169
+ /// function used to extend the output array with null elements.
170
+ ///
171
+ /// This function is independent of the arrays and therefore has no lifetime.
134
172
extend_nulls : ExtendNulls ,
135
173
}
136
174
@@ -307,47 +345,63 @@ fn preallocate_offset_and_binary_buffer<Offset: ArrowNativeType + Integer>(
307
345
]
308
346
}
309
347
310
- /// Define capacities of child data or data buffers.
348
+ /// Define capacities to pre-allocate for child data or data buffers.
311
349
#[ derive( Debug , Clone ) ]
312
350
pub enum Capacities {
313
351
/// Binary, Utf8 and LargeUtf8 data types
314
- /// Define
352
+ ///
353
+ /// Defines
315
354
/// * the capacity of the array offsets
316
355
/// * the capacity of the binary/ str buffer
317
356
Binary ( usize , Option < usize > ) ,
318
357
/// List and LargeList data types
319
- /// Define
358
+ ///
359
+ /// Defines
320
360
/// * the capacity of the array offsets
321
361
/// * the capacity of the child data
322
362
List ( usize , Option < Box < Capacities > > ) ,
323
363
/// Struct type
364
+ ///
365
+ /// Defines
324
366
/// * the capacity of the array
325
367
/// * the capacities of the fields
326
368
Struct ( usize , Option < Vec < Capacities > > ) ,
327
369
/// Dictionary type
370
+ ///
371
+ /// Defines
328
372
/// * the capacity of the array/keys
329
373
/// * the capacity of the values
330
374
Dictionary ( usize , Option < Box < Capacities > > ) ,
331
375
/// Don't preallocate inner buffers and rely on array growth strategy
332
376
Array ( usize ) ,
333
377
}
378
+
334
379
impl < ' a > MutableArrayData < ' a > {
335
- /// returns a new [MutableArrayData] with capacity to `capacity` slots and specialized to create an
336
- /// [ArrayData] from multiple `arrays`.
380
+ /// Returns a new [MutableArrayData] with capacity to `capacity` slots and
381
+ /// specialized to create an [ArrayData] from multiple `arrays`.
337
382
///
338
- /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
339
- /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
340
- /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
383
+ /// # Arguments
384
+ /// * `arrays` - the source arrays to copy from
385
+ /// * `use_nulls` - a flag used to optimize insertions
386
+ /// - `false` if the only source of nulls are the arrays themselves
387
+ /// - `true` if the user plans to call [MutableArrayData::extend_nulls].
388
+ /// * capacity - the preallocated capacity of the output array, in bytes
389
+ ///
390
+ /// Thus, if `use_nulls` is `false`, calling
391
+ /// [MutableArrayData::extend_nulls] should not be used.
341
392
pub fn new ( arrays : Vec < & ' a ArrayData > , use_nulls : bool , capacity : usize ) -> Self {
342
393
Self :: with_capacities ( arrays, use_nulls, Capacities :: Array ( capacity) )
343
394
}
344
395
345
- /// Similar to [MutableArrayData::new], but lets users define the preallocated capacities of the array.
346
- /// See also [MutableArrayData::new] for more information on the arguments .
396
+ /// Similar to [MutableArrayData::new], but lets users define the
397
+ /// preallocated capacities of the array with more granularity .
347
398
///
348
- /// # Panic
349
- /// This function panics if the given `capacities` don't match the data type of `arrays`. Or when
350
- /// a [Capacities] variant is not yet supported.
399
+ /// See [MutableArrayData::new] for more information on the arguments.
400
+ ///
401
+ /// # Panics
402
+ ///
403
+ /// This function panics if the given `capacities` don't match the data type
404
+ /// of `arrays`. Or when a [Capacities] variant is not yet supported.
351
405
pub fn with_capacities (
352
406
arrays : Vec < & ' a ArrayData > ,
353
407
use_nulls : bool ,
@@ -646,7 +700,7 @@ impl<'a> MutableArrayData<'a> {
646
700
}
647
701
}
648
702
649
- /// Extends this array with a chunk of its source arrays
703
+ /// Extends the in progress array with a region of the input arrays
650
704
///
651
705
/// # Arguments
652
706
/// * `index` - the index of array that you what to copy values from
@@ -664,12 +718,11 @@ impl<'a> MutableArrayData<'a> {
664
718
self . data . len += len;
665
719
}
666
720
667
- /// Extends this [MutableArrayData] with null elements, disregarding the bound arrays
721
+ /// Extends the in progress array with null elements, ignoring the input arrays.
668
722
///
669
723
/// # Panics
670
724
///
671
725
/// Panics if [`MutableArrayData`] not created with `use_nulls` or nullable source arrays
672
- ///
673
726
pub fn extend_nulls ( & mut self , len : usize ) {
674
727
self . data . len += len;
675
728
let bit_len = bit_util:: ceil ( self . data . len , 8 ) ;
@@ -697,12 +750,13 @@ impl<'a> MutableArrayData<'a> {
697
750
self . data . null_count
698
751
}
699
752
700
- /// Creates a [ArrayData] from the pushed regions up to this point , consuming `self`.
753
+ /// Creates a [ArrayData] from the in progress array , consuming `self`.
701
754
pub fn freeze ( self ) -> ArrayData {
702
755
unsafe { self . into_builder ( ) . build_unchecked ( ) }
703
756
}
704
757
705
- /// Creates a [ArrayDataBuilder] from the pushed regions up to this point, consuming `self`.
758
+ /// Consume self and returns the in progress array as [`ArrayDataBuilder`].
759
+ ///
706
760
/// This is useful for extending the default behavior of MutableArrayData.
707
761
pub fn into_builder ( self ) -> ArrayDataBuilder {
708
762
let data = self . data ;
0 commit comments