Skip to content

Commit 663a637

Browse files
authored
Improve documentation for MutableArrayData (#6272)
1 parent 25d39c1 commit 663a637

File tree

1 file changed

+91
-37
lines changed
  • arrow-data/src/transform

1 file changed

+91
-37
lines changed

arrow-data/src/transform/mod.rs

Lines changed: 91 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -97,40 +97,78 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits
9797
}
9898
}
9999

100-
/// Struct to efficiently and interactively create an [ArrayData] from an existing [ArrayData] by
100+
/// Efficiently create an [ArrayData] from one or more existing [ArrayData]s by
101101
/// copying chunks.
102102
///
103-
/// The main use case of this struct is to perform unary operations to arrays of arbitrary types,
104-
/// such as `filter` and `take`.
103+
/// The main use case of this struct is to perform unary operations to arrays of
104+
/// arbitrary types, such as `filter` and `take`.
105+
///
106+
/// # Example
107+
/// ```
108+
/// use arrow_buffer::Buffer;
109+
/// use arrow_data::ArrayData;
110+
/// use arrow_data::transform::MutableArrayData;
111+
/// use arrow_schema::DataType;
112+
/// fn i32_array(values: &[i32]) -> ArrayData {
113+
/// ArrayData::try_new(DataType::Int32, 5, None, 0, vec![Buffer::from_slice_ref(values)], vec![]).unwrap()
114+
/// }
115+
/// let arr1 = i32_array(&[1, 2, 3, 4, 5]);
116+
/// let arr2 = i32_array(&[6, 7, 8, 9, 10]);
117+
/// // Create a mutable array for copying values from arr1 and arr2, with a capacity for 6 elements
118+
/// let capacity = 3 * size_of::<i32>();
119+
/// let mut mutable = MutableArrayData::new(vec![&arr1, &arr2], false, 10);
120+
/// // Copy the first 3 elements from arr1
121+
/// mutable.extend(0, 0, 3);
122+
/// // Copy the last 3 elements from arr2
123+
/// mutable.extend(1, 2, 4);
124+
/// // Complete the MutableArrayData into a new ArrayData
125+
/// let frozen = mutable.freeze();
126+
/// assert_eq!(frozen, i32_array(&[1, 2, 3, 8, 9, 10]));
127+
/// ```
105128
pub struct MutableArrayData<'a> {
129+
/// Input arrays: the data being read FROM.
130+
///
131+
/// Note this is "dead code" because all actual references to the arrays are
132+
/// stored in closures for extending values and nulls.
106133
#[allow(dead_code)]
107134
arrays: Vec<&'a ArrayData>,
108-
/// The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
109-
/// mutability invariants (interior mutability):
110-
/// [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
111-
/// [MutableArrayData] itself
135+
136+
/// In progress output array: The data being written TO
137+
///
138+
/// Note these fields are in a separate struct, [_MutableArrayData], as they
139+
/// cannot be in [MutableArrayData] itself due to mutability invariants (interior
140+
/// mutability): [MutableArrayData] contains a function that can only mutate
141+
/// [_MutableArrayData], not [MutableArrayData] itself
112142
data: _MutableArrayData<'a>,
113143

114-
/// the child data of the `Array` in Dictionary arrays.
115-
/// This is not stored in `MutableArrayData` because these values constant and only needed
116-
/// at the end, when freezing [_MutableArrayData].
144+
/// The child data of the `Array` in Dictionary arrays.
145+
///
146+
/// This is not stored in `_MutableArrayData` because these values are
147+
/// constant and only needed at the end, when freezing [_MutableArrayData].
117148
dictionary: Option<ArrayData>,
118149

119-
/// Variadic data buffers referenced by views
120-
/// This is not stored in `MutableArrayData` because these values constant and only needed
121-
/// at the end, when freezing [_MutableArrayData]
150+
/// Variadic data buffers referenced by views.
151+
///
152+
/// Note this this is not stored in `_MutableArrayData` because these values
153+
/// are constant and only needed at the end, when freezing
154+
/// [_MutableArrayData]
122155
variadic_data_buffers: Vec<Buffer>,
123156

124-
/// function used to extend values from arrays. This function's lifetime is bound to the array
125-
/// because it reads values from it.
157+
/// function used to extend output array with values from input arrays.
158+
///
159+
/// This function's lifetime is bound to the input arrays because it reads
160+
/// values from them.
126161
extend_values: Vec<Extend<'a>>,
127162

128-
/// function used to extend nulls from arrays. This function's lifetime is bound to the array
129-
/// because it reads nulls from it.
163+
/// function used to extend the output array with nulls from input arrays.
164+
///
165+
/// This function's lifetime is bound to the input arrays because it reads
166+
/// nulls from it.
130167
extend_null_bits: Vec<ExtendNullBits<'a>>,
131168

132-
/// function used to extend nulls.
133-
/// this is independent of the arrays and therefore has no lifetime.
169+
/// function used to extend the output array with null elements.
170+
///
171+
/// This function is independent of the arrays and therefore has no lifetime.
134172
extend_nulls: ExtendNulls,
135173
}
136174

@@ -307,47 +345,63 @@ fn preallocate_offset_and_binary_buffer<Offset: ArrowNativeType + Integer>(
307345
]
308346
}
309347

310-
/// Define capacities of child data or data buffers.
348+
/// Define capacities to pre-allocate for child data or data buffers.
311349
#[derive(Debug, Clone)]
312350
pub enum Capacities {
313351
/// Binary, Utf8 and LargeUtf8 data types
314-
/// Define
352+
///
353+
/// Defines
315354
/// * the capacity of the array offsets
316355
/// * the capacity of the binary/ str buffer
317356
Binary(usize, Option<usize>),
318357
/// List and LargeList data types
319-
/// Define
358+
///
359+
/// Defines
320360
/// * the capacity of the array offsets
321361
/// * the capacity of the child data
322362
List(usize, Option<Box<Capacities>>),
323363
/// Struct type
364+
///
365+
/// Defines
324366
/// * the capacity of the array
325367
/// * the capacities of the fields
326368
Struct(usize, Option<Vec<Capacities>>),
327369
/// Dictionary type
370+
///
371+
/// Defines
328372
/// * the capacity of the array/keys
329373
/// * the capacity of the values
330374
Dictionary(usize, Option<Box<Capacities>>),
331375
/// Don't preallocate inner buffers and rely on array growth strategy
332376
Array(usize),
333377
}
378+
334379
impl<'a> MutableArrayData<'a> {
335-
/// returns a new [MutableArrayData] with capacity to `capacity` slots and specialized to create an
336-
/// [ArrayData] from multiple `arrays`.
380+
/// Returns a new [MutableArrayData] with capacity to `capacity` slots and
381+
/// specialized to create an [ArrayData] from multiple `arrays`.
337382
///
338-
/// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
339-
/// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
340-
/// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
383+
/// # Arguments
384+
/// * `arrays` - the source arrays to copy from
385+
/// * `use_nulls` - a flag used to optimize insertions
386+
/// - `false` if the only source of nulls are the arrays themselves
387+
/// - `true` if the user plans to call [MutableArrayData::extend_nulls].
388+
/// * capacity - the preallocated capacity of the output array, in bytes
389+
///
390+
/// Thus, if `use_nulls` is `false`, calling
391+
/// [MutableArrayData::extend_nulls] should not be used.
341392
pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) -> Self {
342393
Self::with_capacities(arrays, use_nulls, Capacities::Array(capacity))
343394
}
344395

345-
/// Similar to [MutableArrayData::new], but lets users define the preallocated capacities of the array.
346-
/// See also [MutableArrayData::new] for more information on the arguments.
396+
/// Similar to [MutableArrayData::new], but lets users define the
397+
/// preallocated capacities of the array with more granularity.
347398
///
348-
/// # Panic
349-
/// This function panics if the given `capacities` don't match the data type of `arrays`. Or when
350-
/// a [Capacities] variant is not yet supported.
399+
/// See [MutableArrayData::new] for more information on the arguments.
400+
///
401+
/// # Panics
402+
///
403+
/// This function panics if the given `capacities` don't match the data type
404+
/// of `arrays`. Or when a [Capacities] variant is not yet supported.
351405
pub fn with_capacities(
352406
arrays: Vec<&'a ArrayData>,
353407
use_nulls: bool,
@@ -646,7 +700,7 @@ impl<'a> MutableArrayData<'a> {
646700
}
647701
}
648702

649-
/// Extends this array with a chunk of its source arrays
703+
/// Extends the in progress array with a region of the input arrays
650704
///
651705
/// # Arguments
652706
/// * `index` - the index of array that you what to copy values from
@@ -664,12 +718,11 @@ impl<'a> MutableArrayData<'a> {
664718
self.data.len += len;
665719
}
666720

667-
/// Extends this [MutableArrayData] with null elements, disregarding the bound arrays
721+
/// Extends the in progress array with null elements, ignoring the input arrays.
668722
///
669723
/// # Panics
670724
///
671725
/// Panics if [`MutableArrayData`] not created with `use_nulls` or nullable source arrays
672-
///
673726
pub fn extend_nulls(&mut self, len: usize) {
674727
self.data.len += len;
675728
let bit_len = bit_util::ceil(self.data.len, 8);
@@ -697,12 +750,13 @@ impl<'a> MutableArrayData<'a> {
697750
self.data.null_count
698751
}
699752

700-
/// Creates a [ArrayData] from the pushed regions up to this point, consuming `self`.
753+
/// Creates a [ArrayData] from the in progress array, consuming `self`.
701754
pub fn freeze(self) -> ArrayData {
702755
unsafe { self.into_builder().build_unchecked() }
703756
}
704757

705-
/// Creates a [ArrayDataBuilder] from the pushed regions up to this point, consuming `self`.
758+
/// Consume self and returns the in progress array as [`ArrayDataBuilder`].
759+
///
706760
/// This is useful for extending the default behavior of MutableArrayData.
707761
pub fn into_builder(self) -> ArrayDataBuilder {
708762
let data = self.data;

0 commit comments

Comments
 (0)