Skip to content

Commit 6c59b76

Browse files
authored
Minor: pub use ByteView in arrow and improve documentation (#6275)
* Minor: `pub use ByteView` in arrow and improve documentation * clarify docs more
1 parent 0bbad36 commit 6c59b76

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

arrow-array/src/array/byte_view_array.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use super::ByteArrayType;
5252
/// not by value. as there are many different buffer layouts to represent the
5353
/// same data (e.g. different offsets, different buffer sizes, etc).
5454
///
55-
/// # Layout
55+
/// # Layout: "views" and buffers
5656
///
5757
/// A `GenericByteViewArray` stores variable length byte strings. An array of
5858
/// `N` elements is stored as `N` fixed length "views" and a variable number
@@ -75,10 +75,12 @@ use super::ByteArrayType;
7575
/// 0 31 63 95 127
7676
/// ```
7777
///
78-
/// * Strings with length <= 12 are stored directly in the view.
78+
/// * Strings with length <= 12 are stored directly in the view. See
79+
/// [`Self::inline_value`] to access the inlined prefix from a short view.
7980
///
8081
/// * Strings with length > 12: The first four bytes are stored inline in the
81-
/// view and the entire string is stored in one of the buffers.
82+
/// view and the entire string is stored in one of the buffers. See [`ByteView`]
83+
/// to access the fields of the these views.
8284
///
8385
/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
8486
/// than they must point into a valid buffer. However, they can be out of order,
@@ -89,6 +91,8 @@ use super::ByteArrayType;
8991
/// separate buffer while the string "LavaMonster" is stored inlined in the
9092
/// view. In this case, the same bytes for "Fish" are used to store both strings.
9193
///
94+
/// [`ByteView`]: arrow_data::ByteView
95+
///
9296
/// ```text
9397
/// ┌───┐
9498
/// ┌──────┬──────┬──────┬──────┐ offset │...│
@@ -261,9 +265,12 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
261265
unsafe { self.value_unchecked(i) }
262266
}
263267

264-
/// Returns the element at index `i`
268+
/// Returns the element at index `i` without bounds checking
269+
///
265270
/// # Safety
266-
/// Caller is responsible for ensuring that the index is within the bounds of the array
271+
///
272+
/// Caller is responsible for ensuring that the index is within the bounds
273+
/// of the array
267274
pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
268275
let v = self.views.get_unchecked(idx);
269276
let len = *v as u32;
@@ -278,7 +285,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
278285
T::Native::from_bytes_unchecked(b)
279286
}
280287

281-
/// Returns the inline value of the view.
288+
/// Returns the first `len` bytes the inline value of the view.
282289
///
283290
/// # Safety
284291
/// - The `view` must be a valid element from `Self::views()` that adheres to the view layout.
@@ -289,7 +296,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
289296
std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
290297
}
291298

292-
/// constructs a new iterator
299+
/// Constructs a new iterator for iterating over the values of this array
293300
pub fn iter(&self) -> ArrayIter<&Self> {
294301
ArrayIter::new(self)
295302
}
@@ -358,7 +365,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
358365
builder.finish()
359366
}
360367

361-
/// Comparing two [`GenericByteViewArray`] at index `left_idx` and `right_idx`
368+
/// Compare two [`GenericByteViewArray`] at index `left_idx` and `right_idx`
362369
///
363370
/// Comparing two ByteView types are non-trivial.
364371
/// It takes a bit of patience to understand why we don't just compare two &[u8] directly.

arrow-data/src/byte_view.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
use arrow_buffer::Buffer;
1919
use arrow_schema::ArrowError;
2020

21+
/// Helper to access views of [`GenericByteViewArray`] (`StringViewArray` and
22+
/// `BinaryViewArray`) where the length is greater than 12 bytes.
23+
///
24+
/// See the documentation on [`GenericByteViewArray`] for more information on
25+
/// the layout of the views.
26+
///
27+
/// [`GenericByteViewArray`]: https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
2128
#[derive(Debug, Copy, Clone, Default)]
2229
#[repr(C)]
2330
pub struct ByteView {

arrow/src/array/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ pub use arrow_array::cast::*;
2525
pub use arrow_array::iterator::*;
2626
pub use arrow_array::*;
2727
pub use arrow_data::{
28-
layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, DataTypeLayout,
28+
layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, ByteView, DataTypeLayout,
2929
};
3030

3131
pub use arrow_data::transform::{Capacities, MutableArrayData};

0 commit comments

Comments
 (0)