diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index dca256008db2..c3cba6561aaf 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -842,10 +842,8 @@ mod tests { #[test] #[should_panic(expected = "memory is not aligned")] - #[allow(deprecated)] fn test_primitive_array_alignment() { - let ptr = arrow_buffer::alloc::allocate_aligned(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; + let buf = Buffer::from_slice_ref([0_u64]); let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32) .add_buffer(buf2) @@ -859,10 +857,8 @@ mod tests { // Different error messages, so skip for now // https://github.com/apache/arrow-rs/issues/1545 #[cfg(not(feature = "force_validate"))] - #[allow(deprecated)] fn test_list_array_alignment() { - let ptr = arrow_buffer::alloc::allocate_aligned(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; + let buf = Buffer::from_slice_ref([0_u64]); let buf2 = buf.slice(1); let values: [i32; 8] = [0; 8]; diff --git a/arrow-buffer/src/alloc/mod.rs b/arrow-buffer/src/alloc/mod.rs index 7600a28d8754..d1236eeaa9a6 100644 --- a/arrow-buffer/src/alloc/mod.rs +++ b/arrow-buffer/src/alloc/mod.rs @@ -18,117 +18,15 @@ //! Defines memory-related functions, such as allocate/deallocate/reallocate memory //! regions, cache and allocation alignments. -use std::alloc::{handle_alloc_error, Layout}; +use std::alloc::Layout; use std::fmt::{Debug, Formatter}; use std::panic::RefUnwindSafe; -use std::ptr::NonNull; use std::sync::Arc; mod alignment; pub use alignment::ALIGNMENT; -/// Returns an aligned non null pointer similar to [`NonNull::dangling`] -/// -/// Note that the pointer value may potentially represent a valid pointer, which means -/// this must not be used as a "not yet initialized" sentinel value. -/// -/// Types that lazily allocate must track initialization by some other means. -#[inline] -fn dangling_ptr() -> NonNull { - // SAFETY: ALIGNMENT is a non-zero usize which is then casted - // to a *mut T. Therefore, `ptr` is not null and the conditions for - // calling new_unchecked() are respected. - unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) } -} - -/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values. -/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have -/// an unknown or non-zero value and is semantically similar to `malloc`. -#[deprecated(note = "Use Vec")] -pub fn allocate_aligned(size: usize) -> NonNull { - unsafe { - if size == 0 { - dangling_ptr() - } else { - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc(layout); - NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) - } - } -} - -/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them. -/// This is more performant than using [allocate_aligned] and setting all bytes to zero -/// and is semantically similar to `calloc`. -#[deprecated(note = "Use Vec")] -pub fn allocate_aligned_zeroed(size: usize) -> NonNull { - unsafe { - if size == 0 { - dangling_ptr() - } else { - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc_zeroed(layout); - NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) - } - } -} - -/// # Safety -/// -/// This function is unsafe because undefined behavior can result if the caller does not ensure all -/// of the following: -/// -/// * ptr must denote a block of memory currently allocated via this allocator, -/// -/// * size must be the same size that was used to allocate that block of memory, -#[deprecated(note = "Use Vec")] -pub unsafe fn free_aligned(ptr: NonNull, size: usize) { - if size != 0 { - std::alloc::dealloc( - ptr.as_ptr() as *mut u8, - Layout::from_size_align_unchecked(size, ALIGNMENT), - ); - } -} - -/// # Safety -/// -/// This function is unsafe because undefined behavior can result if the caller does not ensure all -/// of the following: -/// -/// * ptr must be currently allocated via this allocator, -/// -/// * new_size must be greater than zero. -/// -/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e., -/// the rounded value must be less than usize::MAX). -#[deprecated(note = "Use Vec")] -#[allow(deprecated)] -pub unsafe fn reallocate( - ptr: NonNull, - old_size: usize, - new_size: usize, -) -> NonNull { - if old_size == 0 { - return allocate_aligned(new_size); - } - - if new_size == 0 { - free_aligned(ptr, old_size); - return dangling_ptr(); - } - - let raw_ptr = std::alloc::realloc( - ptr.as_ptr() as *mut u8, - Layout::from_size_align_unchecked(old_size, ALIGNMENT), - new_size, - ); - NonNull::new(raw_ptr).unwrap_or_else(|| { - handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT)) - }) -} - /// The owner of an allocation. /// The trait implementation is responsible for dropping the allocations once no more references exist. pub trait Allocation: RefUnwindSafe + Send + Sync {} diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 5f42035c9e30..15d9ff7838c6 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -71,20 +71,10 @@ impl Buffer { } } - /// Create a [`Buffer`] from the provided `Vec` without copying + /// Create a [`Buffer`] from the provided [`Vec`] without copying #[inline] pub fn from_vec(vec: Vec) -> Self { - // Safety - // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable - let ptr = unsafe { NonNull::new_unchecked(vec.as_ptr() as _) }; - let len = vec.len() * std::mem::size_of::(); - // Safety - // Vec guaranteed to have a valid layout matching that of `Layout::array` - // This is based on `RawVec::current_memory` - let layout = unsafe { Layout::array::(vec.capacity()).unwrap_unchecked() }; - std::mem::forget(vec); - let b = unsafe { Bytes::new(ptr, len, Deallocation::Standard(layout)) }; - Self::from_bytes(b) + MutableBuffer::from_vec(vec).into() } /// Initializes a [Buffer] from a slice of items. @@ -810,7 +800,8 @@ mod tests { b.into_mutable().unwrap(); let b = Buffer::from_vec(vec![1_u32, 3, 5]); - let b = b.into_mutable().unwrap_err(); // Invalid layout + let b = b.into_mutable().unwrap(); + let b = Buffer::from(b); let b = b.into_vec::().unwrap(); assert_eq!(b, &[1, 3, 5]); } diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs index 250ac9f31595..9a905a3223b6 100644 --- a/arrow-buffer/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -15,17 +15,18 @@ // specific language governing permissions and limitations // under the License. -use super::Buffer; +use std::alloc::{handle_alloc_error, Layout}; +use std::mem; +use std::ptr::NonNull; + use crate::alloc::{Deallocation, ALIGNMENT}; use crate::{ - alloc, bytes::Bytes, native::{ArrowNativeType, ToByteSlice}, util::bit_util, }; -use std::alloc::Layout; -use std::mem; -use std::ptr::NonNull; + +use super::Buffer; /// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items. /// @@ -55,7 +56,7 @@ pub struct MutableBuffer { data: NonNull, // invariant: len <= capacity len: usize, - capacity: usize, + layout: Layout, } impl MutableBuffer { @@ -67,14 +68,21 @@ impl MutableBuffer { /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`. #[inline] - #[allow(deprecated)] pub fn with_capacity(capacity: usize) -> Self { let capacity = bit_util::round_upto_multiple_of_64(capacity); - let ptr = alloc::allocate_aligned(capacity); + let layout = Layout::from_size_align(capacity, ALIGNMENT).unwrap(); + let data = match layout.size() { + 0 => dangling_ptr(), + _ => { + // Safety: Verified size != 0 + let raw_ptr = unsafe { std::alloc::alloc(layout) }; + NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) + } + }; Self { - data: ptr, + data, len: 0, - capacity, + layout, } } @@ -89,35 +97,46 @@ impl MutableBuffer { /// let data = buffer.as_slice_mut(); /// assert_eq!(data[126], 0u8); /// ``` - #[allow(deprecated)] pub fn from_len_zeroed(len: usize) -> Self { - let new_capacity = bit_util::round_upto_multiple_of_64(len); - let ptr = alloc::allocate_aligned_zeroed(new_capacity); - Self { - data: ptr, - len, - capacity: new_capacity, - } + let layout = Layout::from_size_align(len, ALIGNMENT).unwrap(); + let data = match layout.size() { + 0 => dangling_ptr(), + _ => { + // Safety: Verified size != 0 + let raw_ptr = unsafe { std::alloc::alloc_zeroed(layout) }; + NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) + } + }; + Self { data, len, layout } + } + + /// Create a [`MutableBuffer`] from the provided [`Vec`] without copying + #[inline] + pub fn from_vec(vec: Vec) -> Self { + // Safety + // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable + let data = unsafe { NonNull::new_unchecked(vec.as_ptr() as _) }; + let len = vec.len() * mem::size_of::(); + // Safety + // Vec guaranteed to have a valid layout matching that of `Layout::array` + // This is based on `RawVec::current_memory` + let layout = unsafe { Layout::array::(vec.capacity()).unwrap_unchecked() }; + mem::forget(vec); + Self { data, len, layout } } /// Allocates a new [MutableBuffer] from given `Bytes`. pub(crate) fn from_bytes(bytes: Bytes) -> Result { - let capacity = match bytes.deallocation() { - Deallocation::Standard(layout) if layout.align() == ALIGNMENT => { - layout.size() - } + let layout = match bytes.deallocation() { + Deallocation::Standard(layout) => *layout, _ => return Err(bytes), }; let len = bytes.len(); - let ptr = bytes.ptr(); + let data = bytes.ptr(); mem::forget(bytes); - Ok(Self { - data: ptr, - len, - capacity, - }) + Ok(Self { data, len, layout }) } /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits. @@ -134,7 +153,7 @@ impl MutableBuffer { /// the buffer directly (e.g., modifying the buffer by holding a mutable reference /// from `data_mut()`). pub fn with_bitset(mut self, end: usize, val: bool) -> Self { - assert!(end <= self.capacity); + assert!(end <= self.layout.size()); let v = if val { 255 } else { 0 }; unsafe { std::ptr::write_bytes(self.data.as_ptr(), v, end); @@ -149,7 +168,7 @@ impl MutableBuffer { /// `len` of the buffer and so can be used to initialize the memory region from /// `len` to `capacity`. pub fn set_null_bits(&mut self, start: usize, count: usize) { - assert!(start + count <= self.capacity); + assert!(start + count <= self.layout.size()); unsafe { std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count); } @@ -171,17 +190,33 @@ impl MutableBuffer { #[inline(always)] pub fn reserve(&mut self, additional: usize) { let required_cap = self.len + additional; - if required_cap > self.capacity { - // JUSTIFICATION - // Benefit - // necessity - // Soundness - // `self.data` is valid for `self.capacity`. - let (ptr, new_capacity) = - unsafe { reallocate(self.data, self.capacity, required_cap) }; - self.data = ptr; - self.capacity = new_capacity; + if required_cap > self.layout.size() { + let new_capacity = bit_util::round_upto_multiple_of_64(required_cap); + let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2); + self.reallocate(new_capacity) + } + } + + #[cold] + fn reallocate(&mut self, capacity: usize) { + let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap(); + if new_layout.size() == 0 { + if self.layout.size() != 0 { + // Safety: data was allocated with layout + unsafe { std::alloc::dealloc(self.as_mut_ptr(), self.layout) }; + self.layout = new_layout + } + return; } + + let data = match self.layout.size() { + // Safety: new_layout is not empty + 0 => unsafe { std::alloc::alloc(new_layout) }, + // Safety: verified new layout is valid and not empty + _ => unsafe { std::alloc::realloc(self.as_mut_ptr(), self.layout, capacity) }, + }; + self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout)); + self.layout = new_layout; } /// Truncates this buffer to `len` bytes @@ -233,20 +268,10 @@ impl MutableBuffer { /// buffer.shrink_to_fit(); /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128); /// ``` - #[allow(deprecated)] pub fn shrink_to_fit(&mut self) { let new_capacity = bit_util::round_upto_multiple_of_64(self.len); - if new_capacity < self.capacity { - // JUSTIFICATION - // Benefit - // necessity - // Soundness - // `self.data` is valid for `self.capacity`. - let ptr = - unsafe { alloc::reallocate(self.data, self.capacity, new_capacity) }; - - self.data = ptr; - self.capacity = new_capacity; + if new_capacity < self.layout.size() { + self.reallocate(new_capacity) } } @@ -267,7 +292,7 @@ impl MutableBuffer { /// The invariant `buffer.len() <= buffer.capacity()` is always upheld. #[inline] pub const fn capacity(&self) -> usize { - self.capacity + self.layout.size() } /// Clear all existing data from this buffer. @@ -310,9 +335,9 @@ impl MutableBuffer { #[inline] pub(super) fn into_buffer(self) -> Buffer { - let layout = Layout::from_size_align(self.capacity, ALIGNMENT).unwrap(); - let bytes = - unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(layout)) }; + let bytes = unsafe { + Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) + }; std::mem::forget(self); Buffer::from_bytes(bytes) } @@ -455,19 +480,12 @@ impl MutableBuffer { } } -/// # Safety -/// `ptr` must be allocated for `old_capacity`. -#[cold] -#[allow(deprecated)] -unsafe fn reallocate( - ptr: NonNull, - old_capacity: usize, - new_capacity: usize, -) -> (NonNull, usize) { - let new_capacity = bit_util::round_upto_multiple_of_64(new_capacity); - let new_capacity = std::cmp::max(new_capacity, old_capacity * 2); - let ptr = alloc::reallocate(ptr, old_capacity, new_capacity); - (ptr, new_capacity) +#[inline] +fn dangling_ptr() -> NonNull { + // SAFETY: ALIGNMENT is a non-zero usize which is then casted + // to a *mut T. Therefore, `ptr` is not null and the conditions for + // calling new_unchecked() are respected. + unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) } } impl Extend for MutableBuffer { @@ -492,7 +510,7 @@ impl MutableBuffer { // this is necessary because of https://github.com/rust-lang/rust/issues/32155 let mut len = SetLenOnDrop::new(&mut self.len); let mut dst = unsafe { self.data.as_ptr().add(len.local_len) }; - let capacity = self.capacity; + let capacity = self.layout.size(); while len.local_len + item_size <= capacity { if let Some(item) = iterator.next() { @@ -641,9 +659,11 @@ impl std::ops::DerefMut for MutableBuffer { } impl Drop for MutableBuffer { - #[allow(deprecated)] fn drop(&mut self) { - unsafe { alloc::free_aligned(self.data, self.capacity) }; + if self.layout.size() != 0 { + // Safety: data was allocated with standard allocator with given layout + unsafe { std::alloc::dealloc(self.data.as_ptr() as _, self.layout) }; + } } } @@ -652,7 +672,7 @@ impl PartialEq for MutableBuffer { if self.len != other.len { return false; } - if self.capacity != other.capacity { + if self.layout != other.layout { return false; } self.as_slice() == other.as_slice() diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs index 2820fda781e6..b3105ed5a3b4 100644 --- a/arrow-buffer/src/bytes.rs +++ b/arrow-buffer/src/bytes.rs @@ -30,8 +30,8 @@ use crate::alloc::Deallocation; /// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's /// global allocator nor u8 alignment. /// -/// In the most common case, this buffer is allocated using [`allocate_aligned`](crate::alloc::allocate_aligned) -/// and deallocated accordingly [`free_aligned`](crate::alloc::free_aligned). +/// In the most common case, this buffer is allocated using [`alloc`](std::alloc::alloc) +/// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT) /// /// When the region is allocated by a different allocator, [Deallocation::Custom], this calls the /// custom deallocator to deallocate the region when it is no longer needed.