Skip to content

Add Zero-Copy Conversion between Vec and MutableBuffer #3920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -842,10 +842,8 @@ mod tests {

#[test]
#[should_panic(expected = "memory is not aligned")]
#[allow(deprecated)]
fn test_primitive_array_alignment() {
let ptr = arrow_buffer::alloc::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf = Buffer::from_slice_ref([0_u64]);
let buf2 = buf.slice(1);
let array_data = ArrayData::builder(DataType::Int32)
.add_buffer(buf2)
Expand All @@ -859,10 +857,8 @@ mod tests {
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
#[allow(deprecated)]
fn test_list_array_alignment() {
let ptr = arrow_buffer::alloc::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf = Buffer::from_slice_ref([0_u64]);
let buf2 = buf.slice(1);

let values: [i32; 8] = [0; 8];
Expand Down
104 changes: 1 addition & 103 deletions arrow-buffer/src/alloc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,117 +18,15 @@
//! Defines memory-related functions, such as allocate/deallocate/reallocate memory
//! regions, cache and allocation alignments.

use std::alloc::{handle_alloc_error, Layout};
use std::alloc::Layout;
use std::fmt::{Debug, Formatter};
use std::panic::RefUnwindSafe;
use std::ptr::NonNull;
use std::sync::Arc;

mod alignment;

pub use alignment::ALIGNMENT;

/// Returns an aligned non null pointer similar to [`NonNull::dangling`]
///
/// Note that the pointer value may potentially represent a valid pointer, which means
/// this must not be used as a "not yet initialized" sentinel value.
///
/// Types that lazily allocate must track initialization by some other means.
#[inline]
fn dangling_ptr() -> NonNull<u8> {
// SAFETY: ALIGNMENT is a non-zero usize which is then casted
// to a *mut T. Therefore, `ptr` is not null and the conditions for
// calling new_unchecked() are respected.
unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) }
}

/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
/// an unknown or non-zero value and is semantically similar to `malloc`.
#[deprecated(note = "Use Vec")]
pub fn allocate_aligned(size: usize) -> NonNull<u8> {
unsafe {
if size == 0 {
dangling_ptr()
} else {
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
let raw_ptr = std::alloc::alloc(layout);
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
}
}
}

/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
/// This is more performant than using [allocate_aligned] and setting all bytes to zero
/// and is semantically similar to `calloc`.
#[deprecated(note = "Use Vec")]
pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
unsafe {
if size == 0 {
dangling_ptr()
} else {
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
let raw_ptr = std::alloc::alloc_zeroed(layout);
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
}
}
}

/// # Safety
///
/// This function is unsafe because undefined behavior can result if the caller does not ensure all
/// of the following:
///
/// * ptr must denote a block of memory currently allocated via this allocator,
///
/// * size must be the same size that was used to allocate that block of memory,
#[deprecated(note = "Use Vec")]
pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
if size != 0 {
std::alloc::dealloc(
ptr.as_ptr() as *mut u8,
Layout::from_size_align_unchecked(size, ALIGNMENT),
);
}
}

/// # Safety
///
/// This function is unsafe because undefined behavior can result if the caller does not ensure all
/// of the following:
///
/// * ptr must be currently allocated via this allocator,
///
/// * new_size must be greater than zero.
///
/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
/// the rounded value must be less than usize::MAX).
#[deprecated(note = "Use Vec")]
#[allow(deprecated)]
pub unsafe fn reallocate(
ptr: NonNull<u8>,
old_size: usize,
new_size: usize,
) -> NonNull<u8> {
if old_size == 0 {
return allocate_aligned(new_size);
}

if new_size == 0 {
free_aligned(ptr, old_size);
return dangling_ptr();
}

let raw_ptr = std::alloc::realloc(
ptr.as_ptr() as *mut u8,
Layout::from_size_align_unchecked(old_size, ALIGNMENT),
new_size,
);
NonNull::new(raw_ptr).unwrap_or_else(|| {
handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
})
}

/// The owner of an allocation.
/// The trait implementation is responsible for dropping the allocations once no more references exist.
pub trait Allocation: RefUnwindSafe + Send + Sync {}
Expand Down
17 changes: 4 additions & 13 deletions arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,20 +71,10 @@ impl Buffer {
}
}

/// Create a [`Buffer`] from the provided `Vec` without copying
/// Create a [`Buffer`] from the provided [`Vec`] without copying
#[inline]
pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
// Safety
// Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable
let ptr = unsafe { NonNull::new_unchecked(vec.as_ptr() as _) };
let len = vec.len() * std::mem::size_of::<T>();
// Safety
// Vec guaranteed to have a valid layout matching that of `Layout::array`
// This is based on `RawVec::current_memory`
let layout = unsafe { Layout::array::<T>(vec.capacity()).unwrap_unchecked() };
std::mem::forget(vec);
let b = unsafe { Bytes::new(ptr, len, Deallocation::Standard(layout)) };
Self::from_bytes(b)
MutableBuffer::from_vec(vec).into()
}

/// Initializes a [Buffer] from a slice of items.
Expand Down Expand Up @@ -810,7 +800,8 @@ mod tests {
b.into_mutable().unwrap();

let b = Buffer::from_vec(vec![1_u32, 3, 5]);
let b = b.into_mutable().unwrap_err(); // Invalid layout
let b = b.into_mutable().unwrap();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now possible 🎉

let b = Buffer::from(b);
let b = b.into_vec::<u32>().unwrap();
assert_eq!(b, &[1, 3, 5]);
}
Expand Down
Loading