Skip to content

Commit

Permalink
fix: Use Buffer<T> in ObjectSeries, fixes variety of offset bugs (#18637
Browse files Browse the repository at this point in the history
)
  • Loading branch information
orlp authored Sep 9, 2024
1 parent 72d861e commit 76a340b
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 70 deletions.
14 changes: 2 additions & 12 deletions crates/polars-core/src/chunked_array/from_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use std::borrow::{Borrow, Cow};

#[cfg(feature = "object")]
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::bitmap::MutableBitmap;

use crate::chunked_array::builder::{get_list_builder, AnonymousOwnedListBuilder};
#[cfg(feature = "object")]
Expand Down Expand Up @@ -268,17 +268,7 @@ impl<T: PolarsObject> FromIterator<Option<T>> for ObjectChunked<T> {
})
.collect();

let null_bit_buffer: Option<Bitmap> = null_mask_builder.into();
let null_bitmap = null_bit_buffer;

let len = values.len();

let arr = Box::new(ObjectArray {
values: Arc::new(values),
null_bitmap,
offset: 0,
len,
});
let arr = Box::new(ObjectArray::from(values).with_validity(null_mask_builder.into()));
ChunkedArray::new_with_compute_len(
Arc::new(Field::new(PlSmallStr::EMPTY, get_object_type::<T>())),
vec![arr],
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ impl<T: PolarsObject> ObjectChunked<T> {
// we know that we only iterate over length == self.len()
unsafe {
self.downcast_iter()
.flat_map(|arr| arr.values().iter())
.flat_map(|arr| arr.values_iter())
.trust_my_length(self.len())
}
}
Expand Down
18 changes: 6 additions & 12 deletions crates/polars-core/src/chunked_array/object/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,8 @@ where
.unwrap_or(0) as IdxSize;

let arr = Box::new(ObjectArray {
values: Arc::new(self.values),
null_bitmap,
offset: 0,
len,
values: self.values.into(),
validity: null_bitmap,
});

self.field.dtype = get_object_type::<T>();
Expand Down Expand Up @@ -140,10 +138,8 @@ where
let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
let len = v.len();
let arr = Box::new(ObjectArray {
values: Arc::new(v),
null_bitmap: None,
offset: 0,
len,
values: v.into(),
validity: None,
});

unsafe { ObjectChunked::new_with_dims(field, vec![arr], len as IdxSize, 0) }
Expand All @@ -154,10 +150,8 @@ where
let len = v.len();
let null_count = validity.unset_bits();
let arr = Box::new(ObjectArray {
values: Arc::new(v),
null_bitmap: Some(validity),
offset: 0,
len,
values: v.into(),
validity: Some(validity),
});

unsafe {
Expand Down
63 changes: 30 additions & 33 deletions crates/polars-core/src/chunked_array/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::hash::Hash;

use arrow::bitmap::utils::{BitmapIter, ZipValidity};
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::buffer::Buffer;
use polars_utils::total_ord::TotalHash;

use crate::prelude::*;
Expand All @@ -22,10 +23,8 @@ pub struct ObjectArray<T>
where
T: PolarsObject,
{
pub(crate) values: Arc<Vec<T>>,
pub(crate) null_bitmap: Option<Bitmap>,
pub(crate) offset: usize,
pub(crate) len: usize,
values: Buffer<T>,
validity: Option<Bitmap>,
}

/// Trimmed down object safe polars object
Expand Down Expand Up @@ -80,23 +79,18 @@ impl<T> ObjectArray<T>
where
T: PolarsObject,
{
/// Get a reference to the underlying data
pub fn values(&self) -> &Arc<Vec<T>> {
&self.values
}

pub fn values_iter(&self) -> ObjectValueIter<'_, T> {
self.values.iter()
}

/// Returns an iterator of `Option<&T>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&T, ObjectValueIter<'_, T>, BitmapIter> {
ZipValidity::new_with_validity(self.values_iter(), self.null_bitmap.as_ref())
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
}

/// Get a value at a certain index location
pub fn value(&self, index: usize) -> &T {
&self.values[self.offset + index]
&self.values[index]
}

pub fn get(&self, index: usize) -> Option<&T> {
Expand All @@ -123,7 +117,7 @@ where
/// No bounds checks
#[inline]
pub unsafe fn is_valid_unchecked(&self, i: usize) -> bool {
if let Some(b) = &self.null_bitmap {
if let Some(b) = &self.validity {
b.get_bit_unchecked(i)
} else {
true
Expand Down Expand Up @@ -157,7 +151,7 @@ where
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity must be equal to the array's length")
}
self.null_bitmap = validity;
self.validity = validity;
}
}

Expand All @@ -182,14 +176,12 @@ where
}

unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
let len = std::cmp::min(self.len - offset, length);
self.null_bitmap = self
.null_bitmap
self.validity = self
.validity
.take()
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
.filter(|bitmap| bitmap.unset_bits() > 0);
self.len = len;
self.offset = offset;
self.values.slice_unchecked(offset, length);
}

fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
Expand All @@ -203,11 +195,11 @@ where
}

fn len(&self) -> usize {
self.len
self.values.len()
}

fn validity(&self) -> Option<&Bitmap> {
self.null_bitmap.as_ref()
self.validity.as_ref()
}

fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Expand All @@ -223,7 +215,7 @@ where
}

fn null_count(&self) -> usize {
match &self.null_bitmap {
match &self.validity {
None => 0,
Some(validity) => validity.unset_bits(),
}
Expand All @@ -236,18 +228,16 @@ impl<T: PolarsObject> Splitable for ObjectArray<T> {
}

unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
let (left_values, right_values) = unsafe { self.values.split_at_unchecked(offset) };
let (left_validity, right_validity) = unsafe { self.validity.split_at_unchecked(offset) };
(
Self {
values: self.values.clone(),
null_bitmap: self.null_bitmap.clone(),
len: offset,
offset: self.offset,
values: left_values,
validity: left_validity,
},
Self {
values: self.values.clone(),
null_bitmap: self.null_bitmap.clone(),
len: self.len() - offset,
offset: self.offset + offset,
values: right_values,
validity: right_validity,
},
)
}
Expand Down Expand Up @@ -277,10 +267,8 @@ impl<T: PolarsObject> StaticArray for ObjectArray<T> {

fn full_null(length: usize, _dtype: ArrowDataType) -> Self {
ObjectArray {
values: Arc::new(vec![T::default(); length]),
null_bitmap: Some(Bitmap::new_with_value(false, length)),
offset: 0,
len: length,
values: vec![T::default(); length].into(),
validity: Some(Bitmap::new_with_value(false, length)),
}
}
}
Expand Down Expand Up @@ -328,3 +316,12 @@ where
}
}
}

impl<T: PolarsObject> From<Vec<T>> for ObjectArray<T> {
fn from(values: Vec<T>) -> Self {
Self {
values: values.into(),
validity: None,
}
}
}
13 changes: 1 addition & 12 deletions crates/polars-core/src/datatypes/static_array_collect.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use std::sync::Arc;

use arrow::array::ArrayFromIter;
use arrow::bitmap::Bitmap;

use crate::chunked_array::object::{ObjectArray, PolarsObject};

Expand Down Expand Up @@ -41,14 +38,6 @@ impl<'a, T: PolarsObject> ArrayFromIter<Option<&'a T>> for ObjectArray<T> {
})
.collect::<Result<Vec<T>, E>>()?;

let null_bit_buffer: Option<Bitmap> = null_mask_builder.into();
let null_bitmap = null_bit_buffer;
let len = values.len();
Ok(ObjectArray {
values: Arc::new(values),
null_bitmap,
offset: 0,
len,
})
Ok(ObjectArray::from(values).with_validity(null_mask_builder.into()))
}
}

0 comments on commit 76a340b

Please sign in to comment.