Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 10, 2024
1 parent d195da3 commit 707664a
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 31 deletions.
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
#[must_use]
fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
if length == 0 {
return new_empty_array(self.data_type().clone())
return new_empty_array(self.data_type().clone());
}
let mut new = self.to_boxed();
new.slice(offset, length);
Expand Down
54 changes: 49 additions & 5 deletions crates/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub(crate) fn split_at(
let (raw_offset, _) = slice_offsets(offset, 0, own_length);

let mut remaining_offset = raw_offset;
let mut iter = chunks.into_iter();
let mut iter = chunks.iter();

for chunk in &mut iter {
let chunk_len = chunk.len();
Expand Down Expand Up @@ -173,17 +173,61 @@ impl<T: PolarsDataType> ChunkedArray<T> {
}
}

/// Slice the array. The chunks are reallocated the underlying data slices are zero copy.
/// Split the array. The chunks are reallocated the underlying data slices are zero copy.
///
/// When offset is negative it will be counted from the end of the array.
/// This method will never error,
/// and will slice the best match when offset, or length is out of bounds
pub fn split_at(&self, offset: i64) -> (Self, Self) {
// The len: 0 special cases ensure we release memory.
// A normal slice, slice the buffers and thus keep the whole memory allocated.
let (l, r) = split_at(&self.chunks, offset, self.len());
let out_l = unsafe { self.copy_with_chunks(l) };
let out_r = unsafe { self.copy_with_chunks(r) };
let mut out_l = unsafe { self.copy_with_chunks(l) };
let mut out_r = unsafe { self.copy_with_chunks(r) };

use MetadataProperties as P;
let mut properties_l = P::SORTED | P::FAST_EXPLODE_LIST;
let mut properties_r = P::SORTED | P::FAST_EXPLODE_LIST;

let is_ascending = self.is_sorted_ascending_flag();
let is_descending = self.is_sorted_descending_flag();

if is_ascending || is_descending {
let has_nulls_at_start = self.null_count() != 0
&& self
.chunks()
.first()
.unwrap()
.as_ref()
.validity()
.map_or(false, |bm| bm.get(0).unwrap());

if !has_nulls_at_start {
let can_copy_min_value = !has_nulls_at_start && is_ascending;
let can_copy_max_value = !has_nulls_at_start && is_descending;

properties_l.set(P::MIN_VALUE, can_copy_min_value);
properties_l.set(P::MAX_VALUE, can_copy_max_value);
}

let has_nulls_at_end = self.null_count() != 0
&& self
.chunks()
.last()
.unwrap()
.as_ref()
.validity()
.map_or(false, |bm| bm.get(bm.len() - 1).unwrap());

if !has_nulls_at_end {
let can_copy_min_value = !has_nulls_at_end && is_descending;
let can_copy_max_value = !has_nulls_at_end && is_ascending;
properties_r.set(P::MIN_VALUE, can_copy_min_value);
properties_r.set(P::MAX_VALUE, can_copy_max_value);
}
}
out_l.copy_metadata(self, properties_l);
out_r.copy_metadata(self, properties_r);

(out_l, out_r)
}

Expand Down
8 changes: 8 additions & 0 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2233,6 +2233,14 @@ impl DataFrame {
unsafe { DataFrame::new_no_checks(col) }
}

/// Split [`DataFrame`] at the given `offset`.
pub fn split_at(&self, offset: i64) -> (Self, Self) {
let (a, b) = self.columns.iter().map(|s| s.split_at(offset)).unzip();
let a = unsafe { DataFrame::new_no_checks(a) };
let b = unsafe { DataFrame::new_no_checks(b) };
(a, b)
}

pub fn clear(&self) -> Self {
let col = self.columns.iter().map(|s| s.clear()).collect::<Vec<_>>();
unsafe { DataFrame::new_no_checks(col) }
Expand Down
1 change: 0 additions & 1 deletion crates/polars-core/src/series/implementations/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ impl SeriesTrait for SeriesWrap<BinaryChunked> {
(a.into_series(), b.into_series())
}


fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
// todo! add object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ impl SeriesTrait for SeriesWrap<BinaryOffsetChunked> {
(a.into_series(), b.into_series())
}


fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
// todo! add object
Expand Down
1 change: 0 additions & 1 deletion crates/polars-core/src/series/implementations/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ impl SeriesTrait for SeriesWrap<BooleanChunked> {
(a.into_series(), b.into_series())
}


fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
self.0.append(other.as_ref().as_ref());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
(a, b)
}


fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
self.0.append(other.categorical().unwrap())
Expand Down
7 changes: 6 additions & 1 deletion crates/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,12 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
}
fn split_at(&self, offset: i64) -> (Series, Series) {
let (a, b) = self.0.split_at(offset);
(a.into_datetime(self.0.time_unit(), self.0.time_zone().clone()).into_series(), b.into_datetime(self.0.time_unit(), self.0.time_zone().clone()).into_series())
(
a.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
.into_series(),
b.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
.into_series(),
)
}

fn mean(&self) -> Option<f64> {
Expand Down
6 changes: 4 additions & 2 deletions crates/polars-core/src/series/implementations/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,11 @@ impl SeriesTrait for SeriesWrap<DecimalChunked> {

fn split_at(&self, offset: i64) -> (Series, Series) {
let (a, b) = self.0.split_at(offset);
let a = a.into_decimal_unchecked(self.0.precision(), self.0.scale())
let a = a
.into_decimal_unchecked(self.0.precision(), self.0.scale())
.into_series();
let b = b.into_decimal_unchecked(self.0.precision(), self.0.scale())
let b = b
.into_decimal_unchecked(self.0.precision(), self.0.scale())
.into_series();
(a, b)
}
Expand Down
26 changes: 13 additions & 13 deletions crates/polars-core/src/series/implementations/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,20 +240,20 @@ impl SeriesTrait for NullChunked {
}

fn split_at(&self, offset: i64) -> (Series, Series) {
let (l, r) = chunkops::split_at(&self.chunks(), offset, self.len());
(NullChunked {
name: self.name.clone(),
length: l.iter().map(|arr|arr.len() as IdxSize).sum(),
chunks: l,
}
let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
(
NullChunked {
name: self.name.clone(),
length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
chunks: l,
}
.into_series(),
NullChunked {
name: self.name.clone(),
length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
chunks: r,
}
.into_series(),
NullChunked {
name: self.name.clone(),
length: r.iter().map(|arr|arr.len() as IdxSize).sum(),
chunks: r,
}
.into_series(),

)
}

Expand Down
2 changes: 0 additions & 2 deletions crates/polars-core/src/series/implementations/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,12 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
out.into_series()
}


fn split_at(&self, offset: i64) -> (Series, Series) {
let (a, b): (Vec<_>, Vec<_>) = self.0.fields().iter().map(|s| s.split_at(offset)).unzip();

let a = StructChunked::new(self.name(), &a).unwrap();
let b = StructChunked::new(self.name(), &b).unwrap();
(a.into_series(), b.into_series())

}

fn append(&mut self, other: &Series) -> PolarsResult<()> {
Expand Down
20 changes: 17 additions & 3 deletions crates/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
pub trait Container: Clone {
fn slice(&self, offset: i64, len: usize) -> Self;

fn split_at(&self, offset: i64) -> (Self, Self);

fn len(&self) -> usize;

fn iter_chunks(&self) -> impl Iterator<Item = Self>;
Expand All @@ -122,6 +124,10 @@ impl Container for DataFrame {
DataFrame::slice(self, offset, len)
}

fn split_at(&self, offset: i64) -> (Self, Self) {
DataFrame::split_at(self, offset)
}

fn len(&self) -> usize {
self.height()
}
Expand All @@ -144,6 +150,10 @@ impl<T: PolarsDataType> Container for ChunkedArray<T> {
ChunkedArray::slice(self, offset, len)
}

fn split_at(&self, offset: i64) -> (Self, Self) {
ChunkedArray::split_at(self, offset)
}

fn len(&self) -> usize {
ChunkedArray::len(self)
}
Expand All @@ -167,6 +177,10 @@ impl Container for Series {
self.0.slice(offset, len)
}

fn split_at(&self, offset: i64) -> (Self, Self) {
self.0.split_at(offset)
}

fn len(&self) -> usize {
self.0.len()
}
Expand Down Expand Up @@ -258,9 +272,9 @@ pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
continue 'new_chunk;
}

// TODO! use `split` operation here. That saves a null count.
out.push(chunk.slice(0, chunk_size));
chunk = chunk.slice(chunk_size as i64, h - chunk_size);
let (a, b) = chunk.split_at(chunk_size as i64);
out.push(a);
chunk = b;
}
}
out
Expand Down

0 comments on commit 707664a

Please sign in to comment.