From 707664a29940e404ee9e131ac8a19c9760e91165 Mon Sep 17 00:00:00 2001
From: ritchie <ritchie46@gmail.com>
Date: Mon, 10 Jun 2024 19:37:44 +0200
Subject: [PATCH] lint

---
 crates/polars-arrow/src/array/mod.rs          |  2 +-
 .../src/chunked_array/ops/chunkops.rs         | 54 +++++++++++++++++--
 crates/polars-core/src/frame/mod.rs           |  8 +++
 .../src/series/implementations/binary.rs      |  1 -
 .../series/implementations/binary_offset.rs   |  1 -
 .../src/series/implementations/boolean.rs     |  1 -
 .../src/series/implementations/categorical.rs |  1 -
 .../src/series/implementations/datetime.rs    |  7 ++-
 .../src/series/implementations/decimal.rs     |  6 ++-
 .../src/series/implementations/null.rs        | 26 ++++-----
 .../src/series/implementations/struct_.rs     |  2 -
 crates/polars-core/src/utils/mod.rs           | 20 +++++--
 12 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs
index 19ea2c18c0b6..40d8bf6285b9 100644
--- a/crates/polars-arrow/src/array/mod.rs
+++ b/crates/polars-arrow/src/array/mod.rs
@@ -162,7 +162,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
     #[must_use]
     fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
         if length == 0 {
-            return new_empty_array(self.data_type().clone())
+            return new_empty_array(self.data_type().clone());
         }
         let mut new = self.to_boxed();
         new.slice(offset, length);
diff --git a/crates/polars-core/src/chunked_array/ops/chunkops.rs b/crates/polars-core/src/chunked_array/ops/chunkops.rs
index b3d10e883187..8a147e0fe123 100644
--- a/crates/polars-core/src/chunked_array/ops/chunkops.rs
+++ b/crates/polars-core/src/chunked_array/ops/chunkops.rs
@@ -17,7 +17,7 @@ pub(crate) fn split_at(
     let (raw_offset, _) = slice_offsets(offset, 0, own_length);
 
     let mut remaining_offset = raw_offset;
-    let mut iter = chunks.into_iter();
+    let mut iter = chunks.iter();
 
     for chunk in &mut iter {
         let chunk_len = chunk.len();
@@ -173,17 +173,61 @@ impl<T: PolarsDataType> ChunkedArray<T> {
         }
     }
 
-    /// Slice the array. The chunks are reallocated the underlying data slices are zero copy.
+    /// Split the array. The chunks are reallocated the underlying data slices are zero copy.
     ///
     /// When offset is negative it will be counted from the end of the array.
     /// This method will never error,
     /// and will slice the best match when offset, or length is out of bounds
     pub fn split_at(&self, offset: i64) -> (Self, Self) {
-        // The len: 0 special cases ensure we release memory.
         // A normal slice, slice the buffers and thus keep the whole memory allocated.
         let (l, r) = split_at(&self.chunks, offset, self.len());
-        let out_l = unsafe { self.copy_with_chunks(l) };
-        let out_r = unsafe { self.copy_with_chunks(r) };
+        let mut out_l = unsafe { self.copy_with_chunks(l) };
+        let mut out_r = unsafe { self.copy_with_chunks(r) };
+
+        use MetadataProperties as P;
+        let mut properties_l = P::SORTED | P::FAST_EXPLODE_LIST;
+        let mut properties_r = P::SORTED | P::FAST_EXPLODE_LIST;
+
+        let is_ascending = self.is_sorted_ascending_flag();
+        let is_descending = self.is_sorted_descending_flag();
+
+        if is_ascending || is_descending {
+            let has_nulls_at_start = self.null_count() != 0
+                && self
+                    .chunks()
+                    .first()
+                    .unwrap()
+                    .as_ref()
+                    .validity()
+                    .map_or(false, |bm| bm.get(0).unwrap());
+
+            if !has_nulls_at_start {
+                let can_copy_min_value = !has_nulls_at_start && is_ascending;
+                let can_copy_max_value = !has_nulls_at_start && is_descending;
+
+                properties_l.set(P::MIN_VALUE, can_copy_min_value);
+                properties_l.set(P::MAX_VALUE, can_copy_max_value);
+            }
+
+            let has_nulls_at_end = self.null_count() != 0
+                && self
+                    .chunks()
+                    .last()
+                    .unwrap()
+                    .as_ref()
+                    .validity()
+                    .map_or(false, |bm| bm.get(bm.len() - 1).unwrap());
+
+            if !has_nulls_at_end {
+                let can_copy_min_value = !has_nulls_at_end && is_descending;
+                let can_copy_max_value = !has_nulls_at_end && is_ascending;
+                properties_r.set(P::MIN_VALUE, can_copy_min_value);
+                properties_r.set(P::MAX_VALUE, can_copy_max_value);
+            }
+        }
+        out_l.copy_metadata(self, properties_l);
+        out_r.copy_metadata(self, properties_r);
+
         (out_l, out_r)
     }
 
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index 52e41cf27c9f..6418b3ab3277 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -2233,6 +2233,14 @@ impl DataFrame {
         unsafe { DataFrame::new_no_checks(col) }
     }
 
+    /// Split [`DataFrame`] at the given `offset`.
+    pub fn split_at(&self, offset: i64) -> (Self, Self) {
+        let (a, b) = self.columns.iter().map(|s| s.split_at(offset)).unzip();
+        let a = unsafe { DataFrame::new_no_checks(a) };
+        let b = unsafe { DataFrame::new_no_checks(b) };
+        (a, b)
+    }
+
     pub fn clear(&self) -> Self {
         let col = self.columns.iter().map(|s| s.clear()).collect::<Vec<_>>();
         unsafe { DataFrame::new_no_checks(col) }
diff --git a/crates/polars-core/src/series/implementations/binary.rs b/crates/polars-core/src/series/implementations/binary.rs
index 63d4a25eff9a..042e58fcf813 100644
--- a/crates/polars-core/src/series/implementations/binary.rs
+++ b/crates/polars-core/src/series/implementations/binary.rs
@@ -124,7 +124,6 @@ impl SeriesTrait for SeriesWrap<BinaryChunked> {
         (a.into_series(), b.into_series())
     }
 
-
     fn append(&mut self, other: &Series) -> PolarsResult<()> {
         polars_ensure!(self.0.dtype() == other.dtype(), append);
         // todo! add object
diff --git a/crates/polars-core/src/series/implementations/binary_offset.rs b/crates/polars-core/src/series/implementations/binary_offset.rs
index a16db7e1911e..b0ac481f682c 100644
--- a/crates/polars-core/src/series/implementations/binary_offset.rs
+++ b/crates/polars-core/src/series/implementations/binary_offset.rs
@@ -87,7 +87,6 @@ impl SeriesTrait for SeriesWrap<BinaryOffsetChunked> {
         (a.into_series(), b.into_series())
     }
 
-
     fn append(&mut self, other: &Series) -> PolarsResult<()> {
         polars_ensure!(self.0.dtype() == other.dtype(), append);
         // todo! add object
diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs
index bee6a4771df3..2b6777eda58f 100644
--- a/crates/polars-core/src/series/implementations/boolean.rs
+++ b/crates/polars-core/src/series/implementations/boolean.rs
@@ -145,7 +145,6 @@ impl SeriesTrait for SeriesWrap<BooleanChunked> {
         (a.into_series(), b.into_series())
     }
 
-
     fn append(&mut self, other: &Series) -> PolarsResult<()> {
         polars_ensure!(self.0.dtype() == other.dtype(), append);
         self.0.append(other.as_ref().as_ref());
diff --git a/crates/polars-core/src/series/implementations/categorical.rs b/crates/polars-core/src/series/implementations/categorical.rs
index 425f24cd39e9..97ac4be0031a 100644
--- a/crates/polars-core/src/series/implementations/categorical.rs
+++ b/crates/polars-core/src/series/implementations/categorical.rs
@@ -161,7 +161,6 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
         (a, b)
     }
 
-
     fn append(&mut self, other: &Series) -> PolarsResult<()> {
         polars_ensure!(self.0.dtype() == other.dtype(), append);
         self.0.append(other.categorical().unwrap())
diff --git a/crates/polars-core/src/series/implementations/datetime.rs b/crates/polars-core/src/series/implementations/datetime.rs
index 767c9886629d..42f60bd06c4e 100644
--- a/crates/polars-core/src/series/implementations/datetime.rs
+++ b/crates/polars-core/src/series/implementations/datetime.rs
@@ -174,7 +174,12 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
     }
     fn split_at(&self, offset: i64) -> (Series, Series) {
         let (a, b) = self.0.split_at(offset);
-        (a.into_datetime(self.0.time_unit(), self.0.time_zone().clone()).into_series(), b.into_datetime(self.0.time_unit(), self.0.time_zone().clone()).into_series())
+        (
+            a.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
+                .into_series(),
+            b.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
+                .into_series(),
+        )
     }
 
     fn mean(&self) -> Option<f64> {
diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs
index 76096dab64b4..bfc79836b618 100644
--- a/crates/polars-core/src/series/implementations/decimal.rs
+++ b/crates/polars-core/src/series/implementations/decimal.rs
@@ -196,9 +196,11 @@ impl SeriesTrait for SeriesWrap<DecimalChunked> {
 
     fn split_at(&self, offset: i64) -> (Series, Series) {
         let (a, b) = self.0.split_at(offset);
-        let a = a.into_decimal_unchecked(self.0.precision(), self.0.scale())
+        let a = a
+            .into_decimal_unchecked(self.0.precision(), self.0.scale())
             .into_series();
-        let b = b.into_decimal_unchecked(self.0.precision(), self.0.scale())
+        let b = b
+            .into_decimal_unchecked(self.0.precision(), self.0.scale())
             .into_series();
         (a, b)
     }
diff --git a/crates/polars-core/src/series/implementations/null.rs b/crates/polars-core/src/series/implementations/null.rs
index c084952424db..564a8f93669d 100644
--- a/crates/polars-core/src/series/implementations/null.rs
+++ b/crates/polars-core/src/series/implementations/null.rs
@@ -240,20 +240,20 @@ impl SeriesTrait for NullChunked {
     }
 
     fn split_at(&self, offset: i64) -> (Series, Series) {
-        let (l, r) = chunkops::split_at(&self.chunks(), offset, self.len());
-        (NullChunked {
-            name: self.name.clone(),
-            length: l.iter().map(|arr|arr.len() as IdxSize).sum(),
-            chunks: l,
-        }
+        let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
+        (
+            NullChunked {
+                name: self.name.clone(),
+                length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
+                chunks: l,
+            }
+            .into_series(),
+            NullChunked {
+                name: self.name.clone(),
+                length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
+                chunks: r,
+            }
             .into_series(),
-         NullChunked {
-             name: self.name.clone(),
-             length: r.iter().map(|arr|arr.len() as IdxSize).sum(),
-             chunks: r,
-         }
-             .into_series(),
-
         )
     }
 
diff --git a/crates/polars-core/src/series/implementations/struct_.rs b/crates/polars-core/src/series/implementations/struct_.rs
index c723033d82c0..d9ed03948fce 100644
--- a/crates/polars-core/src/series/implementations/struct_.rs
+++ b/crates/polars-core/src/series/implementations/struct_.rs
@@ -126,14 +126,12 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
         out.into_series()
     }
 
-
     fn split_at(&self, offset: i64) -> (Series, Series) {
         let (a, b): (Vec<_>, Vec<_>) = self.0.fields().iter().map(|s| s.split_at(offset)).unzip();
 
         let a = StructChunked::new(self.name(), &a).unwrap();
         let b = StructChunked::new(self.name(), &b).unwrap();
         (a.into_series(), b.into_series())
-
     }
 
     fn append(&mut self, other: &Series) -> PolarsResult<()> {
diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs
index 313fb55b1b50..fd38c4bad099 100644
--- a/crates/polars-core/src/utils/mod.rs
+++ b/crates/polars-core/src/utils/mod.rs
@@ -108,6 +108,8 @@ pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
 pub trait Container: Clone {
     fn slice(&self, offset: i64, len: usize) -> Self;
 
+    fn split_at(&self, offset: i64) -> (Self, Self);
+
     fn len(&self) -> usize;
 
     fn iter_chunks(&self) -> impl Iterator<Item = Self>;
@@ -122,6 +124,10 @@ impl Container for DataFrame {
         DataFrame::slice(self, offset, len)
     }
 
+    fn split_at(&self, offset: i64) -> (Self, Self) {
+        DataFrame::split_at(self, offset)
+    }
+
     fn len(&self) -> usize {
         self.height()
     }
@@ -144,6 +150,10 @@ impl<T: PolarsDataType> Container for ChunkedArray<T> {
         ChunkedArray::slice(self, offset, len)
     }
 
+    fn split_at(&self, offset: i64) -> (Self, Self) {
+        ChunkedArray::split_at(self, offset)
+    }
+
     fn len(&self) -> usize {
         ChunkedArray::len(self)
     }
@@ -167,6 +177,10 @@ impl Container for Series {
         self.0.slice(offset, len)
     }
 
+    fn split_at(&self, offset: i64) -> (Self, Self) {
+        self.0.split_at(offset)
+    }
+
     fn len(&self) -> usize {
         self.0.len()
     }
@@ -258,9 +272,9 @@ pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
                     continue 'new_chunk;
                 }
 
-                // TODO! use `split` operation here. That saves a null count.
-                out.push(chunk.slice(0, chunk_size));
-                chunk = chunk.slice(chunk_size as i64, h - chunk_size);
+                let (a, b) = chunk.split_at(chunk_size as i64);
+                out.push(a);
+                chunk = b;
             }
         }
         out