Skip to content

Commit

Permalink
refactor(rust): Move gather kernels to polars-compute (pola-rs#20415)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Dec 23, 2024
1 parent 6aaa98c commit 2ae01fb
Show file tree
Hide file tree
Showing 26 changed files with 88 additions and 80 deletions.
4 changes: 1 addition & 3 deletions crates/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ compute_arithmetics = ["strength_reduce", "compute_arithmetics_decimal"]
compute_bitwise = []
compute_boolean = []
compute_boolean_kleene = []
compute_comparison = ["compute_take", "compute_boolean"]
compute_comparison = ["compute_boolean"]
compute_hash = ["multiversion"]
compute_take = []
compute_temporal = []
compute = [
"compute_aggregate",
Expand All @@ -140,7 +139,6 @@ compute = [
"compute_boolean_kleene",
"compute_comparison",
"compute_hash",
"compute_take",
"compute_temporal",
]
serde = ["dep:serde", "polars-schema/serde", "polars-utils/serde"]
Expand Down
3 changes: 0 additions & 3 deletions crates/polars-arrow/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ pub mod boolean_kleene;
pub mod concatenate;
#[cfg(feature = "dtype-decimal")]
pub mod decimal;
#[cfg(feature = "compute_take")]
#[cfg_attr(docsrs, doc(cfg(feature = "compute_take")))]
pub mod take;
#[cfg(feature = "compute_temporal")]
#[cfg_attr(docsrs, doc(cfg(feature = "compute_temporal")))]
pub mod temporal;
Expand Down
4 changes: 0 additions & 4 deletions crates/polars-arrow/src/legacy/kernels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ use crate::array::BooleanArray;
use crate::bitmap::utils::BitChunks;
pub mod concatenate;
pub mod ewm;
#[cfg(feature = "compute_take")]
pub mod fixed_size_list;
#[cfg(feature = "compute_take")]
pub mod list;
pub mod rolling;
pub mod set;
pub mod sort_partition;
Expand Down
12 changes: 11 additions & 1 deletion crates/polars-compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,17 @@ rand = { workspace = true }
version_check = { workspace = true }

[features]
cast = ["arrow/chrono-tz", "dep:atoi_simd", "dep:chrono", "dep:fast-float2", "dep:itoa", "dep:itoap", "dep:ryu"]
cast = [
"gather",
"arrow/chrono-tz",
"dep:atoi_simd",
"dep:chrono",
"dep:fast-float2",
"dep:itoa",
"dep:itoap",
"dep:ryu",
]
gather = []
nightly = []
simd = ["arrow/simd"]
approx_unique = []
Expand Down
5 changes: 2 additions & 3 deletions crates/polars-compute/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,8 @@ fn cast_list_to_fixed_size_list<O: Offset>(
}
}
}
let take_values = unsafe {
arrow::compute::take::take_unchecked(list.values().as_ref(), &indices.freeze())
};
let take_values =
unsafe { crate::gather::take_unchecked(list.values().as_ref(), &indices.freeze()) };

cast(take_values.as_ref(), inner.dtype(), options)?
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
// specific language governing permissions and limitations
// under the License.

use arrow::array::{Array, BinaryArray, PrimitiveArray};
use arrow::offset::Offset;

use super::generic_binary::*;
use super::Index;
use crate::array::{Array, BinaryArray, PrimitiveArray};
use crate::offset::Offset;

/// `take` implementation for utf8 arrays
/// # Safety
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow::array::BinaryViewArray;

use self::primitive::take_values_and_validity_unchecked;
use super::*;
use crate::array::BinaryViewArray;

/// # Safety
/// No bound checks
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use arrow::array::Array;
use arrow::bitmap::Bitmap;
use arrow::datatypes::IdxArr;
use polars_utils::IdxSize;

use crate::array::Array;
use crate::bitmap::Bitmap;
use crate::datatypes::IdxArr;

/// # Safety
/// Doesn't do any bound checks.
pub unsafe fn take_bitmap_unchecked(values: &Bitmap, indices: &[IdxSize]) -> Bitmap {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use arrow::array::{Array, BooleanArray, PrimitiveArray};
use arrow::bitmap::{Bitmap, MutableBitmap};
use polars_utils::IdxSize;

use super::bitmap::{take_bitmap_nulls_unchecked, take_bitmap_unchecked};
use crate::array::{Array, BooleanArray, PrimitiveArray};
use crate::bitmap::{Bitmap, MutableBitmap};

// Take implementation when neither values nor indices contain nulls.
unsafe fn take_no_validity(values: &Bitmap, indices: &[IdxSize]) -> (Bitmap, Option<Bitmap>) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@

use std::mem::ManuallyDrop;

use arrow::array::{Array, ArrayRef, FixedSizeListArray, PrimitiveArray, StaticArray};
use arrow::bitmap::MutableBitmap;
use arrow::compute::utils::combine_validities_and;
use arrow::datatypes::reshape::{Dimension, ReshapeDimension};
use arrow::datatypes::{ArrowDataType, IdxArr, PhysicalType};
use arrow::legacy::prelude::FromData;
use arrow::with_match_primitive_type;
use polars_utils::itertools::Itertools;

use super::Index;
use crate::array::{Array, ArrayRef, FixedSizeListArray, PrimitiveArray, StaticArray};
use crate::bitmap::MutableBitmap;
use crate::compute::take::bitmap::{take_bitmap_nulls_unchecked, take_bitmap_unchecked};
use crate::compute::utils::combine_validities_and;
use crate::datatypes::reshape::{Dimension, ReshapeDimension};
use crate::datatypes::{ArrowDataType, IdxArr, PhysicalType};
use crate::legacy::prelude::FromData;
use crate::with_match_primitive_type;
use crate::gather::bitmap::{take_bitmap_nulls_unchecked, take_bitmap_unchecked};

fn get_stride_and_leaf_type(dtype: &ArrowDataType, size: usize) -> (usize, &ArrowDataType) {
if let ArrowDataType::FixedSizeList(inner, size_inner) = dtype {
Expand Down Expand Up @@ -207,18 +207,18 @@ pub(super) unsafe fn take_unchecked(values: &FixedSizeListArray, indices: &IdxAr

#[cfg(test)]
mod tests {
use crate::array::StaticArray;
use crate::datatypes::ArrowDataType;
use arrow::array::StaticArray;
use arrow::datatypes::ArrowDataType;

/// Test gather for FixedSizeListArray with outer validity but no inner validities.
#[test]
fn test_arr_gather_nulls_outer_validity_19482() {
use arrow::array::{FixedSizeListArray, Int64Array, PrimitiveArray};
use arrow::bitmap::Bitmap;
use arrow::datatypes::reshape::{Dimension, ReshapeDimension};
use polars_utils::IdxSize;

use super::take_unchecked;
use crate::array::{FixedSizeListArray, Int64Array, PrimitiveArray};
use crate::bitmap::Bitmap;
use crate::datatypes::reshape::{Dimension, ReshapeDimension};

unsafe {
let dyn_arr = FixedSizeListArray::from_shape(
Expand Down Expand Up @@ -250,11 +250,11 @@ mod tests {

#[test]
fn test_arr_gather_nulls_inner_validity() {
use arrow::array::{FixedSizeListArray, Int64Array, PrimitiveArray};
use arrow::datatypes::reshape::{Dimension, ReshapeDimension};
use polars_utils::IdxSize;

use super::take_unchecked;
use crate::array::{FixedSizeListArray, Int64Array, PrimitiveArray};
use crate::datatypes::reshape::{Dimension, ReshapeDimension};

unsafe {
let dyn_arr = FixedSizeListArray::from_shape(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use arrow::array::{GenericBinaryArray, PrimitiveArray};
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::buffer::Buffer;
use arrow::offset::{Offset, Offsets, OffsetsBuffer};
use polars_utils::vec::{CapacityByFactor, PushUnchecked};

use super::Index;
use crate::array::{GenericBinaryArray, PrimitiveArray};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::buffer::Buffer;
use crate::offset::{Offset, Offsets, OffsetsBuffer};

fn create_offsets<I: Iterator<Item = usize>, O: Offset>(
lengths: I,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{self, ArrayFromIterDtype, ListArray, StaticArray};
use crate::datatypes::IdxArr;
use crate::offset::Offset;
use arrow::array::{self, ArrayFromIterDtype, ListArray, StaticArray};
use arrow::datatypes::IdxArr;
use arrow::offset::Offset;

/// `take` implementation for ListArrays
pub(super) unsafe fn take_unchecked<I: Offset>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

//! Defines take kernel for [`Array`]
use crate::array::{
use arrow::array::{
self, new_empty_array, Array, ArrayCollectIterExt, ArrayFromIterDtype, NullArray, StaticArray,
Utf8ViewArray,
};
use crate::compute::take::binview::take_binview_unchecked;
use crate::datatypes::{ArrowDataType, IdxArr};
use crate::types::Index;
use arrow::datatypes::{ArrowDataType, IdxArr};
use arrow::types::Index;

use crate::gather::binview::take_binview_unchecked;

pub mod binary;
pub mod binview;
Expand All @@ -34,8 +35,9 @@ pub mod generic_binary;
pub mod list;
pub mod primitive;
pub mod structure;
pub mod sublist;

use crate::with_match_primitive_type_full;
use arrow::with_match_primitive_type_full;

/// Returns a new [`Array`] with only indices at `indices`. Null indices are taken as nulls.
/// The returned array has a length equal to `indices.len()`.
Expand All @@ -46,7 +48,7 @@ pub unsafe fn take_unchecked(values: &dyn Array, indices: &IdxArr) -> Box<dyn Ar
return new_empty_array(values.dtype().clone());
}

use crate::datatypes::PhysicalType::*;
use arrow::datatypes::PhysicalType::*;
match values.dtype().to_physical_type() {
Null => Box::new(NullArray::new(values.dtype().clone(), indices.len())),
Boolean => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use arrow::array::PrimitiveArray;
use arrow::bitmap::utils::set_bit_unchecked;
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::legacy::index::IdxArr;
use arrow::legacy::utils::CustomIterTools;
use arrow::types::NativeType;
use polars_utils::index::NullCount;

use crate::array::PrimitiveArray;
use crate::bitmap::utils::set_bit_unchecked;
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::legacy::index::IdxArr;
use crate::legacy::utils::CustomIterTools;
use crate::types::NativeType;

pub(super) unsafe fn take_values_and_validity_unchecked<T: NativeType>(
values: &[T],
validity_values: Option<&Bitmap>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{Array, StructArray};
use crate::compute::utils::combine_validities_and;
use crate::datatypes::IdxArr;
use arrow::array::{Array, StructArray};
use arrow::compute::utils::combine_validities_and;
use arrow::datatypes::IdxArr;

pub(super) unsafe fn take_unchecked(array: &StructArray, indices: &IdxArr) -> StructArray {
let values: Vec<Box<dyn Array>> = array
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use arrow::array::{ArrayRef, FixedSizeListArray, PrimitiveArray};
use arrow::legacy::prelude::*;
use arrow::legacy::utils::CustomIterTools;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::index::NullCount;
use polars_utils::IdxSize;

use crate::array::{ArrayRef, FixedSizeListArray, PrimitiveArray};
use crate::compute::take::take_unchecked;
use crate::legacy::prelude::*;
use crate::legacy::utils::CustomIterTools;
use crate::gather::take_unchecked;

fn sub_fixed_size_list_get_indexes_literal(width: usize, len: usize, index: i64) -> IdxArr {
(0..len)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use arrow::array::{Array, ArrayRef, ListArray};
use arrow::legacy::prelude::*;
use arrow::legacy::trusted_len::TrustedLenPush;
use arrow::legacy::utils::CustomIterTools;
use arrow::offset::{Offsets, OffsetsBuffer};
use polars_utils::IdxSize;

use crate::array::{Array, ArrayRef, ListArray};
use crate::compute::take::take_unchecked;
use crate::legacy::prelude::*;
use crate::legacy::trusted_len::TrustedLenPush;
use crate::legacy::utils::CustomIterTools;
use crate::offset::{Offsets, OffsetsBuffer};
use crate::gather::take_unchecked;

/// Get the indices that would result in a get operation on the lists values.
/// for example, consider this list:
Expand Down Expand Up @@ -146,9 +146,10 @@ pub fn array_to_unit_list(array: ArrayRef) -> ListArray<i64> {

#[cfg(test)]
mod test {
use arrow::array::{Int32Array, PrimitiveArray};
use arrow::datatypes::ArrowDataType;

use super::*;
use crate::array::{Int32Array, PrimitiveArray};
use crate::datatypes::ArrowDataType;

fn get_array() -> ListArray<i64> {
let values = Int32Array::from_slice([1, 2, 3, 4, 5, 6]);
Expand Down
3 changes: 3 additions & 0 deletions crates/polars-compute/src/gather/sublist/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! Kernels for gathering values contained within lists.
pub mod fixed_size_list;
pub mod list;
2 changes: 2 additions & 0 deletions crates/polars-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ pub mod cast;
pub mod comparisons;
pub mod filter;
pub mod float_sum;
#[cfg(feature = "gather")]
pub mod gather;
pub mod horizontal_flatten;
#[cfg(feature = "approx_unique")]
pub mod hyperloglogplus;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repository = { workspace = true }
description = "Core of the Polars DataFrame library"

[dependencies]
polars-compute = { workspace = true }
polars-compute = { workspace = true, features = ["gather"] }
polars-error = { workspace = true }
polars-row = { workspace = true }
polars-schema = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow::compute::take::take_unchecked;
use arrow::offset::OffsetsBuffer;
use polars_compute::gather::take_unchecked;

use super::*;

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/gather.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use arrow::bitmap::bitmask::BitMask;
use arrow::bitmap::Bitmap;
use arrow::compute::take::take_unchecked;
use polars_compute::gather::take_unchecked;
use polars_error::polars_ensure;
use polars_utils::index::check_bounds;

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/ops/reshape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::borrow::Cow;

use arrow::array::*;
use arrow::bitmap::Bitmap;
use arrow::legacy::kernels::list::array_to_unit_list;
use arrow::offset::{Offsets, OffsetsBuffer};
use polars_compute::gather::sublist::list::array_to_unit_list;
use polars_error::{polars_bail, polars_ensure, PolarsResult};
use polars_utils::format_tuple;

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/hash_keys.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use arrow::array::{BinaryArray, PrimitiveArray, UInt64Array};
use arrow::compute::take::binary::take_unchecked;
use arrow::compute::utils::combine_validities_and_many;
use polars_compute::gather::binary::take_unchecked;
use polars_core::frame::DataFrame;
use polars_core::prelude::row_encode::_get_rows_encoded_unordered;
use polars_core::prelude::PlRandomState;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/array/get.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow::array::Array;
use arrow::legacy::kernels::fixed_size_list::{
use polars_compute::gather::sublist::fixed_size_list::{
sub_fixed_size_list_get, sub_fixed_size_list_get_literal,
};
use polars_core::utils::align_chunks_binary;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fmt::Write;

use arrow::array::ValueSize;
use arrow::legacy::kernels::list::{index_is_oob, sublist_get};
use polars_compute::gather::sublist::list::{index_is_oob, sublist_get};
use polars_core::chunked_array::builder::get_list_builder;
#[cfg(feature = "list_gather")]
use polars_core::export::num::ToPrimitive;
Expand Down

0 comments on commit 2ae01fb

Please sign in to comment.