Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/barak1412/polars into fix_s…
Browse files Browse the repository at this point in the history
…truct_missing_comparision_ops
  • Loading branch information
barak1412 committed Sep 28, 2024
2 parents bc51878 + 901b243 commit 87559e6
Show file tree
Hide file tree
Showing 248 changed files with 3,384 additions and 1,567 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lint-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.12']
python-version: ['3.9', '3.12']

steps:
- uses: actions/checkout@v4
Expand All @@ -63,4 +63,4 @@ jobs:
# Allow untyped calls for older Python versions
- name: Run mypy
working-directory: py-polars
run: mypy ${{ (matrix.python-version == '3.8') && '--allow-untyped-calls' || '' }}
run: mypy ${{ (matrix.python-version == '3.9') && '--allow-untyped-calls' || '' }}
2 changes: 1 addition & 1 deletion .github/workflows/release-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ concurrency:
cancel-in-progress: true

env:
PYTHON_VERSION: '3.8'
PYTHON_VERSION: '3.9'
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTUP_MAX_RETRIES: 10
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ['3.8', '3.11', '3.12']
python-version: ['3.9', '3.12']
include:
- os: windows-latest
python-version: '3.12'
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/array/fixed_size_list/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ impl Arrow2Arrow for FixedSizeListArray {

fn from_data(data: &ArrayData) -> Self {
let dtype: ArrowDataType = data.data_type().clone().into();
let length = data.len() - data.offset();
let size = match dtype {
ArrowDataType::FixedSizeList(_, size) => size,
_ => unreachable!("must be FixedSizeList type"),
Expand All @@ -28,6 +29,7 @@ impl Arrow2Arrow for FixedSizeListArray {

Self {
size,
length,
dtype,
values,
validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())),
Expand Down
14 changes: 11 additions & 3 deletions crates/polars-arrow/src/array/fixed_size_list/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use polars_error::PolarsResult;
use polars_error::{polars_ensure, PolarsResult};

use super::FixedSizeListArray;
use crate::array::ffi::{FromFfi, ToFfi};
Expand Down Expand Up @@ -31,11 +31,19 @@ unsafe impl ToFfi for FixedSizeListArray {
impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeListArray {
unsafe fn try_from_ffi(array: A) -> PolarsResult<Self> {
let dtype = array.dtype().clone();
let (_, width) = FixedSizeListArray::try_child_and_size(&dtype)?;
let validity = unsafe { array.validity() }?;
let child = unsafe { array.child(0)? };
let child = unsafe { array.child(0) }?;
let values = ffi::try_from(child)?;

let mut fsl = Self::try_new(dtype, values, validity)?;
let length = if values.len() == 0 {
0
} else {
polars_ensure!(width > 0, InvalidOperation: "Zero-width array with values");
values.len() / width
};

let mut fsl = Self::try_new(dtype, length, values, validity)?;
fsl.slice(array.offset(), array.length());
Ok(fsl)
}
Expand Down
70 changes: 49 additions & 21 deletions crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ mod iterator;

mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_error::{polars_bail, polars_ensure, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
/// Cloning and slicing this struct is `O(1)`.
#[derive(Clone)]
pub struct FixedSizeListArray {
size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
length: usize, // invariant: this is values.len() / size if size > 0
dtype: ArrowDataType,
values: Box<dyn Array>,
validity: Option<Bitmap>,
Expand All @@ -34,6 +35,7 @@ impl FixedSizeListArray {
/// * the validity's length is not equal to `values.len() / size`.
pub fn try_new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> PolarsResult<Self> {
Expand All @@ -45,34 +47,61 @@ impl FixedSizeListArray {
polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
}

if values.len() % size != 0 {
polars_bail!(ComputeError:
"values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
values.len(),
size
)
}
let len = values.len() / size;
polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
"values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
values.len(),
size
);

polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
"length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
values.len() / size,
length,
);
polars_ensure!(size != 0 || values.len() == 0, ComputeError:
"zero width FixedSizeListArray has values (length = {}).",
values.len(),
);

if validity
.as_ref()
.map_or(false, |validity| validity.len() != len)
.map_or(false, |validity| validity.len() != length)
{
polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
}

Ok(Self {
size,
length,
dtype,
values,
validity,
})
}

#[inline]
fn has_invariants(&self) -> bool {
let has_valid_length = (self.size == 0 && self.values().len() == 0)
|| (self.size > 0
&& self.values().len() % self.size() == 0
&& self.values().len() / self.size() == self.length);
let has_valid_validity = self
.validity
.as_ref()
.map_or(true, |v| v.len() == self.length);

has_valid_length && has_valid_validity
}

/// Alias to `Self::try_new(...).unwrap()`
#[track_caller]
pub fn new(dtype: ArrowDataType, values: Box<dyn Array>, validity: Option<Bitmap>) -> Self {
Self::try_new(dtype, values, validity).unwrap()
pub fn new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(dtype, length, values, validity).unwrap()
}

/// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
Expand All @@ -83,15 +112,15 @@ impl FixedSizeListArray {
/// Returns a new empty [`FixedSizeListArray`].
pub fn new_empty(dtype: ArrowDataType) -> Self {
let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
Self::new(dtype, values, None)
Self::new(dtype, 0, values, None)
}

/// Returns a new null [`FixedSizeListArray`].
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
let (field, size) = Self::get_child_and_size(&dtype);

let values = new_null_array(field.dtype().clone(), length * size);
Self::new(dtype, values, Some(Bitmap::new_zeroed(length)))
Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
}
}

Expand Down Expand Up @@ -124,6 +153,7 @@ impl FixedSizeListArray {
.filter(|bitmap| bitmap.unset_bits() > 0);
self.values
.slice_unchecked(offset * self.size, length * self.size);
self.length = length;
}

impl_sliced!();
Expand All @@ -136,7 +166,8 @@ impl FixedSizeListArray {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.values.len() / self.size
debug_assert!(self.has_invariants());
self.length
}

/// The optional validity.
Expand Down Expand Up @@ -184,12 +215,7 @@ impl FixedSizeListArray {
impl FixedSizeListArray {
pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
match dtype.to_logical_type() {
ArrowDataType::FixedSizeList(child, size) => {
if *size == 0 {
polars_bail!(ComputeError: "FixedSizeBinaryArray expects a positive size")
}
Ok((child.as_ref(), *size))
},
ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
_ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
}
}
Expand Down Expand Up @@ -233,12 +259,14 @@ impl Splitable for FixedSizeListArray {
(
Self {
dtype: self.dtype.clone(),
length: offset,
values: lhs_values,
validity: lhs_validity,
size,
},
Self {
dtype: self.dtype.clone(),
length: self.length - offset,
values: rhs_values,
validity: rhs_validity,
size,
Expand Down
Loading

0 comments on commit 87559e6

Please sign in to comment.