Skip to content

Commit

Permalink
Merge branch 'master' into melt-panic
Browse files Browse the repository at this point in the history
  • Loading branch information
edavisau committed Apr 13, 2024
2 parents c471160 + 92902e6 commit 6c86d88
Show file tree
Hide file tree
Showing 308 changed files with 8,104 additions and 5,830 deletions.
26 changes: 10 additions & 16 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,18 @@ jobs:
id: cache-data
uses: actions/cache/restore@v4
with:
path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv
path: py-polars/tests/benchmark/data/G1_1e7_1e2_5_0.csv
key: benchmark-data

- name: Set up R
if: steps.cache-data.outputs.cache-hit != 'true'
uses: r-lib/actions/setup-r@v2
with:
r-version: '3.5.3'
r-version: '4.3.3'

- name: Generate data
if: steps.cache-data.outputs.cache-hit != 'true'
working-directory: py-polars/tests/benchmark
working-directory: py-polars/tests/benchmark/data
run: |
Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")'
Rscript groupby-datagen.R 1e7 1e2 5 0
Expand All @@ -72,7 +72,7 @@ jobs:
if: github.ref_name == 'main'
uses: actions/cache/save@v4
with:
path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv
path: py-polars/tests/benchmark/data/G1_1e7_1e2_5_0.csv
key: ${{ steps.cache-data.outputs.cache-primary-key }}

- name: Set up Rust
Expand All @@ -93,18 +93,12 @@ jobs:
working-directory: py-polars
run: maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native

- name: Run H2O AI database benchmark - on strings
working-directory: py-polars/tests/benchmark
run: python run_h2oai_benchmark.py on_strings

- name: Run H2O AI database benchmark - on categoricals
working-directory: py-polars/tests/benchmark
run: python run_h2oai_benchmark.py

- name: Run various benchmark tests
working-directory: py-polars
run: pytest -m release --durations 0 -v
- name: Run benchmark tests
uses: CodSpeedHQ/action@v2
with:
working-directory: py-polars
run: pytest -m benchmark --codspeed -v

- name: Run non-benchmark tests
working-directory: py-polars
run: pytest -m 'not release and not debug' -n auto --dist loadgroup
run: pytest -m 'not benchmark and not debug' -n auto --dist loadgroup
4 changes: 2 additions & 2 deletions .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ jobs:
run: maturin develop

- name: Run Python tests
run: pytest --cov -n auto --dist loadgroup -m "not release and not docs" --cov-report xml:main.xml
run: pytest --cov -n auto --dist loadgroup -m "not benchmark and not docs" --cov-report xml:main.xml
continue-on-error: true

- name: Run Python tests - async reader
env:
POLARS_FORCE_ASYNC: 1
run: pytest --cov -m "not release and not docs" tests/unit/io/ --cov-report xml:async.xml
run: pytest --cov -m "not benchmark and not docs" tests/unit/io/ --cov-report xml:async.xml
continue-on-error: true

- name: Report coverage
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ jobs:
# Currently skipped due to performance issues in coverage:
# https://github.com/nedbat/coveragepy/issues/1665
COV: ${{ !(matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12') && '--cov' || '--no-cov' }}
run: pytest $COV -n auto --dist loadgroup -m "not release and not docs"
run: pytest $COV -n auto --dist loadgroup -m "not benchmark and not docs"

- name: Run tests async reader tests
if: github.ref_name != 'main' && matrix.os != 'windows-latest'
env:
POLARS_FORCE_ASYNC: 1
run: pytest -m "not release and not docs" tests/unit/io/
run: pytest -m "not benchmark and not docs" tests/unit/io/

- name: Check import without optional dependencies
if: github.ref_name != 'main' && matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest'
Expand Down
4 changes: 3 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions crates/polars-arrow/src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,31 @@ impl MutableBooleanArray {
}
}

/// Extends `MutableBooleanArray` by additional values of constant value.
#[inline]
pub fn extend_constant(&mut self, additional: usize, value: Option<bool>) {
match value {
Some(value) => {
self.values.extend_constant(additional, value);
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
},
None => {
self.values.extend_constant(additional, false);
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, false)
} else {
self.init_validity();
self.validity
.as_mut()
.unwrap()
.extend_constant(additional, false)
};
},
};
}

fn init_validity(&mut self) {
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
validity.extend_constant(self.len(), true);
Expand Down
65 changes: 27 additions & 38 deletions crates/polars-arrow/src/bitmap/bitmap_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,33 @@ use super::Bitmap;
use crate::bitmap::MutableBitmap;
use crate::trusted_len::TrustedLen;

/// Creates a [Vec<u8>] from an [`Iterator`] of [`BitChunk`].
/// # Safety
/// The iterator must be [`TrustedLen`].
pub unsafe fn from_chunk_iter_unchecked<T: BitChunk, I: Iterator<Item = T>>(
iterator: I,
) -> Vec<u8> {
let (_, upper) = iterator.size_hint();
let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
let len = upper * std::mem::size_of::<T>();

let mut buffer = Vec::with_capacity(len);

let mut dst = buffer.as_mut_ptr();
for item in iterator {
let bytes = item.to_ne_bytes();
for i in 0..std::mem::size_of::<T>() {
std::ptr::write(dst, bytes[i]);
dst = dst.add(1);
}
}
assert_eq!(
dst.offset_from(buffer.as_ptr()) as usize,
len,
"Trusted iterator length was not accurately reported"
);
buffer.set_len(len);
buffer
#[inline(always)]
pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
buffer.extend(value.to_ne_bytes())
}

/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
unsafe { from_chunk_iter_unchecked(iter) }
let cap = iter.size_hint().0 * std::mem::size_of::<T>();
let mut buffer = Vec::with_capacity(cap);
for v in iter {
push_bitchunk(&mut buffer, v)
}
buffer
}

fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
iter: I,
remainder: T,
) -> Vec<u8> {
let cap = (iter.size_hint().0 + 1) * std::mem::size_of::<T>();
let mut buffer = Vec::with_capacity(cap);
for v in iter {
push_bitchunk(&mut buffer, v)
}
push_bitchunk(&mut buffer, remainder);
debug_assert_eq!(buffer.len(), cap);
buffer
}

/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
Expand All @@ -62,9 +57,8 @@ where
.zip(a3_chunks)
.zip(a4_chunks)
.map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
let buffer =
chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3, rem_a4))));

let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
let length = a1.len();

Bitmap::from_u8_vec(buffer, length)
Expand All @@ -90,8 +84,7 @@ where
.zip(a3_chunks)
.map(|((a1, a2), a3)| op(a1, a2, a3));

let buffer = chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3))));

let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
let length = a1.len();

Bitmap::from_u8_vec(buffer, length)
Expand All @@ -112,8 +105,7 @@ where
.zip(rhs_chunks)
.map(|(left, right)| op(left, right));

let buffer = chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_lhs, rem_rhs))));

let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
let length = lhs.len();

Bitmap::from_u8_vec(buffer, length)
Expand All @@ -125,10 +117,7 @@ where
F: Fn(u64) -> u64,
{
let rem = op(iter.remainder());

let iterator = iter.map(op).chain(std::iter::once(rem));

let buffer = chunk_iter_to_vec(iterator);
let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);

Bitmap::from_u8_vec(buffer, length)
}
Expand Down
61 changes: 60 additions & 1 deletion crates/polars-arrow/src/compute/utils.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::borrow::Borrow;
use std::ops::{BitAnd, BitOr};

use polars_error::{polars_ensure, PolarsResult};

use crate::array::Array;
use crate::bitmap::{and_not, ternary, Bitmap};
use crate::bitmap::{and_not, push_bitchunk, ternary, Bitmap};

pub fn combine_validities_and3(
opt1: Option<&Bitmap>,
Expand All @@ -30,6 +31,7 @@ pub fn combine_validities_and(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) ->
(None, None) => None,
}
}

pub fn combine_validities_or(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) -> Option<Bitmap> {
match (opt_l, opt_r) {
(Some(l), Some(r)) => Some(l.bitor(r)),
Expand All @@ -48,6 +50,63 @@ pub fn combine_validities_and_not(
}
}

pub fn combine_validities_and_many<B: Borrow<Bitmap>>(bitmaps: &[Option<B>]) -> Option<Bitmap> {
let mut bitmaps = bitmaps
.iter()
.flatten()
.map(|b| b.borrow())
.collect::<Vec<_>>();

match bitmaps.len() {
0 => None,
1 => bitmaps.pop().cloned(),
2 => combine_validities_and(bitmaps.pop(), bitmaps.pop()),
3 => combine_validities_and3(bitmaps.pop(), bitmaps.pop(), bitmaps.pop()),
_ => {
let mut iterators = bitmaps
.iter()
.map(|v| v.fast_iter_u64())
.collect::<Vec<_>>();
let mut buffer = Vec::with_capacity(iterators.first().unwrap().size_hint().0 + 2);

'rows: loop {
// All ones so as identity for & operation
let mut out = u64::MAX;
for iter in iterators.iter_mut() {
if let Some(v) = iter.next() {
out &= v
} else {
break 'rows;
}
}
push_bitchunk(&mut buffer, out);
}

// All ones so as identity for & operation
let mut out = [u64::MAX, u64::MAX];
let mut len = 0;
for iter in iterators.into_iter() {
let (rem, rem_len) = iter.remainder();
len = rem_len;

for (out, rem) in out.iter_mut().zip(rem) {
*out &= rem;
}
}
push_bitchunk(&mut buffer, out[0]);
if len > 64 {
push_bitchunk(&mut buffer, out[1]);
}
let bitmap = Bitmap::from_u8_vec(buffer, bitmaps[0].len());
if bitmap.unset_bits() == bitmap.len() {
None
} else {
Some(bitmap)
}
},
}
}

// Errors iff the two arrays have a different length.
#[inline]
pub fn check_same_len(lhs: &dyn Array, rhs: &dyn Array) -> PolarsResult<()> {
Expand Down
26 changes: 22 additions & 4 deletions crates/polars-arrow/src/legacy/kernels/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use polars_error::{polars_bail, PolarsResult};
use polars_utils::index::NullCount;
use polars_utils::IdxSize;

use crate::array::{ArrayRef, FixedSizeListArray, PrimitiveArray};
Expand Down Expand Up @@ -38,18 +40,34 @@ fn sub_fixed_size_list_get_indexes(width: usize, index: &PrimitiveArray<i64>) ->
.collect_trusted()
}

pub fn sub_fixed_size_list_get_literal(arr: &FixedSizeListArray, index: i64) -> ArrayRef {
pub fn sub_fixed_size_list_get_literal(
arr: &FixedSizeListArray,
index: i64,
null_on_oob: bool,
) -> PolarsResult<ArrayRef> {
let take_by = sub_fixed_size_list_get_indexes_literal(arr.size(), arr.len(), index);
if !null_on_oob && take_by.null_count() > 0 {
polars_bail!(ComputeError: "get index is out of bounds");
}

let values = arr.values();
// SAFETY:
// the indices we generate are in bounds
unsafe { take_unchecked(&**values, &take_by) }
unsafe { Ok(take_unchecked(&**values, &take_by)) }
}

pub fn sub_fixed_size_list_get(arr: &FixedSizeListArray, index: &PrimitiveArray<i64>) -> ArrayRef {
pub fn sub_fixed_size_list_get(
arr: &FixedSizeListArray,
index: &PrimitiveArray<i64>,
null_on_oob: bool,
) -> PolarsResult<ArrayRef> {
let take_by = sub_fixed_size_list_get_indexes(arr.size(), index);
if !null_on_oob && take_by.null_count() > 0 {
polars_bail!(ComputeError: "get index is out of bounds");
}

let values = arr.values();
// SAFETY:
// the indices we generate are in bounds
unsafe { take_unchecked(&**values, &take_by) }
unsafe { Ok(take_unchecked(&**values, &take_by)) }
}
Loading

0 comments on commit 6c86d88

Please sign in to comment.