Skip to content

Commit

Permalink
perf: Dispatch Parquet Primitive PLAIN decoding to faster kernels whe…
Browse files Browse the repository at this point in the history
…n possible (#19611)
  • Loading branch information
coastalwhite authored Nov 4, 2024
1 parent 0b0a914 commit 5210697
Show file tree
Hide file tree
Showing 5 changed files with 389 additions and 56 deletions.
100 changes: 99 additions & 1 deletion crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::sync::LazyLock;
use either::Either;
use polars_error::{polars_bail, PolarsResult};

use super::utils::{count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::utils::{self, count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::{chunk_iter_to_vec, intersects_with, num_intersections_with, IntoIter, MutableBitmap};
use crate::array::Splitable;
use crate::bitmap::aligned::AlignedBitmapSlice;
Expand Down Expand Up @@ -532,6 +532,104 @@ impl Bitmap {
pub fn num_edges(&self) -> usize {
super::bitmap_ops::num_edges(self)
}

/// Returns the number of zero bits from the start before a one bit is seen
pub fn leading_zeros(&self) -> usize {
utils::leading_zeros(&self.storage, self.offset, self.length)
}
/// Returns the number of one bits from the start before a zero bit is seen
pub fn leading_ones(&self) -> usize {
utils::leading_ones(&self.storage, self.offset, self.length)
}
/// Returns the number of zero bits from the back before a one bit is seen
pub fn trailing_zeros(&self) -> usize {
utils::trailing_zeros(&self.storage, self.offset, self.length)
}
/// Returns the number of one bits from the back before a zero bit is seen
pub fn trailing_ones(&mut self) -> usize {
utils::trailing_ones(&self.storage, self.offset, self.length)
}

/// Take all `0` bits at the start of the [`Bitmap`] before a `1` is seen, returning how many
/// bits were taken
pub fn take_leading_zeros(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == self.length)
{
let leading_zeros = self.length;
self.offset += self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return leading_zeros;
}

let leading_zeros = self.leading_zeros();
self.offset += leading_zeros;
self.length -= leading_zeros;
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
*self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
}
leading_zeros
}
/// Take all `1` bits at the start of the [`Bitmap`] before a `0` is seen, returning how many
/// bits were taken
pub fn take_leading_ones(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == 0)
{
let leading_ones = self.length;
self.offset += self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return leading_ones;
}

let leading_ones = self.leading_ones();
self.offset += leading_ones;
self.length -= leading_ones;
// @NOTE: the unset_bit_count_cache remains unchanged
leading_ones
}
/// Take all `0` bits at the back of the [`Bitmap`] before a `1` is seen, returning how many
/// bits were taken
pub fn take_trailing_zeros(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == self.length)
{
let trailing_zeros = self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return trailing_zeros;
}

let trailing_zeros = self.trailing_zeros();
self.length -= trailing_zeros;
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
*self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
}
trailing_zeros
}
/// Take all `1` bits at the back of the [`Bitmap`] before a `0` is seen, returning how many
/// bits were taken
pub fn take_trailing_ones(&mut self) -> usize {
if self
.lazy_unset_bits()
.is_some_and(|unset_bits| unset_bits == 0)
{
let trailing_ones = self.length;
self.length = 0;
*self.unset_bit_count_cache.get_mut() = 0;
return trailing_ones;
}

let trailing_ones = self.trailing_ones();
self.length -= trailing_ones;
// @NOTE: the unset_bit_count_cache remains unchanged
trailing_ones
}
}

impl<P: AsRef<[bool]>> From<P> for Bitmap {
Expand Down
210 changes: 210 additions & 0 deletions crates/polars-arrow/src/bitmap/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,213 @@ pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
let ones_in_suffix = aligned.suffix().count_ones() as usize;
len - ones_in_prefix - ones_in_bulk - ones_in_suffix
}

/// Returns the number of zero bits before seeing a one bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn leading_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_zeros_in_prefix =
(aligned.prefix().trailing_zeros() as usize).min(aligned.prefix_bitlen());
if leading_zeros_in_prefix < aligned.prefix_bitlen() {
return leading_zeros_in_prefix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().position(|w| w != 0) {
return aligned.prefix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[full_zero_bulk_words].trailing_zeros() as usize;
}

aligned.prefix_bitlen()
+ aligned.bulk_bitlen()
+ (aligned.suffix().trailing_zeros() as usize).min(aligned.suffix_bitlen())
}

/// Returns the number of one bits before seeing a zero bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn leading_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let leading_ones_in_prefix = aligned.prefix().trailing_ones() as usize;
if leading_ones_in_prefix < aligned.prefix_bitlen() {
return leading_ones_in_prefix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().position(|w| w != u64::MAX) {
return aligned.prefix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[full_one_bulk_words].trailing_ones() as usize;
}

aligned.prefix_bitlen() + aligned.bulk_bitlen() + aligned.suffix().trailing_ones() as usize
}

/// Returns the number of zero bits before seeing a one bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn trailing_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_zeros_in_suffix = ((aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.suffix_bitlen());
if trailing_zeros_in_suffix < aligned.suffix_bitlen() {
return trailing_zeros_in_suffix;
}
if let Some(full_zero_bulk_words) = aligned.bulk_iter().rev().position(|w| w != 0) {
return aligned.suffix_bitlen()
+ full_zero_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_zero_bulk_words - 1].leading_zeros()
as usize;
}

let trailing_zeros_in_prefix = ((aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64))
.leading_zeros() as usize)
.min(aligned.prefix_bitlen());
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_zeros_in_prefix
}

/// Returns the number of one bits before seeing a zero bit in the slice offsetted by `offset` and
/// a length of `length`.
///
/// # Panics
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn trailing_ones(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

assert!(8 * slice.len() >= offset + len);

let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let trailing_ones_in_suffix =
(aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64)).leading_ones() as usize;
if trailing_ones_in_suffix < aligned.suffix_bitlen() {
return trailing_ones_in_suffix;
}
if let Some(full_one_bulk_words) = aligned.bulk_iter().rev().position(|w| w != u64::MAX) {
return aligned.suffix_bitlen()
+ full_one_bulk_words * 64
+ aligned.bulk()[aligned.bulk().len() - full_one_bulk_words - 1].leading_ones()
as usize;
}

let trailing_ones_in_prefix =
(aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64)).leading_ones() as usize;
aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_ones_in_prefix
}

#[cfg(test)]
mod tests {
use rand::Rng;

use super::*;
use crate::bitmap::Bitmap;

#[test]
fn leading_trailing() {
macro_rules! testcase {
($slice:expr, $offset:expr, $length:expr => lz=$lz:expr,lo=$lo:expr,tz=$tz:expr,to=$to:expr) => {
assert_eq!(
leading_zeros($slice, $offset, $length),
$lz,
"leading_zeros"
);
assert_eq!(leading_ones($slice, $offset, $length), $lo, "leading_ones");
assert_eq!(
trailing_zeros($slice, $offset, $length),
$tz,
"trailing_zeros"
);
assert_eq!(
trailing_ones($slice, $offset, $length),
$to,
"trailing_ones"
);
};
}

testcase!(&[], 0, 0 => lz=0,lo=0,tz=0,to=0);
testcase!(&[0], 0, 1 => lz=1,lo=0,tz=1,to=0);
testcase!(&[1], 0, 1 => lz=0,lo=1,tz=0,to=1);

testcase!(&[0b010], 0, 3 => lz=1,lo=0,tz=1,to=0);
testcase!(&[0b101], 0, 3 => lz=0,lo=1,tz=0,to=1);
testcase!(&[0b100], 0, 3 => lz=2,lo=0,tz=0,to=1);
testcase!(&[0b110], 0, 3 => lz=1,lo=0,tz=0,to=2);
testcase!(&[0b001], 0, 3 => lz=0,lo=1,tz=2,to=0);
testcase!(&[0b011], 0, 3 => lz=0,lo=2,tz=1,to=0);

testcase!(&[0b010], 1, 2 => lz=0,lo=1,tz=1,to=0);
testcase!(&[0b101], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b100], 1, 2 => lz=1,lo=0,tz=0,to=1);
testcase!(&[0b110], 1, 2 => lz=0,lo=2,tz=0,to=2);
testcase!(&[0b001], 1, 2 => lz=2,lo=0,tz=2,to=0);
testcase!(&[0b011], 1, 2 => lz=0,lo=1,tz=1,to=0);
}

#[ignore = "Fuzz test. Too slow"]
#[test]
fn leading_trailing_fuzz() {
let mut rng = rand::thread_rng();

const SIZE: usize = 1000;
const REPEATS: usize = 10_000;

let mut v = Vec::<bool>::with_capacity(SIZE);

for _ in 0..REPEATS {
v.clear();
let offset = rng.gen_range(0..SIZE);
let length = rng.gen_range(0..SIZE - offset);
let extra_padding = rng.gen_range(0..64);

let mut num_remaining = usize::min(SIZE, offset + length + extra_padding);
while num_remaining > 0 {
let chunk_size = rng.gen_range(1..=num_remaining);
v.extend(
rng.clone()
.sample_iter(rand::distributions::Slice::new(&[false, true]).unwrap())
.take(chunk_size),
);
num_remaining -= chunk_size;
}

let v_slice = &v[offset..offset + length];
let lz = v_slice.iter().take_while(|&v| !*v).count();
let lo = v_slice.iter().take_while(|&v| *v).count();
let tz = v_slice.iter().rev().take_while(|&v| !*v).count();
let to = v_slice.iter().rev().take_while(|&v| *v).count();

let bm = Bitmap::from_iter(v.iter().copied());
let (slice, _, _) = bm.as_slice();

assert_eq!(leading_zeros(slice, offset, length), lz);
assert_eq!(leading_ones(slice, offset, length), lo);
assert_eq!(trailing_zeros(slice, offset, length), tz);
assert_eq!(trailing_ones(slice, offset, length), to);
}
}
}
Loading

0 comments on commit 5210697

Please sign in to comment.