From 3fe10a3f23860898ab231b7fdb2a27c7886e83de Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Fri, 1 Nov 2024 12:36:31 +0100 Subject: [PATCH] perf: Branchless Parquet Prefiltering (#19190) --- crates/polars-arrow/src/array/binview/mod.rs | 12 +- .../polars-arrow/src/array/binview/mutable.rs | 6 +- crates/polars-arrow/src/array/binview/view.rs | 13 + crates/polars-arrow/src/bitmap/bitmask.rs | 41 +- crates/polars-arrow/src/bitmap/iterator.rs | 1 + crates/polars-arrow/src/buffer/immutable.rs | 4 +- crates/polars-arrow/src/pushable.rs | 1 + crates/polars-arrow/src/storage.rs | 6 +- .../polars-arrow/src/types/aligned_bytes.rs | 62 +- crates/polars-compute/src/filter/mod.rs | 1 + .../polars-io/src/parquet/read/read_impl.rs | 35 +- .../src/arrow/read/deserialize/binview.rs | 1091 +++++++---------- .../src/arrow/read/deserialize/boolean.rs | 422 ++++--- .../src/arrow/read/deserialize/dictionary.rs | 193 +-- .../read/deserialize/fixed_size_binary.rs | 648 ++++++---- .../arrow/read/deserialize/nested_utils.rs | 853 +++++-------- .../src/arrow/read/deserialize/null.rs | 78 +- .../arrow/read/deserialize/primitive/float.rs | 252 +--- .../read/deserialize/primitive/integer.rs | 323 ++--- .../arrow/read/deserialize/primitive/mod.rs | 191 +-- .../arrow/read/deserialize/primitive/plain.rs | 405 ++++++ .../read/deserialize/utils/array_chunks.rs | 48 +- .../read/deserialize/utils/dict_encoded.rs | 856 +++++++++++++ .../arrow/read/deserialize/utils/filter.rs | 7 + .../src/arrow/read/deserialize/utils/mod.rs | 744 ++++------- .../src/parquet/encoding/bitpacked/decode.rs | 41 +- .../src/parquet/encoding/bitpacked/mod.rs | 34 +- .../src/parquet/encoding/bitpacked/pack.rs | 1 + .../src/parquet/encoding/bitpacked/unpack.rs | 1 + .../encoding/delta_bitpacked/decoder.rs | 40 - .../parquet/encoding/delta_bitpacked/fuzz.rs | 76 -- .../parquet/encoding/delta_bitpacked/mod.rs | 3 +- .../encoding/delta_byte_array/decoder.rs | 64 +- .../delta_length_byte_array/decoder.rs | 15 +- .../parquet/encoding/hybrid_rle/buffered.rs | 280 ----- .../src/parquet/encoding/hybrid_rle/fuzz.rs | 390 ------ .../parquet/encoding/hybrid_rle/gatherer.rs | 545 -------- .../src/parquet/encoding/hybrid_rle/mod.rs | 407 ++---- crates/polars-parquet/src/parquet/types.rs | 23 +- crates/polars-utils/src/chunks.rs | 63 + crates/polars-utils/src/lib.rs | 1 + .../polars/tests/it/io/parquet/read/binary.rs | 9 +- crates/polars/tests/it/io/parquet/read/mod.rs | 33 +- .../tests/it/io/parquet/read/primitive.rs | 9 +- py-polars/tests/unit/io/test_parquet.py | 152 ++- 45 files changed, 3728 insertions(+), 4752 deletions(-) create mode 100644 crates/polars-parquet/src/arrow/read/deserialize/primitive/plain.rs create mode 100644 crates/polars-parquet/src/arrow/read/deserialize/utils/dict_encoded.rs delete mode 100644 crates/polars-parquet/src/parquet/encoding/delta_bitpacked/fuzz.rs delete mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/buffered.rs delete mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/fuzz.rs delete mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/gatherer.rs create mode 100644 crates/polars-utils/src/chunks.rs diff --git a/crates/polars-arrow/src/array/binview/mod.rs b/crates/polars-arrow/src/array/binview/mod.rs index b257e13ad7d7..96c591d52cf6 100644 --- a/crates/polars-arrow/src/array/binview/mod.rs +++ b/crates/polars-arrow/src/array/binview/mod.rs @@ -161,6 +161,10 @@ impl BinaryViewArrayGeneric { // Verify the invariants #[cfg(debug_assertions)] { + if let Some(validity) = validity.as_ref() { + assert_eq!(validity.len(), views.len()); + } + // @TODO: Enable this. This is currently bugged with concatenate. // let mut actual_total_buffer_len = 0; // let mut actual_total_bytes_len = 0; @@ -169,7 +173,13 @@ impl BinaryViewArrayGeneric { // actual_total_buffer_len += buffer.len(); // } - for view in views.iter() { + for (i, view) in views.iter().enumerate() { + let is_valid = validity.as_ref().map_or(true, |v| v.get_bit(i)); + + if !is_valid { + continue; + } + // actual_total_bytes_len += view.length as usize; if view.length > View::MAX_INLINE_SIZE { assert!((view.buffer_idx as usize) < (buffers.len())); diff --git a/crates/polars-arrow/src/array/binview/mutable.rs b/crates/polars-arrow/src/array/binview/mutable.rs index 1ce32300f7dd..0e8469719417 100644 --- a/crates/polars-arrow/src/array/binview/mutable.rs +++ b/crates/polars-arrow/src/array/binview/mutable.rs @@ -508,7 +508,7 @@ impl MutableBinaryViewArray { Self::from_iterator(slice.as_ref().iter().map(|opt_v| opt_v.as_ref())) } - fn finish_in_progress(&mut self) -> bool { + pub fn finish_in_progress(&mut self) -> bool { if !self.in_progress_buffer.is_empty() { self.completed_buffers .push(std::mem::take(&mut self.in_progress_buffer).into()); @@ -530,6 +530,10 @@ impl MutableBinaryViewArray { arr } + pub fn take(self) -> (Vec, Vec>) { + (self.views, self.completed_buffers) + } + #[inline] pub fn value(&self, i: usize) -> &T { assert!(i < self.len()); diff --git a/crates/polars-arrow/src/array/binview/view.rs b/crates/polars-arrow/src/array/binview/view.rs index 83123814f1d8..70285aacead5 100644 --- a/crates/polars-arrow/src/array/binview/view.rs +++ b/crates/polars-arrow/src/array/binview/view.rs @@ -153,6 +153,19 @@ impl View { } } + /// Construct a byte slice from an inline view. + /// + /// # Safety + /// + /// Assumes that this view is inlinable. + pub unsafe fn get_inlined_slice_unchecked(&self) -> &[u8] { + debug_assert!(self.length <= View::MAX_INLINE_SIZE); + + let ptr = self as *const View as *const u8; + // SAFETY: Invariant of function + unsafe { std::slice::from_raw_parts(ptr.add(4), self.length as usize) } + } + /// Extend a `Vec` with inline views slices of `src` with `width`. /// /// This tries to use SIMD to optimize the copying and can be massively faster than doing a diff --git a/crates/polars-arrow/src/bitmap/bitmask.rs b/crates/polars-arrow/src/bitmap/bitmask.rs index 4d6457c07956..2e4e45195266 100644 --- a/crates/polars-arrow/src/bitmap/bitmask.rs +++ b/crates/polars-arrow/src/bitmap/bitmask.rs @@ -3,6 +3,8 @@ use std::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; use polars_utils::slice::load_padded_le_u64; +use super::iterator::FastU56BitmapIter; +use super::utils::{count_zeros, BitmapIter}; use crate::bitmap::Bitmap; /// Returns the nth set bit in w, if n+1 bits are set. The indexing is @@ -110,6 +112,39 @@ impl<'a> BitMask<'a> { (left, right) } + #[inline] + pub fn sliced(&self, offset: usize, length: usize) -> Self { + assert!(offset.checked_add(length).unwrap() <= self.len); + unsafe { self.sliced_unchecked(offset, length) } + } + + /// # Safety + /// The index must be in-bounds. + #[inline] + pub unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Self { + if cfg!(debug_assertions) { + assert!(offset.checked_add(length).unwrap() <= self.len); + } + + Self { + bytes: self.bytes, + offset: self.offset + offset, + len: length, + } + } + + pub fn unset_bits(&self) -> usize { + count_zeros(self.bytes, self.offset, self.len) + } + + pub fn set_bits(&self) -> usize { + self.len - self.unset_bits() + } + + pub fn fast_iter_u56(&self) -> FastU56BitmapIter { + FastU56BitmapIter::new(self.bytes, self.offset, self.len) + } + #[cfg(feature = "simd")] #[inline] pub fn get_simd(&self, idx: usize) -> Mask @@ -162,7 +197,7 @@ impl<'a> BitMask<'a> { /// Computes the index of the nth set bit after start. /// - /// Both are zero-indexed, so nth_set_bit_idx(0, 0) finds the index of the + /// Both are zero-indexed, so `nth_set_bit_idx(0, 0)` finds the index of the /// first bit set (which can be 0 as well). The returned index is absolute, /// not relative to start. pub fn nth_set_bit_idx(&self, mut n: usize, mut start: usize) -> Option { @@ -245,6 +280,10 @@ impl<'a> BitMask<'a> { false } } + + pub fn iter(&self) -> BitmapIter { + BitmapIter::new(self.bytes, self.offset, self.len) + } } #[cfg(test)] diff --git a/crates/polars-arrow/src/bitmap/iterator.rs b/crates/polars-arrow/src/bitmap/iterator.rs index 84e0a2d7a985..b3daffd593e1 100644 --- a/crates/polars-arrow/src/bitmap/iterator.rs +++ b/crates/polars-arrow/src/bitmap/iterator.rs @@ -173,6 +173,7 @@ impl Iterator for FastU32BitmapIter<'_> { unsafe impl TrustedLen for FastU32BitmapIter<'_> {} +#[derive(Clone)] pub struct FastU56BitmapIter<'a> { bytes: &'a [u8], shift: u32, diff --git a/crates/polars-arrow/src/buffer/immutable.rs b/crates/polars-arrow/src/buffer/immutable.rs index 1c6e5b5aa4ff..a3ad6721a6f8 100644 --- a/crates/polars-arrow/src/buffer/immutable.rs +++ b/crates/polars-arrow/src/buffer/immutable.rs @@ -79,7 +79,7 @@ impl Buffer { } /// Auxiliary method to create a new Buffer - pub(crate) fn from_storage(storage: SharedStorage) -> Self { + pub fn from_storage(storage: SharedStorage) -> Self { let ptr = storage.as_ptr(); let length = storage.len(); Buffer { @@ -164,6 +164,8 @@ impl Buffer { #[inline] #[must_use] pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self { + debug_assert!(offset + length <= self.len()); + self.slice_unchecked(offset, length); self } diff --git a/crates/polars-arrow/src/pushable.rs b/crates/polars-arrow/src/pushable.rs index 7b8857ab3a15..29464b8df679 100644 --- a/crates/polars-arrow/src/pushable.rs +++ b/crates/polars-arrow/src/pushable.rs @@ -181,6 +181,7 @@ impl Pushable> for MutablePrimitiveArray { pub trait NoOption {} impl NoOption for &str {} impl NoOption for &[u8] {} +impl NoOption for Vec {} impl Pushable for MutableBinaryViewArray where diff --git a/crates/polars-arrow/src/storage.rs b/crates/polars-arrow/src/storage.rs index ddde815b5b10..1ecee497820b 100644 --- a/crates/polars-arrow/src/storage.rs +++ b/crates/polars-arrow/src/storage.rs @@ -283,10 +283,12 @@ impl SharedStorage { return Err(self); } - Ok(SharedStorage { + let storage = SharedStorage { inner: self.inner.cast(), phantom: PhantomData, - }) + }; + std::mem::forget(self); + Ok(storage) } } diff --git a/crates/polars-arrow/src/types/aligned_bytes.rs b/crates/polars-arrow/src/types/aligned_bytes.rs index 2c9bf9aed977..95aa84cfd349 100644 --- a/crates/polars-arrow/src/types/aligned_bytes.rs +++ b/crates/polars-arrow/src/types/aligned_bytes.rs @@ -14,6 +14,7 @@ pub unsafe trait AlignedBytesCast: Pod {} pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq { const ALIGNMENT: usize; const SIZE: usize; + const SIZE_ALIGNMENT_PAIR: PrimitiveSizeAlignmentPair; type Unaligned: AsRef<[u8]> + AsMut<[u8]> @@ -45,7 +46,7 @@ pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq { macro_rules! impl_aligned_bytes { ( - $(($name:ident, $size:literal, $alignment:literal, [$($eq_type:ty),*]),)+ + $(($name:ident, $size:literal, $alignment:literal, $sap:ident, [$($eq_type:ty),*]),)+ ) => { $( /// Bytes with a size and alignment. @@ -59,6 +60,7 @@ macro_rules! impl_aligned_bytes { impl AlignedBytes for $name { const ALIGNMENT: usize = $alignment; const SIZE: usize = $size; + const SIZE_ALIGNMENT_PAIR: PrimitiveSizeAlignmentPair = PrimitiveSizeAlignmentPair::$sap; type Unaligned = [u8; $size]; @@ -98,15 +100,53 @@ macro_rules! impl_aligned_bytes { } } +#[derive(Clone, Copy)] +pub enum PrimitiveSizeAlignmentPair { + S1A1, + S2A2, + S4A4, + S8A4, + S8A8, + S12A4, + S16A4, + S16A8, + S16A16, + S32A16, +} + +impl PrimitiveSizeAlignmentPair { + pub const fn size(self) -> usize { + match self { + Self::S1A1 => 1, + Self::S2A2 => 2, + Self::S4A4 => 4, + Self::S8A4 | Self::S8A8 => 8, + Self::S12A4 => 12, + Self::S16A4 | Self::S16A8 | Self::S16A16 => 16, + Self::S32A16 => 32, + } + } + + pub const fn alignment(self) -> usize { + match self { + Self::S1A1 => 1, + Self::S2A2 => 2, + Self::S4A4 | Self::S8A4 | Self::S12A4 | Self::S16A4 => 4, + Self::S8A8 | Self::S16A8 => 8, + Self::S16A16 | Self::S32A16 => 16, + } + } +} + impl_aligned_bytes! { - (Bytes1Alignment1, 1, 1, [u8, i8]), - (Bytes2Alignment2, 2, 2, [u16, i16, f16]), - (Bytes4Alignment4, 4, 4, [u32, i32, f32]), - (Bytes8Alignment8, 8, 8, [u64, i64, f64]), - (Bytes8Alignment4, 8, 4, [days_ms]), - (Bytes12Alignment4, 12, 4, [[u32; 3]]), - (Bytes16Alignment4, 16, 4, [View]), - (Bytes16Alignment8, 16, 8, [months_days_ns]), - (Bytes16Alignment16, 16, 16, [u128, i128]), - (Bytes32Alignment16, 32, 16, [i256]), + (Bytes1Alignment1, 1, 1, S1A1, [u8, i8]), + (Bytes2Alignment2, 2, 2, S2A2, [u16, i16, f16]), + (Bytes4Alignment4, 4, 4, S4A4, [u32, i32, f32]), + (Bytes8Alignment8, 8, 8, S8A8, [u64, i64, f64]), + (Bytes8Alignment4, 8, 4, S8A4, [days_ms]), + (Bytes12Alignment4, 12, 4, S12A4, [[u32; 3]]), + (Bytes16Alignment4, 16, 4, S16A4, [View]), + (Bytes16Alignment8, 16, 8, S16A8, [months_days_ns]), + (Bytes16Alignment16, 16, 16, S16A16, [u128, i128]), + (Bytes32Alignment16, 32, 16, S32A16, [i256]), } diff --git a/crates/polars-compute/src/filter/mod.rs b/crates/polars-compute/src/filter/mod.rs index 6de1afbab2ed..a90ac3aa2b15 100644 --- a/crates/polars-compute/src/filter/mod.rs +++ b/crates/polars-compute/src/filter/mod.rs @@ -11,6 +11,7 @@ use arrow::array::{new_empty_array, Array, BinaryViewArray, BooleanArray, Primit use arrow::bitmap::utils::SlicesIterator; use arrow::bitmap::Bitmap; use arrow::with_match_primitive_type_full; +pub use boolean::filter_boolean_kernel; pub fn filter(array: &dyn Array, mask: &BooleanArray) -> Box { assert_eq!(array.len(), mask.len()); diff --git a/crates/polars-io/src/parquet/read/read_impl.rs b/crates/polars-io/src/parquet/read/read_impl.rs index de22b639bf8b..e0a096de498e 100644 --- a/crates/polars-io/src/parquet/read/read_impl.rs +++ b/crates/polars-io/src/parquet/read/read_impl.rs @@ -212,6 +212,10 @@ fn rg_to_dfs( use_statistics: bool, hive_partition_columns: Option<&[Series]>, ) -> PolarsResult> { + if config::verbose() { + eprintln!("parquet scan with parallel = {parallel:?}"); + } + // If we are only interested in the row_index, we take a little special path here. if projection.is_empty() { if let Some(row_index) = row_index { @@ -341,6 +345,10 @@ fn rg_to_dfs_prefiltered( let num_live_columns = live_variables.len(); let num_dead_columns = projection.len() - num_live_columns; + if config::verbose() { + eprintln!("parquet live columns = {num_live_columns}, dead columns = {num_dead_columns}"); + } + // @NOTE: This is probably already sorted, but just to be sure. let mut projection_sorted = projection.to_vec(); projection_sorted.sort(); @@ -446,6 +454,10 @@ fn rg_to_dfs_prefiltered( debug_assert_eq!(df.height(), filter_mask.set_bits()); if filter_mask.set_bits() == 0 { + if config::verbose() { + eprintln!("parquet filter mask found that row group can be skipped"); + } + return Ok(None); } @@ -886,10 +898,19 @@ pub fn read_parquet( .unwrap_or_else(|| Cow::Owned((0usize..reader_schema.len()).collect::>())); if let Some(predicate) = predicate { - if std::env::var("POLARS_PARQUET_AUTO_PREFILTERED").is_ok_and(|v| v == "1") - && predicate.live_variables().map_or(0, |v| v.len()) * n_row_groups - >= POOL.current_num_threads() - { + let prefilter_env = std::env::var("POLARS_PARQUET_PREFILTER"); + let prefilter_env = prefilter_env.as_deref(); + + let num_live_variables = predicate.live_variables().map_or(0, |v| v.len()); + let mut do_prefilter = false; + + do_prefilter |= prefilter_env == Ok("1"); // Force enable + do_prefilter |= num_live_variables * n_row_groups >= POOL.current_num_threads() + && materialized_projection.len() >= num_live_variables; + + do_prefilter &= prefilter_env != Ok("0"); // Force disable + + if do_prefilter { parallel = ParallelStrategy::Prefiltered; } } @@ -1419,12 +1440,12 @@ impl PrefilterMaskSetting { pub fn should_prefilter(&self, prefilter_cost: f64, dtype: &ArrowDataType) -> bool { match self { Self::Auto => { - // Prefiltering is more expensive for nested types so we make the cut-off - // higher. + // Prefiltering is only expensive for nested types so we make the cut-off quite + // high. let is_nested = dtype.is_nested(); // We empirically selected these numbers. - (is_nested && prefilter_cost <= 0.01) || (!is_nested && prefilter_cost <= 0.02) + is_nested && prefilter_cost <= 0.01 }, Self::Pre => true, Self::Post => false, diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs index 86e46756788a..62a0e7f2df4e 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs @@ -1,21 +1,22 @@ -use std::mem::MaybeUninit; use std::sync::atomic::{AtomicBool, Ordering}; use arrow::array::{ Array, BinaryViewArray, DictionaryArray, DictionaryKey, MutableBinaryViewArray, PrimitiveArray, Utf8ViewArray, View, }; -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::buffer::Buffer; use arrow::datatypes::{ArrowDataType, PhysicalType}; -use super::utils::{dict_indices_decoder, freeze_validity, BatchableCollector}; -use crate::parquet::encoding::delta_bitpacked::{lin_natural_sum, DeltaGatherer}; -use crate::parquet::encoding::hybrid_rle::gatherer::HybridRleGatherer; +use super::utils::dict_encoded::{append_validity, constrain_page_validity}; +use super::utils::{ + dict_indices_decoder, filter_from_range, freeze_validity, unspecialized_decode, +}; +use super::Filter; use crate::parquet::encoding::{delta_byte_array, delta_length_byte_array, hybrid_rle, Encoding}; use crate::parquet::error::{ParquetError, ParquetResult}; use crate::parquet::page::{split_buffer, DataPage, DictPage}; -use crate::read::deserialize::utils::{self, extend_from_decoder, Decoder, PageValidity}; +use crate::read::deserialize::utils::{self}; use crate::read::PrimitiveLogicalType; type DecodedStateTuple = (MutableBinaryViewArray<[u8]>, MutableBitmap); @@ -27,7 +28,7 @@ impl<'a> utils::StateTranslation<'a, BinViewDecoder> for StateTranslation<'a> { decoder: &BinViewDecoder, page: &'a DataPage, dict: Option<&'a ::Dict>, - _page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { let is_string = matches!( page.descriptor.primitive_type.logical_type, @@ -42,7 +43,8 @@ impl<'a> utils::StateTranslation<'a, BinViewDecoder> for StateTranslation<'a> { Ok(Self::Plain(values)) }, (Encoding::PlainDictionary | Encoding::RleDictionary, Some(_)) => { - let values = dict_indices_decoder(page)?; + let values = + dict_indices_decoder(page, page_validity.map_or(0, |bm| bm.unset_bits()))?; Ok(Self::Dictionary(values)) }, (Encoding::DeltaLengthByteArray, _) => { @@ -61,136 +63,6 @@ impl<'a> utils::StateTranslation<'a, BinViewDecoder> for StateTranslation<'a> { _ => Err(utils::not_implemented(page)), } } - - fn len_when_not_nullable(&self) -> usize { - match self { - Self::Plain(v) => v.len_when_not_nullable(), - Self::Dictionary(v) => v.len(), - Self::DeltaLengthByteArray(v, _) => v.len(), - Self::DeltaBytes(v) => v.len(), - } - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } - - match self { - Self::Plain(t) => _ = t.by_ref().nth(n - 1), - Self::Dictionary(t) => t.skip_in_place(n)?, - Self::DeltaLengthByteArray(t, _) => t.skip_in_place(n)?, - Self::DeltaBytes(t) => t.skip_in_place(n)?, - } - - Ok(()) - } - - fn extend_from_state( - &mut self, - decoder: &mut BinViewDecoder, - decoded: &mut ::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - dict: Option<&'a ::Dict>, - additional: usize, - ) -> ParquetResult<()> { - let views_offset = decoded.0.views().len(); - let buffer_offset = decoded.0.completed_buffers().len(); - - let mut validate_utf8 = decoder.check_utf8.load(Ordering::Relaxed); - - match self { - Self::Plain(page_values) => { - decoder.decode_plain_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - additional, - )?; - - // Already done in decode_plain_encoded - validate_utf8 = false; - }, - Self::Dictionary(ref mut page) => { - let dict = dict.unwrap(); - - decoder.decode_dictionary_encoded( - decoded, - page, - is_optional, - page_validity.as_mut(), - dict, - additional, - )?; - - // Already done in decode_plain_encoded - validate_utf8 = false; - }, - Self::DeltaLengthByteArray(ref mut page_values, ref mut lengths) => { - let (values, validity) = decoded; - - let mut collector = DeltaCollector { - gatherer: &mut StatGatherer::default(), - pushed_lengths: lengths, - decoder: page_values, - }; - - match page_validity { - None => { - (&mut collector).push_n(values, additional)?; - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - &mut collector, - )?, - } - - collector.flush(values); - }, - Self::DeltaBytes(ref mut page_values) => { - let (values, validity) = decoded; - - let mut collector = DeltaBytesCollector { - decoder: page_values, - }; - - match page_validity { - None => { - collector.push_n(values, additional)?; - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - collector, - )?, - } - }, - } - - if validate_utf8 { - decoded - .0 - .validate_utf8(buffer_offset, views_offset) - .map_err(|_| ParquetError::oos("Binary view contained invalid UTF-8"))? - } - - Ok(()) - } } #[derive(Default)] @@ -219,304 +91,366 @@ impl utils::ExactSize for (Vec, Vec>) { } } -pub(crate) struct DeltaCollector<'a, 'b> { - // We gatherer the decoded lengths into `pushed_lengths`. Then, we `flush` those to the - // `BinView` This allows us to group many memcopies into one and take better potential fast - // paths for inlineable views and such. - pub(crate) gatherer: &'b mut StatGatherer, - pub(crate) pushed_lengths: &'b mut Vec, - - pub(crate) decoder: &'b mut delta_length_byte_array::Decoder<'a>, -} - -pub(crate) struct DeltaBytesCollector<'a, 'b> { - pub(crate) decoder: &'b mut delta_byte_array::Decoder<'a>, -} - -/// A [`DeltaGatherer`] that gathers the minimum, maximum and summation of the values as `usize`s. -pub(crate) struct StatGatherer { - min: usize, - max: usize, - sum: usize, -} +#[allow(clippy::too_many_arguments)] +pub fn decode_plain( + values: &[u8], + max_num_values: usize, + target: &mut MutableBinaryViewArray<[u8]>, -impl Default for StatGatherer { - fn default() -> Self { - Self { - min: usize::MAX, - max: usize::MIN, - sum: 0, - } - } -} + is_optional: bool, + validity: &mut MutableBitmap, -impl DeltaGatherer for StatGatherer { - type Target = Vec; + page_validity: Option<&Bitmap>, + filter: Option, - fn target_len(&self, target: &Self::Target) -> usize { - target.len() + verify_utf8: bool, +) -> ParquetResult<()> { + if is_optional { + append_validity(page_validity, filter.as_ref(), validity, max_num_values); } + let page_validity = constrain_page_validity(max_num_values, page_validity, filter.as_ref()); - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); - } + match (filter, page_validity) { + (None, None) => decode_required_plain(max_num_values, values, None, target, verify_utf8), + (Some(Filter::Range(rng)), None) if rng.start == 0 => { + decode_required_plain(max_num_values, values, Some(rng.end), target, verify_utf8) + }, + (None, Some(page_validity)) => decode_optional_plain( + page_validity.set_bits(), + values, + target, + &page_validity, + verify_utf8, + ), + (Some(Filter::Range(rng)), Some(page_validity)) if rng.start == 0 => decode_optional_plain( + page_validity.set_bits(), + values, + target, + &page_validity, + verify_utf8, + ), + (Some(Filter::Mask(mask)), None) => { + decode_masked_required_plain(max_num_values, values, target, &mask, verify_utf8) + }, + (Some(Filter::Mask(mask)), Some(page_validity)) => decode_masked_optional_plain( + page_validity.set_bits(), + values, + target, + &page_validity, + &mask, + verify_utf8, + ), + (Some(Filter::Range(rng)), None) => decode_masked_required_plain( + max_num_values, + values, + target, + &filter_from_range(rng.clone()), + verify_utf8, + ), + (Some(Filter::Range(rng)), Some(page_validity)) => decode_masked_optional_plain( + page_validity.set_bits(), + values, + target, + &page_validity, + &filter_from_range(rng.clone()), + verify_utf8, + ), + }?; + + Ok(()) +} - fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> { - if v < 0 { - return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0")); - } +#[cold] +fn invalid_input_err() -> ParquetError { + ParquetError::oos("String data does not match given length") +} - if v > i64::from(u32::MAX) { - return Err(ParquetError::not_supported( - "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX", - )); - } +#[cold] +fn invalid_utf8_err() -> ParquetError { + ParquetError::oos("String data contained invalid UTF-8") +} - let v = v as usize; +fn decode_required_plain( + num_expected_values: usize, + values: &[u8], + limit: Option, + target: &mut MutableBinaryViewArray<[u8]>, + + verify_utf8: bool, +) -> ParquetResult<()> { + let limit = limit.unwrap_or(num_expected_values); + + let mut idx = 0; + decode_plain_generic( + values, + target, + limit, + || { + if idx >= limit { + return None; + } - self.min = self.min.min(v); - self.max = self.max.max(v); - self.sum += v; + idx += 1; - target.push(v as u32); + Some((true, true)) + }, + verify_utf8, + ) +} - Ok(()) +fn decode_optional_plain( + num_expected_values: usize, + values: &[u8], + target: &mut MutableBinaryViewArray<[u8]>, + page_validity: &Bitmap, + + verify_utf8: bool, +) -> ParquetResult<()> { + if page_validity.unset_bits() == 0 { + return decode_required_plain( + num_expected_values, + values, + Some(page_validity.len()), + target, + verify_utf8, + ); } - fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> { - let mut is_invalid = false; - let mut is_too_large = false; - - target.extend(slice.iter().map(|&v| { - is_invalid |= v < 0; - is_too_large |= v > i64::from(u32::MAX); - - let v = v as usize; - - self.min = self.min.min(v); - self.max = self.max.max(v); - self.sum += v; + let mut validity_iter = page_validity.iter(); + decode_plain_generic( + values, + target, + page_validity.len(), + || Some((validity_iter.next()?, true)), + verify_utf8, + ) +} - v as u32 - })); +fn decode_masked_required_plain( + num_expected_values: usize, + values: &[u8], + target: &mut MutableBinaryViewArray<[u8]>, - if is_invalid { - target.truncate(target.len() - slice.len()); - return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0")); - } + mask: &Bitmap, - if is_too_large { - return Err(ParquetError::not_supported( - "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX", - )); - } - - Ok(()) + verify_utf8: bool, +) -> ParquetResult<()> { + if mask.unset_bits() == 0 { + return decode_required_plain( + num_expected_values, + values, + Some(mask.len()), + target, + verify_utf8, + ); } - fn gather_constant( - &mut self, - target: &mut Self::Target, - v: i64, - delta: i64, - num_repeats: usize, - ) -> ParquetResult<()> { - if v < 0 || (delta < 0 && num_repeats > 0 && (num_repeats - 1) as i64 * delta + v < 0) { - return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0")); - } - - if v > i64::from(u32::MAX) || v + ((num_repeats - 1) as i64) * delta > i64::from(u32::MAX) { - return Err(ParquetError::not_supported( - "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX", - )); - } - - target.extend((0..num_repeats).map(|i| (v + (i as i64) * delta) as u32)); - - let vstart = v; - let vend = v + (num_repeats - 1) as i64 * delta; - - let (min, max) = if delta < 0 { - (vend, vstart) - } else { - (vstart, vend) - }; - - let sum = lin_natural_sum(v, delta, num_repeats) as usize; - - #[cfg(debug_assertions)] - { - assert_eq!( - (0..num_repeats) - .map(|i| (v + (i as i64) * delta) as usize) - .sum::(), - sum - ); - } - - self.min = self.min.min(min as usize); - self.max = self.max.max(max as usize); - self.sum += sum; - - Ok(()) - } + let mut mask_iter = mask.iter(); + decode_plain_generic( + values, + target, + mask.set_bits(), + || Some((true, mask_iter.next()?)), + verify_utf8, + ) } -impl BatchableCollector<(), MutableBinaryViewArray<[u8]>> for &mut DeltaCollector<'_, '_> { - fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) { - target.reserve(n); - } +fn decode_masked_optional_plain( + num_expected_values: usize, + values: &[u8], + target: &mut MutableBinaryViewArray<[u8]>, - fn push_n( - &mut self, - _target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - self.decoder - .lengths - .gather_n_into(self.pushed_lengths, n, self.gatherer)?; + page_validity: &Bitmap, + mask: &Bitmap, - Ok(()) - } + verify_utf8: bool, +) -> ParquetResult<()> { + assert_eq!(page_validity.len(), mask.len()); - fn push_n_nulls( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - self.flush(target); - target.extend_constant(n, >::None); - Ok(()) + if mask.unset_bits() == 0 { + return decode_optional_plain( + num_expected_values, + values, + target, + page_validity, + verify_utf8, + ); } - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) + if page_validity.unset_bits() == 0 { + return decode_masked_required_plain( + num_expected_values, + values, + target, + page_validity, + verify_utf8, + ); } + + let mut validity_iter = page_validity.iter(); + let mut mask_iter = mask.iter(); + decode_plain_generic( + values, + target, + mask.set_bits(), + || Some((validity_iter.next()?, mask_iter.next()?)), + verify_utf8, + ) } -impl DeltaCollector<'_, '_> { - pub fn flush(&mut self, target: &mut MutableBinaryViewArray<[u8]>) { - if !self.pushed_lengths.is_empty() { - let start_bytes_len = target.total_bytes_len(); - let start_buffer_len = target.total_buffer_len(); - unsafe { - target.extend_from_lengths_with_stats( - &self.decoder.values[self.decoder.offset..], - self.pushed_lengths.iter().map(|&v| v as usize), - self.gatherer.min, - self.gatherer.max, - self.gatherer.sum, - ) - }; - debug_assert_eq!( - target.total_bytes_len() - start_bytes_len, - self.gatherer.sum, - ); - debug_assert_eq!( - target.total_buffer_len() - start_buffer_len, - self.pushed_lengths - .iter() - .map(|&v| v as usize) - .filter(|&v| v > View::MAX_INLINE_SIZE as usize) - .sum::(), - ); - - self.decoder.offset += self.gatherer.sum; - self.pushed_lengths.clear(); - *self.gatherer = StatGatherer::default(); +pub fn decode_plain_generic( + values: &[u8], + target: &mut MutableBinaryViewArray<[u8]>, + + num_rows: usize, + mut next: impl FnMut() -> Option<(bool, bool)>, + + verify_utf8: bool, +) -> ParquetResult<()> { + // Since the offset in the buffer is decided by the interleaved lengths, every value has to be + // walked no matter what. This makes decoding rather inefficient in general. + // + // There are three cases: + // 1. All inlinable values + // - Most time is spend in decoding + // - No additional buffer has to be formed + // - Possible UTF-8 verification is fast because the len_below_128 trick + // 2. All non-inlinable values + // - Little time is spend in decoding + // - Most time is spend in buffer memcopying (we remove the interleaved lengths) + // - Possible UTF-8 verification is fast because the continuation byte trick + // 3. Mixed inlinable and non-inlinable values + // - Time shared between decoding and buffer forming + // - UTF-8 verification might still use len_below_128 trick, but might need to fall back to + // slow path. + + target.finish_in_progress(); + unsafe { target.views_mut() }.reserve(num_rows); + + let buffer_idx = target.completed_buffers().len() as u32; + let mut buffer = Vec::with_capacity(values.len() + 1); + let mut none_starting_with_continuation_byte = true; // Whether the transition from between strings is valid + // UTF-8 + let mut all_len_below_128 = true; // Whether all the lengths of the values are below 128, this + // allows us to make UTF-8 verification a lot faster. + + let mut total_bytes_len = 0; + let mut num_seen = 0; + let mut num_inlined = 0; + + let mut mvalues = values; + while let Some((is_valid, is_selected)) = next() { + if !is_valid { + if is_selected { + unsafe { target.views_mut() }.push(unsafe { View::new_inline_unchecked(&[]) }); + } + continue; } - } -} -impl BatchableCollector<(), MutableBinaryViewArray<[u8]>> for DeltaBytesCollector<'_, '_> { - fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) { - target.reserve(n); - } + if mvalues.len() < 4 { + return Err(invalid_input_err()); + } - fn push_n(&mut self, target: &mut MutableBinaryViewArray<[u8]>, n: usize) -> ParquetResult<()> { - struct MaybeUninitCollector(usize); + let length; + (length, mvalues) = mvalues.split_at(4); + let length: &[u8; 4] = unsafe { length.try_into().unwrap_unchecked() }; + let length = u32::from_le_bytes(*length); - impl DeltaGatherer for MaybeUninitCollector { - type Target = [MaybeUninit; BATCH_SIZE]; + if mvalues.len() < length as usize { + return Err(invalid_input_err()); + } - fn target_len(&self, _target: &Self::Target) -> usize { - self.0 - } + let value; + (value, mvalues) = mvalues.split_at(length as usize); - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} + num_seen += 1; + all_len_below_128 &= value.len() < 128; + // Everything starting with 10.. .... is a continuation byte. + none_starting_with_continuation_byte &= + value.is_empty() || value[0] & 0b1100_0000 != 0b1000_0000; - fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> { - target[self.0] = MaybeUninit::new(v as usize); - self.0 += 1; - Ok(()) - } + if !is_selected { + continue; } - let decoder_len = self.decoder.len(); - let mut n = usize::min(n, decoder_len); + let offset = buffer.len() as u32; - if n == 0 { - return Ok(()); + if value.len() <= View::MAX_INLINE_SIZE as usize { + unsafe { target.views_mut() }.push(unsafe { View::new_inline_unchecked(value) }); + num_inlined += 1; + } else { + buffer.extend_from_slice(value); + unsafe { target.views_mut() } + .push(unsafe { View::new_noninline_unchecked(value, buffer_idx, offset) }); } - let mut buffer = Vec::new(); - target.reserve(n); - - const BATCH_SIZE: usize = 4096; - - let mut prefix_lengths = [const { MaybeUninit::::uninit() }; BATCH_SIZE]; - let mut suffix_lengths = [const { MaybeUninit::::uninit() }; BATCH_SIZE]; - - while n > 0 { - let num_elems = usize::min(n, BATCH_SIZE); - n -= num_elems; + total_bytes_len += value.len(); + } - self.decoder.prefix_lengths.gather_n_into( - &mut prefix_lengths, - num_elems, - &mut MaybeUninitCollector(0), - )?; - self.decoder.suffix_lengths.gather_n_into( - &mut suffix_lengths, - num_elems, - &mut MaybeUninitCollector(0), - )?; + unsafe { + target.set_total_bytes_len(target.total_bytes_len() + total_bytes_len); + } - for i in 0..num_elems { - let prefix_length = unsafe { prefix_lengths[i].assume_init() }; - let suffix_length = unsafe { suffix_lengths[i].assume_init() }; + if verify_utf8 { + // This is a trick that allows us to check the resulting buffer which allows to batch the + // UTF-8 verification. + // + // This is allowed if none of the strings start with a UTF-8 continuation byte, so we keep + // track of that during the decoding. + if num_inlined == 0 { + if !none_starting_with_continuation_byte || simdutf8::basic::from_utf8(&buffer).is_err() + { + return Err(invalid_utf8_err()); + } - buffer.clear(); + // This is a small trick that allows us to check the Parquet buffer instead of the view + // buffer. Batching the UTF-8 verification is more performant. For this to be allowed, + // all the interleaved lengths need to be valid UTF-8. + // + // Every strings prepended by 4 bytes (L, 0, 0, 0), since we check here L < 128. L is + // only a valid first byte of a UTF-8 code-point and (L, 0, 0, 0) is valid UTF-8. + // Consequently, it is valid to just check the whole buffer. + } else if all_len_below_128 { + if simdutf8::basic::from_utf8(values).is_err() { + return Err(invalid_utf8_err()); + } + } else { + // We check all the non-inlined values here. + if !none_starting_with_continuation_byte || simdutf8::basic::from_utf8(&buffer).is_err() + { + return Err(invalid_utf8_err()); + } - buffer.extend_from_slice(&self.decoder.last[..prefix_length]); - buffer.extend_from_slice( - &self.decoder.values[self.decoder.offset..self.decoder.offset + suffix_length], - ); + let mut all_inlined_are_ascii = true; - target.push_value(&buffer); + // @NOTE: This is only valid because we initialize our inline View's to be zeroes on + // non-included bytes. + for view in &target.views()[target.len() - num_seen..] { + all_inlined_are_ascii &= (view.length > View::MAX_INLINE_SIZE) + | (view.as_u128() & 0x0000_0000_8080_8080_8080_8080_8080_8080 == 0); + } - self.decoder.last.clear(); - std::mem::swap(&mut self.decoder.last, &mut buffer); + // This is the very slow path. + if !all_inlined_are_ascii { + let mut is_valid = true; + for view in &target.views()[target.len() - num_seen..] { + if view.length <= View::MAX_INLINE_SIZE { + is_valid &= + std::str::from_utf8(unsafe { view.get_inlined_slice_unchecked() }) + .is_ok(); + } + } - self.decoder.offset += suffix_length; + if !is_valid { + return Err(invalid_utf8_err()); + } } } - - Ok(()) } - fn push_n_nulls( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - target.extend_constant(n, >::None); - Ok(()) - } + target.push_buffer(buffer.into()); - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } + Ok(()) } impl utils::Decoder for BinViewDecoder { @@ -548,297 +482,136 @@ impl utils::Decoder for BinViewDecoder { Ok(()) } - fn deserialize_dict(&self, page: DictPage) -> ParquetResult { + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult { let values = &page.buffer; let num_values = page.num_values; - // Each value is prepended by the length which is 4 bytes. - let num_bytes = values.len() - 4 * num_values; - - let mut views = Vec::with_capacity(num_values); - let mut buffer = Vec::with_capacity(num_bytes); - - let mut buffers = Vec::with_capacity(1); - - let mut offset = 0; - let mut max_length = 0; - views.extend(BinaryIter::new(values, num_values).map(|v| { - let length = v.len(); - max_length = usize::max(length, max_length); - if length <= View::MAX_INLINE_SIZE as usize { - View::new_inline(v) - } else { - if offset >= u32::MAX as usize { - let full_buffer = std::mem::take(&mut buffer); - let num_bytes = full_buffer.capacity() - full_buffer.len(); - buffers.push(Buffer::from(full_buffer)); - buffer.reserve(num_bytes); - offset = 0; - } + let mut arr = MutableBinaryViewArray::new(); + decode_required_plain( + num_values, + values, + None, + &mut arr, + self.check_utf8.load(Ordering::Relaxed), + )?; - buffer.extend_from_slice(v); - let view = View::new_from_bytes(v, buffers.len() as u32, offset as u32); - offset += v.len(); - view - } - })); - - buffers.push(Buffer::from(buffer)); - - if self.check_utf8.load(Ordering::Relaxed) { - // This is a small trick that allows us to check the Parquet buffer instead of the view - // buffer. Batching the UTF-8 verification is more performant. For this to be allowed, - // all the interleaved lengths need to be valid UTF-8. - // - // Every strings prepended by 4 bytes (L, 0, 0, 0), since we check here L < 128. L is - // only a valid first byte of a UTF-8 code-point and (L, 0, 0, 0) is valid UTF-8. - // Consequently, it is valid to just check the whole buffer. - if max_length < 128 { - simdutf8::basic::from_utf8(values) - .map_err(|_| ParquetError::oos("String data contained invalid UTF-8"))?; - } else { - arrow::array::validate_utf8_view(&views, &buffers) - .map_err(|_| ParquetError::oos("String data contained invalid UTF-8"))?; - } - } + let (views, buffers) = arr.take(); Ok((views, buffers)) } - fn decode_plain_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()> { - let views_offset = values.views().len(); - let buffer_offset = values.completed_buffers().len(); - - struct Collector<'a, 'b> { - iter: &'b mut BinaryIter<'a>, - max_length: &'b mut usize, - } - - impl BatchableCollector<(), MutableBinaryViewArray<[u8]>> for Collector<'_, '_> { - fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) { - target.reserve(n); - } - - fn push_n( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - for x in self.iter.take(n) { - *self.max_length = usize::max(*self.max_length, x.len()); - target.push_value(x); - } - Ok(()) - } - - fn push_n_nulls( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - target.extend_constant(n, >::None); - Ok(()) - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n > 0 { - _ = self.iter.nth(n - 1); - } - Ok(()) - } - } - - let mut max_length = 0; - let buffer = page_values.values; - let mut collector = Collector { - iter: page_values, - max_length: &mut max_length, - }; - - match page_validity { - None => { - collector.push_n(values, limit)?; - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - extend_from_decoder(validity, page_validity, Some(limit), values, collector)? - }, - } - - let buffer = &buffer[..buffer.len() - page_values.values.len()]; - - if self.check_utf8.load(Ordering::Relaxed) { - // This is a small trick that allows us to check the Parquet buffer instead of the view - // buffer. Batching the UTF-8 verification is more performant. For this to be allowed, - // all the interleaved lengths need to be valid UTF-8. - // - // Every strings prepended by 4 bytes (L, 0, 0, 0), since we check here L < 128. L is - // only a valid first byte of a UTF-8 code-point and (L, 0, 0, 0) is valid UTF-8. - // Consequently, it is valid to just check the whole buffer. - if max_length < 128 { - simdutf8::basic::from_utf8(buffer) - .map_err(|_| ParquetError::oos("String data contained invalid UTF-8"))?; - } else { - values - .validate_utf8(buffer_offset, views_offset) - .map_err(|_| ParquetError::oos("String data contained invalid UTF-8"))? - } - } - - Ok(()) - } - - fn decode_dictionary_encoded<'a>( + fn extend_filtered_with_state( &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut hybrid_rle::HybridRleDecoder<'a>, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - dict: &Self::Dict, - limit: usize, + mut state: utils::State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, ) -> ParquetResult<()> { - struct DictionaryTranslator<'a>(&'a [View]); - - impl HybridRleGatherer for DictionaryTranslator<'_> { - type Target = MutableBinaryViewArray<[u8]>; - - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); - } - - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.len() - } - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - self.0 - .get(value as usize) - .cloned() - .ok_or(ParquetError::oos("Dictionary index is out of range")) - } - - fn gather_one(&self, target: &mut Self::Target, value: View) -> ParquetResult<()> { - // SAFETY: - // - All the dictionary values are already buffered - // - We keep the `total_bytes_len` in-sync with the views - unsafe { - target.views_mut().push(value); - target.set_total_bytes_len(target.total_bytes_len() + value.length as usize); - } - - Ok(()) - } + match state.translation { + StateTranslation::Plain(iter) => decode_plain( + iter.values, + iter.max_num_values, + &mut decoded.0, + state.is_optional, + &mut decoded.1, + state.page_validity.as_ref(), + filter, + self.check_utf8.load(Ordering::Relaxed), + ), + StateTranslation::Dictionary(ref mut indexes) => { + let (dict, _) = state.dict.unwrap(); + + let start_length = decoded.0.views().len(); + + utils::dict_encoded::decode_dict( + indexes.clone(), + dict, + state.is_optional, + state.page_validity.as_ref(), + filter, + &mut decoded.1, + unsafe { decoded.0.views_mut() }, + )?; - fn gather_repeated( - &self, - target: &mut Self::Target, - value: View, - n: usize, - ) -> ParquetResult<()> { - // SAFETY: - // - All the dictionary values are already buffered - // - We keep the `total_bytes_len` in-sync with the views + let total_length: usize = decoded + .0 + .views() + .iter() + .skip(start_length) + .map(|view| view.length as usize) + .sum(); unsafe { - let length = target.views_mut().len(); - target.views_mut().resize(length + n, value); - target - .set_total_bytes_len(target.total_bytes_len() + n * value.length as usize); + decoded + .0 + .set_total_bytes_len(decoded.0.total_bytes_len() + total_length); } Ok(()) - } - - fn gather_slice(&self, target: &mut Self::Target, source: &[u32]) -> ParquetResult<()> { - let Some(source_max) = source.iter().copied().max() else { - return Ok(()); - }; + }, + StateTranslation::DeltaLengthByteArray(decoder, _vec) => { + let values = decoder.values; + let lengths = decoder.lengths.collect::>()?; + + if self.check_utf8.load(Ordering::Relaxed) { + let mut none_starting_with_continuation_byte = true; + let mut offset = 0; + for length in &lengths { + none_starting_with_continuation_byte &= + *length == 0 || values[offset] & 0xC0 != 0x80; + offset += *length as usize; + } - if source_max as usize >= self.0.len() { - return Err(ParquetError::oos("Dictionary index is out of range")); - } + if !none_starting_with_continuation_byte { + return Err(invalid_utf8_err()); + } - let mut view_length_sum = 0usize; - // Safety: We have checked before that source only has indexes that are smaller than the - // dictionary length. - // - // Safety: - // - All the dictionary values are already buffered - // - We keep the `total_bytes_len` in-sync with the views - unsafe { - target.views_mut().extend(source.iter().map(|&src_idx| { - let v = *self.0.get_unchecked(src_idx as usize); - view_length_sum += v.length as usize; - v - })); - target.set_total_bytes_len(target.total_bytes_len() + view_length_sum); + if simdutf8::basic::from_utf8(&values[..offset]).is_err() { + return Err(invalid_utf8_err()); + } } - Ok(()) - } - } + let mut i = 0; + let mut offset = 0; + unspecialized_decode( + lengths.len(), + || { + let length = lengths[i] as usize; - let translator = DictionaryTranslator(&dict.0); + let value = &values[offset..offset + length]; - match page_validity { - None => { - page_values.gather_n_into(values, limit, &translator)?; + i += 1; + offset += length; - if is_optional { - validity.extend_constant(limit, true); - } + Ok(value) + }, + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) }, - Some(page_validity) => { - struct Collector<'a, 'b> { - decoder: &'b mut hybrid_rle::HybridRleDecoder<'a>, - translator: DictionaryTranslator<'b>, - } + StateTranslation::DeltaBytes(mut decoder) => { + let check_utf8 = self.check_utf8.load(Ordering::Relaxed); - impl BatchableCollector<(), MutableBinaryViewArray<[u8]>> for Collector<'_, '_> { - fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) { - target.reserve(n); - } + unspecialized_decode( + decoder.len(), + || { + let value = decoder.next().unwrap()?; - fn push_n( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - self.decoder.gather_n_into(target, n, &self.translator)?; - Ok(()) - } - - fn push_n_nulls( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - target.extend_constant(n, >::None); - Ok(()) - } + if check_utf8 && simdutf8::basic::from_utf8(&value[..]).is_err() { + return Err(invalid_utf8_err()); + } - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } - } - let collector = Collector { - decoder: page_values, - translator, - }; - extend_from_decoder(validity, page_validity, Some(limit), values, collector)?; + Ok(value) + }, + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) }, } - - Ok(()) } fn finalize( @@ -903,25 +676,6 @@ impl utils::DictDecodable for BinViewDecoder { } } -impl utils::NestedDecoder for BinViewDecoder { - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } - - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values.extend_constant(n, >::None); - } -} - #[derive(Debug)] pub struct BinaryIter<'a> { values: &'a [u8], @@ -940,11 +694,6 @@ impl<'a> BinaryIter<'a> { max_num_values, } } - - /// Return the length of the iterator when the data is not nullable. - pub fn len_when_not_nullable(&self) -> usize { - self.max_num_values - } } impl<'a> Iterator for BinaryIter<'a> { diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs index 51026f483bd7..11019b3ab614 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs @@ -1,15 +1,19 @@ use arrow::array::BooleanArray; +use arrow::bitmap::bitmask::BitMask; use arrow::bitmap::utils::BitmapIter; -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; - -use super::utils::{self, extend_from_decoder, freeze_validity, Decoder, ExactSize}; -use crate::parquet::encoding::hybrid_rle::gatherer::HybridRleGatherer; -use crate::parquet::encoding::hybrid_rle::HybridRleDecoder; +use polars_compute::filter::filter_boolean_kernel; + +use super::utils::dict_encoded::{append_validity, constrain_page_validity}; +use super::utils::{ + self, decode_hybrid_rle_into_bitmap, filter_from_range, freeze_validity, Decoder, ExactSize, +}; +use super::Filter; +use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder}; use crate::parquet::encoding::Encoding; use crate::parquet::error::ParquetResult; use crate::parquet::page::{split_buffer, DataPage, DictPage}; -use crate::read::deserialize::utils::{BatchableCollector, PageValidity}; #[allow(clippy::large_enum_variant)] #[derive(Debug)] @@ -25,7 +29,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { _decoder: &BooleanDecoder, page: &'a DataPage, _dict: Option<&'a ::Dict>, - page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { let values = split_buffer(page)?.values; @@ -63,121 +67,198 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { _ => Err(utils::not_implemented(page)), } } +} - fn len_when_not_nullable(&self) -> usize { - match self { - Self::Plain(v) => v.len(), - Self::Rle(v) => v.len(), - } +fn decode_required_rle( + values: HybridRleDecoder<'_>, + limit: Option, + target: &mut MutableBitmap, +) -> ParquetResult<()> { + decode_hybrid_rle_into_bitmap(values, limit, target)?; + Ok(()) +} + +fn decode_optional_rle( + values: HybridRleDecoder<'_>, + target: &mut MutableBitmap, + page_validity: &Bitmap, +) -> ParquetResult<()> { + debug_assert!(page_validity.set_bits() <= values.len()); + + if page_validity.unset_bits() == 0 { + return decode_required_rle(values, Some(page_validity.len()), target); } - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } + target.reserve(page_validity.len()); - // @TODO: Add a skip_in_place on BitmapIter - match self { - Self::Plain(t) => _ = t.nth(n - 1), - Self::Rle(t) => t.skip_in_place(n)?, - } + let mut validity_mask = BitMask::from_bitmap(page_validity); - Ok(()) - } + for chunk in values.into_chunk_iter() { + let chunk = chunk?; - fn extend_from_state( - &mut self, - decoder: &mut BooleanDecoder, - decoded: &mut ::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - _: Option<&'a ::Dict>, - additional: usize, - ) -> ParquetResult<()> { - match self { - Self::Plain(page_values) => decoder.decode_plain_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - additional, - )?, - Self::Rle(page_values) => { - let (values, validity) = decoded; - match page_validity { - None => { - page_values.gather_n_into(values, additional, &BitmapGatherer)?; - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => utils::extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - BitmapCollector(page_values), - )?, + match chunk { + HybridRleChunk::Rle(value, size) => { + let offset = validity_mask + .nth_set_bit_idx(size, 0) + .unwrap_or(validity_mask.len()); + + let t; + (t, validity_mask) = validity_mask.split_at(offset); + + target.extend_constant(t.len(), value != 0); + }, + HybridRleChunk::Bitpacked(decoder) => { + let decoder_slice = decoder.as_slice(); + let offset = validity_mask + .nth_set_bit_idx(decoder.len(), 0) + .unwrap_or(validity_mask.len()); + + let decoder_validity; + (decoder_validity, validity_mask) = validity_mask.split_at(offset); + + let mut offset = 0; + let mut validity_iter = decoder_validity.iter(); + while validity_iter.num_remaining() > 0 { + let num_valid = validity_iter.take_leading_ones(); + target.extend_from_slice(decoder_slice, offset, num_valid); + offset += num_valid; + + let num_invalid = validity_iter.take_leading_zeros(); + target.extend_constant(num_invalid, false); } }, } + } - Ok(()) + if cfg!(debug_assertions) { + assert_eq!(validity_mask.set_bits(), 0); } + target.extend_constant(validity_mask.len(), false); + + Ok(()) } -struct BitmapGatherer; -impl HybridRleGatherer for BitmapGatherer { - type Target = MutableBitmap; +fn decode_masked_required_rle( + values: HybridRleDecoder<'_>, + target: &mut MutableBitmap, + mask: &Bitmap, +) -> ParquetResult<()> { + debug_assert!(mask.len() <= values.len()); - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); + if mask.unset_bits() == 0 { + return decode_required_rle(values, Some(mask.len()), target); } - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.len() + let mut im_target = MutableBitmap::new(); + decode_required_rle(values, Some(mask.len()), &mut im_target)?; + + target.extend_from_bitmap(&filter_boolean_kernel(&im_target.freeze(), mask)); + + Ok(()) +} + +fn decode_masked_optional_rle( + values: HybridRleDecoder<'_>, + target: &mut MutableBitmap, + page_validity: &Bitmap, + mask: &Bitmap, +) -> ParquetResult<()> { + debug_assert_eq!(page_validity.len(), mask.len()); + debug_assert!(mask.len() <= values.len()); + + if mask.unset_bits() == 0 { + return decode_optional_rle(values, target, page_validity); } - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - Ok(value) + if page_validity.unset_bits() == 0 { + return decode_masked_required_rle(values, target, mask); } - fn gather_one(&self, target: &mut Self::Target, value: u32) -> ParquetResult<()> { - target.push(value != 0); - Ok(()) + let mut im_target = MutableBitmap::new(); + decode_optional_rle(values, &mut im_target, page_validity)?; + + target.extend_from_bitmap(&filter_boolean_kernel(&im_target.freeze(), mask)); + + Ok(()) +} + +fn decode_required_plain( + mut values: BitmapIter<'_>, + limit: Option, + target: &mut MutableBitmap, +) -> ParquetResult<()> { + let limit = limit.unwrap_or(values.len()); + values.collect_n_into(target, limit); + Ok(()) +} + +fn decode_optional_plain( + mut values: BitmapIter<'_>, + target: &mut MutableBitmap, + page_validity: &Bitmap, +) -> ParquetResult<()> { + debug_assert!(page_validity.set_bits() <= values.len()); + + if page_validity.unset_bits() == 0 { + return decode_required_plain(values, Some(page_validity.len()), target); } - fn gather_repeated( - &self, - target: &mut Self::Target, - value: u32, - n: usize, - ) -> ParquetResult<()> { - target.extend_constant(n, value != 0); - Ok(()) + target.reserve(page_validity.len()); + + let mut validity_iter = page_validity.iter(); + while validity_iter.num_remaining() > 0 { + let num_valid = validity_iter.take_leading_ones(); + values.collect_n_into(target, num_valid); + + let num_invalid = validity_iter.take_leading_zeros(); + target.extend_constant(num_invalid, false); } - // @TODO: The slice impl here can speed some stuff up + Ok(()) } -struct BitmapCollector<'a, 'b>(&'b mut HybridRleDecoder<'a>); -impl BatchableCollector for BitmapCollector<'_, '_> { - fn reserve(target: &mut MutableBitmap, n: usize) { - target.reserve(n); - } - fn push_n(&mut self, target: &mut MutableBitmap, n: usize) -> ParquetResult<()> { - self.0.gather_n_into(target, n, &BitmapGatherer) +fn decode_masked_required_plain( + values: BitmapIter<'_>, + target: &mut MutableBitmap, + mask: &Bitmap, +) -> ParquetResult<()> { + debug_assert!(mask.len() <= values.len()); + + if mask.unset_bits() == 0 { + return decode_required_plain(values, Some(mask.len()), target); } - fn push_n_nulls(&mut self, target: &mut MutableBitmap, n: usize) -> ParquetResult<()> { - target.extend_constant(n, false); - Ok(()) + let mut im_target = MutableBitmap::new(); + decode_required_plain(values, Some(mask.len()), &mut im_target)?; + + target.extend_from_bitmap(&filter_boolean_kernel(&im_target.freeze(), mask)); + + Ok(()) +} + +fn decode_masked_optional_plain( + values: BitmapIter<'_>, + target: &mut MutableBitmap, + page_validity: &Bitmap, + mask: &Bitmap, +) -> ParquetResult<()> { + debug_assert_eq!(page_validity.len(), mask.len()); + debug_assert!(page_validity.set_bits() <= values.len()); + + if mask.unset_bits() == 0 { + return decode_optional_plain(values, target, page_validity); } - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.0.skip_in_place(n) + if page_validity.unset_bits() == 0 { + return decode_masked_required_plain(values, target, mask); } + + let mut im_target = MutableBitmap::new(); + decode_optional_plain(values, &mut im_target, page_validity)?; + + target.extend_from_bitmap(&filter_boolean_kernel(&im_target.freeze(), mask)); + + Ok(()) } impl ExactSize for (MutableBitmap, MutableBitmap) { @@ -207,46 +288,10 @@ impl Decoder for BooleanDecoder { ) } - fn deserialize_dict(&self, _: DictPage) -> ParquetResult { - Ok(()) - } - - fn decode_plain_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()> { - match page_validity { - None => { - page_values.collect_n_into(values, limit); - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - extend_from_decoder(validity, page_validity, Some(limit), values, page_values)? - }, - } - + fn deserialize_dict(&mut self, _: DictPage) -> ParquetResult { Ok(()) } - fn decode_dictionary_encoded<'a>( - &mut self, - _decoded: &mut Self::DecodedState, - _page_values: &mut HybridRleDecoder<'a>, - _is_optional: bool, - _page_validity: Option<&mut PageValidity<'a>>, - _dict: &Self::Dict, - _limit: usize, - ) -> ParquetResult<()> { - unimplemented!() - } - fn finalize( &self, dtype: ArrowDataType, @@ -254,25 +299,112 @@ impl Decoder for BooleanDecoder { (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); - Ok(BooleanArray::new(dtype, values.into(), validity)) + Ok(BooleanArray::new(dtype, values.freeze(), validity)) } -} -impl utils::NestedDecoder for BooleanDecoder { - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } + fn extend_filtered_with_state( + &mut self, + state: utils::State<'_, Self>, + (target, validity): &mut Self::DecodedState, + filter: Option, + ) -> ParquetResult<()> { + match state.translation { + StateTranslation::Plain(values) => { + if state.is_optional { + append_validity( + state.page_validity.as_ref(), + filter.as_ref(), + validity, + values.len(), + ); + } + + let page_validity = constrain_page_validity( + values.len(), + state.page_validity.as_ref(), + filter.as_ref(), + ); - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values.extend_constant(n, false); + match (filter, page_validity) { + (None, None) => decode_required_plain(values, None, target), + (Some(Filter::Range(rng)), None) if rng.start == 0 => { + decode_required_plain(values, Some(rng.end), target) + }, + (None, Some(page_validity)) => { + decode_optional_plain(values, target, &page_validity) + }, + (Some(Filter::Range(rng)), Some(page_validity)) if rng.start == 0 => { + decode_optional_plain(values, target, &page_validity) + }, + (Some(Filter::Mask(mask)), None) => { + decode_masked_required_plain(values, target, &mask) + }, + (Some(Filter::Mask(mask)), Some(page_validity)) => { + decode_masked_optional_plain(values, target, &page_validity, &mask) + }, + (Some(Filter::Range(rng)), None) => decode_masked_required_plain( + values, + target, + &filter_from_range(rng.clone()), + ), + (Some(Filter::Range(rng)), Some(page_validity)) => { + decode_masked_optional_plain( + values, + target, + &page_validity, + &filter_from_range(rng.clone()), + ) + }, + }?; + + Ok(()) + }, + StateTranslation::Rle(values) => { + if state.is_optional { + append_validity( + state.page_validity.as_ref(), + filter.as_ref(), + validity, + values.len(), + ); + } + + let page_validity = constrain_page_validity( + values.len(), + state.page_validity.as_ref(), + filter.as_ref(), + ); + + match (filter, page_validity) { + (None, None) => decode_required_rle(values, None, target), + (Some(Filter::Range(rng)), None) if rng.start == 0 => { + decode_required_rle(values, Some(rng.end), target) + }, + (None, Some(page_validity)) => { + decode_optional_rle(values, target, &page_validity) + }, + (Some(Filter::Range(rng)), Some(page_validity)) if rng.start == 0 => { + decode_optional_rle(values, target, &page_validity) + }, + (Some(Filter::Mask(filter)), None) => { + decode_masked_required_rle(values, target, &filter) + }, + (Some(Filter::Mask(filter)), Some(page_validity)) => { + decode_masked_optional_rle(values, target, &page_validity, &filter) + }, + (Some(Filter::Range(rng)), None) => { + decode_masked_required_rle(values, target, &filter_from_range(rng.clone())) + }, + (Some(Filter::Range(rng)), Some(page_validity)) => decode_masked_optional_rle( + values, + target, + &page_validity, + &filter_from_range(rng.clone()), + ), + }?; + + Ok(()) + }, + } } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs index 478c7cca0f2e..6c149803a887 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs @@ -1,15 +1,13 @@ -use std::sync::atomic::AtomicUsize; - use arrow::array::{DictionaryArray, DictionaryKey, PrimitiveArray}; -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; use super::utils::{ - self, dict_indices_decoder, extend_from_decoder, freeze_validity, BatchableCollector, Decoder, - DictDecodable, ExactSize, PageValidity, StateTranslation, + self, dict_indices_decoder, freeze_validity, unspecialized_decode, Decoder, ExactSize, + StateTranslation, }; use super::ParquetError; -use crate::parquet::encoding::hybrid_rle::{self, HybridRleDecoder, Translator}; +use crate::parquet::encoding::hybrid_rle::HybridRleDecoder; use crate::parquet::encoding::Encoding; use crate::parquet::error::ParquetResult; use crate::parquet::page::{DataPage, DictPage}; @@ -23,7 +21,7 @@ impl<'a, K: DictionaryKey, D: utils::DictDecodable> StateTranslation<'a, Diction _decoder: &DictionaryDecoder, page: &'a DataPage, _dict: Option<&'a as Decoder>::Dict>, - _page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { if !matches!( page.encoding(), @@ -32,59 +30,13 @@ impl<'a, K: DictionaryKey, D: utils::DictDecodable> StateTranslation<'a, Diction return Err(utils::not_implemented(page)); } - dict_indices_decoder(page) - } - - fn len_when_not_nullable(&self) -> usize { - self.len() - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - HybridRleDecoder::skip_in_place(self, n) - } - - fn extend_from_state( - &mut self, - decoder: &mut DictionaryDecoder, - decoded: &mut as Decoder>::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - _: Option<&'a as Decoder>::Dict>, - additional: usize, - ) -> ParquetResult<()> { - let (values, validity) = decoded; - - let dict_size = decoder.dict_size.load(std::sync::atomic::Ordering::Relaxed); - - if dict_size == usize::MAX { - panic!("Dictionary not set for dictionary array"); - } - - let mut collector = DictArrayCollector { - values: self, - dict_size, - }; - - match page_validity { - None => { - collector.push_n(&mut decoded.0, additional)?; - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => { - extend_from_decoder(validity, page_validity, Some(additional), values, collector)? - }, - } - - Ok(()) + dict_indices_decoder(page, page_validity.map_or(0, |bm| bm.unset_bits())) } } #[derive(Debug)] pub struct DictionaryDecoder { - dict_size: AtomicUsize, + dict_size: usize, decoder: D, _pd: std::marker::PhantomData, } @@ -92,7 +44,7 @@ pub struct DictionaryDecoder { impl DictionaryDecoder { pub fn new(decoder: D) -> Self { Self { - dict_size: AtomicUsize::new(usize::MAX), + dict_size: usize::MAX, decoder, _pd: std::marker::PhantomData, } @@ -112,10 +64,9 @@ impl utils::Decoder for DictionaryDec ) } - fn deserialize_dict(&self, page: DictPage) -> ParquetResult { + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult { let dict = self.decoder.deserialize_dict(page)?; - self.dict_size - .store(dict.len(), std::sync::atomic::Ordering::Relaxed); + self.dict_size = dict.len(); Ok(dict) } @@ -132,111 +83,37 @@ impl utils::Decoder for DictionaryDec self.decoder.finalize_dict_array(dtype, dict, keys) } - fn decode_plain_encoded<'a>( + fn extend_filtered_with_state( &mut self, - _decoded: &mut Self::DecodedState, - _page_values: &mut as StateTranslation<'a, Self>>::PlainDecoder, - _is_optional: bool, - _page_validity: Option<&mut PageValidity<'a>>, - _limit: usize, + state: utils::State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, ) -> ParquetResult<()> { - unreachable!() - } - - fn decode_dictionary_encoded<'a>( - &mut self, - _decoded: &mut Self::DecodedState, - _page_values: &mut HybridRleDecoder<'a>, - _is_optional: bool, - _page_validity: Option<&mut PageValidity<'a>>, - _dict: &Self::Dict, - _limit: usize, - ) -> ParquetResult<()> { - unreachable!() - } -} - -impl utils::NestedDecoder for DictionaryDecoder { - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } - - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values.resize(values.len() + n, K::default()); - } -} - -pub(crate) struct DictArrayCollector<'a, 'b> { - values: &'b mut hybrid_rle::HybridRleDecoder<'a>, - dict_size: usize, -} - -pub(crate) struct DictArrayTranslator { - dict_size: usize, -} - -impl BatchableCollector<(), Vec> for DictArrayCollector<'_, '_> { - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } - - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - let translator = DictArrayTranslator { - dict_size: self.dict_size, - }; - self.values - .translate_and_collect_n_into(target, n, &translator) - } + let keys = state.translation.collect()?; + let num_rows = keys.len(); + let mut iter = keys.into_iter(); - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n, K::default()); - Ok(()) - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.values.skip_in_place(n) - } -} - -impl Translator for DictArrayTranslator { - fn translate(&self, value: u32) -> ParquetResult { - let value = value as usize; - - if value >= self.dict_size || value > K::MAX_USIZE_VALUE { - return Err(ParquetError::oos("Dictionary index out-of-range")); - } + let dict_size = self.dict_size; - // SAFETY: value for sure fits in K - Ok(unsafe { K::from_usize_unchecked(value) }) - } - - fn translate_slice(&self, target: &mut Vec, source: &[u32]) -> ParquetResult<()> { - let Some(max) = source.iter().max() else { - return Ok(()); - }; + unspecialized_decode( + num_rows, + || { + let value = iter.next().unwrap(); - let max = *max as usize; - - if max >= self.dict_size || max > K::MAX_USIZE_VALUE { - return Err(ParquetError::oos("Dictionary index out-of-range")); - } + let value = value as usize; - // SAFETY: value for sure fits in K - target.extend( - source - .iter() - .map(|v| unsafe { K::from_usize_unchecked(*v as usize) }), - ); + if value >= dict_size || value > K::MAX_USIZE_VALUE { + return Err(ParquetError::oos("Dictionary index out-of-range")); + } - Ok(()) + // SAFETY: value for sure fits in K + Ok(unsafe { K::from_usize_unchecked(value) }) + }, + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs index 5657a20dd151..7ae39b3366ad 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs @@ -1,13 +1,25 @@ -use arrow::array::{DictionaryArray, DictionaryKey, FixedSizeBinaryArray, PrimitiveArray}; -use arrow::bitmap::MutableBitmap; +use arrow::array::{ + DictionaryArray, DictionaryKey, FixedSizeBinaryArray, PrimitiveArray, Splitable, +}; +use arrow::bitmap::{Bitmap, MutableBitmap}; +use arrow::buffer::Buffer; use arrow::datatypes::ArrowDataType; - -use super::utils::{dict_indices_decoder, extend_from_decoder, freeze_validity, Decoder}; -use crate::parquet::encoding::hybrid_rle::gatherer::HybridRleGatherer; +use arrow::storage::SharedStorage; +use arrow::types::{ + Bytes12Alignment4, Bytes16Alignment16, Bytes1Alignment1, Bytes2Alignment2, Bytes32Alignment16, + Bytes4Alignment4, Bytes8Alignment8, +}; + +use super::utils::array_chunks::ArrayChunks; +use super::utils::dict_encoded::append_validity; +use super::utils::{dict_indices_decoder, freeze_validity, Decoder}; +use super::Filter; +use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder}; use crate::parquet::encoding::{hybrid_rle, Encoding}; use crate::parquet::error::{ParquetError, ParquetResult}; use crate::parquet::page::{split_buffer, DataPage, DictPage}; -use crate::read::deserialize::utils::{self, BatchableCollector, GatheredHybridRle, PageValidity}; +use crate::read::deserialize::utils; +use crate::read::deserialize::utils::dict_encoded::constrain_page_validity; #[allow(clippy::large_enum_variant)] #[derive(Debug)] @@ -16,12 +28,6 @@ pub(crate) enum StateTranslation<'a> { Dictionary(hybrid_rle::HybridRleDecoder<'a>), } -#[derive(Debug)] -pub struct FixedSizeBinary { - pub values: Vec, - pub size: usize, -} - impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { type PlainDecoder = &'a [u8]; @@ -29,7 +35,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { decoder: &BinaryDecoder, page: &'a DataPage, dict: Option<&'a ::Dict>, - _page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { match (page.encoding(), dict) { (Encoding::Plain, _) => { @@ -44,266 +50,404 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { Ok(Self::Plain(values, decoder.size)) }, (Encoding::PlainDictionary | Encoding::RleDictionary, Some(_)) => { - let values = dict_indices_decoder(page)?; + let values = + dict_indices_decoder(page, page_validity.map_or(0, |bm| bm.unset_bits()))?; Ok(Self::Dictionary(values)) }, _ => Err(utils::not_implemented(page)), } } +} - fn len_when_not_nullable(&self) -> usize { - match self { - Self::Plain(v, size) => v.len() / size, - Self::Dictionary(v) => v.len(), - } - } +pub(crate) struct BinaryDecoder { + pub(crate) size: usize, +} - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } +pub(crate) enum FSBVec { + Size1(Vec), + Size2(Vec), + Size4(Vec), + Size8(Vec), + Size12(Vec), + Size16(Vec), + Size32(Vec), + Other(Vec, usize), +} - match self { - Self::Plain(v, size) => *v = &v[usize::min(v.len(), n * *size)..], - Self::Dictionary(v) => v.skip_in_place(n)?, +impl FSBVec { + pub fn new(size: usize) -> FSBVec { + match size { + 1 => Self::Size1(Vec::new()), + 2 => Self::Size2(Vec::new()), + 4 => Self::Size4(Vec::new()), + 8 => Self::Size8(Vec::new()), + 12 => Self::Size12(Vec::new()), + 16 => Self::Size16(Vec::new()), + 32 => Self::Size32(Vec::new()), + _ => Self::Other(Vec::new(), size), } - - Ok(()) } - fn extend_from_state( - &mut self, - decoder: &mut BinaryDecoder, - decoded: &mut ::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - dict: Option<&'a ::Dict>, - additional: usize, - ) -> ParquetResult<()> { - use StateTranslation as T; - match self { - T::Plain(page_values, _) => decoder.decode_plain_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - additional, - )?, - T::Dictionary(page_values) => decoder.decode_dictionary_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - dict.unwrap(), - additional, - )?, - } - - Ok(()) + pub fn into_bytes_buffer(self) -> Buffer { + Buffer::from_storage(match self { + FSBVec::Size1(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size2(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size4(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size8(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size12(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size16(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Size32(vec) => SharedStorage::bytes_from_pod_vec(vec), + FSBVec::Other(vec, _) => SharedStorage::from_vec(vec), + }) } } -pub(crate) struct BinaryDecoder { - pub(crate) size: usize, -} - impl utils::ExactSize for Vec { fn len(&self) -> usize { Vec::len(self) } } -impl utils::ExactSize for (FixedSizeBinary, MutableBitmap) { +impl utils::ExactSize for FSBVec { fn len(&self) -> usize { - self.0.values.len() / self.0.size + match self { + FSBVec::Size1(vec) => vec.len(), + FSBVec::Size2(vec) => vec.len(), + FSBVec::Size4(vec) => vec.len(), + FSBVec::Size8(vec) => vec.len(), + FSBVec::Size12(vec) => vec.len(), + FSBVec::Size16(vec) => vec.len(), + FSBVec::Size32(vec) => vec.len(), + FSBVec::Other(vec, size) => vec.len() / size, + } } } -impl Decoder for BinaryDecoder { - type Translation<'a> = StateTranslation<'a>; - type Dict = Vec; - type DecodedState = (FixedSizeBinary, MutableBitmap); - type Output = FixedSizeBinaryArray; - - fn with_capacity(&self, capacity: usize) -> Self::DecodedState { - let size = self.size; - - ( - FixedSizeBinary { - values: Vec::with_capacity(capacity * size), - size, - }, - MutableBitmap::with_capacity(capacity), - ) +impl utils::ExactSize for (FSBVec, MutableBitmap) { + fn len(&self) -> usize { + self.0.len() } +} - fn deserialize_dict(&self, page: DictPage) -> ParquetResult { - Ok(page.buffer.into_vec()) +fn decode_fsb_plain( + size: usize, + values: &[u8], + target: &mut FSBVec, + validity: &mut MutableBitmap, + is_optional: bool, + filter: Option, + page_validity: Option<&Bitmap>, +) -> ParquetResult<()> { + assert_ne!(size, 0); + assert_eq!(values.len() % size, 0); + + macro_rules! decode_static_size { + ($target:ident) => {{ + let values = ArrayChunks::new(values).ok_or_else(|| { + ParquetError::oos("Page content does not align with expected element size") + })?; + super::primitive::plain::decode_aligned_bytes_dispatch( + values, + is_optional, + page_validity, + filter, + validity, + $target, + ) + }}; } - fn decode_plain_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()> { - struct FixedSizeBinaryCollector<'a, 'b> { - slice: &'b mut &'a [u8], - size: usize, - } - - impl BatchableCollector<(), Vec> for FixedSizeBinaryCollector<'_, '_> { - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } - - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - let n = usize::min(n, self.slice.len() / self.size); - target.extend_from_slice(&self.slice[..n * self.size]); - *self.slice = &self.slice[n * self.size..]; - Ok(()) - } - - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n * self.size, 0); - Ok(()) + use FSBVec as T; + match target { + T::Size1(target) => decode_static_size!(target), + T::Size2(target) => decode_static_size!(target), + T::Size4(target) => decode_static_size!(target), + T::Size8(target) => decode_static_size!(target), + T::Size12(target) => decode_static_size!(target), + T::Size16(target) => decode_static_size!(target), + T::Size32(target) => decode_static_size!(target), + T::Other(target, _) => { + // @NOTE: All these kernels are quite slow, but they should be very uncommon and the + // general case requires arbitrary length memcopies anyway. + + if is_optional { + append_validity( + page_validity, + filter.as_ref(), + validity, + values.len() / size, + ); } - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - let n = usize::min(n, self.slice.len() / self.size); - *self.slice = &self.slice[n * self.size..]; - Ok(()) + let page_validity = + constrain_page_validity(values.len() / size, page_validity, filter.as_ref()); + + match (page_validity, filter.as_ref()) { + (None, None) => target.extend_from_slice(values), + (None, Some(filter)) => match filter { + Filter::Range(range) => { + target.extend_from_slice(&values[range.start * size..range.end * size]) + }, + Filter::Mask(bitmap) => { + let mut iter = bitmap.iter(); + let mut offset = 0; + + while iter.num_remaining() > 0 { + let num_selected = iter.take_leading_ones(); + target + .extend_from_slice(&values[offset * size..][..num_selected * size]); + offset += num_selected; + + let num_filtered = iter.take_leading_zeros(); + offset += num_filtered; + } + }, + }, + (Some(validity), None) => { + let mut iter = validity.iter(); + let mut offset = 0; + + while iter.num_remaining() > 0 { + let num_valid = iter.take_leading_ones(); + target.extend_from_slice(&values[offset * size..][..num_valid * size]); + offset += num_valid; + + let num_filtered = iter.take_leading_zeros(); + target.resize(target.len() + num_filtered * size, 0); + } + }, + (Some(validity), Some(filter)) => match filter { + Filter::Range(range) => { + let (skipped, active) = validity.split_at(range.start); + + let active = active.sliced(0, range.len()); + + let mut iter = active.iter(); + let mut offset = skipped.set_bits(); + + while iter.num_remaining() > 0 { + let num_valid = iter.take_leading_ones(); + target.extend_from_slice(&values[offset * size..][..num_valid * size]); + offset += num_valid; + + let num_filtered = iter.take_leading_zeros(); + target.resize(target.len() + num_filtered * size, 0); + } + }, + Filter::Mask(filter) => { + let mut offset = 0; + for (is_selected, is_valid) in filter.iter().zip(validity.iter()) { + if is_selected { + if is_valid { + target.extend_from_slice(&values[offset * size..][..size]); + } else { + target.resize(target.len() + size, 0); + } + } + + offset += usize::from(is_valid); + } + }, + }, } - } - let mut collector = FixedSizeBinaryCollector { - slice: page_values, - size: self.size, - }; - - match page_validity { - None => { - collector.push_n(&mut values.values, limit)?; + Ok(()) + }, + } +} - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => extend_from_decoder( - validity, +#[allow(clippy::too_many_arguments)] +fn decode_fsb_dict( + size: usize, + values: HybridRleDecoder<'_>, + dict: &FSBVec, + target: &mut FSBVec, + validity: &mut MutableBitmap, + is_optional: bool, + filter: Option, + page_validity: Option<&Bitmap>, +) -> ParquetResult<()> { + assert_ne!(size, 0); + + macro_rules! decode_static_size { + ($dict:ident, $target:ident) => {{ + super::utils::dict_encoded::decode_dict_dispatch( + values, + $dict, + is_optional, page_validity, - Some(limit), - &mut values.values, - collector, - )?, - } - - Ok(()) + filter, + validity, + $target, + ) + }}; } - fn decode_dictionary_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut hybrid_rle::HybridRleDecoder<'a>, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - dict: &Self::Dict, - limit: usize, - ) -> ParquetResult<()> { - struct FixedSizeBinaryGatherer<'a> { - dict: &'a [u8], - size: usize, - } - - impl<'a> HybridRleGatherer<&'a [u8]> for FixedSizeBinaryGatherer<'a> { - type Target = Vec; - - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n * self.size); + use FSBVec as T; + match (dict, target) { + (T::Size1(dict), T::Size1(target)) => decode_static_size!(dict, target), + (T::Size2(dict), T::Size2(target)) => decode_static_size!(dict, target), + (T::Size4(dict), T::Size4(target)) => decode_static_size!(dict, target), + (T::Size8(dict), T::Size8(target)) => decode_static_size!(dict, target), + (T::Size12(dict), T::Size12(target)) => decode_static_size!(dict, target), + (T::Size16(dict), T::Size16(target)) => decode_static_size!(dict, target), + (T::Size32(dict), T::Size32(target)) => decode_static_size!(dict, target), + (T::Other(dict, _), T::Other(target, _)) => { + // @NOTE: All these kernels are quite slow, but they should be very uncommon and the + // general case requires arbitrary length memcopies anyway. + + if is_optional { + append_validity( + page_validity, + filter.as_ref(), + validity, + values.len() / size, + ); } - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.len() / self.size - } + let page_validity = + constrain_page_validity(values.len() / size, page_validity, filter.as_ref()); - fn hybridrle_to_target(&self, value: u32) -> ParquetResult<&'a [u8]> { - let value = value as usize; + let mut indexes = Vec::with_capacity(values.len()); - if value * self.size >= self.dict.len() { - return Err(ParquetError::oos( - "Fixed size binary dictionary index out-of-range", - )); + for chunk in values.into_chunk_iter() { + match chunk? { + HybridRleChunk::Rle(value, repeats) => { + indexes.resize(indexes.len() + repeats, value) + }, + HybridRleChunk::Bitpacked(decoder) => decoder.collect_into(&mut indexes), } - - Ok(&self.dict[value * self.size..(value + 1) * self.size]) } - fn gather_one(&self, target: &mut Self::Target, value: &'a [u8]) -> ParquetResult<()> { - // We make the null value length 0, which allows us to do this. - if value.is_empty() { - target.resize(target.len() + self.size, 0); - return Ok(()); - } - - target.extend_from_slice(value); - Ok(()) + match (page_validity, filter.as_ref()) { + (None, None) => target.extend( + indexes + .into_iter() + .flat_map(|v| &dict[(v as usize) * size..][..size]), + ), + (None, Some(filter)) => match filter { + Filter::Range(range) => target.extend( + indexes[range.start..range.end] + .iter() + .flat_map(|v| &dict[(*v as usize) * size..][..size]), + ), + Filter::Mask(bitmap) => { + let mut iter = bitmap.iter(); + let mut offset = 0; + + while iter.num_remaining() > 0 { + let num_selected = iter.take_leading_ones(); + target.extend( + indexes[offset..][..num_selected] + .iter() + .flat_map(|v| &dict[(*v as usize) * size..][..size]), + ); + offset += num_selected; + + let num_filtered = iter.take_leading_zeros(); + offset += num_filtered; + } + }, + }, + (Some(validity), None) => { + let mut iter = validity.iter(); + let mut offset = 0; + + while iter.num_remaining() > 0 { + let num_valid = iter.take_leading_ones(); + target.extend( + indexes[offset..][..num_valid] + .iter() + .flat_map(|v| &dict[(*v as usize) * size..][..size]), + ); + offset += num_valid; + + let num_filtered = iter.take_leading_zeros(); + target.resize(target.len() + num_filtered * size, 0); + } + }, + (Some(validity), Some(filter)) => match filter { + Filter::Range(range) => { + let (skipped, active) = validity.split_at(range.start); + + let active = active.sliced(0, range.len()); + + let mut iter = active.iter(); + let mut offset = skipped.set_bits(); + + while iter.num_remaining() > 0 { + let num_valid = iter.take_leading_ones(); + target.extend( + indexes[offset..][..num_valid] + .iter() + .flat_map(|v| &dict[(*v as usize) * size..][..size]), + ); + offset += num_valid; + + let num_filtered = iter.take_leading_zeros(); + target.resize(target.len() + num_filtered * size, 0); + } + }, + Filter::Mask(filter) => { + let mut offset = 0; + for (is_selected, is_valid) in filter.iter().zip(validity.iter()) { + if is_selected { + if is_valid { + target.extend_from_slice( + &dict[(indexes[offset] as usize) * size..][..size], + ); + } else { + target.resize(target.len() + size, 0); + } + } + + offset += usize::from(is_valid); + } + }, + }, } - fn gather_repeated( - &self, - target: &mut Self::Target, - value: &'a [u8], - n: usize, - ) -> ParquetResult<()> { - // We make the null value length 0, which allows us to do this. - if value.is_empty() { - target.resize(target.len() + n * self.size, 0); - return Ok(()); - } + Ok(()) + }, + _ => unreachable!(), + } +} - debug_assert_eq!(value.len(), self.size); - for _ in 0..n { - target.extend(value); - } +impl Decoder for BinaryDecoder { + type Translation<'a> = StateTranslation<'a>; + type Dict = FSBVec; + type DecodedState = (FSBVec, MutableBitmap); + type Output = FixedSizeBinaryArray; - Ok(()) - } - } + fn with_capacity(&self, capacity: usize) -> Self::DecodedState { + let size = self.size; - let gatherer = FixedSizeBinaryGatherer { - dict, - size: self.size, + let values = match size { + 1 => FSBVec::Size1(Vec::with_capacity(capacity)), + 2 => FSBVec::Size2(Vec::with_capacity(capacity)), + 4 => FSBVec::Size4(Vec::with_capacity(capacity)), + 8 => FSBVec::Size8(Vec::with_capacity(capacity)), + 12 => FSBVec::Size12(Vec::with_capacity(capacity)), + 16 => FSBVec::Size16(Vec::with_capacity(capacity)), + 32 => FSBVec::Size32(Vec::with_capacity(capacity)), + _ => FSBVec::Other(Vec::with_capacity(capacity * size), size), }; - // @NOTE: - // This is a special case in our gatherer. If the length of the value is 0, then we just - // resize with the appropriate size. Important is that this also works for FSL with size=0. - let null_value = &[]; - - match page_validity { - None => { - page_values.gather_n_into(&mut values.values, limit, &gatherer)?; - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - let collector = GatheredHybridRle::new(page_values, &gatherer, null_value); - - extend_from_decoder( - validity, - page_validity, - Some(limit), - &mut values.values, - collector, - )?; - }, - } + (values, MutableBitmap::with_capacity(capacity)) + } - Ok(()) + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult { + let mut target = FSBVec::new(self.size); + decode_fsb_plain( + self.size, + page.buffer.as_ref(), + &mut target, + &mut MutableBitmap::new(), + false, + None, + None, + )?; + Ok(target) } fn finalize( @@ -313,12 +457,42 @@ impl Decoder for BinaryDecoder { (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); + Ok(FixedSizeBinaryArray::new( dtype, - values.values.into(), + values.into_bytes_buffer(), validity, )) } + + fn extend_filtered_with_state( + &mut self, + state: utils::State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, + ) -> ParquetResult<()> { + match state.translation { + StateTranslation::Plain(values, size) => decode_fsb_plain( + size, + values, + &mut decoded.0, + &mut decoded.1, + state.is_optional, + filter, + state.page_validity.as_ref(), + ), + StateTranslation::Dictionary(values) => decode_fsb_dict( + self.size, + values, + state.dict.unwrap(), + &mut decoded.0, + &mut decoded.1, + state.is_optional, + filter, + state.page_validity.as_ref(), + ), + } + } } impl utils::DictDecodable for BinaryDecoder { @@ -328,29 +502,11 @@ impl utils::DictDecodable for BinaryDecoder { dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult> { - let dict = - FixedSizeBinaryArray::new(ArrowDataType::FixedSizeBinary(self.size), dict.into(), None); + let dict = FixedSizeBinaryArray::new( + ArrowDataType::FixedSizeBinary(self.size), + dict.into_bytes_buffer(), + None, + ); Ok(DictionaryArray::try_new(dtype, keys, Box::new(dict)).unwrap()) } } - -impl utils::NestedDecoder for BinaryDecoder { - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } - - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values - .values - .resize(values.values.len() + n * values.size, 0); - } -} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs index d37a6d4bf3b1..78b4a813693d 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs @@ -1,15 +1,12 @@ -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::utils::BitmapIter; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; -use polars_error::PolarsResult; -use super::utils::{self, BatchableCollector}; -use super::{BasicDecompressor, Filter}; -use crate::parquet::encoding::hybrid_rle::gatherer::HybridRleGatherer; -use crate::parquet::encoding::hybrid_rle::HybridRleDecoder; +use super::{utils, BasicDecompressor, Filter}; +use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder}; use crate::parquet::error::ParquetResult; use crate::parquet::page::{split_buffer, DataPage}; use crate::parquet::read::levels::get_bit_width; -use crate::read::deserialize::utils::{hybrid_rle_count_zeros, BatchedCollector}; #[derive(Debug)] pub struct Nested { @@ -210,30 +207,78 @@ impl Nested { } } -pub struct BatchedNestedDecoder<'a, 'b, 'c, D: utils::NestedDecoder> { - state: &'b mut utils::State<'a, D>, - decoder: &'c mut D, +/// Utility structure to create a `Filter` and `Validity` mask for the leaf values. +/// +/// This batches the extending. +pub struct BatchedNestedDecoder<'a> { + pub(crate) num_waiting_valids: usize, + pub(crate) num_waiting_invalids: usize, + + filter: &'a mut MutableBitmap, + validity: &'a mut MutableBitmap, } -impl BatchableCollector<(), D::DecodedState> - for BatchedNestedDecoder<'_, '_, '_, D> -{ - fn reserve(_target: &mut D::DecodedState, _n: usize) { - unreachable!() +impl BatchedNestedDecoder<'_> { + fn push_valid(&mut self) -> ParquetResult<()> { + self.push_n_valids(1) + } + + fn push_invalid(&mut self) -> ParquetResult<()> { + self.push_n_invalids(1) } - fn push_n(&mut self, target: &mut D::DecodedState, n: usize) -> ParquetResult<()> { - self.decoder.push_n_valids(self.state, target, n)?; + fn push_n_valids(&mut self, n: usize) -> ParquetResult<()> { + if self.num_waiting_invalids == 0 { + self.num_waiting_valids += n; + return Ok(()); + } + + self.filter.extend_constant(self.num_waiting_valids, true); + self.validity.extend_constant(self.num_waiting_valids, true); + + self.filter.extend_constant(self.num_waiting_invalids, true); + self.validity + .extend_constant(self.num_waiting_invalids, false); + + self.num_waiting_valids = n; + self.num_waiting_invalids = 0; + Ok(()) } - fn push_n_nulls(&mut self, target: &mut D::DecodedState, n: usize) -> ParquetResult<()> { - self.decoder.push_n_nulls(self.state, target, n); + fn push_n_invalids(&mut self, n: usize) -> ParquetResult<()> { + self.num_waiting_invalids += n; Ok(()) } fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.state.skip_in_place(n) + if self.num_waiting_valids > 0 { + self.filter.extend_constant(self.num_waiting_valids, true); + self.validity.extend_constant(self.num_waiting_valids, true); + self.num_waiting_valids = 0; + } + if self.num_waiting_invalids > 0 { + self.filter.extend_constant(self.num_waiting_invalids, true); + self.validity + .extend_constant(self.num_waiting_invalids, false); + self.num_waiting_invalids = 0; + } + + self.filter.extend_constant(n, false); + self.validity.extend_constant(n, true); + + Ok(()) + } + + fn finalize(self) -> ParquetResult<()> { + self.filter.extend_constant(self.num_waiting_valids, true); + self.validity.extend_constant(self.num_waiting_valids, true); + + self.filter.extend_constant(self.num_waiting_invalids, true); + self.validity + .extend_constant(self.num_waiting_invalids, false); + + Ok(()) } } @@ -321,426 +366,199 @@ impl NestedState { } } -/// Calculate the number of leaf values that are covered by the first `limit` definition level -/// values. -fn limit_to_num_values( - def_iter: &HybridRleDecoder<'_>, - def_levels: &[u16], - limit: usize, -) -> ParquetResult { - struct NumValuesGatherer { - leaf_def_level: u16, - } - struct NumValuesState { - num_values: usize, - length: usize, - } +fn collect_level_values( + target: &mut Vec, + hybrid_rle: HybridRleDecoder<'_>, +) -> ParquetResult<()> { + target.reserve(hybrid_rle.len()); - impl HybridRleGatherer for NumValuesGatherer { - type Target = NumValuesState; + for chunk in hybrid_rle.into_chunk_iter() { + let chunk = chunk?; - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.length - } - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - Ok(value) - } - - fn gather_one(&self, target: &mut Self::Target, value: u32) -> ParquetResult<()> { - target.num_values += usize::from(value == self.leaf_def_level as u32); - target.length += 1; - Ok(()) - } - - fn gather_repeated( - &self, - target: &mut Self::Target, - value: u32, - n: usize, - ) -> ParquetResult<()> { - target.num_values += n * usize::from(value == self.leaf_def_level as u32); - target.length += n; - Ok(()) + match chunk { + HybridRleChunk::Rle(value, size) => { + target.resize(target.len() + size, value as u16); + }, + HybridRleChunk::Bitpacked(decoder) => { + decoder.lower_element::()?.collect_into(target); + }, } } - let mut state = NumValuesState { - num_values: 0, - length: 0, - }; - def_iter.clone().gather_n_into( - &mut state, - limit, - &NumValuesGatherer { - leaf_def_level: *def_levels.last().unwrap(), - }, - )?; - - Ok(state.num_values) + Ok(()) } -fn idx_to_limit(rep_iter: &HybridRleDecoder<'_>, idx: usize) -> ParquetResult { - struct RowIdxOffsetGatherer; - struct RowIdxOffsetState { - num_elements_seen: usize, - top_level_limit: usize, - found: Option, - } - - impl HybridRleGatherer for RowIdxOffsetGatherer { - type Target = RowIdxOffsetState; - - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.num_elements_seen - } - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - Ok(value == 0) - } - - fn gather_one(&self, target: &mut Self::Target, value: bool) -> ParquetResult<()> { - let idx = target.num_elements_seen; - target.num_elements_seen += 1; - - if !value || target.found.is_some() { - return Ok(()); - } - - if target.top_level_limit > 0 { - target.top_level_limit -= 1; - return Ok(()); - } - - target.found = Some(idx); - - Ok(()) - } - - fn gather_repeated( - &self, - target: &mut Self::Target, - value: bool, - n: usize, - ) -> ParquetResult<()> { - let idx = target.num_elements_seen; - target.num_elements_seen += n; - - if !value || target.found.is_some() { - return Ok(()); - } - - if target.top_level_limit >= n { - target.top_level_limit -= n; - return Ok(()); - } - - target.found = Some(idx + target.top_level_limit); - target.top_level_limit = 0; - - Ok(()) - } - - // @TODO: Add specialization for other methods - } - - let mut state = RowIdxOffsetState { - num_elements_seen: 0, - top_level_limit: idx, - found: None, - }; - - const ROW_IDX_BATCH_SIZE: usize = 1024; - - let mut row_idx_iter = rep_iter.clone(); - while row_idx_iter.len() > 0 && state.found.is_none() { - row_idx_iter.gather_n_into(&mut state, ROW_IDX_BATCH_SIZE, &RowIdxOffsetGatherer)?; - } - - Ok(state.found.unwrap_or(rep_iter.len())) +/// State to keep track of how many top-level values (i.e. rows) still need to be skipped and +/// collected. +/// +/// This state should be kept between pages because a top-level value / row value may span several +/// pages. +/// +/// - `num_skips = Some(n)` means that it will skip till the `n + 1`-th occurrence of the repetition +/// level of `0` (i.e. the start of a top-level value / row value). +/// - `num_collects = Some(n)` means that it will collect values till the `n + 1`-th occurrence of +/// the repetition level of `0` (i.e. the start of a top-level value / row value). +struct DecodingState { + num_skips: Option, + num_collects: Option, } #[allow(clippy::too_many_arguments)] -fn extend_offsets2<'a, D: utils::NestedDecoder>( - mut def_iter: HybridRleDecoder<'a>, - mut rep_iter: HybridRleDecoder<'a>, - batched_collector: &mut BatchedCollector< - '_, - (), - D::DecodedState, - BatchedNestedDecoder<'a, '_, '_, D>, - >, - nested: &mut [Nested], - filter: Option, - - def_levels: &[u16], - rep_levels: &[u16], -) -> PolarsResult<()> { - debug_assert_eq!(def_iter.len(), rep_iter.len()); - - match filter { - None => { - let limit = def_iter.len(); - - extend_offsets_limited( - &mut def_iter, - &mut rep_iter, - batched_collector, - nested, - limit, - def_levels, - rep_levels, - )?; - - debug_assert_eq!(def_iter.len(), rep_iter.len()); - debug_assert_eq!(def_iter.len(), 0); - - Ok(()) - }, - Some(Filter::Range(range)) => { - let start = range.start; - let end = range.end; - - if start > 0 { - let start_cell = idx_to_limit(&rep_iter, start)?; - - let num_skipped_values = limit_to_num_values(&def_iter, def_levels, start_cell)?; - batched_collector.skip_in_place(num_skipped_values)?; +fn decode_nested( + mut current_def_levels: &[u16], + mut current_rep_levels: &[u16], - rep_iter.skip_in_place(start_cell)?; - def_iter.skip_in_place(start_cell)?; - } - - if end - start > 0 { - let limit = idx_to_limit(&rep_iter, end - start)?; - - extend_offsets_limited( - &mut def_iter, - &mut rep_iter, - batched_collector, - nested, - limit, - def_levels, - rep_levels, - )?; - } - - // @NOTE: This is kind of unused - let last_skip = def_iter.len(); - let num_skipped_values = limit_to_num_values(&def_iter, def_levels, last_skip)?; - batched_collector.skip_in_place(num_skipped_values)?; - rep_iter.skip_in_place(last_skip)?; - def_iter.skip_in_place(last_skip)?; - - Ok(()) - }, - Some(Filter::Mask(bitmap)) => { - let mut iter = bitmap.iter(); - while iter.num_remaining() > 0 { - let num_zeros = iter.take_leading_zeros(); - if num_zeros > 0 { - let offset = idx_to_limit(&rep_iter, num_zeros)?; - let num_skipped_values = limit_to_num_values(&def_iter, def_levels, offset)?; - batched_collector.skip_in_place(num_skipped_values)?; - rep_iter.skip_in_place(offset)?; - def_iter.skip_in_place(offset)?; - } - - let num_ones = iter.take_leading_ones(); - if num_ones > 0 { - let limit = idx_to_limit(&rep_iter, num_ones)?; - extend_offsets_limited( - &mut def_iter, - &mut rep_iter, - batched_collector, - nested, - limit, - def_levels, - rep_levels, - )?; - } - } + batched_collector: &mut BatchedNestedDecoder<'_>, + nested: &mut [Nested], - Ok(()) - }, - } -} + state: &mut DecodingState, + top_level_filter: &mut BitmapIter<'_>, -fn extend_offsets_limited<'a, D: utils::NestedDecoder>( - def_iter: &mut HybridRleDecoder<'a>, - rep_iter: &mut HybridRleDecoder<'a>, - batched_collector: &mut BatchedCollector< - '_, - (), - D::DecodedState, - BatchedNestedDecoder<'a, '_, '_, D>, - >, - nested: &mut [Nested], - mut limit: usize, // Amortized allocations def_levels: &[u16], rep_levels: &[u16], -) -> PolarsResult<()> { - #[derive(Default)] - struct LevelGatherer<'a>(std::marker::PhantomData<&'a ()>); - struct LevelGathererState<'a> { - offset: usize, - slice: &'a mut [u16], - } - - impl<'a> HybridRleGatherer for LevelGatherer<'a> { - type Target = LevelGathererState<'a>; - - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.offset - } - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - debug_assert!(value <= u16::MAX as u32); - Ok(value as u16) - } - - fn gather_one(&self, target: &mut Self::Target, value: u16) -> ParquetResult<()> { - debug_assert!(target.offset < target.slice.len()); - - target.slice[target.offset] = value; - target.offset += 1; - - Ok(()) - } +) -> ParquetResult<()> { + let max_depth = nested.len(); + let leaf_def_level = *def_levels.last().unwrap(); + + while !current_def_levels.is_empty() { + debug_assert_eq!(current_def_levels.len(), current_rep_levels.len()); + + // Handle skips + if let Some(ref mut num_skips) = state.num_skips { + let mut i = 0; + let mut num_skipped_values = 0; + while i < current_def_levels.len() && (*num_skips > 0 || current_rep_levels[i] != 0) { + let def = current_def_levels[i]; + let rep = current_rep_levels[i]; + + *num_skips -= usize::from(rep == 0); + i += 1; + + // @NOTE: + // We don't need to account for higher def-levels that imply extra values, since we + // don't have those higher levels either. + num_skipped_values += usize::from(def == leaf_def_level); + } + batched_collector.skip_in_place(num_skipped_values)?; - fn gather_repeated( - &self, - target: &mut Self::Target, - value: u16, - n: usize, - ) -> ParquetResult<()> { - debug_assert!(target.offset + n <= target.slice.len()); + current_def_levels = ¤t_def_levels[i..]; + current_rep_levels = ¤t_rep_levels[i..]; - for i in 0..n { - target.slice[target.offset + i] = value; + if current_def_levels.is_empty() { + break; + } else { + state.num_skips = None; } - target.offset += n; - - Ok(()) } - // @TODO: Add specialization for other methods - } - - let mut def_values = [0u16; DECODE_BATCH_SIZE]; - let mut rep_values = [0u16; DECODE_BATCH_SIZE]; - - let max_depth = nested.len(); - - const DECODE_BATCH_SIZE: usize = 1024; - while def_iter.len() > 0 && limit > 0 { - let additional = usize::min(limit, DECODE_BATCH_SIZE); + // Handle collects + if let Some(ref mut num_collects) = state.num_collects { + let mut i = 0; + while i < current_def_levels.len() && (*num_collects > 0 || current_rep_levels[i] != 0) + { + let def = current_def_levels[i]; + let rep = current_rep_levels[i]; - let mut def_state = LevelGathererState { - offset: 0, - slice: &mut def_values, - }; - let mut rep_state = LevelGathererState { - offset: 0, - slice: &mut rep_values, - }; + *num_collects -= usize::from(rep == 0); + i += 1; - def_iter.gather_n_into(&mut def_state, additional, &LevelGatherer::default())?; - rep_iter.gather_n_into(&mut rep_state, additional, &LevelGatherer::default())?; + let mut is_required = false; - debug_assert_eq!(def_state.offset, rep_state.offset); - debug_assert_eq!(def_state.offset, additional); - - for i in 0..additional { - let def = def_values[i]; - let rep = rep_values[i]; + for depth in 0..max_depth { + // Defines whether this element is defined at `depth` + // + // e.g. [ [ [ 1 ] ] ] is defined at [ ... ], [ [ ... ] ], [ [ [ ... ] ] ] and + // [ [ [ 1 ] ] ]. + let is_defined_at_this_depth = + rep <= rep_levels[depth] && def >= def_levels[depth]; - let mut is_required = false; + let length = nested + .get(depth + 1) + .map(|x| x.len() as i64) + // the last depth is the leaf, which is always increased by 1 + .unwrap_or(1); - for depth in 0..max_depth { - // Defines whether this element is defined at `depth` - // - // e.g. [ [ [ 1 ] ] ] is defined at [ ... ], [ [ ... ] ], [ [ [ ... ] ] ] and - // [ [ [ 1 ] ] ]. - let is_defined_at_this_depth = rep <= rep_levels[depth] && def >= def_levels[depth]; + let nest = &mut nested[depth]; - let length = nested - .get(depth + 1) - .map(|x| x.len() as i64) - // the last depth is the leaf, which is always increased by 1 - .unwrap_or(1); + let is_valid = !nest.is_nullable() || def > def_levels[depth]; - let nest = &mut nested[depth]; + if is_defined_at_this_depth && !is_valid { + let mut num_elements = 1; - let is_valid = !nest.is_nullable() || def > def_levels[depth]; + nest.push(length, is_valid); - if is_defined_at_this_depth && !is_valid { - let mut num_elements = 1; + for embed_depth in depth..max_depth { + let embed_length = nested + .get(embed_depth + 1) + .map(|x| x.len() as i64) + // the last depth is the leaf, which is always increased by 1 + .unwrap_or(1); - nest.push(length, is_valid); + let embed_nest = &mut nested[embed_depth]; - for embed_depth in depth..max_depth { - let embed_length = nested - .get(embed_depth + 1) - .map(|x| x.len() as i64) - // the last depth is the leaf, which is always increased by 1 - .unwrap_or(1); + if embed_depth > depth { + for _ in 0..num_elements { + embed_nest.push_default(embed_length); + } + } - let embed_nest = &mut nested[embed_depth]; + let embed_num_values = embed_nest.invalid_num_values(); + num_elements *= embed_num_values; - if embed_depth > depth { - for _ in 0..num_elements { - embed_nest.push_default(embed_length); + if embed_num_values == 0 { + break; } } - let embed_num_values = embed_nest.invalid_num_values(); - num_elements *= embed_num_values; + batched_collector.push_n_invalids(num_elements)?; - if embed_num_values == 0 { - break; - } + break; } - batched_collector.push_n_invalids(num_elements); + if is_required || is_defined_at_this_depth { + nest.push(length, is_valid); - break; - } - - if is_required || is_defined_at_this_depth { - nest.push(length, is_valid); + if depth == max_depth - 1 { + // the leaf / primitive + let is_valid = (def != def_levels[depth]) || !nest.is_nullable(); - if depth == max_depth - 1 { - // the leaf / primitive - let is_valid = (def != def_levels[depth]) || !nest.is_nullable(); - - if is_valid { - batched_collector.push_valid()?; - } else { - batched_collector.push_invalid(); + if is_valid { + batched_collector.push_valid()?; + } else { + batched_collector.push_invalid()?; + } } } + + is_required = (is_required || is_defined_at_this_depth) + && nest.is_required() + && !is_valid; } + } + + current_def_levels = ¤t_def_levels[i..]; + current_rep_levels = ¤t_rep_levels[i..]; - is_required = - (is_required || is_defined_at_this_depth) && nest.is_required() && !is_valid; + if current_def_levels.is_empty() { + break; + } else { + state.num_collects = None; } } - limit -= additional; + if top_level_filter.num_remaining() == 0 { + break; + } + + state.num_skips = Some(top_level_filter.take_leading_zeros()).filter(|v| *v != 0); + state.num_collects = Some(top_level_filter.take_leading_ones()).filter(|v| *v != 0); } Ok(()) } -pub struct PageNestedDecoder { +pub struct PageNestedDecoder { pub iter: BasicDecompressor, pub dtype: ArrowDataType, pub dict: Option, @@ -763,11 +581,11 @@ fn level_iters(page: &DataPage) -> ParquetResult<(HybridRleDecoder, HybridRleDec Ok((def_iter, rep_iter)) } -impl PageNestedDecoder { +impl PageNestedDecoder { pub fn new( mut iter: BasicDecompressor, dtype: ArrowDataType, - decoder: D, + mut decoder: D, init: Vec, ) -> ParquetResult { let dict_page = iter.read_dict_page()?; @@ -795,201 +613,92 @@ impl PageNestedDecoder { // Amortize the allocations. let (def_levels, rep_levels) = nested_state.levels(); - match filter { - None => { - loop { - let Some(page) = self.iter.next() else { - break; - }; - let page = page?; - let page = page.decompress(&mut self.iter)?; - - let mut state = - utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?; - let (def_iter, rep_iter) = level_iters(&page)?; - - // @TODO: move this to outside the loop. - let mut batched_collector = BatchedCollector::new( - BatchedNestedDecoder { - state: &mut state, - decoder: &mut self.decoder, - }, - &mut target, - ); - - extend_offsets2( - def_iter, - rep_iter, - &mut batched_collector, - &mut nested_state.nested, - None, - &def_levels, - &rep_levels, - )?; - - batched_collector.finalize()?; - - drop(state); - self.iter.reuse_page_buffer(page); - } - }, - Some(mut filter) => { - enum PageStartAction { - Skip, - Collect, - } + let mut current_def_levels = Vec::::new(); + let mut current_rep_levels = Vec::::new(); + + let (mut decode_state, top_level_filter) = match filter { + None => ( + DecodingState { + num_skips: None, + num_collects: Some(usize::MAX), + }, + Bitmap::new(), + ), + Some(Filter::Range(range)) => ( + DecodingState { + num_skips: Some(range.start), + num_collects: Some(range.len()), + }, + Bitmap::new(), + ), + Some(Filter::Mask(mask)) => ( + DecodingState { + num_skips: None, + num_collects: None, + }, + mask, + ), + }; - // We may have an action (skip / collect) for one row value left over from the - // previous page. Every page may state what the next page needs to do until the - // first of its own row values (rep_lvl = 0). - let mut last_row_value_action = PageStartAction::Skip; - let mut num_rows_remaining = filter.num_rows(); + let mut top_level_filter = top_level_filter.iter(); - while num_rows_remaining > 0 - || matches!(last_row_value_action, PageStartAction::Collect) - { - let Some(page) = self.iter.next() else { - break; - }; - let page = page?; - // We cannot lazily decompress because we don't have the number of row values - // at this point. We need repetition levels for that. *sign*. In general, lazy - // decompression is quite difficult with nested values. - // - // @TODO - // Lazy decompression is quite doable in the V2 specification since that does - // not compress the repetition and definition levels. However, not a lot of - // people use the V2 specification. So let us ignore that for now. - let page = page.decompress(&mut self.iter)?; + loop { + let Some(page) = self.iter.next() else { + break; + }; + let page = page?; + let page = page.decompress(&mut self.iter)?; - let (mut def_iter, mut rep_iter) = level_iters(&page)?; + let (mut def_iter, mut rep_iter) = level_iters(&page)?; - let mut state; - let mut batched_collector; + let num_levels = def_iter.len().min(rep_iter.len()); + def_iter.limit_to(num_levels); + rep_iter.limit_to(num_levels); - let start_length = nested_state.len(); + current_def_levels.clear(); + current_rep_levels.clear(); - // rep lvl == 0 ==> row value - let num_row_values = hybrid_rle_count_zeros(&rep_iter)?; + collect_level_values(&mut current_def_levels, def_iter)?; + collect_level_values(&mut current_rep_levels, rep_iter)?; - let state_filter; - (state_filter, filter) = Filter::split_at(&filter, num_row_values); + let mut leaf_filter = MutableBitmap::new(); + let mut leaf_validity = MutableBitmap::new(); - match last_row_value_action { - PageStartAction::Skip => { - // Fast path: skip the whole page. - // No new row values or we don't care about any of the row values. - if num_row_values == 0 && state_filter.num_rows() == 0 { - self.iter.reuse_page_buffer(page); - continue; - } + // @TODO: move this to outside the loop. + let mut batched_collector = BatchedNestedDecoder { + num_waiting_valids: 0, + num_waiting_invalids: 0, - let limit = idx_to_limit(&rep_iter, 0)?; - - // We just saw that we had at least one row value. - debug_assert!(limit < rep_iter.len()); - - state = - utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?; - batched_collector = BatchedCollector::new( - BatchedNestedDecoder { - state: &mut state, - decoder: &mut self.decoder, - }, - &mut target, - ); - - let num_leaf_values = - limit_to_num_values(&def_iter, &def_levels, limit)?; - batched_collector.skip_in_place(num_leaf_values)?; - rep_iter.skip_in_place(limit)?; - def_iter.skip_in_place(limit)?; - }, - PageStartAction::Collect => { - let limit = if num_row_values == 0 { - rep_iter.len() - } else { - idx_to_limit(&rep_iter, 0)? - }; - - // Fast path: we are not interested in any of the row values in this - // page. - if limit == 0 && state_filter.num_rows() == 0 { - self.iter.reuse_page_buffer(page); - last_row_value_action = PageStartAction::Skip; - continue; - } + filter: &mut leaf_filter, + validity: &mut leaf_validity, + }; - state = - utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?; - batched_collector = BatchedCollector::new( - BatchedNestedDecoder { - state: &mut state, - decoder: &mut self.decoder, - }, - &mut target, - ); - - extend_offsets_limited( - &mut def_iter, - &mut rep_iter, - &mut batched_collector, - &mut nested_state.nested, - limit, - &def_levels, - &rep_levels, - )?; - - // No new row values. Keep collecting. - if rep_iter.len() == 0 { - batched_collector.finalize()?; - - let num_done = nested_state.len() - start_length; - debug_assert!(num_done <= num_rows_remaining); - debug_assert!(num_done <= num_row_values); - num_rows_remaining -= num_done; - - drop(state); - self.iter.reuse_page_buffer(page); - - continue; - } - }, - } + decode_nested( + ¤t_def_levels, + ¤t_rep_levels, + &mut batched_collector, + &mut nested_state.nested, + &mut decode_state, + &mut top_level_filter, + &def_levels, + &rep_levels, + )?; - // Two cases: - // 1. First page: Must always start with a row value. - // 2. Other pages: If they did not have a row value, they would have been - // handled by the last_row_value_action. - debug_assert!(num_row_values > 0); - - last_row_value_action = if state_filter.do_include_at(num_row_values - 1) { - PageStartAction::Collect - } else { - PageStartAction::Skip - }; - - extend_offsets2( - def_iter, - rep_iter, - &mut batched_collector, - &mut nested_state.nested, - Some(state_filter), - &def_levels, - &rep_levels, - )?; - - batched_collector.finalize()?; - - let num_done = nested_state.len() - start_length; - debug_assert!(num_done <= num_rows_remaining); - debug_assert!(num_done <= num_row_values); - num_rows_remaining -= num_done; - - drop(state); - self.iter.reuse_page_buffer(page); - } - }, + batched_collector.finalize()?; + + let state = utils::State::new_nested( + &self.decoder, + &page, + self.dict.as_ref(), + Some(leaf_validity.freeze()), + )?; + state.decode( + &mut self.decoder, + &mut target, + Some(Filter::Mask(leaf_filter.freeze())), + )?; + + self.iter.reuse_page_buffer(page); } // we pop the primitive off here. diff --git a/crates/polars-parquet/src/arrow/read/deserialize/null.rs b/crates/polars-parquet/src/arrow/read/deserialize/null.rs index e12757fe2e20..7ec884715318 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/null.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/null.rs @@ -3,11 +3,11 @@ //! `DecodedState`. use arrow::array::{Array, NullArray}; +use arrow::bitmap::Bitmap; use arrow::datatypes::ArrowDataType; use super::utils; use super::utils::filter::Filter; -use crate::parquet::encoding::hybrid_rle; use crate::parquet::error::ParquetResult; use crate::parquet::page::{DataPage, DictPage}; @@ -30,31 +30,10 @@ impl<'a> utils::StateTranslation<'a, NullDecoder> for () { _decoder: &NullDecoder, _page: &'a DataPage, _dict: Option<&'a ::Dict>, - _page_validity: Option<&utils::PageValidity<'a>>, + _page_validity: Option<&Bitmap>, ) -> ParquetResult { Ok(()) } - - fn len_when_not_nullable(&self) -> usize { - usize::MAX - } - - fn skip_in_place(&mut self, _: usize) -> ParquetResult<()> { - Ok(()) - } - - fn extend_from_state( - &mut self, - _decoder: &mut NullDecoder, - decoded: &mut ::DecodedState, - _is_optional: bool, - _page_validity: &mut Option>, - _: Option<&'a ::Dict>, - additional: usize, - ) -> ParquetResult<()> { - decoded.length += additional; - Ok(()) - } } impl utils::Decoder for NullDecoder { @@ -68,33 +47,10 @@ impl utils::Decoder for NullDecoder { NullArrayLength { length: 0 } } - fn deserialize_dict(&self, _: DictPage) -> ParquetResult { + fn deserialize_dict(&mut self, _: DictPage) -> ParquetResult { Ok(()) } - fn decode_plain_encoded<'a>( - &mut self, - _decoded: &mut Self::DecodedState, - _page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - _is_optional: bool, - _page_validity: Option<&mut utils::PageValidity<'a>>, - _limit: usize, - ) -> ParquetResult<()> { - unimplemented!() - } - - fn decode_dictionary_encoded<'a>( - &mut self, - _decoded: &mut Self::DecodedState, - _page_values: &mut hybrid_rle::HybridRleDecoder<'a>, - _is_optional: bool, - _page_validity: Option<&mut utils::PageValidity<'a>>, - _dict: &Self::Dict, - _limit: usize, - ) -> ParquetResult<()> { - unimplemented!() - } - fn finalize( &self, dtype: ArrowDataType, @@ -103,23 +59,17 @@ impl utils::Decoder for NullDecoder { ) -> ParquetResult { Ok(NullArray::new(dtype, decoded.length)) } -} - -impl utils::NestedDecoder for NullDecoder { - fn validity_extend( - _: &mut utils::State<'_, Self>, - _: &mut Self::DecodedState, - _value: bool, - _n: usize, - ) { - } - fn values_extend_nulls( - _state: &mut utils::State<'_, Self>, + fn extend_filtered_with_state( + &mut self, + _state: utils::State<'_, Self>, decoded: &mut Self::DecodedState, - n: usize, - ) { - decoded.length += n; + filter: Option, + ) -> ParquetResult<()> { + // @NOTE: This is only used by nested decoders. Those will always supply a mask. + let filter = filter.unwrap(); + decoded.length += filter.num_rows(); + Ok(()) } } @@ -151,12 +101,12 @@ pub fn iter_to_arrays( continue; } - let num_rows = match state_filter { + let num_page_rows = match state_filter { None => page.num_values(), Some(filter) => filter.num_rows(), }; - len = (len + num_rows).min(num_rows); + len = (len + num_page_rows).min(num_rows); } Ok(Box::new(NullArray::new(dtype, len))) diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs index eb4815b6fbfe..225738b0c1fd 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs @@ -1,55 +1,53 @@ use arrow::array::{DictionaryArray, DictionaryKey, PrimitiveArray}; -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; use arrow::types::NativeType; use super::super::utils; use super::{ - deserialize_plain, AsDecoderFunction, ClosureDecoderFunction, DecoderFunction, - PlainDecoderFnCollector, PrimitiveDecoder, UnitDecoderFunction, + AsDecoderFunction, ClosureDecoderFunction, DecoderFunction, PrimitiveDecoder, + UnitDecoderFunction, }; -use crate::parquet::encoding::hybrid_rle::DictionaryTranslator; use crate::parquet::encoding::{byte_stream_split, hybrid_rle, Encoding}; use crate::parquet::error::ParquetResult; use crate::parquet::page::{split_buffer, DataPage, DictPage}; use crate::parquet::types::{decode, NativeType as ParquetNativeType}; -use crate::read::deserialize::utils::array_chunks::ArrayChunks; use crate::read::deserialize::utils::{ - dict_indices_decoder, freeze_validity, BatchableCollector, Decoder, PageValidity, - TranslatedHybridRle, + dict_indices_decoder, freeze_validity, unspecialized_decode, }; +use crate::read::Filter; #[allow(clippy::large_enum_variant)] #[derive(Debug)] -pub(crate) enum StateTranslation<'a, P: ParquetNativeType> { - Plain(ArrayChunks<'a, P>), +pub(crate) enum StateTranslation<'a> { + Plain(&'a [u8]), Dictionary(hybrid_rle::HybridRleDecoder<'a>), ByteStreamSplit(byte_stream_split::Decoder<'a>), } -impl<'a, P, T, D> utils::StateTranslation<'a, FloatDecoder> for StateTranslation<'a, P> +impl<'a, P, T, D> utils::StateTranslation<'a, FloatDecoder> for StateTranslation<'a> where T: NativeType, P: ParquetNativeType, D: DecoderFunction, { - type PlainDecoder = ArrayChunks<'a, P>; + type PlainDecoder = &'a [u8]; fn new( _decoder: &FloatDecoder, page: &'a DataPage, dict: Option<&'a as utils::Decoder>::Dict>, - _page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { match (page.encoding(), dict) { (Encoding::PlainDictionary | Encoding::RleDictionary, Some(_)) => { - let values = dict_indices_decoder(page)?; + let values = + dict_indices_decoder(page, page_validity.map_or(0, |bm| bm.unset_bits()))?; Ok(Self::Dictionary(values)) }, (Encoding::Plain, _) => { let values = split_buffer(page)?.values; - let chunks = ArrayChunks::new(values).unwrap(); - Ok(Self::Plain(chunks)) + Ok(Self::Plain(values)) }, (Encoding::ByteStreamSplit, _) => { let values = split_buffer(page)?.values; @@ -61,82 +59,6 @@ where _ => Err(utils::not_implemented(page)), } } - - fn len_when_not_nullable(&self) -> usize { - match self { - Self::Plain(n) => n.len(), - Self::Dictionary(n) => n.len(), - Self::ByteStreamSplit(n) => n.len(), - } - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } - - match self { - Self::Plain(t) => t.skip_in_place(n), - Self::Dictionary(t) => t.skip_in_place(n)?, - Self::ByteStreamSplit(t) => _ = t.iter_converted(|_| ()).nth(n - 1), - } - - Ok(()) - } - - fn extend_from_state( - &mut self, - decoder: &mut FloatDecoder, - decoded: &mut as utils::Decoder>::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - dict: Option<&'a as utils::Decoder>::Dict>, - additional: usize, - ) -> ParquetResult<()> { - match self { - Self::Plain(page_values) => decoder.decode_plain_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - additional, - )?, - Self::Dictionary(ref mut page) => decoder.decode_dictionary_encoded( - decoded, - page, - is_optional, - page_validity.as_mut(), - dict.unwrap(), - additional, - )?, - Self::ByteStreamSplit(page_values) => { - let (values, validity) = decoded; - - match page_validity { - None => { - values.extend( - page_values - .iter_converted(|v| decoder.0.decoder.decode(decode(v))) - .take(additional), - ); - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => utils::extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - &mut page_values.iter_converted(|v| decoder.0.decoder.decode(decode(v))), - )?, - } - }, - } - - Ok(()) - } } #[derive(Debug)] @@ -202,7 +124,7 @@ where P: ParquetNativeType, D: DecoderFunction, { - type Translation<'a> = StateTranslation<'a, P>; + type Translation<'a> = StateTranslation<'a>; type Dict = Vec; type DecodedState = (Vec, MutableBitmap); type Output = PrimitiveArray; @@ -214,84 +136,64 @@ where ) } - fn deserialize_dict(&self, page: DictPage) -> ParquetResult { - Ok(deserialize_plain::(&page.buffer, self.0.decoder)) - } - - fn decode_plain_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()> { - match page_validity { - None => { - PlainDecoderFnCollector { - chunks: page_values, - decoder: self.0.decoder, - _pd: std::marker::PhantomData, - } - .push_n(values, limit)?; + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult { + let values = page.buffer.as_ref(); - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - let collector = PlainDecoderFnCollector { - chunks: page_values, - decoder: self.0.decoder, - _pd: std::marker::PhantomData, - }; - - utils::extend_from_decoder( - validity, - page_validity, - Some(limit), - values, - collector, - )?; - }, - } - - Ok(()) + let mut target = Vec::with_capacity(page.num_values); + super::plain::decode( + values, + false, + None, + None, + &mut MutableBitmap::new(), + &mut self.0.intermediate, + &mut target, + self.0.decoder, + )?; + Ok(target) } - fn decode_dictionary_encoded<'a>( + fn extend_filtered_with_state( &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut hybrid_rle::HybridRleDecoder<'a>, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - dict: &Self::Dict, - limit: usize, + mut state: utils::State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, ) -> ParquetResult<()> { - let translator = DictionaryTranslator(dict); - - match page_validity { - None => { - page_values.translate_and_collect_n_into(values, limit, &translator)?; - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - let translated_hybridrle = TranslatedHybridRle::new(page_values, &translator); - - utils::extend_from_decoder( - validity, - page_validity, - Some(limit), - values, - translated_hybridrle, - )?; + match state.translation { + StateTranslation::Plain(ref mut values) => super::plain::decode( + values, + state.is_optional, + state.page_validity.as_ref(), + filter, + &mut decoded.1, + &mut self.0.intermediate, + &mut decoded.0, + self.0.decoder, + ), + StateTranslation::Dictionary(ref mut indexes) => utils::dict_encoded::decode_dict( + indexes.clone(), + state.dict.unwrap(), + state.is_optional, + state.page_validity.as_ref(), + filter, + &mut decoded.1, + &mut decoded.0, + ), + StateTranslation::ByteStreamSplit(mut decoder) => { + let num_rows = decoder.len(); + let mut iter = decoder.iter_converted(|v| self.0.decoder.decode(decode(v))); + + unspecialized_decode( + num_rows, + || Ok(iter.next().unwrap()), + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) }, } - - Ok(()) } fn finalize( @@ -327,27 +229,3 @@ where Ok(DictionaryArray::try_new(dtype, keys, dict).unwrap()) } } - -impl utils::NestedDecoder for FloatDecoder -where - T: NativeType, - P: ParquetNativeType, - D: DecoderFunction, -{ - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } - - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values.resize(values.len() + n, T::default()); - } -} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs index ff9c0b014b08..087fc1c447d5 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs @@ -1,58 +1,55 @@ use arrow::array::{DictionaryArray, DictionaryKey, PrimitiveArray}; -use arrow::bitmap::MutableBitmap; +use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; use arrow::types::NativeType; use super::super::utils; use super::{ - deserialize_plain, AsDecoderFunction, ClosureDecoderFunction, DecoderFunction, DeltaCollector, - DeltaTranslator, IntoDecoderFunction, PlainDecoderFnCollector, PrimitiveDecoder, - UnitDecoderFunction, + AsDecoderFunction, ClosureDecoderFunction, DecoderFunction, IntoDecoderFunction, + PrimitiveDecoder, UnitDecoderFunction, }; -use crate::parquet::encoding::hybrid_rle::{self, DictionaryTranslator}; -use crate::parquet::encoding::{byte_stream_split, delta_bitpacked, Encoding}; +use crate::parquet::encoding::{byte_stream_split, delta_bitpacked, hybrid_rle, Encoding}; use crate::parquet::error::ParquetResult; use crate::parquet::page::{split_buffer, DataPage, DictPage}; use crate::parquet::types::{decode, NativeType as ParquetNativeType}; -use crate::read::deserialize::utils::array_chunks::ArrayChunks; use crate::read::deserialize::utils::{ - dict_indices_decoder, freeze_validity, BatchableCollector, Decoder, PageValidity, - TranslatedHybridRle, + dict_indices_decoder, freeze_validity, unspecialized_decode, }; +use crate::read::Filter; #[allow(clippy::large_enum_variant)] #[derive(Debug)] -pub(crate) enum StateTranslation<'a, P: ParquetNativeType> { - Plain(ArrayChunks<'a, P>), +pub(crate) enum StateTranslation<'a> { + Plain(&'a [u8]), Dictionary(hybrid_rle::HybridRleDecoder<'a>), ByteStreamSplit(byte_stream_split::Decoder<'a>), DeltaBinaryPacked(delta_bitpacked::Decoder<'a>), } -impl<'a, P, T, D> utils::StateTranslation<'a, IntDecoder> for StateTranslation<'a, P> +impl<'a, P, T, D> utils::StateTranslation<'a, IntDecoder> for StateTranslation<'a> where T: NativeType, P: ParquetNativeType, i64: num_traits::AsPrimitive

, D: DecoderFunction, { - type PlainDecoder = ArrayChunks<'a, P>; + type PlainDecoder = &'a [u8]; fn new( _decoder: &IntDecoder, page: &'a DataPage, dict: Option<&'a as utils::Decoder>::Dict>, - _page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult { match (page.encoding(), dict) { (Encoding::PlainDictionary | Encoding::RleDictionary, Some(_)) => { - let values = dict_indices_decoder(page)?; + let values = + dict_indices_decoder(page, page_validity.map_or(0, |bm| bm.unset_bits()))?; Ok(Self::Dictionary(values)) }, (Encoding::Plain, _) => { let values = split_buffer(page)?.values; - let chunks = ArrayChunks::new(values).unwrap(); - Ok(Self::Plain(chunks)) + Ok(Self::Plain(values)) }, (Encoding::ByteStreamSplit, _) => { let values = split_buffer(page)?.values; @@ -70,115 +67,6 @@ where _ => Err(utils::not_implemented(page)), } } - - fn len_when_not_nullable(&self) -> usize { - match self { - Self::Plain(v) => v.len(), - Self::Dictionary(v) => v.len(), - Self::ByteStreamSplit(v) => v.len(), - Self::DeltaBinaryPacked(v) => v.len(), - } - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } - - match self { - Self::Plain(v) => v.skip_in_place(n), - Self::Dictionary(v) => v.skip_in_place(n)?, - Self::ByteStreamSplit(v) => _ = v.iter_converted(|_| ()).nth(n - 1), - Self::DeltaBinaryPacked(v) => v.skip_in_place(n)?, - } - - Ok(()) - } - - fn extend_from_state( - &mut self, - decoder: &mut IntDecoder, - decoded: &mut as utils::Decoder>::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - dict: Option<&'a as utils::Decoder>::Dict>, - additional: usize, - ) -> ParquetResult<()> { - match self { - Self::Plain(page_values) => decoder.decode_plain_encoded( - decoded, - page_values, - is_optional, - page_validity.as_mut(), - additional, - )?, - Self::Dictionary(ref mut page) => decoder.decode_dictionary_encoded( - decoded, - page, - is_optional, - page_validity.as_mut(), - dict.unwrap(), - additional, - )?, - Self::ByteStreamSplit(page_values) => { - let (values, validity) = decoded; - - match page_validity { - None => { - values.extend( - page_values - .iter_converted(|v| decoder.0.decoder.decode(decode(v))) - .take(additional), - ); - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => { - utils::extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - &mut page_values - .iter_converted(|v| decoder.0.decoder.decode(decode(v))), - )?; - }, - } - }, - Self::DeltaBinaryPacked(page_values) => { - let (values, validity) = decoded; - - let mut gatherer = DeltaTranslator { - dfn: decoder.0.decoder, - _pd: std::marker::PhantomData, - }; - - match page_validity { - None => { - page_values.gather_n_into(values, additional, &mut gatherer)?; - - if is_optional { - validity.extend_constant(additional, true); - } - }, - Some(page_validity) => utils::extend_from_decoder( - validity, - page_validity, - Some(additional), - values, - DeltaCollector { - decoder: page_values, - gatherer, - }, - )?, - } - }, - } - - Ok(()) - } } /// Decoder of integer parquet type @@ -257,7 +145,7 @@ where i64: num_traits::AsPrimitive

, D: DecoderFunction, { - type Translation<'a> = StateTranslation<'a, P>; + type Translation<'a> = StateTranslation<'a>; type Dict = Vec; type DecodedState = (Vec, MutableBitmap); type Output = PrimitiveArray; @@ -269,84 +157,21 @@ where ) } - fn deserialize_dict(&self, page: DictPage) -> ParquetResult { - Ok(deserialize_plain::(&page.buffer, self.0.decoder)) - } - - fn decode_plain_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut as utils::StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()> { - match page_validity { - None => { - PlainDecoderFnCollector { - chunks: page_values, - decoder: self.0.decoder, - _pd: Default::default(), - } - .push_n(values, limit)?; - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - let collector = PlainDecoderFnCollector { - chunks: page_values, - decoder: self.0.decoder, - _pd: Default::default(), - }; - - utils::extend_from_decoder( - validity, - page_validity, - Some(limit), - values, - collector, - )?; - }, - } - - Ok(()) - } - - fn decode_dictionary_encoded<'a>( - &mut self, - (values, validity): &mut Self::DecodedState, - page_values: &mut hybrid_rle::HybridRleDecoder<'a>, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - dict: &Self::Dict, - limit: usize, - ) -> ParquetResult<()> { - match page_validity { - None => { - let translator = DictionaryTranslator(dict); - page_values.translate_and_collect_n_into(values, limit, &translator)?; - - if is_optional { - validity.extend_constant(limit, true); - } - }, - Some(page_validity) => { - let translator = DictionaryTranslator(dict); - let translated_hybridrle = TranslatedHybridRle::new(page_values, &translator); - - utils::extend_from_decoder( - validity, - page_validity, - Some(limit), - values, - translated_hybridrle, - )?; - }, - } - - Ok(()) + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult { + let values = page.buffer.as_ref(); + + let mut target = Vec::with_capacity(page.num_values); + super::plain::decode( + values, + false, + None, + None, + &mut MutableBitmap::new(), + &mut self.0.intermediate, + &mut target, + self.0.decoder, + )?; + Ok(target) } fn finalize( @@ -358,6 +183,69 @@ where let validity = freeze_validity(validity); Ok(PrimitiveArray::try_new(dtype, values.into(), validity).unwrap()) } + + fn extend_filtered_with_state( + &mut self, + mut state: utils::State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, + ) -> ParquetResult<()> { + match state.translation { + StateTranslation::Plain(ref mut values) => super::plain::decode( + values, + state.is_optional, + state.page_validity.as_ref(), + filter, + &mut decoded.1, + &mut self.0.intermediate, + &mut decoded.0, + self.0.decoder, + ), + StateTranslation::Dictionary(ref mut indexes) => utils::dict_encoded::decode_dict( + indexes.clone(), + state.dict.unwrap(), + state.is_optional, + state.page_validity.as_ref(), + filter, + &mut decoded.1, + &mut decoded.0, + ), + StateTranslation::ByteStreamSplit(mut decoder) => { + let num_rows = decoder.len(); + let mut iter = decoder.iter_converted(|v| self.0.decoder.decode(decode(v))); + + unspecialized_decode( + num_rows, + || Ok(iter.next().unwrap()), + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) + }, + StateTranslation::DeltaBinaryPacked(decoder) => { + let num_rows = decoder.len(); + let values = decoder.collect::>()?; + + let mut i = 0; + unspecialized_decode( + num_rows, + || { + use num_traits::AsPrimitive; + let value = values[i]; + i += 1; + Ok(self.0.decoder.decode(value.as_())) + }, + filter, + state.page_validity, + state.is_optional, + &mut decoded.1, + &mut decoded.0, + ) + }, + } + } } impl utils::DictDecodable for IntDecoder @@ -383,28 +271,3 @@ where Ok(DictionaryArray::try_new(dtype, keys, dict).unwrap()) } } - -impl utils::NestedDecoder for IntDecoder -where - T: NativeType, - P: ParquetNativeType, - i64: num_traits::AsPrimitive

, - D: DecoderFunction, -{ - fn validity_extend( - _: &mut utils::State<'_, Self>, - (_, validity): &mut Self::DecodedState, - value: bool, - n: usize, - ) { - validity.extend_constant(n, value); - } - - fn values_extend_nulls( - _: &mut utils::State<'_, Self>, - (values, _): &mut Self::DecodedState, - n: usize, - ) { - values.resize(values.len() + n, T::default()); - } -} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs index 5539595fda48..88b8a55932a7 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs @@ -1,19 +1,14 @@ use arrow::types::NativeType; -use num_traits::AsPrimitive; -use crate::parquet::types::{decode, NativeType as ParquetNativeType}; +use crate::parquet::types::NativeType as ParquetNativeType; mod float; mod integer; +pub(crate) mod plain; pub(crate) use float::FloatDecoder; pub(crate) use integer::IntDecoder; -use super::utils::array_chunks::ArrayChunks; -use super::utils::BatchableCollector; -use super::ParquetResult; -use crate::parquet::encoding::delta_bitpacked::{self, DeltaGatherer}; - #[derive(Debug)] pub(crate) struct PrimitiveDecoder where @@ -22,6 +17,7 @@ where D: DecoderFunction, { pub(crate) decoder: D, + pub(crate) intermediate: Vec

, _pd: std::marker::PhantomData<(P, T)>, } @@ -35,6 +31,7 @@ where pub(crate) fn new(decoder: D) -> Self { Self { decoder, + intermediate: Vec::new(), _pd: std::marker::PhantomData, } } @@ -49,12 +46,22 @@ where T: NativeType, P: ParquetNativeType, { + const NEED_TO_DECODE: bool; + const CAN_TRANSMUTE: bool = { + let has_same_size = size_of::

() == size_of::(); + let has_same_alignment = align_of::

() == align_of::(); + + has_same_size && has_same_alignment + }; + fn decode(self, x: P) -> T; } #[derive(Default, Clone, Copy)] pub(crate) struct UnitDecoderFunction(std::marker::PhantomData); impl DecoderFunction for UnitDecoderFunction { + const NEED_TO_DECODE: bool = false; + #[inline(always)] fn decode(self, x: T) -> T { x @@ -62,11 +69,15 @@ impl DecoderFunction for UnitDecoderFun } #[derive(Default, Clone, Copy)] -pub(crate) struct AsDecoderFunction(std::marker::PhantomData<(P, T)>); +pub(crate) struct AsDecoderFunction( + std::marker::PhantomData<(P, T)>, +); macro_rules! as_decoder_impl { ($($p:ty => $t:ty,)+) => { $( impl DecoderFunction<$p, $t> for AsDecoderFunction<$p, $t> { + const NEED_TO_DECODE: bool = Self::CAN_TRANSMUTE; + #[inline(always)] fn decode(self, x : $p) -> $t { x as $t @@ -94,6 +105,8 @@ where P: ParquetNativeType + Into, T: NativeType, { + const NEED_TO_DECODE: bool = true; + #[inline(always)] fn decode(self, x: P) -> T { x.into() @@ -108,168 +121,10 @@ where T: NativeType, F: Copy + Fn(P) -> T, { + const NEED_TO_DECODE: bool = true; + #[inline(always)] fn decode(self, x: P) -> T { (self.0)(x) } } - -pub(crate) struct PlainDecoderFnCollector<'a, 'b, P, T, D> -where - T: NativeType, - P: ParquetNativeType, - D: DecoderFunction, -{ - pub(crate) chunks: &'b mut ArrayChunks<'a, P>, - pub(crate) decoder: D, - pub(crate) _pd: std::marker::PhantomData, -} - -impl> BatchableCollector<(), Vec> - for PlainDecoderFnCollector<'_, '_, P, T, D> -where - T: NativeType, - P: ParquetNativeType, - D: DecoderFunction, -{ - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } - - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - let n = usize::min(self.chunks.len(), n); - let (items, remainder) = self.chunks.bytes.split_at(n); - let decoder = self.decoder; - target.extend( - items - .iter() - .map(|chunk| decoder.decode(P::from_le_bytes(*chunk))), - ); - self.chunks.bytes = remainder; - Ok(()) - } - - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n, T::default()); - Ok(()) - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.chunks.skip_in_place(n); - Ok(()) - } -} - -fn deserialize_plain(values: &[u8], decoder: D) -> Vec -where - T: NativeType, - P: ParquetNativeType, - D: DecoderFunction, -{ - values - .chunks_exact(size_of::

()) - .map(decode) - .map(|v| decoder.decode(v)) - .collect::>() -} - -struct DeltaTranslator -where - T: NativeType, - P: ParquetNativeType, - i64: AsPrimitive

, - D: DecoderFunction, -{ - dfn: D, - _pd: std::marker::PhantomData<(P, T)>, -} - -struct DeltaCollector<'a, 'b, P, T, D> -where - T: NativeType, - P: ParquetNativeType, - i64: AsPrimitive

, - D: DecoderFunction, -{ - decoder: &'b mut delta_bitpacked::Decoder<'a>, - gatherer: DeltaTranslator, -} - -impl DeltaGatherer for DeltaTranslator -where - T: NativeType, - P: ParquetNativeType, - i64: AsPrimitive

, - D: DecoderFunction, -{ - type Target = Vec; - - fn target_len(&self, target: &Self::Target) -> usize { - target.len() - } - - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); - } - - fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> { - target.push(self.dfn.decode(v.as_())); - Ok(()) - } - - fn gather_constant( - &mut self, - target: &mut Self::Target, - v: i64, - delta: i64, - num_repeats: usize, - ) -> ParquetResult<()> { - target.extend((0..num_repeats).map(|i| self.dfn.decode((v + (i as i64) * delta).as_()))); - Ok(()) - } - - fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> { - target.extend(slice.iter().copied().map(|v| self.dfn.decode(v.as_()))); - Ok(()) - } - - fn gather_chunk(&mut self, target: &mut Self::Target, chunk: &[i64; 64]) -> ParquetResult<()> { - target.extend(chunk.iter().copied().map(|v| self.dfn.decode(v.as_()))); - Ok(()) - } -} - -impl BatchableCollector<(), Vec> for DeltaCollector<'_, '_, P, T, D> -where - T: NativeType, - P: ParquetNativeType, - i64: AsPrimitive

, - D: DecoderFunction, -{ - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } - - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - let start_length = target.len(); - let start_num_elems = self.decoder.len(); - - self.decoder.gather_n_into(target, n, &mut self.gatherer)?; - - let consumed_elements = usize::min(n, start_num_elems); - - debug_assert_eq!(self.decoder.len(), start_num_elems - consumed_elements); - debug_assert_eq!(target.len(), start_length + consumed_elements); - - Ok(()) - } - - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n, T::default()); - Ok(()) - } - - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } -} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/plain.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/plain.rs new file mode 100644 index 000000000000..9e843f673072 --- /dev/null +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/plain.rs @@ -0,0 +1,405 @@ +use arrow::array::Splitable; +use arrow::bitmap::{Bitmap, MutableBitmap}; +use arrow::types::{AlignedBytes, NativeType}; + +use super::DecoderFunction; +use crate::parquet::error::ParquetResult; +use crate::parquet::types::NativeType as ParquetNativeType; +use crate::read::deserialize::utils::array_chunks::ArrayChunks; +use crate::read::deserialize::utils::dict_encoded::{append_validity, constrain_page_validity}; +use crate::read::{Filter, ParquetError}; + +#[allow(clippy::too_many_arguments)] +pub fn decode>( + values: &[u8], + is_optional: bool, + page_validity: Option<&Bitmap>, + filter: Option, + validity: &mut MutableBitmap, + intermediate: &mut Vec

, + target: &mut Vec, + dfn: D, +) -> ParquetResult<()> { + if cfg!(debug_assertions) && is_optional { + assert_eq!(target.len(), validity.len()); + } + + if D::CAN_TRANSMUTE { + let values = ArrayChunks::<'_, T::AlignedBytes>::new(values).ok_or_else(|| { + ParquetError::oos("Page content does not align with expected element size") + })?; + + let start_length = target.len(); + decode_aligned_bytes_dispatch( + values, + is_optional, + page_validity, + filter, + validity, + ::cast_vec_ref_mut(target), + )?; + + if D::NEED_TO_DECODE { + let to_decode: &mut [P] = bytemuck::cast_slice_mut(&mut target[start_length..]); + + for v in to_decode { + *v = bytemuck::cast(dfn.decode(*v)); + } + } + } else { + let values = ArrayChunks::<'_, P::AlignedBytes>::new(values).ok_or_else(|| { + ParquetError::oos("Page content does not align with expected element size") + })?; + + intermediate.clear(); + decode_aligned_bytes_dispatch( + values, + is_optional, + page_validity, + filter, + validity, + ::cast_vec_ref_mut(intermediate), + )?; + + target.extend(intermediate.iter().copied().map(|v| dfn.decode(v))); + } + + if cfg!(debug_assertions) && is_optional { + assert_eq!(target.len(), validity.len()); + } + + Ok(()) +} + +#[inline(never)] +pub fn decode_aligned_bytes_dispatch( + values: ArrayChunks<'_, B>, + is_optional: bool, + page_validity: Option<&Bitmap>, + filter: Option, + validity: &mut MutableBitmap, + target: &mut Vec, +) -> ParquetResult<()> { + if is_optional { + append_validity(page_validity, filter.as_ref(), validity, values.len()); + } + + let page_validity = constrain_page_validity(values.len(), page_validity, filter.as_ref()); + + match (filter, page_validity) { + (None, None) => decode_required(values, target), + (None, Some(page_validity)) => decode_optional(values, &page_validity, target), + + (Some(Filter::Range(rng)), None) => decode_required( + unsafe { values.slice_unchecked(rng.start, rng.end) }, + target, + ), + (Some(Filter::Range(rng)), Some(mut page_validity)) => { + let prevalidity; + (prevalidity, page_validity) = page_validity.split_at(rng.start); + + (page_validity, _) = page_validity.split_at(rng.len()); + + let values_start = prevalidity.set_bits(); + + decode_optional( + unsafe { values.slice_unchecked(values_start, values.len()) }, + &page_validity, + target, + ) + }, + + (Some(Filter::Mask(filter)), None) => decode_masked_required(values, &filter, target), + (Some(Filter::Mask(filter)), Some(page_validity)) => { + decode_masked_optional(values, &page_validity, &filter, target) + }, + }?; + + Ok(()) +} + +#[inline(never)] +fn decode_required( + values: ArrayChunks<'_, B>, + target: &mut Vec, +) -> ParquetResult<()> { + if values.is_empty() { + return Ok(()); + } + + target.reserve(values.len()); + + // SAFETY: Vec guarantees if the `capacity != 0` the pointer to valid since we just reserve + // that pointer. + let dst = unsafe { target.as_mut_ptr().add(target.len()) }; + let src = values.as_ptr(); + + // SAFETY: + // - `src` is valid for read of values.len() elements. + // - `dst` is valid for writes of values.len() elements, it was just reserved. + // - B::Unaligned is always aligned, since it has an alignment of 1 + // - The ranges for src and dst do not overlap + unsafe { + std::ptr::copy_nonoverlapping::(src.cast(), dst.cast(), values.len()); + target.set_len(target.len() + values.len()); + }; + + Ok(()) +} + +#[inline(never)] +fn decode_optional( + values: ArrayChunks<'_, B>, + validity: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let num_values = validity.set_bits(); + + if num_values == validity.len() { + return decode_required(values.truncate(validity.len()), target); + } + + let mut limit = validity.len(); + + assert!(num_values <= values.len()); + + let start_length = target.len(); + let end_length = target.len() + limit; + target.reserve(limit); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut validity_iter = validity.fast_iter_u56(); + let mut num_values_remaining = num_values; + let mut value_offset = 0; + + let mut iter = |mut v: u64, len: usize| { + debug_assert!(len < 64); + + let num_chunk_values = v.count_ones() as usize; + + if num_values_remaining == num_chunk_values { + for i in 0..len { + let is_valid = v & 1 != 0; + let value = if is_valid { + unsafe { values.get_unchecked(value_offset) } + } else { + B::zeroed() + }; + unsafe { target_ptr.add(i).write(value) }; + + value_offset += (v & 1) as usize; + v >>= 1; + } + } else { + for i in 0..len { + let value = unsafe { values.get_unchecked(value_offset) }; + unsafe { target_ptr.add(i).write(value) }; + + value_offset += (v & 1) as usize; + v >>= 1; + } + } + + num_values_remaining -= num_chunk_values; + unsafe { + target_ptr = target_ptr.add(len); + } + }; + + for v in validity_iter.by_ref() { + if limit < 56 { + iter(v, limit); + } else { + iter(v, 56); + } + limit -= 56; + } + + let (v, vl) = validity_iter.remainder(); + + iter(v, vl.min(limit)); + + unsafe { target.set_len(end_length) }; + + Ok(()) +} + +#[inline(never)] +fn decode_masked_required( + values: ArrayChunks<'_, B>, + mask: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let num_rows = mask.set_bits(); + + if num_rows == mask.len() { + return decode_required(values.truncate(num_rows), target); + } + + assert!(mask.len() <= values.len()); + + let start_length = target.len(); + target.reserve(num_rows); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut mask_iter = mask.fast_iter_u56(); + let mut num_rows_left = num_rows; + let mut value_offset = 0; + + let mut iter = |mut f: u64, len: usize| { + if num_rows_left == 0 { + return false; + } + + let mut num_read = 0; + let mut num_written = 0; + + while f != 0 { + let offset = f.trailing_zeros() as usize; + + num_read += offset; + + // SAFETY: + // 1. `values_buffer` starts out as only zeros, which we know is in the + // dictionary following the original `dict.is_empty` check. + // 2. Each time we write to `values_buffer`, it is followed by a + // `verify_dict_indices`. + let value = unsafe { values.get_unchecked(value_offset + num_read) }; + unsafe { target_ptr.add(num_written).write(value) }; + + num_written += 1; + num_read += 1; + + f >>= offset + 1; // Clear least significant bit. + } + + unsafe { + target_ptr = target_ptr.add(num_written); + } + value_offset += len; + num_rows_left -= num_written; + + true + }; + + for f in mask_iter.by_ref() { + if !iter(f, 56) { + break; + } + } + + let (f, fl) = mask_iter.remainder(); + + iter(f, fl); + + unsafe { target.set_len(start_length + num_rows) }; + + Ok(()) +} + +#[inline(never)] +fn decode_masked_optional( + values: ArrayChunks<'_, B>, + validity: &Bitmap, + mask: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let num_rows = mask.set_bits(); + let num_values = validity.set_bits(); + + if num_rows == mask.len() { + return decode_optional(values, validity, target); + } + + if num_values == validity.len() { + return decode_masked_required(values, mask, target); + } + + assert!(num_values <= values.len()); + + let start_length = target.len(); + target.reserve(num_rows); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut validity_iter = validity.fast_iter_u56(); + let mut mask_iter = mask.fast_iter_u56(); + let mut num_values_left = num_values; + let mut num_rows_left = num_rows; + let mut value_offset = 0; + + let mut iter = |mut f: u64, mut v: u64, len: usize| { + if num_rows_left == 0 { + return false; + } + + let num_chunk_values = v.count_ones() as usize; + + let mut num_read = 0; + let mut num_written = 0; + + if num_chunk_values == num_values_left { + while f != 0 { + let offset = f.trailing_zeros() as usize; + + num_read += (v & (1u64 << offset).wrapping_sub(1)).count_ones() as usize; + v >>= offset; + + let is_valid = v & 1 != 0; + let value = if is_valid { + unsafe { values.get_unchecked(value_offset + num_read) } + } else { + B::zeroed() + }; + unsafe { target_ptr.add(num_written).write(value) }; + + num_written += 1; + num_read += (v & 1) as usize; + + f >>= offset + 1; // Clear least significant bit. + v >>= 1; + } + } else { + while f != 0 { + let offset = f.trailing_zeros() as usize; + + num_read += (v & (1u64 << offset).wrapping_sub(1)).count_ones() as usize; + v >>= offset; + + let value = unsafe { values.get_unchecked(value_offset + num_read) }; + unsafe { target_ptr.add(num_written).write(value) }; + + num_written += 1; + num_read += (v & 1) as usize; + + f >>= offset + 1; // Clear least significant bit. + v >>= 1; + } + } + + unsafe { + target_ptr = target_ptr.add(num_written); + } + value_offset += len; + num_rows_left -= num_written; + num_values_left -= num_chunk_values; + + true + }; + + for (f, v) in mask_iter.by_ref().zip(validity_iter.by_ref()) { + if !iter(f, v, 56) { + break; + } + } + + let (f, fl) = mask_iter.remainder(); + let (v, vl) = validity_iter.remainder(); + + assert_eq!(fl, vl); + + iter(f, v, fl); + + unsafe { target.set_len(start_length + num_rows) }; + + Ok(()) +} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs index f66544ee3183..d201db813628 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs @@ -1,22 +1,22 @@ -use crate::parquet::types::NativeType as ParquetNativeType; +use arrow::types::AlignedBytes; -/// A slice of chunks that fit the `P` type. +/// A slice of chunks that fit an [`AlignedBytes`] type. /// /// This is essentially the equivalent of [`ChunksExact`][std::slice::ChunksExact], but with a size /// and type known at compile-time. This makes the compiler able to reason much more about the /// code. Especially, since the chunk-sizes for this type are almost always powers of 2 and /// bitshifts or special instructions would be much better to use. #[derive(Debug, Clone, Copy)] -pub(crate) struct ArrayChunks<'a, P: ParquetNativeType> { - pub(crate) bytes: &'a [P::Bytes], +pub(crate) struct ArrayChunks<'a, B: AlignedBytes> { + pub(crate) bytes: &'a [B::Unaligned], } -impl<'a, P: ParquetNativeType> ArrayChunks<'a, P> { +impl<'a, B: AlignedBytes> ArrayChunks<'a, B> { /// Create a new [`ArrayChunks`] /// /// This returns null if the `bytes` slice's length is not a multiple of the size of `P::Bytes`. pub(crate) fn new(bytes: &'a [u8]) -> Option { - if bytes.len() % size_of::() != 0 { + if bytes.len() % B::SIZE != 0 { return None; } @@ -25,14 +25,38 @@ impl<'a, P: ParquetNativeType> ArrayChunks<'a, P> { Some(Self { bytes }) } - pub(crate) fn skip_in_place(&mut self, n: usize) { - let n = usize::min(self.bytes.len(), n); - self.bytes = &self.bytes[n..]; + pub(crate) unsafe fn get_unchecked(&self, at: usize) -> B { + B::from_unaligned(*unsafe { self.bytes.get_unchecked(at) }) + } + + pub fn truncate(&self, length: usize) -> ArrayChunks<'a, B> { + let length = length.min(self.bytes.len()); + + Self { + bytes: unsafe { self.bytes.get_unchecked(..length) }, + } + } + + pub unsafe fn slice_unchecked(&self, start: usize, end: usize) -> ArrayChunks<'a, B> { + debug_assert!(start <= self.bytes.len()); + debug_assert!(end <= self.bytes.len()); + + Self { + bytes: unsafe { self.bytes.get_unchecked(start..end) }, + } + } + + pub fn as_ptr(&self) -> *const B::Unaligned { + self.bytes.as_ptr() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 } } -impl<'a, P: ParquetNativeType> Iterator for ArrayChunks<'a, P> { - type Item = &'a P::Bytes; +impl<'a, B: AlignedBytes> Iterator for ArrayChunks<'a, B> { + type Item = &'a B::Unaligned; #[inline(always)] fn next(&mut self) -> Option { @@ -47,4 +71,4 @@ impl<'a, P: ParquetNativeType> Iterator for ArrayChunks<'a, P> { } } -impl ExactSizeIterator for ArrayChunks<'_, P> {} +impl ExactSizeIterator for ArrayChunks<'_, B> {} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/dict_encoded.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/dict_encoded.rs new file mode 100644 index 000000000000..69c1cd6c549d --- /dev/null +++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/dict_encoded.rs @@ -0,0 +1,856 @@ +use arrow::bitmap::bitmask::BitMask; +use arrow::bitmap::{Bitmap, MutableBitmap}; +use arrow::types::{AlignedBytes, NativeType}; +use polars_compute::filter::filter_boolean_kernel; + +use super::filter_from_range; +use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder}; +use crate::parquet::error::ParquetResult; +use crate::read::{Filter, ParquetError}; + +pub fn decode_dict( + values: HybridRleDecoder<'_>, + dict: &[T], + is_optional: bool, + page_validity: Option<&Bitmap>, + filter: Option, + validity: &mut MutableBitmap, + target: &mut Vec, +) -> ParquetResult<()> { + decode_dict_dispatch( + values, + bytemuck::cast_slice(dict), + is_optional, + page_validity, + filter, + validity, + ::cast_vec_ref_mut(target), + ) +} + +pub(crate) fn append_validity( + page_validity: Option<&Bitmap>, + filter: Option<&Filter>, + validity: &mut MutableBitmap, + values_len: usize, +) { + match (page_validity, filter) { + (None, None) => validity.extend_constant(values_len, true), + (None, Some(f)) => validity.extend_constant(f.num_rows(), true), + (Some(page_validity), None) => validity.extend_from_bitmap(page_validity), + (Some(page_validity), Some(Filter::Range(rng))) => { + let page_validity = page_validity.clone(); + validity.extend_from_bitmap(&page_validity.clone().sliced(rng.start, rng.len())) + }, + (Some(page_validity), Some(Filter::Mask(mask))) => { + validity.extend_from_bitmap(&filter_boolean_kernel(page_validity, mask)) + }, + } +} + +pub(crate) fn constrain_page_validity( + values_len: usize, + page_validity: Option<&Bitmap>, + filter: Option<&Filter>, +) -> Option { + let num_unfiltered_rows = match (filter.as_ref(), page_validity) { + (None, None) => values_len, + (None, Some(pv)) => { + debug_assert!(pv.len() >= values_len); + pv.len() + }, + (Some(f), v) => { + if cfg!(debug_assertions) { + if let Some(v) = v { + assert!(v.len() >= f.max_offset()); + } + } + + f.max_offset() + }, + }; + + page_validity.map(|pv| { + if pv.len() > num_unfiltered_rows { + pv.clone().sliced(0, num_unfiltered_rows) + } else { + pv.clone() + } + }) +} + +#[inline(never)] +pub fn decode_dict_dispatch( + values: HybridRleDecoder<'_>, + dict: &[B], + is_optional: bool, + page_validity: Option<&Bitmap>, + filter: Option, + validity: &mut MutableBitmap, + target: &mut Vec, +) -> ParquetResult<()> { + if cfg!(debug_assertions) && is_optional { + assert_eq!(target.len(), validity.len()); + } + + if is_optional { + append_validity(page_validity, filter.as_ref(), validity, values.len()); + } + + let page_validity = constrain_page_validity(values.len(), page_validity, filter.as_ref()); + + match (filter, page_validity) { + (None, None) => decode_required_dict(values, dict, None, target), + (Some(Filter::Range(rng)), None) if rng.start == 0 => { + decode_required_dict(values, dict, Some(rng.end), target) + }, + (None, Some(page_validity)) => decode_optional_dict(values, dict, &page_validity, target), + (Some(Filter::Range(rng)), Some(page_validity)) if rng.start == 0 => { + decode_optional_dict(values, dict, &page_validity, target) + }, + (Some(Filter::Mask(filter)), None) => { + decode_masked_required_dict(values, dict, &filter, target) + }, + (Some(Filter::Mask(filter)), Some(page_validity)) => { + decode_masked_optional_dict(values, dict, &filter, &page_validity, target) + }, + (Some(Filter::Range(rng)), None) => { + decode_masked_required_dict(values, dict, &filter_from_range(rng.clone()), target) + }, + (Some(Filter::Range(rng)), Some(page_validity)) => decode_masked_optional_dict( + values, + dict, + &filter_from_range(rng.clone()), + &page_validity, + target, + ), + }?; + + if cfg!(debug_assertions) && is_optional { + assert_eq!(target.len(), validity.len()); + } + + Ok(()) +} + +#[cold] +fn oob_dict_idx() -> ParquetError { + ParquetError::oos("Dictionary Index is out-of-bounds") +} + +#[inline(always)] +fn verify_dict_indices(indices: &[u32; 32], dict_size: usize) -> ParquetResult<()> { + let mut is_valid = true; + for &idx in indices { + is_valid &= (idx as usize) < dict_size; + } + + if is_valid { + return Ok(()); + } + + Err(oob_dict_idx()) +} + +#[inline(never)] +pub fn decode_required_dict( + mut values: HybridRleDecoder<'_>, + dict: &[B], + limit: Option, + target: &mut Vec, +) -> ParquetResult<()> { + if dict.is_empty() && values.len() > 0 { + return Err(oob_dict_idx()); + } + + let mut limit = limit.unwrap_or(values.len()); + assert!(limit <= values.len()); + let start_length = target.len(); + let end_length = start_length + limit; + + target.reserve(limit); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + while limit > 0 { + let chunk = values.next_chunk()?.unwrap(); + + match chunk { + HybridRleChunk::Rle(value, length) => { + let length = length.min(limit); + + let Some(&value) = dict.get(value as usize) else { + return Err(oob_dict_idx()); + }; + let target_slice; + // SAFETY: + // 1. `target_ptr..target_ptr + limit` is allocated + // 2. `length <= limit` + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, length); + target_ptr = target_ptr.add(length); + } + + target_slice.fill(value); + limit -= length; + }, + HybridRleChunk::Bitpacked(mut decoder) => { + let mut chunked = decoder.chunked(); + loop { + if limit < 32 { + break; + } + + let Some(chunk) = chunked.next() else { + break; + }; + + verify_dict_indices(&chunk, dict.len())?; + + for (i, &idx) in chunk.iter().enumerate() { + let value = unsafe { dict.get_unchecked(idx as usize) }; + let value = *value; + unsafe { target_ptr.add(i).write(value) }; + } + + unsafe { + target_ptr = target_ptr.add(32); + } + limit -= 32; + } + + if let Some((chunk, chunk_size)) = chunked.next_inexact() { + let chunk_size = chunk_size.min(limit); + + let highest_idx = chunk[..chunk_size].iter().copied().max().unwrap(); + assert!((highest_idx as usize) < dict.len()); + + for (i, &idx) in chunk[..chunk_size].iter().enumerate() { + let value = unsafe { dict.get_unchecked(idx as usize) }; + let value = *value; + unsafe { target_ptr.add(i).write(value) }; + } + + unsafe { + target_ptr = target_ptr.add(chunk_size); + } + + limit -= chunk_size; + } + }, + } + } + + unsafe { + target.set_len(end_length); + } + + Ok(()) +} + +#[inline(never)] +pub fn decode_optional_dict( + mut values: HybridRleDecoder<'_>, + dict: &[B], + validity: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let mut limit = validity.len(); + let num_valid_values = validity.set_bits(); + + // Dispatch to the required kernel if all rows are valid anyway. + if num_valid_values == validity.len() { + return decode_required_dict(values, dict, Some(validity.len()), target); + } + + if dict.is_empty() && num_valid_values > 0 { + return Err(oob_dict_idx()); + } + + assert!(num_valid_values <= values.len()); + let start_length = target.len(); + let end_length = start_length + validity.len(); + + target.reserve(validity.len()); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut validity = BitMask::from_bitmap(validity); + let mut values_buffer = [0u32; 128]; + let values_buffer = &mut values_buffer; + + loop { + if limit == 0 { + break; + } + + let Some(chunk) = values.next_chunk()? else { + break; + }; + + match chunk { + HybridRleChunk::Rle(value, size) => { + // If we know that we have `size` times `value` that we can append, but there might + // be nulls in between those values. + // + // 1. See how many `num_rows = valid + invalid` values `size` would entail. This is + // done with `num_bits_before_nth_one` on the validity mask. + // 2. Fill `num_rows` values into the target buffer. + // 3. Advance the validity mask by `num_rows` values. + + let num_chunk_rows = validity.nth_set_bit_idx(size, 0).unwrap_or(validity.len()); + + (_, validity) = unsafe { validity.split_at_unchecked(num_chunk_rows) }; + + let Some(&value) = dict.get(value as usize) else { + return Err(oob_dict_idx()); + }; + let target_slice; + // SAFETY: + // Given `validity_iter` before the `advance_by_bits` + // + // 1. `target_ptr..target_ptr + validity_iter.bits_left()` is allocated + // 2. `num_chunk_rows <= validity_iter.bits_left()` + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, num_chunk_rows); + target_ptr = target_ptr.add(num_chunk_rows); + } + + target_slice.fill(value); + limit -= num_chunk_rows; + }, + HybridRleChunk::Bitpacked(mut decoder) => { + let mut chunked = decoder.chunked(); + + let mut buffer_part_idx = 0; + let mut values_offset = 0; + let mut num_buffered: usize = 0; + + { + let mut num_done = 0; + let mut validity_iter = validity.fast_iter_u56(); + + 'outer: while limit >= 64 { + let v = validity_iter.next().unwrap(); + + while num_buffered < v.count_ones() as usize { + let buffer_part = <&mut [u32; 32]>::try_from( + &mut values_buffer[buffer_part_idx * 32..][..32], + ) + .unwrap(); + let Some(num_added) = chunked.next_into(buffer_part) else { + break 'outer; + }; + + verify_dict_indices(buffer_part, dict.len())?; + + num_buffered += num_added; + + buffer_part_idx += 1; + buffer_part_idx %= 4; + } + + let mut num_read = 0; + + for i in 0..56 { + let idx = values_buffer[(values_offset + num_read) % 128]; + + // SAFETY: + // 1. `values_buffer` starts out as only zeros, which we know is in the + // dictionary following the original `dict.is_empty` check. + // 2. Each time we write to `values_buffer`, it is followed by a + // `verify_dict_indices`. + let value = unsafe { dict.get_unchecked(idx as usize) }; + let value = *value; + unsafe { target_ptr.add(i).write(value) }; + num_read += ((v >> i) & 1) as usize; + } + + values_offset += num_read; + values_offset %= 128; + num_buffered -= num_read; + unsafe { + target_ptr = target_ptr.add(56); + } + num_done += 56; + limit -= 56; + } + + (_, validity) = unsafe { validity.split_at_unchecked(num_done) }; + } + + let num_decoder_remaining = num_buffered + chunked.decoder.len(); + let decoder_limit = validity + .nth_set_bit_idx(num_decoder_remaining, 0) + .unwrap_or(validity.len()); + + let num_remaining = limit.min(decoder_limit); + let current_validity; + (current_validity, validity) = + unsafe { validity.split_at_unchecked(num_remaining) }; + let (v, _) = current_validity.fast_iter_u56().remainder(); + + while num_buffered < v.count_ones() as usize { + let buffer_part = <&mut [u32; 32]>::try_from( + &mut values_buffer[buffer_part_idx * 32..][..32], + ) + .unwrap(); + let num_added = chunked.next_into(buffer_part).unwrap(); + + verify_dict_indices(buffer_part, dict.len())?; + + num_buffered += num_added; + + buffer_part_idx += 1; + buffer_part_idx %= 4; + } + + let mut num_read = 0; + + for i in 0..num_remaining { + let idx = values_buffer[(values_offset + num_read) % 128]; + let value = unsafe { dict.get_unchecked(idx as usize) }; + let value = *value; + unsafe { *target_ptr.add(i) = value }; + num_read += ((v >> i) & 1) as usize; + } + + unsafe { + target_ptr = target_ptr.add(num_remaining); + } + limit -= num_remaining; + }, + } + } + + if cfg!(debug_assertions) { + assert_eq!(validity.set_bits(), 0); + } + + let target_slice; + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, limit); + } + + target_slice.fill(B::zeroed()); + + unsafe { + target.set_len(end_length); + } + + Ok(()) +} + +#[inline(never)] +pub fn decode_masked_optional_dict( + mut values: HybridRleDecoder<'_>, + dict: &[B], + filter: &Bitmap, + validity: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let num_rows = filter.set_bits(); + let num_valid_values = validity.set_bits(); + + // Dispatch to the non-filter kernel if all rows are needed anyway. + if num_rows == filter.len() { + return decode_optional_dict(values, dict, validity, target); + } + + // Dispatch to the required kernel if all rows are valid anyway. + if num_valid_values == validity.len() { + return decode_masked_required_dict(values, dict, filter, target); + } + + if dict.is_empty() && num_valid_values > 0 { + return Err(oob_dict_idx()); + } + + debug_assert_eq!(filter.len(), validity.len()); + assert!(num_valid_values <= values.len()); + let start_length = target.len(); + + target.reserve(num_rows); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut filter = BitMask::from_bitmap(filter); + let mut validity = BitMask::from_bitmap(validity); + + let mut values_buffer = [0u32; 128]; + let values_buffer = &mut values_buffer; + + let mut num_rows_left = num_rows; + + loop { + // Early stop if we have no more rows to load. + if num_rows_left == 0 { + break; + } + + let Some(chunk) = values.next_chunk()? else { + break; + }; + + match chunk { + HybridRleChunk::Rle(value, size) => { + if value as usize >= dict.len() { + return Err(oob_dict_idx()); + } + + // If we know that we have `size` times `value` that we can append, but there might + // be nulls in between those values. + // + // 1. See how many `num_rows = valid + invalid` values `size` would entail. This is + // done with `num_bits_before_nth_one` on the validity mask. + // 2. Fill `num_rows` values into the target buffer. + // 3. Advance the validity mask by `num_rows` values. + + let num_chunk_values = validity.nth_set_bit_idx(size, 0).unwrap_or(validity.len()); + + let current_filter; + (_, validity) = unsafe { validity.split_at_unchecked(num_chunk_values) }; + (current_filter, filter) = unsafe { filter.split_at_unchecked(num_chunk_values) }; + + let num_chunk_rows = current_filter.set_bits(); + + if num_chunk_rows > 0 { + // SAFETY: Bounds check done before. + let value = unsafe { dict.get_unchecked(value as usize) }; + + let target_slice; + // SAFETY: + // Given `filter_iter` before the `advance_by_bits`. + // + // 1. `target_ptr..target_ptr + filter_iter.count_ones()` is allocated + // 2. `num_chunk_rows < filter_iter.count_ones()` + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, num_chunk_rows); + target_ptr = target_ptr.add(num_chunk_rows); + } + + target_slice.fill(*value); + num_rows_left -= num_chunk_rows; + } + }, + HybridRleChunk::Bitpacked(mut decoder) => { + // For bitpacked we do the following: + // 1. See how many rows are encoded by this `decoder`. + // 2. Go through the filter and validity 56 bits at a time and: + // 0. If filter bits are 0, skip the chunk entirely. + // 1. Buffer enough values so that we can branchlessly decode with the filter + // and validity. + // 2. Decode with filter and validity. + // 3. Decode remainder. + + let size = decoder.len(); + let mut chunked = decoder.chunked(); + + let num_chunk_values = validity.nth_set_bit_idx(size, 0).unwrap_or(validity.len()); + + let mut buffer_part_idx = 0; + let mut values_offset = 0; + let mut num_buffered: usize = 0; + let mut skip_values = 0; + + let current_filter; + let current_validity; + + (current_filter, filter) = unsafe { filter.split_at_unchecked(num_chunk_values) }; + (current_validity, validity) = + unsafe { validity.split_at_unchecked(num_chunk_values) }; + + let mut iter = |mut f: u64, mut v: u64| { + // Skip chunk if we don't any values from here. + if f == 0 { + skip_values += v.count_ones() as usize; + return ParquetResult::Ok(()); + } + + // Skip over already buffered items. + let num_buffered_skipped = skip_values.min(num_buffered); + values_offset += num_buffered_skipped; + num_buffered -= num_buffered_skipped; + skip_values -= num_buffered_skipped; + + // If we skipped plenty already, just skip decoding those chunks instead of + // decoding them and throwing them away. + chunked.decoder.skip_chunks(skip_values / 32); + // The leftovers we have to decode but we can also just skip. + skip_values %= 32; + + while num_buffered < v.count_ones() as usize { + let buffer_part = <&mut [u32; 32]>::try_from( + &mut values_buffer[buffer_part_idx * 32..][..32], + ) + .unwrap(); + let num_added = chunked.next_into(buffer_part).unwrap(); + + verify_dict_indices(buffer_part, dict.len())?; + + let skip_chunk_values = skip_values.min(num_added); + + values_offset += skip_chunk_values; + num_buffered += num_added - skip_chunk_values; + skip_values -= skip_chunk_values; + + buffer_part_idx += 1; + buffer_part_idx %= 4; + } + + let mut num_read = 0; + let mut num_written = 0; + + while f != 0 { + let offset = f.trailing_zeros(); + + num_read += (v & (1u64 << offset).wrapping_sub(1)).count_ones() as usize; + v >>= offset; + + let idx = values_buffer[(values_offset + num_read) % 128]; + // SAFETY: + // 1. `values_buffer` starts out as only zeros, which we know is in the + // dictionary following the original `dict.is_empty` check. + // 2. Each time we write to `values_buffer`, it is followed by a + // `verify_dict_indices`. + let value = unsafe { dict.get_unchecked(idx as usize) }; + let value = *value; + unsafe { target_ptr.add(num_written).write(value) }; + + num_written += 1; + num_read += (v & 1) as usize; + + f >>= offset + 1; // Clear least significant bit. + v >>= 1; + } + + num_read += v.count_ones() as usize; + + values_offset += num_read; + values_offset %= 128; + num_buffered -= num_read; + unsafe { + target_ptr = target_ptr.add(num_written); + } + num_rows_left -= num_written; + + ParquetResult::Ok(()) + }; + + let mut f_iter = current_filter.fast_iter_u56(); + let mut v_iter = current_validity.fast_iter_u56(); + + for (f, v) in f_iter.by_ref().zip(v_iter.by_ref()) { + iter(f, v)?; + } + + let (f, fl) = f_iter.remainder(); + let (v, vl) = v_iter.remainder(); + + assert_eq!(fl, vl); + + iter(f, v)?; + }, + } + } + + if cfg!(debug_assertions) { + assert_eq!(validity.set_bits(), 0); + } + + let target_slice; + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, num_rows_left); + } + + target_slice.fill(B::zeroed()); + + unsafe { + target.set_len(start_length + num_rows); + } + + Ok(()) +} + +#[inline(never)] +pub fn decode_masked_required_dict( + mut values: HybridRleDecoder<'_>, + dict: &[B], + filter: &Bitmap, + target: &mut Vec, +) -> ParquetResult<()> { + let num_rows = filter.set_bits(); + + // Dispatch to the non-filter kernel if all rows are needed anyway. + if num_rows == filter.len() { + return decode_required_dict(values, dict, Some(filter.len()), target); + } + + if dict.is_empty() && values.len() > 0 { + return Err(oob_dict_idx()); + } + + let start_length = target.len(); + + target.reserve(num_rows); + let mut target_ptr = unsafe { target.as_mut_ptr().add(start_length) }; + + let mut filter = BitMask::from_bitmap(filter); + + let mut values_buffer = [0u32; 128]; + let values_buffer = &mut values_buffer; + + let mut num_rows_left = num_rows; + + loop { + if num_rows_left == 0 { + break; + } + + let Some(chunk) = values.next_chunk()? else { + break; + }; + + match chunk { + HybridRleChunk::Rle(value, size) => { + if value as usize >= dict.len() { + return Err(oob_dict_idx()); + } + + let size = size.min(filter.len()); + + // If we know that we have `size` times `value` that we can append, but there might + // be nulls in between those values. + // + // 1. See how many `num_rows = valid + invalid` values `size` would entail. This is + // done with `num_bits_before_nth_one` on the validity mask. + // 2. Fill `num_rows` values into the target buffer. + // 3. Advance the validity mask by `num_rows` values. + + let current_filter; + + (current_filter, filter) = unsafe { filter.split_at_unchecked(size) }; + let num_chunk_rows = current_filter.set_bits(); + + if num_chunk_rows > 0 { + // SAFETY: Bounds check done before. + let value = unsafe { dict.get_unchecked(value as usize) }; + + let target_slice; + // SAFETY: + // Given `filter_iter` before the `advance_by_bits`. + // + // 1. `target_ptr..target_ptr + filter_iter.count_ones()` is allocated + // 2. `num_chunk_rows < filter_iter.count_ones()` + unsafe { + target_slice = std::slice::from_raw_parts_mut(target_ptr, num_chunk_rows); + target_ptr = target_ptr.add(num_chunk_rows); + } + + target_slice.fill(*value); + num_rows_left -= num_chunk_rows; + } + }, + HybridRleChunk::Bitpacked(mut decoder) => { + let size = decoder.len().min(filter.len()); + let mut chunked = decoder.chunked(); + + let mut buffer_part_idx = 0; + let mut values_offset = 0; + let mut num_buffered: usize = 0; + let mut skip_values = 0; + + let current_filter; + + (current_filter, filter) = unsafe { filter.split_at_unchecked(size) }; + + let mut iter = |mut f: u64, len: usize| { + debug_assert!(len <= 64); + + // Skip chunk if we don't any values from here. + if f == 0 { + skip_values += len; + return ParquetResult::Ok(()); + } + + // Skip over already buffered items. + let num_buffered_skipped = skip_values.min(num_buffered); + values_offset += num_buffered_skipped; + num_buffered -= num_buffered_skipped; + skip_values -= num_buffered_skipped; + + // If we skipped plenty already, just skip decoding those chunks instead of + // decoding them and throwing them away. + chunked.decoder.skip_chunks(skip_values / 32); + // The leftovers we have to decode but we can also just skip. + skip_values %= 32; + + while num_buffered < len { + let buffer_part = <&mut [u32; 32]>::try_from( + &mut values_buffer[buffer_part_idx * 32..][..32], + ) + .unwrap(); + let num_added = chunked.next_into(buffer_part).unwrap(); + + verify_dict_indices(buffer_part, dict.len())?; + + let skip_chunk_values = skip_values.min(num_added); + + values_offset += skip_chunk_values; + num_buffered += num_added - skip_chunk_values; + skip_values -= skip_chunk_values; + + buffer_part_idx += 1; + buffer_part_idx %= 4; + } + + let mut num_read = 0; + let mut num_written = 0; + + while f != 0 { + let offset = f.trailing_zeros() as usize; + + num_read += offset; + + let idx = values_buffer[(values_offset + num_read) % 128]; + // SAFETY: + // 1. `values_buffer` starts out as only zeros, which we know is in the + // dictionary following the original `dict.is_empty` check. + // 2. Each time we write to `values_buffer`, it is followed by a + // `verify_dict_indices`. + let value = *unsafe { dict.get_unchecked(idx as usize) }; + unsafe { target_ptr.add(num_written).write(value) }; + + num_written += 1; + num_read += 1; + + f >>= offset + 1; // Clear least significant bit. + } + + values_offset += len; + values_offset %= 128; + num_buffered -= len; + unsafe { + target_ptr = target_ptr.add(num_written); + } + num_rows_left -= num_written; + + ParquetResult::Ok(()) + }; + + let mut f_iter = current_filter.fast_iter_u56(); + + for f in f_iter.by_ref() { + iter(f, 56)?; + } + + let (f, fl) = f_iter.remainder(); + + iter(f, fl)?; + }, + } + } + + unsafe { + target.set_len(start_length + num_rows); + } + + Ok(()) +} diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/filter.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/filter.rs index a9f0f7b3ef87..bc37d79fc868 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/utils/filter.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/filter.rs @@ -36,6 +36,13 @@ impl Filter { } } + pub fn max_offset(&self) -> usize { + match self { + Filter::Range(range) => range.end, + Filter::Mask(bitmap) => bitmap.len(), + } + } + pub(crate) fn split_at(&self, at: usize) -> (Filter, Filter) { use Filter as F; match self { diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs index 7c6cf840bdce..960c11d75c12 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs @@ -1,17 +1,17 @@ pub(crate) mod array_chunks; +pub(crate) mod dict_encoded; pub(crate) mod filter; -use arrow::array::{DictionaryArray, DictionaryKey, MutableBinaryViewArray, PrimitiveArray, View}; +use std::ops::Range; + +use arrow::array::{DictionaryArray, DictionaryKey, PrimitiveArray}; use arrow::bitmap::{Bitmap, MutableBitmap}; use arrow::datatypes::ArrowDataType; use arrow::pushable::Pushable; use self::filter::Filter; use super::BasicDecompressor; -use crate::parquet::encoding::hybrid_rle::gatherer::{ - HybridRleGatherer, ZeroCount, ZeroCountGatherer, -}; -use crate::parquet::encoding::hybrid_rle::{self, HybridRleDecoder, Translator}; +use crate::parquet::encoding::hybrid_rle::{self, HybridRleChunk, HybridRleDecoder}; use crate::parquet::error::{ParquetError, ParquetResult}; use crate::parquet::page::{split_buffer, DataPage, DictPage}; use crate::parquet::schema::Repetition; @@ -20,7 +20,7 @@ use crate::parquet::schema::Repetition; pub(crate) struct State<'a, D: Decoder> { pub(crate) dict: Option<&'a D::Dict>, pub(crate) is_optional: bool, - pub(crate) page_validity: Option>, + pub(crate) page_validity: Option, pub(crate) translation: D::Translation<'a>, } @@ -31,22 +31,8 @@ pub(crate) trait StateTranslation<'a, D: Decoder>: Sized { decoder: &D, page: &'a DataPage, dict: Option<&'a D::Dict>, - page_validity: Option<&PageValidity<'a>>, + page_validity: Option<&Bitmap>, ) -> ParquetResult; - fn len_when_not_nullable(&self) -> usize; - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()>; - - /// extends [`Self::DecodedState`] by deserializing items in [`Self::State`]. - /// It guarantees that the length of `decoded` is at most `decoded.len() + additional`. - fn extend_from_state( - &mut self, - decoder: &mut D, - decoded: &mut D::DecodedState, - is_optional: bool, - page_validity: &mut Option>, - dict: Option<&'a D::Dict>, - additional: usize, - ) -> ParquetResult<()>; } impl<'a, D: Decoder> State<'a, D> { @@ -54,18 +40,16 @@ impl<'a, D: Decoder> State<'a, D> { let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; - let mut page_validity = is_optional - .then(|| page_validity_decoder(page)) - .transpose()?; + let mut page_validity = None; // Make the page_validity None if there are no nulls in the page - let null_count = page - .null_count() - .map(Ok) - .or_else(|| page_validity.as_ref().map(hybrid_rle_count_zeros)) - .transpose()?; - if null_count == Some(0) { - page_validity = None; + if is_optional && page.null_count().is_none_or(|nc| nc != 0) { + let pv = page_validity_decoder(page)?; + let pv = decode_page_validity(pv, None)?; + + if pv.unset_bits() > 0 { + page_validity = Some(pv); + } } let translation = D::Translation::new(decoder, page, dict, page_validity.as_ref())?; @@ -82,124 +66,35 @@ impl<'a, D: Decoder> State<'a, D> { decoder: &D, page: &'a DataPage, dict: Option<&'a D::Dict>, + mut page_validity: Option, ) -> ParquetResult { let translation = D::Translation::new(decoder, page, dict, None)?; + let is_optional = + page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; + + if page_validity + .as_ref() + .is_some_and(|bm| bm.unset_bits() == 0) + { + page_validity = None; + } + Ok(Self { dict, translation, - - // Nested values may be optional, but all that is handled elsewhere. - is_optional: false, - page_validity: None, + is_optional, + page_validity, }) } - pub fn len(&self) -> usize { - match &self.page_validity { - Some(v) => v.len(), - None => self.translation.len_when_not_nullable(), - } - } - - pub fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n == 0 { - return Ok(()); - } - - let n = self - .page_validity - .as_mut() - .map_or(ParquetResult::Ok(n), |page_validity| { - let mut zc = ZeroCount::default(); - page_validity.gather_n_into(&mut zc, n, &ZeroCountGatherer)?; - Ok(zc.num_nonzero) - })?; - - self.translation.skip_in_place(n) - } - - pub fn extend_from_state( - &mut self, + pub fn decode( + self, decoder: &mut D, decoded: &mut D::DecodedState, filter: Option, ) -> ParquetResult<()> { - match filter { - None => { - let num_rows = self.len(); - - if num_rows == 0 { - return Ok(()); - } - - self.translation.extend_from_state( - decoder, - decoded, - self.is_optional, - &mut self.page_validity, - self.dict, - num_rows, - ) - }, - Some(filter) => match filter { - Filter::Range(range) => { - let start = range.start; - let end = range.end; - - self.skip_in_place(start)?; - debug_assert!(end - start <= self.len()); - - if end - start > 0 { - self.translation.extend_from_state( - decoder, - decoded, - self.is_optional, - &mut self.page_validity, - self.dict, - end - start, - )?; - } - - Ok(()) - }, - Filter::Mask(bitmap) => { - debug_assert!(bitmap.len() == self.len()); - - let mut iter = bitmap.iter(); - while iter.num_remaining() > 0 && self.len() > 0 { - let prev_state_len = self.len(); - - let num_ones = iter.take_leading_ones(); - - if num_ones > 0 { - self.translation.extend_from_state( - decoder, - decoded, - self.is_optional, - &mut self.page_validity, - self.dict, - num_ones, - )?; - } - - if iter.num_remaining() == 0 || self.len() == 0 { - break; - } - - let num_zeros = iter.take_leading_zeros(); - self.skip_in_place(num_zeros)?; - - assert!( - prev_state_len != self.len(), - "No forward progress was booked in a filtered parquet file." - ); - } - - Ok(()) - }, - }, - } + decoder.extend_filtered_with_state(self, decoded, filter) } } @@ -213,97 +108,6 @@ pub fn not_implemented(page: &DataPage) -> ParquetError { )) } -pub trait BatchableCollector { - fn reserve(target: &mut T, n: usize); - fn push_n(&mut self, target: &mut T, n: usize) -> ParquetResult<()>; - fn push_n_nulls(&mut self, target: &mut T, n: usize) -> ParquetResult<()>; - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()>; -} - -/// This batches sequential collect operations to try and prevent unnecessary buffering and -/// `Iterator::next` polling. -#[must_use] -pub struct BatchedCollector<'a, I, T, C: BatchableCollector> { - pub(crate) num_waiting_valids: usize, - pub(crate) num_waiting_invalids: usize, - - target: &'a mut T, - collector: C, - _pd: std::marker::PhantomData, -} - -impl<'a, I, T, C: BatchableCollector> BatchedCollector<'a, I, T, C> { - pub fn new(collector: C, target: &'a mut T) -> Self { - Self { - num_waiting_valids: 0, - num_waiting_invalids: 0, - target, - collector, - _pd: Default::default(), - } - } - - #[inline] - pub fn push_valid(&mut self) -> ParquetResult<()> { - self.push_n_valids(1) - } - - #[inline] - pub fn push_invalid(&mut self) { - self.push_n_invalids(1) - } - - #[inline] - pub fn push_n_valids(&mut self, n: usize) -> ParquetResult<()> { - if self.num_waiting_invalids == 0 { - self.num_waiting_valids += n; - return Ok(()); - } - - self.collector - .push_n(self.target, self.num_waiting_valids)?; - self.collector - .push_n_nulls(self.target, self.num_waiting_invalids)?; - - self.num_waiting_valids = n; - self.num_waiting_invalids = 0; - - Ok(()) - } - - #[inline] - pub fn push_n_invalids(&mut self, n: usize) { - self.num_waiting_invalids += n; - } - - #[inline] - pub fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if self.num_waiting_valids > 0 { - self.collector - .push_n(self.target, self.num_waiting_valids)?; - self.num_waiting_valids = 0; - } - if self.num_waiting_invalids > 0 { - self.collector - .push_n_nulls(self.target, self.num_waiting_invalids)?; - self.num_waiting_invalids = 0; - } - - self.collector.skip_in_place(n)?; - - Ok(()) - } - - #[inline] - pub fn finalize(mut self) -> ParquetResult<()> { - self.collector - .push_n(self.target, self.num_waiting_valids)?; - self.collector - .push_n_nulls(self.target, self.num_waiting_invalids)?; - Ok(()) - } -} - pub(crate) type PageValidity<'a> = HybridRleDecoder<'a>; pub(crate) fn page_validity_decoder(page: &DataPage) -> ParquetResult { let validity = split_buffer(page)?.def; @@ -311,277 +115,146 @@ pub(crate) fn page_validity_decoder(page: &DataPage) -> ParquetResult>( - std::marker::PhantomData<&'a (I, T, C)>, -); -impl<'a, I, T, C: BatchableCollector> HybridRleGatherer for BatchGatherer<'a, I, T, C> { - type Target = (&'a mut MutableBitmap, BatchedCollector<'a, I, T, C>); - - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.0.len() - } - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - Ok(value) - } - - fn gather_one(&self, (validity, values): &mut Self::Target, value: u32) -> ParquetResult<()> { - if value == 0 { - values.push_invalid(); - validity.extend_constant(1, false); - } else { - values.push_valid()?; - validity.extend_constant(1, true); - } - - Ok(()) - } - - fn gather_repeated( - &self, - (validity, values): &mut Self::Target, - value: u32, - n: usize, - ) -> ParquetResult<()> { - if value == 0 { - values.push_n_invalids(n); - validity.extend_constant(n, false); - } else { - values.push_n_valids(n)?; - validity.extend_constant(n, true); - } - - Ok(()) - } - - fn gather_slice(&self, target: &mut Self::Target, source: &[u32]) -> ParquetResult<()> { - let mut prev = 0u32; - let mut len = 0usize; +pub(crate) fn unspecialized_decode( + mut num_rows: usize, - for v in source { - let v = *v; + mut decode_one: impl FnMut() -> ParquetResult, - if v == prev { - len += 1; - } else { - if len != 0 { - self.gather_repeated(target, prev, len)?; - } - prev = v; - len = 1; - } - } + mut filter: Option, + page_validity: Option, - if len != 0 { - self.gather_repeated(target, prev, len)?; - } + is_optional: bool, - Ok(()) - } -} - -/// Extends a [`Pushable`] from an iterator of non-null values and an hybrid-rle decoder -pub(super) fn extend_from_decoder>( validity: &mut MutableBitmap, - page_validity: &mut PageValidity, - limit: Option, - target: &mut T, - collector: C, + target: &mut impl Pushable, ) -> ParquetResult<()> { - let num_elements = limit.map_or(page_validity.len(), |limit| limit.min(page_validity.len())); - - validity.reserve(num_elements); - C::reserve(target, num_elements); - - let batched_collector = BatchedCollector::new(collector, target); - let mut target = (validity, batched_collector); - let gatherer = BatchGatherer(Default::default()); - - page_validity.gather_n_into(&mut target, num_elements, &gatherer)?; - - target.1.finalize()?; - - Ok(()) -} - -/// This translates and collects items from a [`HybridRleDecoder`] into a target [`Vec`]. -/// -/// This batches sequential collect operations to try and prevent unnecessary buffering. -pub struct TranslatedHybridRle<'a, 'b, 'c, O, T> -where - O: Clone + Default, - T: Translator, -{ - decoder: &'a mut HybridRleDecoder<'b>, - translator: &'c T, - _pd: std::marker::PhantomData, -} + match &filter { + None => {}, + Some(Filter::Range(range)) => { + match page_validity.as_ref() { + None => { + for _ in 0..range.start { + decode_one()?; + } + }, + Some(pv) => { + for _ in 0..pv.clone().sliced(0, range.start).set_bits() { + decode_one()?; + } + }, + } -impl<'a, 'b, 'c, O, T> TranslatedHybridRle<'a, 'b, 'c, O, T> -where - O: Clone + Default, - T: Translator, -{ - pub fn new(decoder: &'a mut HybridRleDecoder<'b>, translator: &'c T) -> Self { - Self { - decoder, - translator, - _pd: Default::default(), - } - } -} + num_rows = range.len(); + filter = None; + }, + Some(Filter::Mask(mask)) => { + if mask.unset_bits() == 0 { + num_rows = mask.len(); + filter = None; + } + }, + }; + + match (filter, page_validity) { + (None, None) => { + target.reserve(num_rows); + for _ in 0..num_rows { + target.push(decode_one()?); + } -impl BatchableCollector> for TranslatedHybridRle<'_, '_, '_, O, T> -where - O: Clone + Default, - T: Translator, -{ - #[inline] - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } + if is_optional { + validity.extend_constant(num_rows, true); + } + }, + (None, Some(page_validity)) => { + target.reserve(page_validity.len()); + for is_valid in page_validity.iter() { + let v = if is_valid { + decode_one()? + } else { + T::default() + }; + target.push(v); + } - #[inline] - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - self.decoder - .translate_and_collect_n_into(target, n, self.translator) - } + validity.extend_from_bitmap(&page_validity); + }, + (Some(Filter::Range(_)), _) => unreachable!(), + (Some(Filter::Mask(mask)), None) => { + let num_rows = mask.set_bits(); + target.reserve(num_rows); - #[inline] - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n, O::default()); - Ok(()) - } + let mut iter = mask.iter(); + while iter.num_remaining() > 0 { + let num_ones = iter.take_leading_ones(); - #[inline] - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } -} + if num_ones > 0 { + for _ in 0..num_rows { + target.push(decode_one()?); + } + } -pub struct GatheredHybridRle<'a, 'b, 'c, O, G> -where - O: Clone, - G: HybridRleGatherer, -{ - decoder: &'a mut HybridRleDecoder<'b>, - gatherer: &'c G, - null_value: O, - _pd: std::marker::PhantomData, -} + let num_zeros = iter.take_leading_zeros(); + for _ in 0..num_zeros { + decode_one()?; + } + } -impl<'a, 'b, 'c, O, G> GatheredHybridRle<'a, 'b, 'c, O, G> -where - O: Clone, - G: HybridRleGatherer, -{ - pub fn new(decoder: &'a mut HybridRleDecoder<'b>, gatherer: &'c G, null_value: O) -> Self { - Self { - decoder, - gatherer, - null_value, - _pd: Default::default(), - } - } -} + if is_optional { + validity.extend_constant(num_rows, true); + } + }, + (Some(Filter::Mask(mask)), Some(page_validity)) => { + assert_eq!(mask.len(), page_validity.len()); -impl BatchableCollector> for GatheredHybridRle<'_, '_, '_, O, G> -where - O: Clone, - G: HybridRleGatherer>, -{ - #[inline] - fn reserve(target: &mut Vec, n: usize) { - target.reserve(n); - } + let num_rows = mask.set_bits(); + target.reserve(num_rows); - #[inline] - fn push_n(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - self.decoder.gather_n_into(target, n, self.gatherer)?; - Ok(()) - } + let mut mask_iter = mask.fast_iter_u56(); + let mut validity_iter = page_validity.fast_iter_u56(); - #[inline] - fn push_n_nulls(&mut self, target: &mut Vec, n: usize) -> ParquetResult<()> { - self.gatherer - .gather_repeated(target, self.null_value.clone(), n)?; - Ok(()) - } + let mut iter = |mut f: u64, mut v: u64| { + while f != 0 { + let offset = f.trailing_ones(); - #[inline] - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } -} + if (v >> offset) & 1 != 0 { + target.push(decode_one()?); + } else { + target.push(T::default()); + } -impl BatchableCollector> - for TranslatedHybridRle<'_, '_, '_, View, T> -where - T: Translator, -{ - #[inline] - fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) { - target.reserve(n); - } + let skip = (v & (1u64 << offset).wrapping_sub(1)).count_ones() as usize; + for _ in 0..skip { + decode_one()?; + } - #[inline] - fn push_n(&mut self, target: &mut MutableBinaryViewArray<[u8]>, n: usize) -> ParquetResult<()> { - self.decoder.translate_and_collect_n_into( - unsafe { target.views_mut() }, - n, - self.translator, - )?; + v >>= offset + 1; + f >>= offset + 1; + } - if let Some(validity) = target.validity() { - validity.extend_constant(n, true); - } + for _ in 0..v.count_ones() as usize { + decode_one()?; + } - Ok(()) - } + ParquetResult::Ok(()) + }; - #[inline] - fn push_n_nulls( - &mut self, - target: &mut MutableBinaryViewArray<[u8]>, - n: usize, - ) -> ParquetResult<()> { - target.extend_null(n); - Ok(()) - } + for (f, v) in mask_iter.by_ref().zip(validity_iter.by_ref()) { + iter(f, v)?; + } - #[inline] - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - self.decoder.skip_in_place(n) - } -} + let (f, fl) = mask_iter.remainder(); + let (v, vl) = validity_iter.remainder(); -impl, I: Iterator> BatchableCollector for I { - #[inline] - fn reserve(target: &mut P, n: usize) { - target.reserve(n); - } + assert_eq!(fl, vl); - #[inline] - fn push_n(&mut self, target: &mut P, n: usize) -> ParquetResult<()> { - target.extend_n(n, self); - Ok(()) - } + iter(f, v)?; - #[inline] - fn push_n_nulls(&mut self, target: &mut P, n: usize) -> ParquetResult<()> { - target.extend_null_constant(n); - Ok(()) + validity.extend_from_bitmap(&page_validity); + }, } - #[inline] - fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if n > 0 { - _ = self.nth(n - 1); - } - Ok(()) - } + Ok(()) } /// An item with a known size @@ -605,7 +278,14 @@ pub(super) trait Decoder: Sized { fn with_capacity(&self, capacity: usize) -> Self::DecodedState; /// Deserializes a [`DictPage`] into [`Self::Dict`]. - fn deserialize_dict(&self, page: DictPage) -> ParquetResult; + fn deserialize_dict(&mut self, page: DictPage) -> ParquetResult; + + fn extend_filtered_with_state( + &mut self, + state: State<'_, Self>, + decoded: &mut Self::DecodedState, + filter: Option, + ) -> ParquetResult<()>; fn apply_dictionary( &mut self, @@ -615,24 +295,6 @@ pub(super) trait Decoder: Sized { Ok(()) } - fn decode_plain_encoded<'a>( - &mut self, - decoded: &mut Self::DecodedState, - page_values: &mut as StateTranslation<'a, Self>>::PlainDecoder, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - limit: usize, - ) -> ParquetResult<()>; - fn decode_dictionary_encoded<'a>( - &mut self, - decoded: &mut Self::DecodedState, - page_values: &mut HybridRleDecoder<'a>, - is_optional: bool, - page_validity: Option<&mut PageValidity<'a>>, - dict: &Self::Dict, - limit: usize, - ) -> ParquetResult<()>; - fn finalize( &self, dtype: ArrowDataType, @@ -641,38 +303,6 @@ pub(super) trait Decoder: Sized { ) -> ParquetResult; } -pub(crate) trait NestedDecoder: Decoder { - fn validity_extend( - state: &mut State<'_, Self>, - decoded: &mut Self::DecodedState, - value: bool, - n: usize, - ); - fn values_extend_nulls(state: &mut State<'_, Self>, decoded: &mut Self::DecodedState, n: usize); - - fn push_n_valids( - &mut self, - state: &mut State<'_, Self>, - decoded: &mut Self::DecodedState, - n: usize, - ) -> ParquetResult<()> { - state.extend_from_state(self, decoded, Some(Filter::new_limited(n)))?; - Self::validity_extend(state, decoded, true, n); - - Ok(()) - } - - fn push_n_nulls( - &self, - state: &mut State<'_, Self>, - decoded: &mut Self::DecodedState, - n: usize, - ) { - Self::validity_extend(state, decoded, false, n); - Self::values_extend_nulls(state, decoded, n); - } -} - pub trait DictDecodable: Decoder { fn finalize_dict_array( &self, @@ -693,7 +323,7 @@ impl PageDecoder { pub fn new( mut iter: BasicDecompressor, dtype: ArrowDataType, - decoder: D, + mut decoder: D, ) -> ParquetResult { let dict_page = iter.read_dict_page()?; let dict = dict_page.map(|d| decoder.deserialize_dict(d)).transpose()?; @@ -731,17 +361,14 @@ impl PageDecoder { let page = page.decompress(&mut self.iter)?; - let mut state = State::new(&self.decoder, &page, self.dict.as_ref())?; + let state = State::new(&self.decoder, &page, self.dict.as_ref())?; let start_length = target.len(); - state.extend_from_state(&mut self.decoder, &mut target, state_filter)?; + state.decode(&mut self.decoder, &mut target, state_filter)?; let end_length = target.len(); num_rows_remaining -= end_length - start_length; - debug_assert!(state.len() == 0 || num_rows_remaining == 0); - - drop(state); self.iter.reuse_page_buffer(page); } @@ -750,7 +377,10 @@ impl PageDecoder { } #[inline] -pub(super) fn dict_indices_decoder(page: &DataPage) -> ParquetResult { +pub(super) fn dict_indices_decoder( + page: &DataPage, + null_count: usize, +) -> ParquetResult { let indices_buffer = split_buffer(page)?.values; // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32), @@ -761,7 +391,7 @@ pub(super) fn dict_indices_decoder(page: &DataPage) -> ParquetResult Option { Some(validity) } -pub(crate) fn hybrid_rle_count_zeros( - decoder: &hybrid_rle::HybridRleDecoder<'_>, -) -> ParquetResult { - let mut count = ZeroCount::default(); - decoder - .clone() - .gather_into(&mut count, &ZeroCountGatherer)?; - Ok(count.num_zero) +pub(crate) fn filter_from_range(rng: Range) -> Bitmap { + let mut bm = MutableBitmap::with_capacity(rng.end); + + bm.extend_constant(rng.start, false); + bm.extend_constant(rng.len(), true); + + bm.freeze() +} + +pub(crate) fn decode_hybrid_rle_into_bitmap( + mut page_validity: HybridRleDecoder<'_>, + limit: Option, + bitmap: &mut MutableBitmap, +) -> ParquetResult<()> { + assert!(page_validity.num_bits() <= 1); + + let mut limit = limit.unwrap_or(page_validity.len()); + bitmap.reserve(limit); + + while let Some(chunk) = page_validity.next_chunk()? { + if limit == 0 { + break; + } + + match chunk { + HybridRleChunk::Rle(value, size) => { + let size = size.min(limit); + bitmap.extend_constant(size, value != 0); + limit -= size; + }, + HybridRleChunk::Bitpacked(decoder) => { + let len = decoder.len().min(limit); + bitmap.extend_from_slice(decoder.as_slice(), 0, len); + limit -= len; + }, + } + } + + Ok(()) +} + +pub(crate) fn decode_page_validity( + page_validity: HybridRleDecoder<'_>, + limit: Option, +) -> ParquetResult { + let mut bm = MutableBitmap::new(); + decode_hybrid_rle_into_bitmap(page_validity, limit, &mut bm)?; + Ok(bm.freeze()) } diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs index b5ea9b815dc1..8c43d2694590 100644 --- a/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs +++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs @@ -1,3 +1,5 @@ +use polars_utils::chunks::Chunks; + use super::{Packed, Unpackable, Unpacked}; use crate::parquet::error::{ParquetError, ParquetResult}; @@ -6,7 +8,7 @@ use crate::parquet::error::{ParquetError, ParquetResult}; /// This iterator unpacks bytes in chunks and does not allocate. #[derive(Debug, Clone)] pub struct Decoder<'a, T: Unpackable> { - packed: std::slice::Chunks<'a, u8>, + packed: Chunks<'a, u8>, num_bits: usize, /// number of items pub(crate) length: usize, @@ -16,7 +18,7 @@ pub struct Decoder<'a, T: Unpackable> { impl Default for Decoder<'_, T> { fn default() -> Self { Self { - packed: [].chunks(1), + packed: Chunks::new(&[], 1), num_bits: 0, length: 0, _pd: std::marker::PhantomData, @@ -66,7 +68,8 @@ impl<'a, T: Unpackable> Decoder<'a, T> { } debug_assert!(num_bits != 0 || packed.is_empty()); - let packed = packed.chunks(block_size.max(1)); + let block_size = block_size.max(1); + let packed = Chunks::new(packed, block_size); Ok(Self { length, @@ -91,7 +94,7 @@ impl<'a, T: Unpackable> Decoder<'a, T> { ))); } - let packed = packed.chunks(block_size); + let packed = Chunks::new(packed, block_size); Ok(Self { length, @@ -104,6 +107,15 @@ impl<'a, T: Unpackable> Decoder<'a, T> { pub fn num_bits(&self) -> usize { self.num_bits } + + pub fn as_slice(&self) -> &[u8] { + self.packed.as_slice() + } + + pub fn lower_element(self) -> ParquetResult> { + let packed = self.packed.as_slice(); + Decoder::try_new(packed, self.num_bits, self.length) + } } /// A iterator over the exact chunks in a [`Decoder`]. @@ -124,9 +136,7 @@ impl Iterator for ChunkedDecoder<'_, '_, T> { } let mut unpacked = T::Unpacked::zero(); - let packed = self.decoder.packed.next()?; - decode_pack::(packed, self.decoder.num_bits, &mut unpacked); - self.decoder.length -= T::Unpacked::LENGTH; + self.next_into(&mut unpacked)?; Some(unpacked) } @@ -162,6 +172,19 @@ impl ChunkedDecoder<'_, '_, T> { self.remainder() } } + + pub fn next_into(&mut self, unpacked: &mut T::Unpacked) -> Option { + if self.decoder.len() == 0 { + return None; + } + + let unpacked_len = self.decoder.len().min(T::Unpacked::LENGTH); + let packed = self.decoder.packed.next()?; + decode_pack::(packed, self.decoder.num_bits, unpacked); + self.decoder.length -= unpacked_len; + + Some(unpacked_len) + } } impl<'a, T: Unpackable> Decoder<'a, T> { @@ -181,8 +204,8 @@ impl<'a, T: Unpackable> Decoder<'a, T> { } pub fn take(&mut self) -> Self { - let block_size = size_of::() * self.num_bits; - let packed = std::mem::replace(&mut self.packed, [].chunks(block_size)); + let block_size = self.packed.chunk_size(); + let packed = std::mem::replace(&mut self.packed, Chunks::new(&[], block_size)); let length = self.length; self.length = 0; diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs index 94f310d28f14..10a1f61af963 100644 --- a/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs +++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs @@ -1,4 +1,20 @@ macro_rules! seq_macro { + ($i:ident in 1..15 $block:block) => { + seq_macro!($i in [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + ] $block) + }; + ($i:ident in 0..16 $block:block) => { + seq_macro!($i in [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + ] $block) + }; + ($i:ident in 0..=16 $block:block) => { + seq_macro!($i in [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, + ] $block) + }; ($i:ident in 1..31 $block:block) => { seq_macro!($i in [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -57,7 +73,7 @@ mod encode; mod pack; mod unpack; -pub use decode::Decoder; +pub use decode::{ChunkedDecoder, Decoder}; pub use encode::{encode, encode_pack}; /// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs. @@ -155,10 +171,26 @@ impl Unpacked for [u64; 64] { pub trait Unpackable: Copy + Sized + Default { type Packed: Packed; type Unpacked: Unpacked; + fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked); fn pack(unpacked: &Self::Unpacked, num_bits: usize, packed: &mut [u8]); } +impl Unpackable for u16 { + type Packed = [u8; 16 * 2]; + type Unpacked = [u16; 16]; + + #[inline] + fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) { + unpack::unpack16(packed, unpacked, num_bits) + } + + #[inline] + fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) { + pack::pack16(packed, unpacked, num_bits) + } +} + impl Unpackable for u32 { type Packed = [u8; 32 * 4]; type Unpacked = [u32; 32]; diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs index c318f42649d3..349d0f34ee87 100644 --- a/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs +++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs @@ -81,6 +81,7 @@ macro_rules! pack { }; } +pack!(pack16, u16, 2, 16, 15); pack!(pack32, u32, 4, 32, 31); pack!(pack64, u64, 8, 64, 63); diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs index 61d59925a39e..c52c17d21681 100644 --- a/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs +++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs @@ -116,6 +116,7 @@ macro_rules! unpack { }; } +unpack!(unpack16, u16, 2, 16); unpack!(unpack32, u32, 4, 32); unpack!(unpack64, u64, 8, 64); diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs index 261e84ce2e23..f176ef9862d4 100644 --- a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs @@ -69,7 +69,6 @@ struct MiniBlock<'a> { unpacked_end: usize, } -struct SkipGatherer; pub(crate) struct SumGatherer(pub(crate) usize); pub trait DeltaGatherer { @@ -109,38 +108,6 @@ pub trait DeltaGatherer { } } -impl DeltaGatherer for SkipGatherer { - type Target = usize; - - fn target_len(&self, target: &Self::Target) -> usize { - *target - } - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - fn gather_one(&mut self, target: &mut Self::Target, _v: i64) -> ParquetResult<()> { - *target += 1; - Ok(()) - } - fn gather_constant( - &mut self, - target: &mut Self::Target, - _v: i64, - _delta: i64, - num_repeats: usize, - ) -> ParquetResult<()> { - *target += num_repeats; - Ok(()) - } - fn gather_chunk(&mut self, target: &mut Self::Target, chunk: &[i64; 64]) -> ParquetResult<()> { - *target += chunk.len(); - Ok(()) - } - fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> { - *target += slice.len(); - Ok(()) - } -} - impl DeltaGatherer for SumGatherer { type Target = usize; @@ -749,12 +716,6 @@ impl<'a> Decoder<'a> { Ok(()) } - pub fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - let mut gatherer = SkipGatherer; - self.gather_n_into(&mut 0usize, n, &mut gatherer) - } - - #[cfg(test)] pub(crate) fn collect_n>( &mut self, e: &mut E, @@ -786,7 +747,6 @@ impl<'a> Decoder<'a> { self.gather_n_into(&mut target, n, &mut gatherer) } - #[cfg(test)] pub(crate) fn collect + Default>( mut self, ) -> ParquetResult { diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/fuzz.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/fuzz.rs deleted file mode 100644 index dc16bc8353fd..000000000000 --- a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/fuzz.rs +++ /dev/null @@ -1,76 +0,0 @@ -#[ignore = "Fuzz test. Takes too long"] -#[test] -fn fuzz_test_delta_encoding() -> Result<(), Box> { - use rand::Rng; - - use super::DeltaGatherer; - use crate::parquet::error::ParquetResult; - - struct SimpleGatherer; - - impl DeltaGatherer for SimpleGatherer { - type Target = Vec; - - fn target_len(&self, target: &Self::Target) -> usize { - target.len() - } - - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); - } - - fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> { - target.push(v); - Ok(()) - } - } - - const MIN_VALUES: usize = 1; - const MAX_VALUES: usize = 515; - - const MIN: i64 = i64::MIN; - const MAX: i64 = i64::MAX; - - const NUM_ITERATIONS: usize = 1_000_000; - - let mut values = Vec::with_capacity(MAX_VALUES); - let mut rng = rand::thread_rng(); - - let mut encoded = Vec::with_capacity(MAX_VALUES); - let mut decoded = Vec::with_capacity(MAX_VALUES); - let mut gatherer = SimpleGatherer; - - for i in 0..NUM_ITERATIONS { - values.clear(); - - let num_values = rng.gen_range(MIN_VALUES..=MAX_VALUES); - values.extend(std::iter::from_fn(|| Some(rng.gen_range(MIN..=MAX))).take(num_values)); - - encoded.clear(); - decoded.clear(); - - super::encode( - values.iter().copied(), - &mut encoded, - 1 << rng.gen_range(0..=2), - ); - let (mut decoder, rem) = super::Decoder::try_new(&encoded)?; - - assert!(rem.is_empty()); - - let mut num_remaining = num_values; - while num_remaining > 0 { - let n = rng.gen_range(1usize..=num_remaining); - decoder.gather_n_into(&mut decoded, n, &mut gatherer)?; - num_remaining -= n; - } - - assert_eq!(values, decoded); - - if i % 1000 == 999 { - eprintln!("[INFO]: {} iterations done.", i + 1); - } - } - - Ok(()) -} diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs index 4a32610a302e..040909a336bb 100644 --- a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs +++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs @@ -1,8 +1,7 @@ mod decoder; mod encoder; -mod fuzz; -pub(crate) use decoder::{Decoder, DeltaGatherer, SumGatherer}; +pub(crate) use decoder::{Decoder, SumGatherer}; pub(crate) use encoder::encode; /// The sum of `start, start + delta, start + 2 * delta, ... len times`. diff --git a/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs index deb95f1dd3a2..bcdf9403b7be 100644 --- a/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs @@ -52,45 +52,51 @@ impl<'a> Decoder<'a> { } } -#[cfg(test)] -mod tests { - use super::*; +impl Iterator for Decoder<'_> { + type Item = ParquetResult>; - impl Iterator for Decoder<'_> { - type Item = ParquetResult>; + fn next(&mut self) -> Option { + if self.len() == 0 { + return None; + } - fn next(&mut self) -> Option { - if self.len() == 0 { - return None; - } + let mut prefix_length = vec![]; + let mut suffix_length = vec![]; + if let Err(e) = self.prefix_lengths.collect_n(&mut prefix_length, 1) { + return Some(Err(e)); + } + if let Err(e) = self.suffix_lengths.collect_n(&mut suffix_length, 1) { + return Some(Err(e)); + } + let prefix_length = prefix_length[0]; + let suffix_length = suffix_length[0]; - let mut prefix_length = vec![]; - let mut suffix_length = vec![]; - if let Err(e) = self.prefix_lengths.collect_n(&mut prefix_length, 1) { - return Some(Err(e)); - } - if let Err(e) = self.suffix_lengths.collect_n(&mut suffix_length, 1) { - return Some(Err(e)); - } - let prefix_length = prefix_length[0]; - let suffix_length = suffix_length[0]; + let prefix_length = prefix_length as usize; + let suffix_length = suffix_length as usize; - let prefix_length = prefix_length as usize; - let suffix_length = suffix_length as usize; + let mut value = Vec::with_capacity(prefix_length + suffix_length); - let mut value = Vec::with_capacity(prefix_length + suffix_length); + value.extend_from_slice(&self.last[..prefix_length]); + value.extend_from_slice(&self.values[self.offset..self.offset + suffix_length]); - value.extend_from_slice(&self.last[..prefix_length]); - value.extend_from_slice(&self.values[self.offset..self.offset + suffix_length]); + self.last.clear(); + self.last.extend_from_slice(&value); - self.last.clear(); - self.last.extend_from_slice(&value); + self.offset += suffix_length; - self.offset += suffix_length; + Some(Ok(value)) + } - Some(Ok(value)) - } + fn size_hint(&self) -> (usize, Option) { + (self.prefix_lengths.len(), Some(self.prefix_lengths.len())) } +} + +impl ExactSizeIterator for Decoder<'_> {} + +#[cfg(test)] +mod tests { + use super::*; #[test] fn test_bla() -> ParquetResult<()> { diff --git a/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs index b3191e0a51ff..3bd46add609e 100644 --- a/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs @@ -1,5 +1,4 @@ use super::super::delta_bitpacked; -use crate::parquet::encoding::delta_bitpacked::SumGatherer; use crate::parquet::error::ParquetResult; /// Decodes [Delta-length byte array](https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6) @@ -10,6 +9,7 @@ use crate::parquet::error::ParquetResult; pub(crate) struct Decoder<'a> { pub(crate) lengths: delta_bitpacked::Decoder<'a>, pub(crate) values: &'a [u8], + #[cfg(test)] pub(crate) offset: usize, } @@ -19,21 +19,10 @@ impl<'a> Decoder<'a> { Ok(Self { lengths, values, + #[cfg(test)] offset: 0, }) } - - pub(crate) fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - let mut sum = 0usize; - self.lengths - .gather_n_into(&mut sum, n, &mut SumGatherer(0))?; - self.offset += sum; - Ok(()) - } - - pub fn len(&self) -> usize { - self.lengths.len() - } } #[cfg(test)] diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/buffered.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/buffered.rs deleted file mode 100644 index 95d53b2769e4..000000000000 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/buffered.rs +++ /dev/null @@ -1,280 +0,0 @@ -use super::gatherer::HybridRleGatherer; -use crate::parquet::encoding::bitpacked::{self, Unpackable, Unpacked}; -use crate::parquet::error::ParquetResult; - -#[derive(Debug, Clone)] -pub struct BufferedBitpacked<'a> { - pub unpacked: [u32; 32], - pub unpacked_start: usize, - pub unpacked_end: usize, - - pub decoder: bitpacked::Decoder<'a, u32>, -} - -#[derive(Debug, Clone)] -pub struct BufferedRle { - pub value: u32, - pub length: usize, -} - -/// A buffered set of items for the [`HybridRleDecoder`]. This can be iterated over and stopped at -/// any time. -#[derive(Debug, Clone)] -pub enum HybridRleBuffered<'a> { - Bitpacked(BufferedBitpacked<'a>), - Rle(BufferedRle), -} - -impl Iterator for BufferedRle { - type Item = u32; - - fn next(&mut self) -> Option { - if self.length > 0 { - self.length -= 1; - Some(self.value) - } else { - None - } - } - - fn size_hint(&self) -> (usize, Option) { - (self.length, Some(self.length)) - } -} - -impl ExactSizeIterator for BufferedRle {} - -impl Iterator for BufferedBitpacked<'_> { - type Item = u32; - - fn next(&mut self) -> Option { - if self.unpacked_start < self.unpacked_end { - let value = self.unpacked[self.unpacked_start]; - self.unpacked_start += 1; - return Some(value); - } - - self.decoder - .chunked() - .next_inexact() - .map(|(unpacked, unpacked_length)| { - debug_assert!(unpacked_length > 0); - let value = unpacked[0]; - self.unpacked = unpacked; - self.unpacked_end = unpacked_length; - self.unpacked_start = 1; - value - }) - } - - fn size_hint(&self) -> (usize, Option) { - let unpacked_num_elements = self.unpacked_end - self.unpacked_start; - let exact = unpacked_num_elements + self.decoder.len(); - (exact, Some(exact)) - } -} - -impl ExactSizeIterator for BufferedBitpacked<'_> {} - -impl Iterator for HybridRleBuffered<'_> { - type Item = u32; - - fn next(&mut self) -> Option { - match self { - HybridRleBuffered::Bitpacked(b) => b.next(), - HybridRleBuffered::Rle(b) => b.next(), - } - } - - fn size_hint(&self) -> (usize, Option) { - match self { - HybridRleBuffered::Bitpacked(b) => b.size_hint(), - HybridRleBuffered::Rle(b) => b.size_hint(), - } - } -} - -impl ExactSizeIterator for HybridRleBuffered<'_> {} - -impl BufferedBitpacked<'_> { - fn gather_limited_into>( - &mut self, - target: &mut G::Target, - limit: usize, - gatherer: &G, - ) -> ParquetResult { - let unpacked_num_elements = self.unpacked_end - self.unpacked_start; - if limit <= unpacked_num_elements { - gatherer.gather_slice( - target, - &self.unpacked[self.unpacked_start..self.unpacked_start + limit], - )?; - self.unpacked_start += limit; - return Ok(limit); - } - - gatherer.gather_slice( - target, - &self.unpacked[self.unpacked_start..self.unpacked_end], - )?; - self.unpacked_end = 0; - self.unpacked_start = 0; - let limit = limit - unpacked_num_elements; - - let decoder = self.decoder.take(); - let decoder_len = decoder.len(); - if limit >= decoder_len { - gatherer.gather_bitpacked_all(target, decoder)?; - Ok(unpacked_num_elements + decoder_len) - } else { - let buffered = gatherer.gather_bitpacked_limited(target, decoder, limit)?; - *self = buffered; - Ok(unpacked_num_elements + limit) - } - } - - pub fn gather_into>( - self, - target: &mut G::Target, - gatherer: &G, - ) -> ParquetResult { - let unpacked_num_elements = self.unpacked_end - self.unpacked_start; - gatherer.gather_slice( - target, - &self.unpacked[self.unpacked_start..self.unpacked_end], - )?; - let decoder_len = self.decoder.len(); - gatherer.gather_bitpacked_all(target, self.decoder)?; - Ok(unpacked_num_elements + decoder_len) - } - - pub fn skip_in_place(&mut self, n: usize) -> usize { - let unpacked_num_elements = self.unpacked_end - self.unpacked_start; - - if n < unpacked_num_elements { - self.unpacked_start += n; - return n; - } - - let n = n - unpacked_num_elements; - - if self.decoder.len() > n { - let num_chunks = n / ::Unpacked::LENGTH; - let unpacked_offset = n % ::Unpacked::LENGTH; - self.decoder.skip_chunks(num_chunks); - let (unpacked, unpacked_length) = self.decoder.chunked().next_inexact().unwrap(); - debug_assert!(unpacked_offset < unpacked_length); - - self.unpacked = unpacked; - self.unpacked_start = unpacked_offset; - self.unpacked_end = unpacked_length; - - return unpacked_num_elements + n; - } - - // We skip the entire decoder. Essentially, just zero it out. - let decoder = self.decoder.take(); - self.unpacked_start = 0; - self.unpacked_end = 0; - - decoder.len() + unpacked_num_elements - } -} - -impl BufferedRle { - pub fn gather_limited_into>( - &mut self, - target: &mut G::Target, - limit: usize, - gatherer: &G, - ) -> ParquetResult { - let value = gatherer.hybridrle_to_target(self.value)?; - let num_elements = usize::min(self.length, limit); - self.length -= num_elements; - gatherer.gather_repeated(target, value, num_elements)?; - Ok(num_elements) - } - - pub fn gather_into>( - self, - target: &mut A::Target, - applicator: &A, - ) -> ParquetResult { - let value = applicator.hybridrle_to_target(self.value)?; - applicator.gather_repeated(target, value, self.length)?; - Ok(self.length) - } - - pub fn skip_in_place(&mut self, n: usize) -> usize { - let num_elements = usize::min(self.length, n); - self.length -= num_elements; - num_elements - } -} - -impl HybridRleBuffered<'_> { - pub fn gather_limited_into>( - &mut self, - target: &mut G::Target, - limit: usize, - gatherer: &G, - ) -> ParquetResult { - let start_target_length = gatherer.target_num_elements(target); - let start_length = self.len(); - - let num_processed = match self { - HybridRleBuffered::Bitpacked(b) => b.gather_limited_into(target, limit, gatherer), - HybridRleBuffered::Rle(b) => b.gather_limited_into(target, limit, gatherer), - }?; - - debug_assert!(num_processed <= limit); - debug_assert_eq!( - num_processed, - gatherer.target_num_elements(target) - start_target_length - ); - debug_assert_eq!(num_processed, start_length - self.len()); - - Ok(num_processed) - } - - pub fn gather_into>( - self, - target: &mut G::Target, - gatherer: &G, - ) -> ParquetResult { - let start_target_length = gatherer.target_num_elements(target); - let start_length = self.len(); - - let num_processed = match self { - HybridRleBuffered::Bitpacked(b) => b.gather_into(target, gatherer), - HybridRleBuffered::Rle(b) => b.gather_into(target, gatherer), - }?; - - debug_assert_eq!( - num_processed, - gatherer.target_num_elements(target) - start_target_length - ); - debug_assert_eq!(num_processed, start_length); - - Ok(num_processed) - } - - pub fn skip_in_place(&mut self, n: usize) -> usize { - let start_length = self.len(); - - let num_skipped = match self { - HybridRleBuffered::Bitpacked(b) => b.skip_in_place(n), - HybridRleBuffered::Rle(b) => b.skip_in_place(n), - }; - - debug_assert!(num_skipped <= n); - debug_assert_eq!( - num_skipped, - start_length - self.len(), - "{self:?}: {num_skipped} != {start_length} - {}", - self.len() - ); - - num_skipped - } -} diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/fuzz.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/fuzz.rs deleted file mode 100644 index f4a980fb5062..000000000000 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/fuzz.rs +++ /dev/null @@ -1,390 +0,0 @@ -/// Since the HybridRle decoder is very widely used within the Parquet reader and the code is quite -/// complex to facilitate performance. We create this small fuzzer -use std::collections::VecDeque; - -use rand::Rng; - -use super::*; - -fn run_iteration( - bs: &[u32], - collects: impl Iterator, - encoded: &mut Vec, - decoded: &mut Vec, - num_bits: u32, -) -> ParquetResult<()> { - encoded.clear(); - decoded.clear(); - - encoder::encode(encoded, bs.iter().copied(), num_bits).unwrap(); - - let mut decoder = HybridRleDecoder::new(&encoded[..], num_bits, bs.len()); - - for c in collects { - decoder.collect_n_into(decoded, c)?; - } - - Ok(()) -} - -/// Minimizes a failing case -fn minimize_failing_case( - bs: &mut Vec, - collects: &mut VecDeque, - encoded: &mut Vec, - decoded: &mut Vec, - num_bits: u32, -) -> ParquetResult<()> { - loop { - let initial_bs_len = bs.len(); - let initial_collects_len = collects.len(); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - assert_ne!(&bs, &decoded); - - while collects.len() > 2 { - let last = collects.pop_back().unwrap(); - - *collects.back_mut().unwrap() += last; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - *collects.back_mut().unwrap() -= last; - collects.push_back(last); - break; - } - } - - while collects.len() > 2 { - let first = collects.pop_front().unwrap(); - - *collects.front_mut().unwrap() += first; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - *collects.front_mut().unwrap() -= first; - collects.push_front(first); - break; - } - } - - while bs.len() > 1 { - let last = bs.pop().unwrap(); - *collects.back_mut().unwrap() -= 1; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.push(last); - *collects.back_mut().unwrap() += 1; - break; - } - - if *collects.back().unwrap() == 0 { - collects.pop_back().unwrap(); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.push_back(0); - break; - } - } - } - - while bs.len() > 1 { - let last = bs.pop().unwrap(); - *collects.front_mut().unwrap() -= 1; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.push(last); - *collects.front_mut().unwrap() += 1; - break; - } - - if *collects.front().unwrap() == 0 { - collects.pop_front().unwrap(); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.push_front(0); - break; - } - } - } - - while bs.len() > 1 { - let first = bs.remove(0); - *collects.back_mut().unwrap() -= 1; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.insert(0, first); - *collects.back_mut().unwrap() += 1; - break; - } - - if *collects.back().unwrap() == 0 { - collects.pop_back().unwrap(); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.push_back(0); - break; - } - } - } - - while bs.len() > 1 { - let first = bs.remove(0); - *collects.front_mut().unwrap() -= 1; - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.insert(0, first); - *collects.front_mut().unwrap() += 1; - break; - } - - if *collects.front().unwrap() == 0 { - collects.pop_front().unwrap(); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.push_front(0); - break; - } - } - } - - let mut start_offset = collects[0]; - for i in 1..collects.len() - 1 { - loop { - let start_length = collects[i]; - - while collects[i] > 0 { - collects[i] -= 1; - let item = bs.remove(start_offset); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.insert(start_offset, item); - collects[i] += 1; - break; - } - - if collects[i] == 0 { - collects.remove(i); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.insert(i, 0); - break; - } - } - } - - while collects[i] > 0 { - collects[i] -= 1; - let end_offset = start_offset + collects[i] - 1; - let item = bs.remove(end_offset); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - bs.insert(end_offset, item); - collects[i] += 1; - break; - } - - if collects[i] == 0 { - collects.remove(i); - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - if bs == decoded { - collects.insert(i, 0); - break; - } - } - } - - if collects[i] == start_length { - break; - } - } - - start_offset += collects[i]; - } - - let now_bs_len = bs.len(); - let now_collects_len = collects.len(); - - if initial_bs_len == now_bs_len && initial_collects_len == now_collects_len { - break; - } - } - - run_iteration(bs, collects.iter().copied(), encoded, decoded, num_bits)?; - - Ok(()) -} - -fn fuzz_loops(num_loops: usize) -> ParquetResult<()> { - let mut rng = rand::thread_rng(); - - const MAX_LENGTH: usize = 10_000; - - let mut encoded = Vec::with_capacity(1024); - let mut decoded = Vec::with_capacity(1024); - - let mut bs = Vec::with_capacity(MAX_LENGTH); - let mut collects: VecDeque = VecDeque::with_capacity(2000); - - for i in 0..num_loops { - collects.clear(); - bs.clear(); - - let num_bits = rng.gen_range(0..=32); - let mask = 1u32.wrapping_shl(num_bits).wrapping_sub(1); - - let length = rng.gen_range(1..=MAX_LENGTH); - - unsafe { bs.set_len(length) }; - rng.fill(&mut bs[..]); - - let mut filled = 0; - while filled < bs.len() { - if rng.gen() { - let num_repeats = rng.gen_range(0..=(bs.len() - filled)); - let value = bs[filled] & mask; - for j in 0..num_repeats { - bs[filled + j] = value; - } - filled += num_repeats; - } else { - bs[filled] &= mask; - filled += 1; - } - } - - if rng.gen() { - let mut num_values = bs.len(); - while num_values > 0 { - let n = rng.gen_range(0..=num_values); - collects.push_back(n); - num_values -= n; - } - } else { - collects.resize(1, bs.len()); - } - - run_iteration( - &bs, - collects.iter().copied(), - &mut encoded, - &mut decoded, - num_bits, - )?; - - if decoded != bs { - minimize_failing_case(&mut bs, &mut collects, &mut encoded, &mut decoded, num_bits)?; - - eprintln!("Minimized case:"); - eprintln!("Expected: {bs:?}"); - eprintln!("Found: {decoded:?}"); - eprintln!("Collects: {collects:?}"); - eprintln!(); - - panic!("Found a failing case..."); - } - - if i % 512 == 0 { - eprintln!("{i} iterations done."); - } - } - - Ok(()) -} - -#[test] -fn small_fuzz() -> ParquetResult<()> { - fuzz_loops(2048) -} - -#[test] -#[ignore = "Large fuzz test. Too slow"] -fn large_fuzz() -> ParquetResult<()> { - fuzz_loops(1_000_000) -} - -#[test] -#[ignore = "Large fuzz test. Too slow"] -fn skip_fuzz() -> ParquetResult<()> { - let mut rng = rand::thread_rng(); - - const MAX_LENGTH: usize = 10_000; - - let mut encoded = Vec::with_capacity(10000); - - let mut bs: Vec = Vec::with_capacity(MAX_LENGTH); - let mut skips: VecDeque = VecDeque::with_capacity(2000); - - let num_loops = 100_000; - - for _ in 0..num_loops { - skips.clear(); - bs.clear(); - - let num_bits = rng.gen_range(0..=32); - let mask = 1u32.wrapping_shl(num_bits).wrapping_sub(1); - - let length = rng.gen_range(1..=MAX_LENGTH); - - unsafe { bs.set_len(length) }; - rng.fill(&mut bs[..]); - - let mut filled = 0; - while filled < bs.len() { - if rng.gen() { - let num_repeats = rng.gen_range(0..=(bs.len() - filled)); - let value = bs[filled] & mask; - for j in 0..num_repeats { - bs[filled + j] = value; - } - filled += num_repeats; - } else { - bs[filled] &= mask; - filled += 1; - } - } - - let mut num_done = 0; - while num_done < filled { - let num_skip = rng.gen_range(1..=filled - num_done); - num_done += num_skip; - skips.push_back(num_skip); - } - - encoder::encode(&mut encoded, bs.iter().copied(), num_bits).unwrap(); - let mut decoder = HybridRleDecoder::new(&encoded, num_bits, filled); - - for s in &skips { - decoder.skip_in_place(*s).unwrap(); - } - } - - Ok(()) -} diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/gatherer.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/gatherer.rs deleted file mode 100644 index c66ef5873439..000000000000 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/gatherer.rs +++ /dev/null @@ -1,545 +0,0 @@ -use crate::parquet::encoding::bitpacked::{Decoder, Unpackable, Unpacked}; -use crate::parquet::encoding::hybrid_rle::{BufferedBitpacked, HybridRleBuffered}; -use crate::parquet::error::{ParquetError, ParquetResult}; - -/// Trait that describes what to do with consumed Hybrid-RLE Encoded values. -/// -/// This is quite a general trait that provides a lot of open space as to how to handle the -/// Hybrid-RLE encoded values. There is also the [`Translator`] trait that is usually good enough -/// if you want just want to map values to another set of values and collect them into a vector. -/// -/// Although, this trait might seem quite over-engineered (it might be), it is very useful for -/// performance. This allows for usage of the properties that [`HybridRleDecoder`] provides and for -/// definition of efficient procedures for collecting slices, chunks, repeated elements and -/// bit-packed elements. -/// -/// The [`Translator`] doc-comment has a good description of why this trait is needed. -/// -/// [`HybridRleDecoder`]: super::HybridRleDecoder -pub trait HybridRleGatherer { - type Target; - - fn target_reserve(&self, target: &mut Self::Target, n: usize); - fn target_num_elements(&self, target: &Self::Target) -> usize; - - fn hybridrle_to_target(&self, value: u32) -> ParquetResult; - fn gather_one(&self, target: &mut Self::Target, value: O) -> ParquetResult<()>; - fn gather_repeated(&self, target: &mut Self::Target, value: O, n: usize) -> ParquetResult<()>; - fn gather_slice(&self, target: &mut Self::Target, source: &[u32]) -> ParquetResult<()> { - self.target_reserve(target, source.len()); - for v in source { - self.gather_one(target, self.hybridrle_to_target(*v)?)?; - } - Ok(()) - } - fn gather_chunk( - &self, - target: &mut Self::Target, - source: &::Unpacked, - ) -> ParquetResult<()> { - self.gather_slice(target, source) - } - fn gather_bitpacked_all( - &self, - target: &mut Self::Target, - mut decoder: Decoder, - ) -> ParquetResult<()> { - self.target_reserve(target, decoder.len()); - - let mut chunked = decoder.chunked(); - - for unpacked in &mut chunked { - self.gather_chunk(target, &unpacked)?; - } - - if let Some((last, last_length)) = chunked.remainder() { - self.gather_slice(target, &last[..last_length])?; - } - - Ok(()) - } - - fn gather_bitpacked_limited<'a>( - &self, - target: &mut Self::Target, - mut decoder: Decoder<'a, u32>, - limit: usize, - ) -> ParquetResult> { - assert!(limit < decoder.len()); - - const CHUNK_SIZE: usize = ::Unpacked::LENGTH; - - let mut chunked = decoder.chunked(); - - let num_full_chunks = limit / CHUNK_SIZE; - for unpacked in (&mut chunked).take(num_full_chunks) { - self.gather_chunk(target, &unpacked)?; - } - - let (unpacked, unpacked_length) = chunked.next_inexact().unwrap(); - let unpacked_offset = limit % CHUNK_SIZE; - debug_assert!(unpacked_offset < unpacked_length); - self.gather_slice(target, &unpacked[..unpacked_offset])?; - - Ok(BufferedBitpacked { - unpacked, - - unpacked_start: unpacked_offset, - unpacked_end: unpacked_length, - decoder, - }) - } - fn gather_bitpacked<'a>( - &self, - target: &mut Self::Target, - decoder: Decoder<'a, u32>, - limit: Option, - ) -> ParquetResult<(usize, Option>)> { - let length = decoder.len(); - - match limit { - None => self - .gather_bitpacked_all(target, decoder) - .map(|_| (length, None)), - Some(limit) if limit >= length => self - .gather_bitpacked_all(target, decoder) - .map(|_| (length, None)), - Some(limit) => self - .gather_bitpacked_limited(target, decoder, limit) - .map(|b| (limit, Some(HybridRleBuffered::Bitpacked(b)))), - } - } -} - -#[derive(Default, Clone, Copy)] -pub struct ZeroCount { - pub num_zero: usize, - pub num_nonzero: usize, -} -pub struct ZeroCountGatherer; - -impl HybridRleGatherer for ZeroCountGatherer { - type Target = ZeroCount; - - #[inline(always)] - fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {} - - #[inline] - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.num_zero + target.num_nonzero - } - - #[inline] - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - Ok(ZeroCount { - num_zero: usize::from(value == 0), - num_nonzero: usize::from(value != 0), - }) - } - - #[inline] - fn gather_one(&self, target: &mut Self::Target, value: ZeroCount) -> ParquetResult<()> { - target.num_zero += value.num_zero; - target.num_nonzero += value.num_nonzero; - Ok(()) - } - - #[inline] - fn gather_repeated( - &self, - target: &mut Self::Target, - value: ZeroCount, - n: usize, - ) -> ParquetResult<()> { - target.num_zero += value.num_zero * n; - target.num_nonzero += value.num_nonzero * n; - Ok(()) - } - - #[inline] - fn gather_slice(&self, target: &mut Self::Target, source: &[u32]) -> ParquetResult<()> { - let mut num_zero = 0; - let mut num_nonzero = 0; - - for v in source { - num_zero += usize::from(*v == 0); - num_nonzero += usize::from(*v != 0); - } - - target.num_zero += num_zero; - target.num_nonzero += num_nonzero; - - Ok(()) - } -} - -/// A trait to describe a translation from a HybridRLE encoding to an another format. -/// -/// In essence, this is one method ([`Translator::translate`]) that maps an `u32` to the desired -/// output type `O`. There are several other methods that may provide optimized routines -/// for slices, chunks and decoders. -/// -/// # Motivation -/// -/// The [`HybridRleDecoder`] is used extensively during Parquet decoding because it is used for -/// Dremel decoding and dictionary decoding. We want to perform a transformation from this -/// space-efficient encoding to a buffer. Here, items might be skipped, might be mapped and only a -/// few items might be needed. There are 3 main ways to do this. -/// -/// 1. Element-by-element translation using iterator `map`, `filter`, `skip`, etc. This suffers -/// from the problem that is difficult to SIMD the translation and that a `collect` might need -/// to constantly poll the `next` function. Next to that monomorphization might need to generate -/// many, many variants. -/// 2. Buffer most everything, filter and translate later. This has high memory-consumption and -/// might suffer from cache-eviction problems. This is computationally the most efficient, but -/// probably still has a high runtime. Also, this fails to utilize run-length information and -/// needs to retranslate all repeated elements. -/// 3. Batched operations. Here, we try to utilize the run-length information and utilize SIMD to -/// process many bitpacked items. This can provide the best of both worlds. -/// -/// The [`HybridRleDecoder`][super::HybridRleDecoder] decoders utilizing both run-length encoding -/// and bitpacking. In both processes, this [`Translator`] trait allows for translation with (i) no -/// heap allocations and (ii) cheap buffering and can stop and start at any point. Consequently, -/// the memory consumption while doing these translations can be relatively low while still -/// processing items in batches. -/// -/// [`HybridRleDecoder`]: super::HybridRleDecoder -pub trait Translator { - /// Translate from a decoded value to the output format - fn translate(&self, value: u32) -> ParquetResult; - - /// Translate from a slice of decoded values to the output format and write them to a `target`. - /// - /// This can overwritten to be more optimized. - fn translate_slice(&self, target: &mut Vec, source: &[u32]) -> ParquetResult<()> { - target.reserve(source.len()); - for v in source { - target.push(self.translate(*v)?); - } - Ok(()) - } - - /// Translate from a chunk of unpacked items to the output format and write them to a `target`. - /// - /// This is the same as [`Translator::translate_slice`] but with a known slice size. This can - /// allow SIMD routines to better optimize the procedure. - /// - /// This can overwritten to be more optimized. - fn translate_chunk( - &self, - target: &mut Vec, - source: &::Unpacked, - ) -> ParquetResult<()> { - self.translate_slice(target, &source[..]) - } - - /// Translate and collect all the items in a [`Decoder`] to a `target`. - /// - /// This can overwritten to be more optimized. - fn translate_bitpacked_all( - &self, - target: &mut Vec, - mut decoder: Decoder, - ) -> ParquetResult<()> { - target.reserve(decoder.len()); - - let mut chunked = decoder.chunked(); - - for unpacked in &mut chunked { - self.translate_chunk(target, &unpacked)?; - } - - if let Some((last, last_length)) = chunked.remainder() { - self.translate_slice(target, &last[..last_length])?; - } - - Ok(()) - } - - /// Translate and collect a limited number of items in a [`Decoder`] to a `target`. - /// - /// This can overwritten to be more optimized. - /// - /// # Panics - /// - /// This method panics when `limit` is larger than the `decoder` length. - fn translate_bitpacked_limited<'a>( - &self, - target: &mut Vec, - mut decoder: Decoder<'a, u32>, - limit: usize, - ) -> ParquetResult> { - assert!(limit < decoder.len()); - - const CHUNK_SIZE: usize = ::Unpacked::LENGTH; - - let mut chunked = decoder.chunked(); - - let num_full_chunks = limit / CHUNK_SIZE; - for unpacked in (&mut chunked).take(num_full_chunks) { - self.translate_chunk(target, &unpacked)?; - } - - let (unpacked, unpacked_length) = chunked.next_inexact().unwrap(); - let unpacked_offset = limit % CHUNK_SIZE; - debug_assert!(unpacked_offset < unpacked_length); - self.translate_slice(target, &unpacked[..unpacked_offset])?; - - Ok(BufferedBitpacked { - unpacked, - - unpacked_start: unpacked_offset, - unpacked_end: unpacked_length, - decoder, - }) - } - - /// Translate and collect items in a [`Decoder`] to a `target`. - /// - /// This can overwritten to be more optimized. - fn translate_bitpacked<'a>( - &self, - target: &mut Vec, - decoder: Decoder<'a, u32>, - limit: Option, - ) -> ParquetResult<(usize, Option>)> { - let length = decoder.len(); - - match limit { - None => self - .translate_bitpacked_all(target, decoder) - .map(|_| (length, None)), - Some(limit) if limit >= length => self - .translate_bitpacked_all(target, decoder) - .map(|_| (length, None)), - Some(limit) => self - .translate_bitpacked_limited(target, decoder, limit) - .map(|b| (limit, Some(HybridRleBuffered::Bitpacked(b)))), - } - } -} - -impl> HybridRleGatherer for T { - type Target = Vec; - - #[inline(always)] - fn target_reserve(&self, target: &mut Self::Target, n: usize) { - target.reserve(n); - } - #[inline(always)] - fn target_num_elements(&self, target: &Self::Target) -> usize { - target.len() - } - - #[inline(always)] - fn hybridrle_to_target(&self, value: u32) -> ParquetResult { - self.translate(value) - } - - #[inline(always)] - fn gather_one(&self, target: &mut Self::Target, value: O) -> ParquetResult<()> { - target.push(value); - Ok(()) - } - - #[inline(always)] - fn gather_repeated(&self, target: &mut Self::Target, value: O, n: usize) -> ParquetResult<()> { - target.resize(target.len() + n, value); - Ok(()) - } - - #[inline(always)] - fn gather_slice(&self, target: &mut Self::Target, source: &[u32]) -> ParquetResult<()> { - self.translate_slice(target, source) - } - - #[inline(always)] - fn gather_chunk( - &self, - target: &mut Self::Target, - source: &::Unpacked, - ) -> ParquetResult<()> { - self.translate_chunk(target, source) - } - - #[inline(always)] - fn gather_bitpacked_all( - &self, - target: &mut Self::Target, - decoder: Decoder, - ) -> ParquetResult<()> { - self.translate_bitpacked_all(target, decoder) - } - - #[inline(always)] - fn gather_bitpacked_limited<'a>( - &self, - target: &mut Self::Target, - decoder: Decoder<'a, u32>, - limit: usize, - ) -> ParquetResult> { - self.translate_bitpacked_limited(target, decoder, limit) - } - - #[inline(always)] - fn gather_bitpacked<'a>( - &self, - target: &mut Self::Target, - decoder: Decoder<'a, u32>, - limit: Option, - ) -> ParquetResult<(usize, Option>)> { - self.translate_bitpacked(target, decoder, limit) - } -} - -/// This is a unit translation variant of [`Translator`]. This just maps all encoded values from a -/// [`HybridRleDecoder`] to themselves. -/// -/// [`HybridRleDecoder`]: super::HybridRleDecoder -pub struct UnitTranslator; - -impl Translator for UnitTranslator { - fn translate(&self, value: u32) -> ParquetResult { - Ok(value) - } - - fn translate_slice(&self, target: &mut Vec, source: &[u32]) -> ParquetResult<()> { - target.extend_from_slice(source); - Ok(()) - } - fn translate_chunk( - &self, - target: &mut Vec, - source: &::Unpacked, - ) -> ParquetResult<()> { - target.extend_from_slice(&source[..]); - Ok(()) - } - fn translate_bitpacked_all( - &self, - target: &mut Vec, - decoder: Decoder, - ) -> ParquetResult<()> { - decoder.collect_into(target); - Ok(()) - } -} - -/// This is a dictionary translation variant of [`Translator`]. -/// -/// All the [`HybridRleDecoder`] values are regarded as a offset into a dictionary. -/// -/// [`HybridRleDecoder`]: super::HybridRleDecoder -pub struct DictionaryTranslator<'a, T>(pub &'a [T]); - -impl Translator for DictionaryTranslator<'_, T> { - fn translate(&self, value: u32) -> ParquetResult { - self.0 - .get(value as usize) - .cloned() - .ok_or(ParquetError::oos("Dictionary index is out of range")) - } - - fn translate_slice(&self, target: &mut Vec, source: &[u32]) -> ParquetResult<()> { - let Some(source_max) = source.iter().copied().max() else { - return Ok(()); - }; - - if source_max as usize >= self.0.len() { - return Err(ParquetError::oos("Dictionary index is out of range")); - } - - // Safety: We have checked before that source only has indexes that are smaller than the - // dictionary length. - target.extend( - source - .iter() - .map(|&src_idx| unsafe { *self.0.get_unchecked(src_idx as usize) }), - ); - - Ok(()) - } - - fn translate_chunk( - &self, - target: &mut Vec, - source: &::Unpacked, - ) -> ParquetResult<()> { - let source_max: u32 = source.iter().copied().max().unwrap(); - - if source_max as usize >= self.0.len() { - return Err(ParquetError::oos("Dictionary index is out of range")); - } - - // Safety: We have checked before that source only has indexes that are smaller than the - // dictionary length. - target.extend( - source - .iter() - .map(|&src_idx| unsafe { *self.0.get_unchecked(src_idx as usize) }), - ); - - Ok(()) - } -} - -/// A closure-based translator -pub struct FnTranslator ParquetResult>(pub F); - -impl ParquetResult> Translator for FnTranslator { - fn translate(&self, value: u32) -> ParquetResult { - (self.0)(value) - } -} - -#[derive(Default)] -pub struct TryFromUsizeTranslator>(std::marker::PhantomData); - -impl> Translator for TryFromUsizeTranslator { - fn translate(&self, value: u32) -> ParquetResult { - O::try_from(value as usize).map_err(|_| ParquetError::oos("Invalid cast in translation")) - } -} - -pub struct SliceDictionaryTranslator<'a, T> { - dict: &'a [T], - size: usize, -} - -impl<'a, T> SliceDictionaryTranslator<'a, T> { - pub fn new(dict: &'a [T], size: usize) -> Self { - debug_assert_eq!(dict.len() % size, 0); - Self { dict, size } - } -} - -impl<'a, T> Translator<&'a [T]> for SliceDictionaryTranslator<'a, T> { - fn translate(&self, value: u32) -> ParquetResult<&'a [T]> { - let idx = value as usize; - - if idx >= self.dict.len() / self.size { - return Err(ParquetError::oos("Dictionary slice index is out of range")); - } - - Ok(&self.dict[idx * self.size..(idx + 1) * self.size]) - } - - fn translate_slice(&self, target: &mut Vec<&'a [T]>, source: &[u32]) -> ParquetResult<()> { - let Some(source_max) = source.iter().copied().max() else { - return Ok(()); - }; - - if source_max as usize >= self.dict.len() / self.size { - return Err(ParquetError::oos("Dictionary index is out of range")); - } - - // Safety: We have checked before that source only has indexes that are smaller than the - // dictionary length. - target.extend(source.iter().map(|&src_idx| unsafe { - self.dict - .get_unchecked((src_idx as usize) * self.size..(src_idx as usize + 1) * self.size) - })); - - Ok(()) - } -} diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs index a2d8d891c6c4..72e7b82cc4ad 100644 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs +++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs @@ -1,35 +1,12 @@ // See https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3 mod bitmap; -mod buffered; mod encoder; -pub mod gatherer; - -#[cfg(test)] -mod fuzz; pub use bitmap::{encode_bool as bitpacked_encode, BitmapIter}; -pub use buffered::BufferedBitpacked; pub use encoder::{encode, Encoder}; -pub use gatherer::{ - DictionaryTranslator, FnTranslator, Translator, TryFromUsizeTranslator, UnitTranslator, -}; - -use self::buffered::HybridRleBuffered; -use self::gatherer::HybridRleGatherer; -use super::{bitpacked, ceil8, uleb128}; -use crate::parquet::encoding::bitpacked::{Unpackable, Unpacked}; -use crate::parquet::encoding::hybrid_rle::buffered::BufferedRle; -use crate::parquet::error::{ParquetError, ParquetResult}; -/// The two possible states of an RLE-encoded run. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum HybridEncoded<'a> { - /// A bitpacked slice. The consumer must know its bit-width to unpack it. - Bitpacked(&'a [u8]), - /// A RLE-encoded slice. The first attribute corresponds to the slice (that can be interpreted) - /// the second attribute corresponds to the number of repetitions. - Rle(&'a [u8], usize), -} +use super::{bitpacked, uleb128}; +use crate::parquet::error::{ParquetError, ParquetResult}; /// A [`Iterator`] for Hybrid Run-Length Encoding /// @@ -46,8 +23,24 @@ pub struct HybridRleDecoder<'a> { data: &'a [u8], num_bits: usize, num_values: usize, +} + +pub struct HybridRleChunkIter<'a> { + decoder: HybridRleDecoder<'a>, +} + +#[derive(Debug)] +pub enum HybridRleChunk<'a> { + Rle(u32, usize), + Bitpacked(bitpacked::Decoder<'a, u32>), +} + +impl<'a> Iterator for HybridRleChunkIter<'a> { + type Item = ParquetResult>; - buffered: Option>, + fn next(&mut self) -> Option { + self.decoder.next_chunk().transpose() + } } impl<'a> HybridRleDecoder<'a> { @@ -57,8 +50,6 @@ impl<'a> HybridRleDecoder<'a> { data, num_bits: num_bits as usize, num_values, - - buffered: None, } } @@ -66,344 +57,90 @@ impl<'a> HybridRleDecoder<'a> { self.num_values } - fn gather_limited_once>( - &mut self, - target: &mut G::Target, - limit: Option, - gatherer: &G, - ) -> ParquetResult { - if limit == Some(0) { - return Ok(0); - } + pub fn num_bits(&self) -> usize { + self.num_bits + } - let start_target_length = gatherer.target_num_elements(target); - let start_num_values = self.num_values; + pub fn into_chunk_iter(self) -> HybridRleChunkIter<'a> { + HybridRleChunkIter { decoder: self } + } - // @NOTE: - // This is basically a collapsed version of the `decoder::Decoder`. Any change here - // probably also applies there. In a microbenchmark this collapse did around 3x for this - // specific piece of code, but I think this actually also makes the code more readable. + pub fn next_chunk(&mut self) -> ParquetResult>> { + if self.len() == 0 { + return Ok(None); + } - debug_assert!( - self.num_values > 0, - "{:?}", - gatherer.target_num_elements(target) - ); - debug_assert!(self.num_bits > 0); + if self.num_bits == 0 { + let num_values = self.num_values; + self.num_values = 0; + return Ok(Some(HybridRleChunk::Rle(0, num_values))); + } + + if self.data.is_empty() { + return Ok(None); + } let (indicator, consumed) = uleb128::decode(self.data); self.data = unsafe { self.data.get_unchecked(consumed..) }; - if consumed == 0 { - let step_size = - limit.map_or(self.num_values, |limit| usize::min(self.num_values, limit)); - // In this case, we take the value encoded by `0`. For example, if the HybridRle - // encodes a dictionary. We should take the 0-th value. - let value = gatherer.hybridrle_to_target(0)?; - gatherer.gather_repeated(target, value, step_size)?; - self.num_values -= step_size; - - return Ok(step_size); - } - - let num_processed = if indicator & 1 == 1 { + Ok(Some(if indicator & 1 == 1 { // is bitpacking let bytes = (indicator as usize >> 1) * self.num_bits; let bytes = std::cmp::min(bytes, self.data.len()); - let (packed, remaining) = self.data.split_at(bytes); + let Some((packed, remaining)) = self.data.split_at_checked(bytes) else { + return Err(ParquetError::oos("Not enough bytes for bitpacked data")); + }; self.data = remaining; let length = std::cmp::min(packed.len() * 8 / self.num_bits, self.num_values); let decoder = bitpacked::Decoder::::try_new(packed, self.num_bits, length)?; - let (num_processed, buffered) = gatherer.gather_bitpacked(target, decoder, limit)?; - debug_assert!(limit.map_or(true, |limit| limit >= num_processed)); - self.buffered = buffered; + self.num_values -= length; - num_processed + HybridRleChunk::Bitpacked(decoder) } else { // is rle let run_length = indicator as usize >> 1; // repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width) - let rle_bytes = ceil8(self.num_bits); - let (pack, remaining) = self.data.split_at(rle_bytes); + let rle_bytes = self.num_bits.div_ceil(8); + let Some((pack, remaining)) = self.data.split_at_checked(rle_bytes) else { + return Err(ParquetError::oos("Not enough bytes for RLE encoded data")); + }; self.data = remaining; - if run_length == 0 { - 0 - } else { - let mut bytes = [0u8; size_of::()]; - pack.iter().zip(bytes.iter_mut()).for_each(|(src, dst)| { - *dst = *src; - }); - let value = u32::from_le_bytes(bytes); - - let num_elements = limit.map_or(run_length, |limit| usize::min(run_length, limit)); - - // Only translate once. Then, just do a memset. - let translated = gatherer.hybridrle_to_target(value)?; - gatherer.gather_repeated(target, translated, num_elements)?; - - if let Some(limit) = limit { - if run_length > limit { - self.buffered = (run_length != limit).then_some({ - HybridRleBuffered::Rle(BufferedRle { - value, - length: run_length - num_elements, - }) - }); - } - } - - num_elements - } - }; + let mut bytes = [0u8; std::mem::size_of::()]; + pack.iter().zip(bytes.iter_mut()).for_each(|(src, dst)| { + *dst = *src; + }); + let value = u32::from_le_bytes(bytes); - self.num_values -= num_processed; + let length = std::cmp::min(run_length, self.num_values); - debug_assert_eq!(num_processed, start_num_values - self.num_values); - debug_assert_eq!( - num_processed, - gatherer.target_num_elements(target) - start_target_length - ); - debug_assert!(limit.map_or(true, |limit| num_processed <= limit)); + self.num_values -= length; - Ok(num_processed) + HybridRleChunk::Rle(value, length) + })) } - #[inline(always)] - pub fn gather_into>( - mut self, - target: &mut G::Target, - gatherer: &G, - ) -> ParquetResult<()> { - if self.num_values == 0 { - return Ok(()); - } - - gatherer.target_reserve(target, self.num_values); - - if self.num_bits == 0 { - let value = gatherer.hybridrle_to_target(0)?; - gatherer.gather_repeated(target, value, self.num_values)?; - return Ok(()); - } - - if let Some(buffered) = self.buffered.take() { - let num_buffered = buffered.gather_into(target, gatherer)?; - self.num_values -= num_buffered; - } - - while self.num_values > 0 { - self.gather_limited_once(target, None, gatherer)?; - } - - Ok(()) + pub fn limit_to(&mut self, length: usize) { + self.num_values = self.num_values.min(length); } - pub fn gather_n_into>( - &mut self, - target: &mut G::Target, - n: usize, - gatherer: &G, - ) -> ParquetResult<()> { - if self.num_values == 0 || n == 0 { - return Ok(()); - } - - if self.num_bits == 0 { - let n = usize::min(n, self.num_values); - - let value = gatherer.hybridrle_to_target(0)?; - gatherer.gather_repeated(target, value, n)?; - self.num_values -= n; - return Ok(()); - } - - let target_length = gatherer.target_num_elements(target) + n; - gatherer.target_reserve(target, n); - - if let Some(buffered) = self.buffered.as_mut() { - let num_buffered = buffered.gather_limited_into(target, n, gatherer)?; - debug_assert!(num_buffered <= n); - self.num_values -= num_buffered; - - if num_buffered < n { - self.buffered = None; - } - } - - while gatherer.target_num_elements(target) < target_length && self.num_values > 0 { - self.gather_limited_once( - target, - Some(target_length - gatherer.target_num_elements(target)), - gatherer, - )?; - } - - Ok(()) - } - - #[inline(always)] - pub fn translate_and_collect_into>( - self, - target: &mut >::Target, - translator: &T, - ) -> ParquetResult<()> { - self.gather_into(target, translator) - } - - pub fn translate_and_collect_n_into>( - &mut self, - target: &mut >::Target, - n: usize, - translator: &T, - ) -> ParquetResult<()> { - self.gather_n_into(target, n, translator) - } - - #[inline(always)] - pub fn translate_and_collect>( - self, - translator: &T, - ) -> ParquetResult<>::Target> { - let mut vec = Vec::new(); - self.translate_and_collect_into(&mut vec, translator)?; - Ok(vec) - } - - #[inline(always)] - pub fn collect_into( - self, - target: &mut >::Target, - ) -> Result<(), ParquetError> { - self.translate_and_collect_into(target, &UnitTranslator) - } - - #[inline(always)] - pub fn collect_n_into( - &mut self, - target: &mut >::Target, - n: usize, - ) -> ParquetResult<()> { - self.translate_and_collect_n_into(target, n, &UnitTranslator) - } - - #[inline(always)] - pub fn collect(self) -> ParquetResult<>::Target> { - let mut vec = Vec::new(); - self.collect_into(&mut vec)?; - Ok(vec) - } - - pub fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> { - if self.num_values == 0 || n == 0 { - return Ok(()); - } - - if n >= self.num_values { - self.data = &[]; - self.num_values = 0; - self.buffered = None; - return Ok(()); - } - - if self.num_bits == 0 { - self.num_values -= n; - return Ok(()); - } - - let mut n = n; - if let Some(buffered) = self.buffered.as_mut() { - let num_skipped = buffered.skip_in_place(n); - - if num_skipped < n { - self.buffered = None; + pub fn collect(self) -> ParquetResult> { + let mut target = Vec::with_capacity(self.len()); + + for chunk in self.into_chunk_iter() { + match chunk? { + HybridRleChunk::Rle(value, size) => { + target.resize(target.len() + size, value); + }, + HybridRleChunk::Bitpacked(decoder) => { + decoder.collect_into(&mut target); + }, } - - self.num_values -= num_skipped; - n -= num_skipped; - } - - while n > 0 && self.num_values > 0 { - let start_num_values = self.num_values; - - let (indicator, consumed) = uleb128::decode(self.data); - self.data = unsafe { self.data.get_unchecked(consumed..) }; - - let num_skipped = if consumed == 0 { - n - } else if indicator & 1 == 1 { - // is bitpacking - let bytes = (indicator as usize >> 1) * self.num_bits; - let bytes = std::cmp::min(bytes, self.data.len()); - let (packed, remaining) = self.data.split_at(bytes); - self.data = remaining; - - let length = std::cmp::min(packed.len() * 8 / self.num_bits, self.num_values); - let mut decoder = - bitpacked::Decoder::::try_new(packed, self.num_bits, length)?; - - // Skip the whole decoder if it is possible - if decoder.len() <= n { - decoder.len() - } else { - const CHUNK_SIZE: usize = ::Unpacked::LENGTH; - - let num_full_chunks = n / CHUNK_SIZE; - decoder.skip_chunks(num_full_chunks); - - let (unpacked, unpacked_length) = decoder.chunked().next_inexact().unwrap(); - let unpacked_offset = n % CHUNK_SIZE; - debug_assert!(unpacked_offset < unpacked_length); - - self.buffered = Some(HybridRleBuffered::Bitpacked(BufferedBitpacked { - unpacked, - - unpacked_start: unpacked_offset, - unpacked_end: unpacked_length, - decoder, - })); - - n - } - } else { - // is rle - let run_length = indicator as usize >> 1; - // repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width) - let rle_bytes = ceil8(self.num_bits); - let (pack, remaining) = self.data.split_at(rle_bytes); - self.data = remaining; - - // Skip the whole run-length encoded value if it is possible - if run_length <= n { - run_length - } else { - let mut bytes = [0u8; size_of::()]; - pack.iter().zip(bytes.iter_mut()).for_each(|(src, dst)| { - *dst = *src; - }); - let value = u32::from_le_bytes(bytes); - - self.buffered = Some(HybridRleBuffered::Rle(BufferedRle { - value, - length: run_length - n, - })); - - n - } - }; - - self.num_values -= num_skipped; - - debug_assert_eq!(num_skipped, start_num_values - self.num_values); - debug_assert!(num_skipped <= n, "{num_skipped} <= {n}"); - debug_assert!(indicator >> 1 == 0 || num_skipped > 0); - - n -= num_skipped; } - Ok(()) + Ok(target) } } @@ -543,7 +280,7 @@ mod tests { fn empty_values() -> ParquetResult<()> { let data = []; - let num_bits = 1; + let num_bits = 0; let decoder = HybridRleDecoder::new(&data, num_bits, 100); diff --git a/crates/polars-parquet/src/parquet/types.rs b/crates/polars-parquet/src/parquet/types.rs index 1dd65d0ff622..74c5da2adc2f 100644 --- a/crates/polars-parquet/src/parquet/types.rs +++ b/crates/polars-parquet/src/parquet/types.rs @@ -1,7 +1,13 @@ +use arrow::types::{ + AlignedBytes, AlignedBytesCast, Bytes12Alignment4, Bytes4Alignment4, Bytes8Alignment8, +}; + use crate::parquet::schema::types::PhysicalType; /// A physical native representation of a Parquet fixed-sized type. -pub trait NativeType: std::fmt::Debug + Send + Sync + 'static + Copy + Clone { +pub trait NativeType: + std::fmt::Debug + Send + Sync + 'static + Copy + Clone + AlignedBytesCast +{ type Bytes: AsRef<[u8]> + bytemuck::Pod + IntoIterator @@ -9,6 +15,7 @@ pub trait NativeType: std::fmt::Debug + Send + Sync + 'static + Copy + Clone { + std::fmt::Debug + Clone + Copy; + type AlignedBytes: AlignedBytes + From + Into; fn to_le_bytes(&self) -> Self::Bytes; @@ -20,9 +27,11 @@ pub trait NativeType: std::fmt::Debug + Send + Sync + 'static + Copy + Clone { } macro_rules! native { - ($type:ty, $physical_type:expr) => { + ($type:ty, $unaligned:ty, $physical_type:expr) => { impl NativeType for $type { type Bytes = [u8; size_of::()]; + type AlignedBytes = $unaligned; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { Self::to_le_bytes(*self) @@ -43,15 +52,17 @@ macro_rules! native { }; } -native!(i32, PhysicalType::Int32); -native!(i64, PhysicalType::Int64); -native!(f32, PhysicalType::Float); -native!(f64, PhysicalType::Double); +native!(i32, Bytes4Alignment4, PhysicalType::Int32); +native!(i64, Bytes8Alignment8, PhysicalType::Int64); +native!(f32, Bytes4Alignment4, PhysicalType::Float); +native!(f64, Bytes8Alignment8, PhysicalType::Double); impl NativeType for [u32; 3] { const TYPE: PhysicalType = PhysicalType::Int96; type Bytes = [u8; size_of::()]; + type AlignedBytes = Bytes12Alignment4; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { let mut bytes = [0; 12]; diff --git a/crates/polars-utils/src/chunks.rs b/crates/polars-utils/src/chunks.rs new file mode 100644 index 000000000000..80f720eb2fd7 --- /dev/null +++ b/crates/polars-utils/src/chunks.rs @@ -0,0 +1,63 @@ +/// A copy of the [`std::slice::Chunks`] that exposes the inner `slice` and `chunk_size`. +#[derive(Clone, Debug)] +pub struct Chunks<'a, T> { + slice: &'a [T], + chunk_size: usize, +} + +impl<'a, T> Iterator for Chunks<'a, T> { + type Item = &'a [T]; + + fn next(&mut self) -> Option { + if self.slice.is_empty() { + return None; + } + + let item; + (item, self.slice) = self.slice.split_at(self.chunk_size.min(self.slice.len())); + + Some(item) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.slice.len().div_ceil(self.chunk_size); + (len, Some(len)) + } +} + +impl DoubleEndedIterator for Chunks<'_, T> { + fn next_back(&mut self) -> Option { + if self.slice.is_empty() { + return None; + } + + let rem = self.slice.len() % self.chunk_size; + let offset = if rem == 0 { self.chunk_size } else { rem }; + + let item; + (self.slice, item) = self.slice.split_at(self.slice.len() - offset); + + Some(item) + } +} + +impl ExactSizeIterator for Chunks<'_, T> {} + +impl<'a, T> Chunks<'a, T> { + pub const fn new(slice: &'a [T], chunk_size: usize) -> Self { + Self { slice, chunk_size } + } + + pub const fn as_slice(&self) -> &'a [T] { + self.slice + } + + pub const fn chunk_size(&self) -> usize { + self.chunk_size + } + + pub fn skip_in_place(&mut self, n: usize) { + let n = n * self.chunk_size; + self.slice = &self.slice[n.min(self.slice.len())..]; + } +} diff --git a/crates/polars-utils/src/lib.rs b/crates/polars-utils/src/lib.rs index e789b0debfa3..409c18485530 100644 --- a/crates/polars-utils/src/lib.rs +++ b/crates/polars-utils/src/lib.rs @@ -12,6 +12,7 @@ pub mod binary_search; pub mod cache; pub mod cardinality_sketch; pub mod cell; +pub mod chunks; pub mod clmul; pub mod cpuid; mod error; diff --git a/crates/polars/tests/it/io/parquet/read/binary.rs b/crates/polars/tests/it/io/parquet/read/binary.rs index 4fcf27e173b7..086dd06bb9b9 100644 --- a/crates/polars/tests/it/io/parquet/read/binary.rs +++ b/crates/polars/tests/it/io/parquet/read/binary.rs @@ -1,11 +1,10 @@ -use polars_parquet::parquet::encoding::hybrid_rle::FnTranslator; use polars_parquet::parquet::error::ParquetResult; use polars_parquet::parquet::page::DataPage; use super::dictionary::BinaryPageDict; use super::utils::deserialize_optional; -use crate::io::parquet::read::hybrid_rle_iter; use crate::io::parquet::read::utils::FixedLenBinaryPageState; +use crate::io::parquet::read::{hybrid_rle_fn_collect, hybrid_rle_iter}; pub fn page_to_vec( page: &DataPage, @@ -25,9 +24,9 @@ pub fn page_to_vec( .map(|x| x.transpose()) .collect(), FixedLenBinaryPageState::RequiredDictionary(dict) => { - let dictionary = - FnTranslator(|v| dict.dict.value(v as usize).map(|v| Some(v.to_vec()))); - dict.indexes.translate_and_collect(&dictionary) + hybrid_rle_fn_collect(dict.indexes, |x| { + dict.dict.value(x as usize).map(<[u8]>::to_vec).map(Some) + }) }, FixedLenBinaryPageState::OptionalDictionary(validity, dict) => { let values = hybrid_rle_iter(dict.indexes)? diff --git a/crates/polars/tests/it/io/parquet/read/mod.rs b/crates/polars/tests/it/io/parquet/read/mod.rs index d671e085c86a..448134a8d955 100644 --- a/crates/polars/tests/it/io/parquet/read/mod.rs +++ b/crates/polars/tests/it/io/parquet/read/mod.rs @@ -15,7 +15,7 @@ mod utils; use std::fs::File; use dictionary::DecodedDictPage; -use polars_parquet::parquet::encoding::hybrid_rle::HybridRleDecoder; +use polars_parquet::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder}; use polars_parquet::parquet::error::{ParquetError, ParquetResult}; use polars_parquet::parquet::metadata::ColumnChunkMetadata; use polars_parquet::parquet::page::DataPage; @@ -32,6 +32,37 @@ pub fn hybrid_rle_iter(d: HybridRleDecoder) -> ParquetResult( + d: HybridRleDecoder, + mut f: impl FnMut(u32) -> ParquetResult, +) -> ParquetResult> { + let mut target = Vec::with_capacity(d.len()); + + for chunk in d.into_chunk_iter() { + match chunk? { + HybridRleChunk::Rle(value, size) => { + target.resize(target.len() + size, f(value)?); + }, + HybridRleChunk::Bitpacked(mut decoder) => { + let mut chunked = decoder.chunked(); + for dchunk in chunked.by_ref() { + for v in dchunk { + target.push(f(v)?); + } + } + + if let Some((dchunk, l)) = chunked.remainder() { + for &v in &dchunk[..l] { + target.push(f(v)?); + } + } + }, + } + } + + Ok(target) +} + pub fn get_path() -> PathBuf { let dir = env!("CARGO_MANIFEST_DIR"); PathBuf::from(dir).join("../../docs/assets/data") diff --git a/crates/polars/tests/it/io/parquet/read/primitive.rs b/crates/polars/tests/it/io/parquet/read/primitive.rs index 960c502fb82d..96ed3f4eac11 100644 --- a/crates/polars/tests/it/io/parquet/read/primitive.rs +++ b/crates/polars/tests/it/io/parquet/read/primitive.rs @@ -1,4 +1,3 @@ -use polars_parquet::parquet::encoding::hybrid_rle::FnTranslator; use polars_parquet::parquet::error::ParquetResult; use polars_parquet::parquet::page::DataPage; use polars_parquet::parquet::types::NativeType; @@ -7,6 +6,7 @@ use polars_parquet::read::ParquetError; use super::dictionary::PrimitivePageDict; use super::hybrid_rle_iter; use super::utils::{deserialize_optional, NativePageState}; +use crate::io::parquet::read::hybrid_rle_fn_collect; /// The deserialization state of a `DataPage` of `Primitive` parquet primitive type #[derive(Debug)] @@ -43,10 +43,9 @@ pub fn page_to_vec( deserialize_optional(validity, values.by_ref().map(Ok)) }, NativePageState::Required(values) => Ok(values.map(Some).collect()), - NativePageState::RequiredDictionary(dict) => { - let dictionary = FnTranslator(|x| dict.dict.value(x as usize).copied().map(Some)); - dict.indexes.translate_and_collect(&dictionary) - }, + NativePageState::RequiredDictionary(dict) => hybrid_rle_fn_collect(dict.indexes, |x| { + dict.dict.value(x as usize).copied().map(Some) + }), NativePageState::OptionalDictionary(validity, dict) => { let values = hybrid_rle_iter(dict.indexes)?.map(|x| dict.dict.value(x as usize).copied()); diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index bf9d8ac4fad8..84e93eb0c5e4 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -1099,6 +1099,7 @@ def test_hybrid_rle() -> None: @pytest.mark.slow def test_roundtrip_parametric(df: pl.DataFrame) -> None: f = io.BytesIO() + print(df) df.write_parquet(f) f.seek(0) result = pl.read_parquet(f) @@ -1579,6 +1580,7 @@ def test_slice_roundtrip(df: pl.DataFrame, offset: int, length: int) -> None: df.write_parquet(f) f.seek(0) + print((offset, length)) scanned = pl.scan_parquet(f).slice(offset, length).collect() assert_frame_equal(scanned, df.slice(offset, length)) @@ -2085,7 +2087,77 @@ def test_conserve_sortedness( ) -def test_f16() -> None: +@pytest.mark.parametrize("use_dictionary", [True, False]) +@pytest.mark.parametrize( + "values", + [ + (size, x) + for size in [1, 2, 3, 4, 8, 12, 15, 16, 32] + for x in [ + [list(range(size)), list(range(7, 7 + size))], + [list(range(size)), None], + [list(range(i, i + size)) for i in range(13)], + [list(range(i, i + size)) if i % 3 < 2 else None for i in range(13)], + ] + ], +) +@pytest.mark.parametrize( + "filt", + [ + lambda _: None, + lambda _: pl.col.f > 0, + lambda _: pl.col.f > 1, + lambda _: pl.col.f < 5, + lambda _: pl.col.f % 2 == 0, + lambda _: pl.col.f % 5 < 4, + lambda values: (0, min(1, len(values))), + lambda _: (1, 1), + lambda _: (-2, 1), + lambda values: (1, len(values) - 2), + ], +) +def test_fixed_size_binary( + use_dictionary: bool, + values: tuple[int, list[None | list[int]]], + filt: Callable[[list[None | list[int]]], None | pl.Expr | tuple[int, int]], +) -> None: + size, elems = values + bs = [bytes(v) if v is not None else None for v in elems] + + tbl = pa.table( + { + "a": bs, + "f": range(len(bs)), + }, + schema=pa.schema( + [ + pa.field("a", pa.binary(length=size), nullable=True), + pa.field("f", pa.int32(), nullable=True), + ] + ), + ) + + df = pl.DataFrame(tbl) + + f = io.BytesIO() + pq.write_table(tbl, f, use_dictionary=use_dictionary) + + f.seek(0) + + loaded: pl.DataFrame + if isinstance(filt, pl.Expr): + loaded = pl.scan_parquet(f).filter(filt).collect() + df = df.filter(filt) + elif isinstance(filt, tuple): + loaded = pl.scan_parquet(f).slice(filt[0], filt[1]).collect() + df = df.slice(filt[0], filt[1]) + else: + loaded = pl.read_parquet(f) + + assert_frame_equal(loaded, df) + + +def test_decode_f16() -> None: values = [float("nan"), 0.0, 0.5, 1.0, 1.5] table = pa.Table.from_pydict( @@ -2113,3 +2185,81 @@ def test_f16() -> None: pl.scan_parquet(f).slice(1, 3).collect(), df.slice(1, 3), ) + + +def test_invalid_utf8_binary() -> None: + a = pl.Series("a", [b"\x80"], pl.Binary).to_frame() + f = io.BytesIO() + + a.write_parquet(f) + f.seek(0) + out = pl.read_parquet(f) + + assert_frame_equal(a, out) + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Null, + pl.Int8, + pl.Int32, + pl.Datetime(), + pl.String, + pl.Binary, + pl.Boolean, + pl.Struct({"x": pl.Int32}), + pl.List(pl.Int32), + pl.Array(pl.Int32, 0), + pl.Array(pl.Int32, 2), + ], +) +@pytest.mark.parametrize( + "filt", + [ + pl.col.f == 0, + pl.col.f != 0, + pl.col.f == 1, + pl.col.f != 1, + pl.col.f == 2, + pl.col.f != 2, + pl.col.f == 3, + pl.col.f != 3, + ], +) +def test_filter_only_invalid(dtype: pl.DataType, filt: pl.Expr) -> None: + df = pl.DataFrame( + [ + pl.Series("a", [None, None, None], dtype), + pl.Series("f", range(3), pl.Int32), + ] + ) + + f = io.BytesIO() + + df.write_parquet(f) + f.seek(0) + out = pl.scan_parquet(f, parallel="prefiltered").filter(filt).collect() + + assert_frame_equal(df.filter(filt), out) + + +def test_nested_nulls() -> None: + df = pl.Series( + "a", + [ + [None, None], + None, + [None, 1], + [None, None], + [2, None], + ], + pl.Array(pl.Int32, 2), + ).to_frame() + + f = io.BytesIO() + df.write_parquet(f) + + f.seek(0) + out = pl.read_parquet(f) + assert_frame_equal(out, df)