Skip to content

Commit

Permalink
fix: IPC don't write variadic_buffer_counts in blocks, but only dicti…
Browse files Browse the repository at this point in the history
…onaries
  • Loading branch information
ritchie46 committed Sep 27, 2024
1 parent 79fcd53 commit e40032c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
10 changes: 3 additions & 7 deletions crates/polars-arrow/src/io/ipc/write/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,9 @@ fn set_variadic_buffer_counts(counts: &mut Vec<i64>, array: &dyn Array) {
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
set_variadic_buffer_counts(counts, array.values().as_ref())
},
ArrowDataType::Dictionary(_, _, _) => {
let array = array
.as_any()
.downcast_ref::<DictionaryArray<u32>>()
.unwrap();
set_variadic_buffer_counts(counts, array.values().as_ref())
},
// Don't traverse dictionary values as those are set when the `Dictionary` IPC struct
// is read.
ArrowDataType::Dictionary(_, _, _) => (),
_ => (),
}
}
Expand Down
15 changes: 13 additions & 2 deletions crates/polars-plan/src/plans/ir/scan_sources.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fmt::{Debug, Formatter};
use std::fs::File;
use std::path::{Path, PathBuf};
use std::sync::Arc;
Expand All @@ -14,10 +15,10 @@ use super::FileScanOptions;

/// Set of sources to scan from
///
/// This is can either be a list of paths to files, opened files or in-memory buffers. Mixing of
/// This can either be a list of paths to files, opened files or in-memory buffers. Mixing of
/// buffers is not currently possible.
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone)]
#[derive(Clone)]
pub enum ScanSources {
Paths(Arc<[PathBuf]>),

Expand All @@ -27,6 +28,16 @@ pub enum ScanSources {
Buffers(Arc<[bytes::Bytes]>),
}

impl Debug for ScanSources {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Paths(p) => write!(f, "paths: {:?}", p.as_ref()),
Self::Files(p) => write!(f, "files: {} files", p.len()),
Self::Buffers(b) => write!(f, "buffers: {} in-memory-buffers", b.len()),
}
}
}

/// A reference to a single item in [`ScanSources`]
#[derive(Debug, Clone, Copy)]
pub enum ScanSourceRef<'a> {
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/unit/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,17 @@ def test_ipc_decimal_15920(
path = f"{tmp_path}/data"
df.write_ipc(path)
assert_frame_equal(pl.read_ipc(path), df)


def test_ipc_variadic_buffers_categorical_binview_18636() -> None:
df = pl.DataFrame(
{
"Test": pl.Series(["Value012"], dtype=pl.Categorical),
"Test2": pl.Series(["Value Two 20032"], dtype=pl.String),
}
)

b = io.BytesIO()
df.write_ipc(b)
b.seek(0)
assert_frame_equal(pl.read_ipc(b), df)

0 comments on commit e40032c

Please sign in to comment.