Skip to content

Commit

Permalink
fix: ensure ListChunked::full_null uses physical types (#11554)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Oct 6, 2023
1 parent b6dce6b commit 3102e7e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 14 deletions.
34 changes: 23 additions & 11 deletions crates/polars-core/src/chunked_array/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,13 @@ where
dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),
dt => dt,
};
// assertions in debug mode
// that check if the data types in the arrays are as expected
#[cfg(debug_assertions)]
{
if !chunks.is_empty() && dtype.is_primitive() {
assert_eq!(chunks[0].data_type(), &dtype.to_physical().to_arrow())
}
}
let field = Arc::new(Field::new(name, dtype));
Self::from_chunks_and_dtype(name, chunks, dtype)
}

/// # Safety
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
let field = self.field.clone();
let mut out = ChunkedArray {
field,
chunks,
Expand All @@ -191,10 +189,24 @@ where
out
}

/// Create a new [`ChunkedArray`] from existing chunks.
///
/// # Safety
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
let field = self.field.clone();
pub unsafe fn from_chunks_and_dtype(
name: &str,
chunks: Vec<ArrayRef>,
dtype: DataType,
) -> Self {
// assertions in debug mode
// that check if the data types in the arrays are as expected
#[cfg(debug_assertions)]
{
if !chunks.is_empty() && dtype.is_primitive() {
assert_eq!(chunks[0].data_type(), &dtype.to_physical().to_arrow())
}
}
let field = Arc::new(Field::new(name, dtype));
let mut out = ChunkedArray {
field,
chunks,
Expand Down
13 changes: 10 additions & 3 deletions crates/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,22 @@ impl ChunkFullNull for ArrayChunked {

impl ListChunked {
pub fn full_null_with_dtype(name: &str, length: usize, inner_dtype: &DataType) -> ListChunked {
let arr = ListArray::new_null(
let arr: ListArray<i64> = ListArray::new_null(
ArrowDataType::LargeList(Box::new(ArrowField::new(
"item",
inner_dtype.to_arrow(),
inner_dtype.to_physical().to_arrow(),
true,
))),
length,
);
ChunkedArray::with_chunk(name, arr)
// SAFETY: physical type matches the logical.
unsafe {
ChunkedArray::from_chunks_and_dtype(
name,
vec![Box::new(arr)],
DataType::List(Box::new(inner_dtype.clone())),
)
}
}
}
#[cfg(feature = "dtype-struct")]
Expand Down
21 changes: 21 additions & 0 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,24 @@ def test_list_amortized_iter_clear_settings_10126() -> None:
)

assert out.to_dict(False) == {"a": [1, 2], "b": [[1, 2, 3], [4]]}


def test_list_inner_cast_physical_11513() -> None:
df = pl.DataFrame(
{
"date": ["foo"],
"struct": [[]],
},
schema_overrides={
"struct": pl.List(
pl.Struct(
{
"field": pl.Struct(
{"subfield": pl.List(pl.Struct({"subsubfield": pl.Date}))}
)
}
)
)
},
)
assert df.select(pl.col("struct").take(0)).to_dict(False) == {"struct": [[]]}

0 comments on commit 3102e7e

Please sign in to comment.