Skip to content

Commit 21db3bf

Browse files
committed
less
1 parent 1a765fe commit 21db3bf

File tree

1 file changed

+56
-4
lines changed

1 file changed

+56
-4
lines changed

vortex-array/src/arrays/chunked/canonical.rs

+56-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
use vortex_dtype::{DType, StructDType};
2-
use vortex_error::{VortexExpect, VortexResult};
1+
use vortex_buffer::BufferMut;
2+
use vortex_dtype::{DType, Nullability, PType, StructDType};
3+
use vortex_error::{VortexExpect, VortexResult, vortex_err};
34

45
use super::ChunkedArray;
5-
use crate::arrays::StructArray;
6+
use crate::arrays::{ListArray, PrimitiveArray, StructArray};
67
use crate::builders::{ArrayBuilder, builder_with_capacity};
8+
use crate::compute::{scalar_at, slice, try_cast};
79
use crate::validity::Validity;
8-
use crate::{Array as _, ArrayCanonicalImpl, ArrayRef, Canonical};
10+
use crate::{Array as _, ArrayCanonicalImpl, ArrayRef, Canonical, ToCanonical};
911

1012
impl ArrayCanonicalImpl for ChunkedArray {
1113
fn _to_canonical(&self) -> VortexResult<Canonical> {
@@ -18,6 +20,11 @@ impl ArrayCanonicalImpl for ChunkedArray {
1820
)?;
1921
Ok(Canonical::Struct(struct_array))
2022
}
23+
DType::List(elem, _) => Ok(Canonical::List(pack_lists(
24+
self.chunks(),
25+
Validity::copy_from_array(self)?,
26+
elem,
27+
)?)),
2128
_ => {
2229
let mut builder = builder_with_capacity(self.dtype(), self.len());
2330
self.append_to_builder(builder.as_mut())?;
@@ -61,6 +68,51 @@ fn swizzle_struct_chunks(
6168
StructArray::try_new(struct_dtype.names().clone(), field_arrays, len, validity)
6269
}
6370

71+
fn pack_lists(
72+
chunks: &[ArrayRef],
73+
validity: Validity,
74+
elem_dtype: &DType,
75+
) -> VortexResult<ListArray> {
76+
let len: usize = chunks.iter().map(|c| c.len()).sum();
77+
let mut offsets = BufferMut::<i64>::with_capacity(len + 1);
78+
offsets.push(0);
79+
let mut elements = Vec::new();
80+
81+
for chunk in chunks {
82+
let chunk = chunk.to_list()?;
83+
// TODO: handle i32 offsets if they fit.
84+
let offsets_arr = try_cast(
85+
chunk.offsets(),
86+
&DType::Primitive(PType::I64, Nullability::NonNullable),
87+
)?
88+
.to_primitive()?;
89+
90+
let first_offset_value: usize = usize::try_from(&scalar_at(&offsets_arr, 0)?)?;
91+
let last_offset_value: usize =
92+
usize::try_from(&scalar_at(&offsets_arr, offsets_arr.len() - 1)?)?;
93+
elements.push(slice(
94+
chunk.elements(),
95+
first_offset_value,
96+
last_offset_value,
97+
)?);
98+
99+
let adjustment_from_previous = *offsets
100+
.last()
101+
.ok_or_else(|| vortex_err!("List offsets must have at least one element"))?;
102+
offsets.extend(
103+
offsets_arr
104+
.as_slice::<i64>()
105+
.iter()
106+
.skip(1)
107+
.map(|off| off + adjustment_from_previous - first_offset_value as i64),
108+
);
109+
}
110+
let chunked_elements = ChunkedArray::try_new(elements, elem_dtype.clone())?.into_array();
111+
let offsets = PrimitiveArray::new(offsets.freeze(), Validity::NonNullable);
112+
113+
ListArray::try_new(chunked_elements, offsets.into_array(), validity)
114+
}
115+
64116
#[cfg(test)]
65117
mod tests {
66118
use std::sync::Arc;

0 commit comments

Comments
 (0)