Skip to content

Commit 358bdcf

Browse files
authored
fix: StructLayoutWriter requires unique field names (#2519)
We assume throghput scanning that the column names in expression can be unique identifiers but we never enforce it. This fixes the issue by rejecting such schemas and also avoids running into them in the fuzzer
1 parent 5cd0a96 commit 358bdcf

File tree

4 files changed

+85
-16
lines changed

4 files changed

+85
-16
lines changed

fuzz/fuzz_targets/file_io.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use arrow_ord::sort::SortOptions;
66
use bytes::Bytes;
77
use futures_util::TryStreamExt;
88
use libfuzzer_sys::{Corpus, fuzz_target};
9+
use vortex_array::aliases::hash_set::HashSet;
910
use vortex_array::arrays::ChunkedArray;
1011
use vortex_array::arrays::arbitrary::ArbitraryArray;
1112
use vortex_array::arrow::IntoArrowArray;
@@ -18,11 +19,10 @@ use vortex_file::{VortexOpenOptions, VortexWriteOptions};
1819
fuzz_target!(|array_data: ArbitraryArray| -> Corpus {
1920
let array_data = array_data.0;
2021

21-
if !array_data.dtype().is_struct() {
22-
return Corpus::Reject;
23-
}
24-
25-
if has_nullable_struct(array_data.dtype()) {
22+
if !array_data.dtype().is_struct()
23+
|| has_nullable_struct(array_data.dtype())
24+
|| has_duplicate_field_names(array_data.dtype())
25+
{
2626
return Corpus::Reject;
2727
}
2828

@@ -122,3 +122,14 @@ fn has_nullable_struct(dtype: &DType) -> bool {
122122
.map(|sdt| sdt.fields().any(|dtype| has_nullable_struct(&dtype)))
123123
.unwrap_or(false)
124124
}
125+
126+
fn has_duplicate_field_names(dtype: &DType) -> bool {
127+
let Some(struct_dtype) = dtype.as_struct() else {
128+
return false;
129+
};
130+
HashSet::from_iter(struct_dtype.names().iter()).len() != struct_dtype.names().len()
131+
|| dtype
132+
.as_struct()
133+
.map(|sdt| sdt.fields().any(|dtype| has_duplicate_field_names(&dtype)))
134+
.unwrap_or(false)
135+
}

vortex-dtype/src/struct_.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ where
271271
.into_iter()
272272
.map(|(name, dtype)| (name.into(), dtype.into()))
273273
.unzip();
274-
StructDType::from_fields(names.into(), dtypes.into_iter().collect())
274+
StructDType::from_fields(names.into(), dtypes)
275275
}
276276
}
277277

vortex-layout/src/layouts/struct_/eval_expr.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ mod tests {
8585
use vortex_buffer::buffer;
8686
use vortex_dtype::PType::I32;
8787
use vortex_dtype::{DType, Nullability, StructDType};
88+
use vortex_error::VortexUnwrap;
8889
use vortex_expr::{get_item, gt, ident, pack};
8990
use vortex_mask::Mask;
9091

@@ -101,7 +102,7 @@ mod tests {
101102
let ctx = ArrayContext::empty();
102103
let mut segments = TestSegments::default();
103104

104-
let layout = StructLayoutWriter::new(
105+
let layout = StructLayoutWriter::try_new(
105106
DType::Struct(
106107
Arc::new(StructDType::new(
107108
vec!["a".into(), "b".into(), "c".into()].into(),
@@ -127,6 +128,7 @@ mod tests {
127128
)),
128129
],
129130
)
131+
.vortex_unwrap()
130132
.push_all(
131133
&mut segments,
132134
[Ok(StructArray::from_fields(

vortex-layout/src/layouts/struct_/writer.rs

+65-9
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use itertools::Itertools;
2+
use vortex_array::aliases::hash_set::HashSet;
23
use vortex_array::iter::ArrayIteratorArrayExt;
34
use vortex_array::{ArrayContext, ArrayRef};
45
use vortex_dtype::DType;
5-
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
6+
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
67

78
use crate::LayoutVTableRef;
89
use crate::data::Layout;
@@ -19,33 +20,43 @@ pub struct StructLayoutWriter {
1920
}
2021

2122
impl StructLayoutWriter {
22-
pub fn new(dtype: DType, column_layout_writers: Vec<Box<dyn LayoutWriter>>) -> Self {
23-
let struct_dtype = dtype.as_struct().vortex_expect("dtype is not a struct");
23+
pub fn try_new(
24+
dtype: DType,
25+
column_layout_writers: Vec<Box<dyn LayoutWriter>>,
26+
) -> VortexResult<Self> {
27+
let struct_dtype = dtype
28+
.as_struct()
29+
.ok_or_else(|| vortex_err!("expected StructDType"))?;
30+
if HashSet::from_iter(struct_dtype.names().iter()).len() != struct_dtype.names().len() {
31+
vortex_bail!("StructLayout must have unique field names")
32+
}
2433
if struct_dtype.fields().len() != column_layout_writers.len() {
25-
vortex_panic!(
34+
vortex_bail!(
2635
"number of fields in struct dtype does not match number of column layout writers"
2736
);
2837
}
29-
Self {
38+
Ok(Self {
3039
column_strategies: column_layout_writers,
3140
dtype,
3241
row_count: 0,
33-
}
42+
})
3443
}
3544

3645
pub fn try_new_with_factory<F: LayoutStrategy>(
3746
ctx: &ArrayContext,
3847
dtype: &DType,
3948
factory: F,
4049
) -> VortexResult<Self> {
41-
let struct_dtype = dtype.as_struct().vortex_expect("dtype is not a struct");
42-
Ok(Self::new(
50+
let struct_dtype = dtype
51+
.as_struct()
52+
.ok_or_else(|| vortex_err!("expected StructDType"))?;
53+
Self::try_new(
4354
dtype.clone(),
4455
struct_dtype
4556
.fields()
4657
.map(|dtype| factory.new_writer(ctx, &dtype))
4758
.try_collect()?,
48-
))
59+
)
4960
}
5061
}
5162

@@ -98,3 +109,48 @@ impl LayoutWriter for StructLayoutWriter {
98109
))
99110
}
100111
}
112+
113+
#[cfg(test)]
114+
mod tests {
115+
use std::sync::Arc;
116+
117+
use vortex_array::ArrayContext;
118+
use vortex_dtype::{DType, Nullability, PType};
119+
120+
use crate::LayoutWriterExt;
121+
use crate::layouts::flat::writer::{FlatLayoutOptions, FlatLayoutWriter};
122+
use crate::layouts::struct_::writer::StructLayoutWriter;
123+
124+
#[test]
125+
#[should_panic]
126+
fn fails_on_duplicate_field() {
127+
StructLayoutWriter::try_new(
128+
DType::Struct(
129+
Arc::new(
130+
[
131+
("a", DType::Primitive(PType::I32, Nullability::NonNullable)),
132+
("a", DType::Primitive(PType::I32, Nullability::NonNullable)),
133+
]
134+
.into_iter()
135+
.collect(),
136+
),
137+
Nullability::NonNullable,
138+
),
139+
vec![
140+
FlatLayoutWriter::new(
141+
ArrayContext::empty(),
142+
DType::Primitive(PType::I32, Nullability::NonNullable),
143+
FlatLayoutOptions::default(),
144+
)
145+
.boxed(),
146+
FlatLayoutWriter::new(
147+
ArrayContext::empty(),
148+
DType::Primitive(PType::I32, Nullability::NonNullable),
149+
FlatLayoutOptions::default(),
150+
)
151+
.boxed(),
152+
],
153+
)
154+
.unwrap();
155+
}
156+
}

0 commit comments

Comments
 (0)