Skip to content

Commit ec85145

Browse files
authored
Add try_filter_leaves to propagate error from filter closure (#5575)
* Propagate error from filter closure * Add try_filter_leaves instead
1 parent 77a3132 commit ec85145

File tree

1 file changed

+78
-19
lines changed

1 file changed

+78
-19
lines changed

arrow-schema/src/fields.rs

+78-19
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,22 @@ impl Fields {
137137
/// assert_eq!(filtered, expected);
138138
/// ```
139139
pub fn filter_leaves<F: FnMut(usize, &FieldRef) -> bool>(&self, mut filter: F) -> Self {
140-
fn filter_field<F: FnMut(&FieldRef) -> bool>(
140+
self.try_filter_leaves(|idx, field| Ok(filter(idx, field)))
141+
.unwrap()
142+
}
143+
144+
/// Returns a copy of this [`Fields`] containing only those [`FieldRef`] passing a predicate
145+
/// or an error if the predicate fails.
146+
///
147+
/// See [`Fields::filter_leaves`] for more information.
148+
pub fn try_filter_leaves<F: FnMut(usize, &FieldRef) -> Result<bool, ArrowError>>(
149+
&self,
150+
mut filter: F,
151+
) -> Result<Self, ArrowError> {
152+
fn filter_field<F: FnMut(&FieldRef) -> Result<bool, ArrowError>>(
141153
f: &FieldRef,
142154
filter: &mut F,
143-
) -> Option<FieldRef> {
155+
) -> Result<Option<FieldRef>, ArrowError> {
144156
use DataType::*;
145157

146158
let v = match f.data_type() {
@@ -149,35 +161,72 @@ impl Fields {
149161
d => d,
150162
};
151163
let d = match v {
152-
List(child) => List(filter_field(child, filter)?),
153-
LargeList(child) => LargeList(filter_field(child, filter)?),
154-
Map(child, ordered) => Map(filter_field(child, filter)?, *ordered),
155-
FixedSizeList(child, size) => FixedSizeList(filter_field(child, filter)?, *size),
164+
List(child) => {
165+
let fields = filter_field(child, filter)?;
166+
if let Some(fields) = fields {
167+
List(fields)
168+
} else {
169+
return Ok(None);
170+
}
171+
}
172+
LargeList(child) => {
173+
let fields = filter_field(child, filter)?;
174+
if let Some(fields) = fields {
175+
LargeList(fields)
176+
} else {
177+
return Ok(None);
178+
}
179+
}
180+
Map(child, ordered) => {
181+
let fields = filter_field(child, filter)?;
182+
if let Some(fields) = fields {
183+
Map(fields, *ordered)
184+
} else {
185+
return Ok(None);
186+
}
187+
}
188+
FixedSizeList(child, size) => {
189+
let fields = filter_field(child, filter)?;
190+
if let Some(fields) = fields {
191+
FixedSizeList(fields, *size)
192+
} else {
193+
return Ok(None);
194+
}
195+
}
156196
Struct(fields) => {
157-
let filtered: Fields = fields
197+
let filtered: Result<Vec<_>, _> =
198+
fields.iter().map(|f| filter_field(f, filter)).collect();
199+
let filtered: Fields = filtered?
158200
.iter()
159-
.filter_map(|f| filter_field(f, filter))
201+
.filter_map(|f| f.as_ref().cloned())
160202
.collect();
161203

162204
if filtered.is_empty() {
163-
return None;
205+
return Ok(None);
164206
}
165207

166208
Struct(filtered)
167209
}
168210
Union(fields, mode) => {
169-
let filtered: UnionFields = fields
211+
let filtered: Result<Vec<_>, _> = fields
212+
.iter()
213+
.map(|(id, f)| filter_field(f, filter).map(|f| f.map(|f| (id, f))))
214+
.collect();
215+
let filtered: UnionFields = filtered?
170216
.iter()
171-
.filter_map(|(id, f)| Some((id, filter_field(f, filter)?)))
217+
.filter_map(|f| f.as_ref().cloned())
172218
.collect();
173219

174220
if filtered.is_empty() {
175-
return None;
221+
return Ok(None);
176222
}
177223

178224
Union(filtered, *mode)
179225
}
180-
_ => return filter(f).then(|| f.clone()),
226+
_ => {
227+
let filtered = filter(f)?;
228+
return Ok(filtered.then(|| f.clone()));
229+
}
181230
};
182231
let d = match f.data_type() {
183232
Dictionary(k, _) => Dictionary(k.clone(), Box::new(d)),
@@ -186,20 +235,26 @@ impl Fields {
186235
}
187236
_ => d,
188237
};
189-
Some(Arc::new(f.as_ref().clone().with_data_type(d)))
238+
Ok(Some(Arc::new(f.as_ref().clone().with_data_type(d))))
190239
}
191240

192241
let mut leaf_idx = 0;
193242
let mut filter = |f: &FieldRef| {
194-
let t = filter(leaf_idx, f);
243+
let t = filter(leaf_idx, f)?;
195244
leaf_idx += 1;
196-
t
245+
Ok(t)
197246
};
198247

199-
self.0
248+
let filtered: Result<Vec<_>, _> = self
249+
.0
200250
.iter()
201-
.filter_map(|f| filter_field(f, &mut filter))
202-
.collect()
251+
.map(|f| filter_field(f, &mut filter))
252+
.collect();
253+
let filtered = filtered?
254+
.iter()
255+
.filter_map(|f| f.as_ref().cloned())
256+
.collect();
257+
Ok(filtered)
203258
}
204259

205260
/// Remove a field by index and return it.
@@ -531,5 +586,9 @@ mod tests {
531586
let r = fields.filter_leaves(|idx, _| idx == 14 || idx == 15);
532587
assert_eq!(r.len(), 1);
533588
assert_eq!(r[0], fields[9]);
589+
590+
// Propagate error
591+
let r = fields.try_filter_leaves(|_, _| Err(ArrowError::SchemaError("error".to_string())));
592+
assert!(r.is_err());
534593
}
535594
}

0 commit comments

Comments
 (0)