@@ -45,10 +45,16 @@ pub struct FilterExpr {
45
45
selectivity_threshold : f64 ,
46
46
/// The quantile to use from the selectivity histogram of each conjunct.
47
47
selectivity_quantile : f64 ,
48
+ /// Do not block on conjunct evaluation for data fetching.
49
+ prefetch_conjuncts : bool ,
48
50
}
49
51
50
52
impl FilterExpr {
51
- pub fn try_new ( scope_dtype : StructDType , expr : ExprRef ) -> VortexResult < Self > {
53
+ pub fn try_new (
54
+ scope_dtype : StructDType ,
55
+ expr : ExprRef ,
56
+ prefetch_conjuncts : bool ,
57
+ ) -> VortexResult < Self > {
52
58
// Find all the fields involved in the expression.
53
59
let fields: Arc < [ FieldName ] > = immediate_scope_access ( & expr, & scope_dtype) ?
54
60
. into_iter ( )
@@ -85,6 +91,7 @@ impl FilterExpr {
85
91
} )
86
92
. take ( nconjuncts)
87
93
. collect ( ) ,
94
+ prefetch_conjuncts,
88
95
// The initial ordering is naive, we could order this by how well we expect each
89
96
// comparison operator to perform. e.g. == might be more selective than <=? Not obvious.
90
97
ordering : RwLock :: new ( ( 0 ..nconjuncts) . collect ( ) ) ,
@@ -199,6 +206,31 @@ pub struct FilterEvaluation {
199
206
}
200
207
201
208
impl FilterEvaluation {
209
+ async fn fetch_fields < E : ExprEvaluator > (
210
+ & mut self ,
211
+ fields_to_read : Vec < usize > ,
212
+ evaluator : & E ,
213
+ ) -> VortexResult < ( ) > {
214
+ // Construct futures to read the *full* field. We don't push down our mask as a
215
+ // selection mask here, perhaps we should?
216
+ let field_readers = fields_to_read
217
+ . iter ( )
218
+ . map ( |& field_idx| self . filter_expr . fields [ field_idx] . clone ( ) )
219
+ . map ( |field_name| {
220
+ evaluator. evaluate_expr (
221
+ RowMask :: new ( Mask :: new_true ( self . mask . len ( ) ) , self . row_offset ) ,
222
+ get_item ( field_name, ident ( ) ) ,
223
+ )
224
+ } )
225
+ . collect :: < Vec < _ > > ( ) ;
226
+
227
+ let field_arrays = try_join_all ( field_readers) . await ?;
228
+ for ( field_idx, field_array) in fields_to_read. iter ( ) . zip_eq ( field_arrays) {
229
+ self . field_arrays [ * field_idx] = Some ( field_array) ;
230
+ }
231
+ Ok ( ( ) )
232
+ }
233
+
202
234
pub async fn evaluate < E : ExprEvaluator > ( & mut self , evaluator : E ) -> VortexResult < RowMask > {
203
235
// First, we run all conjuncts through the evaluators pruning function. This helps trim
204
236
// down the mask based on cheap statistics.
@@ -224,53 +256,43 @@ impl FilterEvaluation {
224
256
return Ok ( RowMask :: new ( self . mask . clone ( ) , self . row_offset ) ) ;
225
257
}
226
258
259
+ if self . filter_expr . prefetch_conjuncts {
260
+ self . fetch_fields ( ( 0 ..self . filter_expr . fields . len ( ) ) . collect ( ) , & evaluator)
261
+ . await ?;
262
+ }
263
+
227
264
// Then we loop over the conjuncts and evaluate them.
228
265
loop {
229
- let Some ( next_conjunct) = self
266
+ let Some ( mut next_conjunct) = self
230
267
. filter_expr
231
268
. next_conjunct ( & self . remaining , & self . field_arrays )
232
269
else {
233
270
// If there are no more conjuncts, then we've finished
234
271
return Ok ( RowMask :: new ( self . mask . clone ( ) , self . row_offset ) ) ;
235
272
} ;
236
273
237
- // Figure out which fields are needed for the next conjunct.
238
- // TODO(ngates): convert this into a conjunct group, where a group should only be
239
- // created if it has been observed to prune away to zero (therefore short-circuiting
240
- // the subsequent conjunct groups).
241
- let fields_to_read = self . filter_expr . conjunct_fields [ next_conjunct]
242
- . iter ( )
243
- . filter ( |& field_idx| self . field_arrays [ * field_idx] . is_none ( ) )
244
- . copied ( )
245
- . collect :: < Vec < usize > > ( ) ;
246
-
247
- // Construct futures to read the *full* field. We don't push down our mask as a
248
- // selection mask here, perhaps we should?
249
- let field_readers = fields_to_read
250
- . iter ( )
251
- . map ( |& field_idx| self . filter_expr . fields [ field_idx] . clone ( ) )
252
- . map ( |field_name| {
253
- evaluator. evaluate_expr (
254
- RowMask :: new ( Mask :: new_true ( self . mask . len ( ) ) , self . row_offset ) ,
255
- get_item ( field_name, ident ( ) ) ,
256
- )
257
- } )
258
- . collect :: < Vec < _ > > ( ) ;
259
-
260
- let field_arrays = try_join_all ( field_readers) . await ?;
261
- for ( field_idx, field_array) in fields_to_read. iter ( ) . zip_eq ( field_arrays) {
262
- self . field_arrays [ * field_idx] = Some ( field_array) ;
274
+ if !self . filter_expr . prefetch_conjuncts {
275
+ // Figure out which fields are needed for the next conjunct.
276
+ // TODO(ngates): convert this into a conjunct group, where a group should only be
277
+ // created if it has been observed to prune away to zero (therefore short-circuiting
278
+ // the subsequent conjunct groups).
279
+ let fields_to_read = self . filter_expr . conjunct_fields [ next_conjunct]
280
+ . iter ( )
281
+ . filter ( |& field_idx| self . field_arrays [ * field_idx] . is_none ( ) )
282
+ . copied ( )
283
+ . collect :: < Vec < usize > > ( ) ;
284
+
285
+ self . fetch_fields ( fields_to_read, & evaluator) . await ?;
286
+ // Now we've fetched some fields, we find the _now_ preferred conjunct to evaluate based
287
+ // on the fields we actually have. This may have changed from before, for example if
288
+ // we have `5 < X <= 10`, then we may have fetched X to evaluate `5 < X`, but now we
289
+ // know that `X <= 10` is more selective and worth running first.
290
+ next_conjunct = self
291
+ . filter_expr
292
+ . next_conjunct ( & self . remaining , & self . field_arrays )
293
+ . vortex_expect ( "we know there is another conjunct" ) ;
263
294
}
264
295
265
- // Now we've fetched some fields, we find the _now_ preferred conjunct to evaluate based
266
- // on the fields we actually have. This may have changed from before, for example if
267
- // we have `5 < X <= 10`, then we may have fetched X to evaluate `5 < X`, but now we
268
- // know that `X <= 10` is more selective and worth running first.
269
- let next_conjunct = self
270
- . filter_expr
271
- . next_conjunct ( & self . remaining , & self . field_arrays )
272
- . vortex_expect ( "we know there is another conjunct" ) ;
273
-
274
296
log:: debug!(
275
297
"Evaluating conjunct {}" ,
276
298
self . filter_expr. conjuncts[ next_conjunct] ,
0 commit comments