@@ -17,7 +17,7 @@ use vortex_mask::Mask;
17
17
18
18
use crate :: scan:: filter:: FilterExpr ;
19
19
use crate :: scan:: unified:: UnifiedDriverStream ;
20
- use crate :: segments:: { AsyncSegmentReader , SegmentRegistry } ;
20
+ use crate :: segments:: { AsyncSegmentReader , RowRangePruner , SegmentCollector , SegmentStream } ;
21
21
use crate :: {
22
22
ExprEvaluator , Layout , LayoutReader , LayoutReaderExt , RowMask , instrument, range_intersection,
23
23
} ;
@@ -39,7 +39,7 @@ pub trait ScanDriver: 'static + Sized {
39
39
/// how frequently this future will be polled, so it should not be used to drive I/O.
40
40
///
41
41
/// TODO(ngates): make this a future
42
- fn io_stream ( self , segments : SegmentRegistry ) -> impl Stream < Item = VortexResult < ( ) > > ;
42
+ fn io_stream ( self , segments : SegmentStream ) -> impl Stream < Item = VortexResult < ( ) > > ;
43
43
}
44
44
45
45
/// A struct for building a scan operation.
@@ -141,12 +141,13 @@ impl<D: ScanDriver> ScanBuilder<D> {
141
141
142
142
let row_indices = self . row_indices . clone ( ) ;
143
143
let splits = self . split_by . splits ( & self . layout , & field_mask) ?;
144
- let mut segments = SegmentRegistry :: default ( ) ;
144
+ let mut collector = SegmentCollector :: default ( ) ;
145
145
let ( filter_mask, projection_mask) = self . filter_and_projection_masks ( ) ?;
146
146
self . layout
147
- . required_segments ( 0 , & filter_mask, & projection_mask, & mut segments) ?;
147
+ . required_segments ( 0 , & filter_mask, & projection_mask, & mut collector) ?;
148
+ let ( mut row_range_pruner, segments) = collector. finish ( ) ?;
148
149
if let Some ( indices) = & row_indices {
149
- segments . retain_matching ( indices. clone ( ) ) ;
150
+ row_range_pruner . retain_matching ( indices. clone ( ) ) ;
150
151
}
151
152
152
153
let row_masks = splits
@@ -189,6 +190,7 @@ impl<D: ScanDriver> ScanBuilder<D> {
189
190
canonicalize : self . canonicalize ,
190
191
concurrency : self . concurrency ,
191
192
prefetch_conjuncts : self . prefetch_conjuncts ,
193
+ row_range_pruner,
192
194
segments,
193
195
} )
194
196
}
@@ -255,7 +257,8 @@ pub struct Scan<D> {
255
257
//TODO(adam): bake this into the executors?
256
258
concurrency : usize ,
257
259
prefetch_conjuncts : bool ,
258
- segments : SegmentRegistry ,
260
+ row_range_pruner : RowRangePruner ,
261
+ segments : SegmentStream ,
259
262
}
260
263
261
264
impl < D : ScanDriver > Scan < D > {
@@ -267,9 +270,7 @@ impl<D: ScanDriver> Scan<D> {
267
270
// Create a single LayoutReader that is reused for the entire scan.
268
271
let segment_reader = self . driver . segment_reader ( ) ;
269
272
let task_executor = self . task_executor . clone ( ) ;
270
- let reader: Arc < dyn LayoutReader > = self
271
- . layout
272
- . reader ( segment_reader. clone ( ) , self . ctx . clone ( ) ) ?;
273
+ let reader: Arc < dyn LayoutReader > = self . layout . reader ( segment_reader, self . ctx . clone ( ) ) ?;
273
274
274
275
let pruning = self
275
276
. filter
@@ -294,13 +295,15 @@ impl<D: ScanDriver> Scan<D> {
294
295
// We start with a stream of row masks
295
296
let row_masks = stream:: iter ( self . row_masks ) ;
296
297
let projection = self . projection . clone ( ) ;
298
+ let row_range_pruner = self . row_range_pruner . clone ( ) ;
297
299
298
300
let exec_stream = row_masks
299
301
. map ( move |row_mask| {
300
302
let reader = reader. clone ( ) ;
301
303
let projection = projection. clone ( ) ;
302
304
let pruning = pruning. clone ( ) ;
303
305
let reader = reader. clone ( ) ;
306
+ let mut row_range_pruner = row_range_pruner. clone ( ) ;
304
307
305
308
// This future is the processing task
306
309
instrument ! ( "process" , async move {
@@ -316,6 +319,9 @@ impl<D: ScanDriver> Scan<D> {
316
319
317
320
// Filter out all-false masks
318
321
if row_mask. filter_mask( ) . all_false( ) {
322
+ row_range_pruner
323
+ . remove( row_mask. begin( ) ..row_mask. end( ) )
324
+ . await ?;
319
325
Ok ( None )
320
326
} else {
321
327
let mut array = reader. evaluate_expr( row_mask, projection) . await ?;
0 commit comments