@@ -109,9 +109,15 @@ use super::metrics::ParquetFileMetrics;
109
109
/// can not contain rows that match the predicate.
110
110
///
111
111
/// # Implementation notes
112
+ ///
113
+ /// Single column predicates are evaluated using the PageIndex information
114
+ /// for that column to determine which row ranges can be skipped based.
115
+ ///
116
+ /// The resulting [`RowSelection`]'s are combined into a final
117
+ /// row selection that is added to the [`ParquetAccessPlan`].
112
118
#[ derive( Debug ) ]
113
119
pub struct PagePruningAccessPlanFilter {
114
- /// single column predicates (e.g. (`col = 5`) extracted from the overall
120
+ /// single column predicates (e.g. (`col = 5`) extracted from the overall
115
121
/// predicate. Must all be true for a row to be included in the result.
116
122
predicates : Vec < PruningPredicate > ,
117
123
}
@@ -175,7 +181,7 @@ impl PagePruningAccessPlanFilter {
175
181
( file_metadata. offset_index ( ) , file_metadata. column_index ( ) )
176
182
else {
177
183
trace ! (
178
- "skip page pruning due to lack of indexes. Have offset: {}, column index: {}" ,
184
+ "Can not prune Parquet pages due to lack of indexes. Have offset: {}, column index: {}" ,
179
185
file_metadata. offset_index( ) . is_some( ) , file_metadata. column_index( ) . is_some( )
180
186
) ;
181
187
return access_plan;
@@ -184,11 +190,13 @@ impl PagePruningAccessPlanFilter {
184
190
// track the total number of rows that should be skipped
185
191
let mut total_skip = 0 ;
186
192
193
+ // for each row group specified in the access plan
187
194
let row_group_indexes = access_plan. row_group_indexes ( ) ;
188
195
for r in row_group_indexes {
189
196
// The selection for this particular row group
190
197
let mut overall_selection = None ;
191
198
for predicate in page_index_predicates {
199
+
192
200
// find column index in the parquet schema
193
201
let col_idx = find_column_index ( predicate, arrow_schema, parquet_schema) ;
194
202
let row_group_metadata = & groups[ r] ;
0 commit comments