Skip to content

Commit 9c34351

Browse files
committed
Comments
1 parent 0d6c71b commit 9c34351

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,15 @@ use super::metrics::ParquetFileMetrics;
109109
/// can not contain rows that match the predicate.
110110
///
111111
/// # Implementation notes
112+
///
113+
/// Single column predicates are evaluated using the PageIndex information
114+
/// for that column to determine which row ranges can be skipped based.
115+
///
116+
/// The resulting [`RowSelection`]'s are combined into a final
117+
/// row selection that is added to the [`ParquetAccessPlan`].
112118
#[derive(Debug)]
113119
pub struct PagePruningAccessPlanFilter {
114-
/// single column predicates (e.g. (`col = 5`) extracted from the overall
120+
/// single column predicates (e.g. (`col = 5`) extracted from the overall
115121
/// predicate. Must all be true for a row to be included in the result.
116122
predicates: Vec<PruningPredicate>,
117123
}
@@ -175,7 +181,7 @@ impl PagePruningAccessPlanFilter {
175181
(file_metadata.offset_index(), file_metadata.column_index())
176182
else {
177183
trace!(
178-
"skip page pruning due to lack of indexes. Have offset: {}, column index: {}",
184+
"Can not prune Parquet pages due to lack of indexes. Have offset: {}, column index: {}",
179185
file_metadata.offset_index().is_some(), file_metadata.column_index().is_some()
180186
);
181187
return access_plan;
@@ -184,11 +190,13 @@ impl PagePruningAccessPlanFilter {
184190
// track the total number of rows that should be skipped
185191
let mut total_skip = 0;
186192

193+
// for each row group specified in the access plan
187194
let row_group_indexes = access_plan.row_group_indexes();
188195
for r in row_group_indexes {
189196
// The selection for this particular row group
190197
let mut overall_selection = None;
191198
for predicate in page_index_predicates {
199+
192200
// find column index in the parquet schema
193201
let col_idx = find_column_index(predicate, arrow_schema, parquet_schema);
194202
let row_group_metadata = &groups[r];

0 commit comments

Comments
 (0)