@@ -134,7 +134,15 @@ use crate::{OptimizerConfig, OptimizerRule};
134
134
#[ derive( Default ) ]
135
135
pub struct PushDownFilter { }
136
136
137
- /// For a given JOIN type, determine whether each side of the join is preserved.
137
+ /// For a given JOIN type, determine whether each input of the join is preserved
138
+ /// for post-join (`WHERE` clause) filters.
139
+ ///
140
+ /// It is only correct to push filters below a join for preserved inputs.
141
+ ///
142
+ /// # Return Value
143
+ /// A tuple of booleans - (left_preserved, right_preserved).
144
+ ///
145
+ /// # "Preserved" input definition
138
146
///
139
147
/// We say a join side is preserved if the join returns all or a subset of the rows from
140
148
/// the relevant side, such that each row of the output table directly maps to a row of
@@ -145,15 +153,11 @@ pub struct PushDownFilter {}
145
153
/// For example:
146
154
/// - In an inner join, both sides are preserved, because each row of the output
147
155
/// maps directly to a row from each side.
148
- /// - In a left join, the left side is preserved and the right is not, because
149
- /// there may be rows in the output that don't directly map to a row in the
150
- /// right input (due to nulls filling where there is no match on the right).
151
- ///
152
- /// This is important because we can always push down post-join filters to a preserved
153
- /// side of the join, assuming the filter only references columns from that side. For the
154
- /// non-preserved side it can be more tricky.
155
156
///
156
- /// Returns a tuple of booleans - (left_preserved, right_preserved).
157
+ /// - In a left join, the left side is preserved (we can push predicates) but
158
+ /// the right is not, because there may be rows in the output that don't
159
+ /// directly map to a row in the right input (due to nulls filling where there
160
+ /// is no match on the right).
157
161
fn lr_is_preserved ( join_type : JoinType ) -> Result < ( bool , bool ) > {
158
162
match join_type {
159
163
JoinType :: Inner => Ok ( ( true , true ) ) ,
@@ -169,9 +173,15 @@ fn lr_is_preserved(join_type: JoinType) -> Result<(bool, bool)> {
169
173
}
170
174
}
171
175
172
- /// For a given JOIN logical plan, determine whether each side of the join is preserved
173
- /// in terms on join filtering.
174
- /// Predicates from join filter can only be pushed to preserved join side.
176
+ /// For a given JOIN type, determine whether each input of the join is preserved
177
+ /// for the join condition (`ON` clause filters).
178
+ ///
179
+ /// It is only correct to push filters below a join for preserved inputs.
180
+ ///
181
+ /// # Return Value
182
+ /// A tuple of booleans - (left_preserved, right_preserved).
183
+ ///
184
+ /// See [`lr_is_preserved`] for a definition of "preserved".
175
185
fn on_lr_is_preserved ( join_type : JoinType ) -> Result < ( bool , bool ) > {
176
186
match join_type {
177
187
JoinType :: Inner => Ok ( ( true , true ) ) ,
@@ -184,11 +194,7 @@ fn on_lr_is_preserved(join_type: JoinType) -> Result<(bool, bool)> {
184
194
}
185
195
}
186
196
187
- /// Determine which predicates in state can be pushed down to a given side of a join.
188
- /// To determine this, we need to know the schema of the relevant join side and whether
189
- /// or not the side's rows are preserved when joining. If the side is not preserved, we
190
- /// do not push down anything. Otherwise we can push down predicates where all of the
191
- /// relevant columns are contained on the relevant join side's schema.
197
+ /// Return true if a predicate only references columns in the specified schema
192
198
fn can_pushdown_join_predicate ( predicate : & Expr , schema : & DFSchema ) -> Result < bool > {
193
199
let schema_columns = schema
194
200
. iter ( )
0 commit comments