Skip to content

Commit 901a094

Browse files
Update join_selection.rs (#13893)
1 parent 94f08ff commit 901a094

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

datafusion/core/src/physical_optimizer/join_selection.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl JoinSelection {
6161
// TODO: We need some performance test for Right Semi/Right Join swap to Left Semi/Left Join in case that the right side is smaller but not much smaller.
6262
// TODO: In PrestoSQL, the optimizer flips join sides only if one side is much smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default is 8 times.
6363
/// Checks statistics for join swap.
64-
fn should_swap_join_order(
64+
pub(crate) fn should_swap_join_order(
6565
left: &dyn ExecutionPlan,
6666
right: &dyn ExecutionPlan,
6767
) -> Result<bool> {
@@ -108,7 +108,7 @@ fn supports_collect_by_thresholds(
108108
}
109109

110110
/// Predicate that checks whether the given join type supports input swapping.
111-
fn supports_swap(join_type: JoinType) -> bool {
111+
pub(crate) fn supports_swap(join_type: JoinType) -> bool {
112112
matches!(
113113
join_type,
114114
JoinType::Inner
@@ -176,7 +176,7 @@ fn swap_join_projection(
176176
/// This function swaps the inputs of the given join operator.
177177
/// This function is public so other downstream projects can use it
178178
/// to construct `HashJoinExec` with right side as the build side.
179-
pub fn swap_hash_join(
179+
pub(crate) fn swap_hash_join(
180180
hash_join: &HashJoinExec,
181181
partition_mode: PartitionMode,
182182
) -> Result<Arc<dyn ExecutionPlan>> {
@@ -222,7 +222,7 @@ pub fn swap_hash_join(
222222
}
223223

224224
/// Swaps inputs of `NestedLoopJoinExec` and wraps it into `ProjectionExec` is required
225-
fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
225+
pub(crate) fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
226226
let new_filter = swap_join_filter(join.filter());
227227
let new_join_type = &swap_join_type(*join.join_type());
228228

@@ -359,7 +359,7 @@ impl PhysicalOptimizerRule for JoinSelection {
359359
/// `CollectLeft` mode is applicable. Otherwise, it will try to swap the join sides.
360360
/// When the `ignore_threshold` is false, this function will also check left
361361
/// and right sizes in bytes or rows.
362-
fn try_collect_left(
362+
pub(crate) fn try_collect_left(
363363
hash_join: &HashJoinExec,
364364
ignore_threshold: bool,
365365
threshold_byte_size: usize,
@@ -421,7 +421,14 @@ fn try_collect_left(
421421
}
422422
}
423423

424-
fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
424+
/// Creates a partitioned hash join execution plan, swapping inputs if beneficial.
425+
///
426+
/// Checks if the join order should be swapped based on the join type and input statistics.
427+
/// If swapping is optimal and supported, creates a swapped partitioned hash join; otherwise,
428+
/// creates a standard partitioned hash join.
429+
pub(crate) fn partitioned_hash_join(
430+
hash_join: &HashJoinExec,
431+
) -> Result<Arc<dyn ExecutionPlan>> {
425432
let left = hash_join.left();
426433
let right = hash_join.right();
427434
if supports_swap(*hash_join.join_type()) && should_swap_join_order(&**left, &**right)?

0 commit comments

Comments
 (0)