diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index fa00a4e9343325..dbaaf2a6b32244 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -113,6 +113,7 @@ import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOnPkFk; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide; +import org.apache.doris.nereids.rules.rewrite.PushDownAggWithDistinctThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushDownLimit; @@ -344,6 +345,7 @@ public class Rewriter extends AbstractBatchJobExecutor { topic("Eager aggregation", costBased(topDown( + new PushDownAggWithDistinctThroughJoinOneSide(), new PushDownAggThroughJoinOneSide(), new PushDownAggThroughJoin() )), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index c81f56f85b6add..e25050b012dc5f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -201,6 +201,7 @@ public enum RuleType { ELIMINATE_SORT(RuleTypeClass.REWRITE), PUSH_DOWN_AGG_THROUGH_JOIN_ONE_SIDE(RuleTypeClass.REWRITE), + PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE(RuleTypeClass.REWRITE), PUSH_DOWN_AGG_THROUGH_JOIN(RuleTypeClass.REWRITE), PUSH_DOWN_AGG_THROUGH_JOIN_ON_PKFK(RuleTypeClass.REWRITE), TRANSPOSE_LOGICAL_SEMI_JOIN_LOGICAL_JOIN(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggWithDistinctThroughJoinOneSide.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggWithDistinctThroughJoinOneSide.java new file mode 100644 index 00000000000000..3f9ad609744e21 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggWithDistinctThroughJoinOneSide.java @@ -0,0 +1,176 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.Max; +import org.apache.doris.nereids.trees.expressions.functions.agg.Min; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.Sets; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Push down agg function with distinct through join on only one side. + */ +public class PushDownAggWithDistinctThroughJoinOneSide implements RewriteRuleFactory { + @Override + public List buildRules() { + return ImmutableList.of( + logicalAggregate(logicalProject(innerLogicalJoin())) + .when(agg -> agg.child().isAllSlots()) + .when(agg -> agg.child().child().getOtherJoinConjuncts().isEmpty()) + .when(agg -> !agg.isGenerated()) + .whenNot(agg -> agg.getAggregateFunctions().isEmpty()) + .whenNot(agg -> agg.child() + .child(0).children().stream().anyMatch(p -> p instanceof LogicalAggregate)) + .when(agg -> { + Set funcs = agg.getAggregateFunctions(); + if (funcs.size() > 1) { + return false; + } else { + return funcs.stream() + .allMatch(f -> (f instanceof Min || f instanceof Max || f instanceof Sum + || f instanceof Count) && f.isDistinct() + && f.child(0) instanceof Slot); + } + }) + .thenApply(ctx -> { + LogicalAggregate>> agg = ctx.root; + return pushDownAggWithDistinct(agg, agg.child().child(), agg.child().getProjects()); + }) + .toRule(RuleType.PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE) + ); + } + + private static LogicalAggregate pushDownAggWithDistinct(LogicalAggregate agg, + LogicalJoin join, List projects) { + Plan leftJoin = join.left(); + Plan rightJoin = join.right(); + List leftJoinOutput = leftJoin.getOutput(); + List rightJoinOutput = rightJoin.getOutput(); + + List leftFuncs = new ArrayList<>(); + List rightFuncs = new ArrayList<>(); + Set leftFuncSlotSet = new HashSet<>(); + Set rightFuncSlotSet = new HashSet<>(); + Set newAggOverJoinGroupByKeys = new HashSet<>(); + for (AggregateFunction func : agg.getAggregateFunctions()) { + Slot slot = (Slot) func.child(0); + newAggOverJoinGroupByKeys.add(slot); + if (leftJoinOutput.contains(slot)) { + leftFuncs.add(func); + leftFuncSlotSet.add(slot); + } else if (rightJoinOutput.contains(slot)) { + rightFuncs.add(func); + rightFuncSlotSet.add(slot); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } + boolean isLeftSideAggDistinct = !leftFuncs.isEmpty() && rightFuncs.isEmpty(); + boolean isRightSideAggDistinct = leftFuncs.isEmpty() && !rightFuncs.isEmpty(); + if (!isLeftSideAggDistinct && !isRightSideAggDistinct) { + return null; + } + + Set leftPushDownGroupBy = new HashSet<>(); + Set rightPushDownGroupBy = new HashSet<>(); + for (Expression e : agg.getGroupByExpressions()) { + Slot slot = (Slot) e; + newAggOverJoinGroupByKeys.add(slot); + if (leftJoinOutput.contains(slot)) { + leftPushDownGroupBy.add(slot); + } else if (rightJoinOutput.contains(slot)) { + rightPushDownGroupBy.add(slot); + } else { + return null; + } + } + join.getHashJoinConjuncts().forEach(e -> e.getInputSlots().forEach(slot -> { + if (leftJoinOutput.contains(slot)) { + leftPushDownGroupBy.add(slot); + } else if (rightJoinOutput.contains(slot)) { + rightPushDownGroupBy.add(slot); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + })); + + if (isLeftSideAggDistinct) { + leftPushDownGroupBy.add((Slot) leftFuncs.get(0).child(0)); + Builder leftAggOutputBuilder = ImmutableList.builder() + .addAll(leftPushDownGroupBy); + leftJoin = new LogicalAggregate<>(ImmutableList.copyOf(leftPushDownGroupBy), + leftAggOutputBuilder.build(), join.left()); + } else { + rightPushDownGroupBy.add((Slot) rightFuncs.get(0).child(0)); + Builder rightAggOutputBuilder = ImmutableList.builder() + .addAll(rightPushDownGroupBy); + rightJoin = new LogicalAggregate<>(ImmutableList.copyOf(rightPushDownGroupBy), + rightAggOutputBuilder.build(), join.right()); + } + + Preconditions.checkState(leftJoin != join.left() || rightJoin != join.right(), + "not pushing down aggr with distinct through join on single side successfully"); + Plan newJoin = join.withChildren(leftJoin, rightJoin); + LogicalAggregate newAggOverJoin = agg.withChildGroupByAndOutput( + ImmutableList.copyOf(newAggOverJoinGroupByKeys), projects, newJoin); + + List newOutputExprs = new ArrayList<>(); + for (NamedExpression ne : agg.getOutputExpressions()) { + if (ne instanceof Alias && ((Alias) ne).child() instanceof AggregateFunction) { + AggregateFunction func = (AggregateFunction) ((Alias) ne).child(); + Slot slot = (Slot) func.child(0); + if (leftFuncSlotSet.contains(slot) || rightFuncSlotSet.contains(slot)) { + Expression newFunc = discardDistinct(func); + newOutputExprs.add((NamedExpression) ne.withChildren(newFunc)); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } else { + newOutputExprs.add(ne); + } + } + return agg.withAggOutputChild(newOutputExprs, newAggOverJoin); + } + + private static Expression discardDistinct(AggregateFunction func) { + Preconditions.checkState(func.isDistinct(), "current aggregation function is not distinct"); + Set aggChild = Sets.newLinkedHashSet(func.children()); + return func.withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java index 2798b1aef0102b..a06f238f5314ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java @@ -271,6 +271,12 @@ public LogicalAggregate withGroupByAndOutput(List groupByExprL hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), child()); } + public LogicalAggregate withChildGroupByAndOutput(List groupByExprList, + List outputExpressionList, Plan newChild) { + return new LogicalAggregate<>(groupByExprList, outputExpressionList, normalized, ordinalIsResolved, generated, + hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), newChild); + } + public LogicalAggregate withChildAndOutput(CHILD_TYPE child, List outputExpressionList) { return new LogicalAggregate<>(groupByExpressions, outputExpressionList, normalized, ordinalIsResolved, diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.out new file mode 100644 index 00000000000000..4a84ff5117c008 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.out @@ -0,0 +1,237 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_pushdown_basic -- +1 +1 +1 +3 + +-- !groupby_pushdown_left_join -- +1 +1 +1 +3 + +-- !groupby_pushdown_right_join -- +1 +1 +1 +3 + +-- !groupby_pushdown_full_join -- +1 +1 +1 +3 + +-- !groupby_pushdown_left_semi_join -- +1 +1 +1 +3 + +-- !groupby_pushdown_left_anti_join -- + +-- !groupby_pushdown_complex_conditions -- + +-- !groupby_pushdown_with_aggregate -- +1 1.0 +1 2.0 +1 3.0 +3 2.0 + +-- !groupby_pushdown_subquery -- + +-- !groupby_pushdown_outer_join -- +1 +1 +1 +3 + +-- !groupby_pushdown_deep_subquery -- + +-- !groupby_pushdown_having -- + +-- !groupby_pushdown_mixed_aggregates -- +1 1 +1 2 +1 3 +3 6 + +-- !groupby_pushdown_multi_table_join -- +1 +1 +1 + +-- !groupby_pushdown_with_order_by -- +1 +1 +1 +3 + +-- !groupby_pushdown_multiple_equal_conditions -- +1 +1 +1 + +-- !groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 1 +3 1 + +-- !groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 1 +c 1 + +-- !groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 1 1 +c 1 1 + +-- !groupby_pushdown_with_where_clause -- + +-- !groupby_pushdown_varied_aggregates -- +1 1.5 1 +1 4.5 1 +1 7.5 1 +3 7.0 0 + +-- !groupby_pushdown_with_order_by_limit -- +1 +1 +1 +3 + +-- !groupby_pushdown_alias_multiple_equal_conditions -- +1 +1 +1 + +-- !groupby_pushdown_complex_join_condition -- + +-- !groupby_pushdown_function_processed_columns -- +0 +1 +1 +1 + +-- !groupby_pushdown_nested_queries -- + +-- !with_hint_groupby_pushdown_basic -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_left_join -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_right_join -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_full_join -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_left_semi_join -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_left_anti_join -- + +-- !with_hint_groupby_pushdown_complex_conditions -- + +-- !with_hint_groupby_pushdown_with_aggregate -- +1 1.0 +1 2.0 +1 3.0 +3 2.0 + +-- !with_hint_groupby_pushdown_subquery -- + +-- !with_hint_groupby_pushdown_outer_join -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_deep_subquery -- + +-- !with_hint_groupby_pushdown_having -- + +-- !with_hint_groupby_pushdown_mixed_aggregates -- +1 1 +1 2 +1 3 +3 6 + +-- !with_hint_groupby_pushdown_multi_table_join -- +1 +1 +1 + +-- !with_hint_groupby_pushdown_with_order_by -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_multiple_equal_conditions -- +1 +1 +1 + +-- !with_hint_groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 1 +3 1 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 1 +c 1 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 1 1 +c 1 1 + +-- !with_hint_groupby_pushdown_with_where_clause -- + +-- !with_hint_groupby_pushdown_varied_aggregates -- +1 1.5 1 +1 4.5 1 +1 7.5 1 +3 7.0 0 + +-- !with_hint_groupby_pushdown_with_order_by_limit -- +1 +1 +1 +3 + +-- !with_hint_groupby_pushdown_alias_multiple_equal_conditions -- +1 +1 +1 + +-- !with_hint_groupby_pushdown_complex_join_condition -- + +-- !with_hint_groupby_pushdown_function_processed_columns -- +0 +1 +1 +1 + +-- !with_hint_groupby_pushdown_nested_queries -- + diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.out new file mode 100644 index 00000000000000..db15483c496eb4 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.out @@ -0,0 +1,237 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_pushdown_basic -- +1 +1 +2 +3 + +-- !groupby_pushdown_left_join -- +1 +1 +2 +3 + +-- !groupby_pushdown_right_join -- +1 +1 +2 +3 + +-- !groupby_pushdown_full_join -- +1 +1 +2 +3 + +-- !groupby_pushdown_left_semi_join -- +1 +1 +2 +3 + +-- !groupby_pushdown_left_anti_join -- + +-- !groupby_pushdown_complex_conditions -- + +-- !groupby_pushdown_with_aggregate -- +1 1.0 +1 2.0 +2 2.0 +3 3.0 + +-- !groupby_pushdown_subquery -- + +-- !groupby_pushdown_outer_join -- +1 +1 +2 +3 + +-- !groupby_pushdown_deep_subquery -- + +-- !groupby_pushdown_having -- + +-- !groupby_pushdown_mixed_aggregates -- +1 1 +1 6 +2 2 +3 3 + +-- !groupby_pushdown_multi_table_join -- +1 +2 +3 + +-- !groupby_pushdown_with_order_by -- +1 +1 +2 +3 + +-- !groupby_pushdown_multiple_equal_conditions -- +1 +2 +3 + +-- !groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 2 +3 3 + +-- !groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 2 +c 3 + +-- !groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 2 2 +c 3 3 + +-- !groupby_pushdown_with_where_clause -- + +-- !groupby_pushdown_varied_aggregates -- +1 1.5 a +1 7.0 \N +2 4.5 b +3 7.5 c + +-- !groupby_pushdown_with_order_by_limit -- +1 +1 +2 +3 + +-- !groupby_pushdown_alias_multiple_equal_conditions -- +1 +2 +3 + +-- !groupby_pushdown_complex_join_condition -- + +-- !groupby_pushdown_function_processed_columns -- +\N +1 +1 +1 + +-- !groupby_pushdown_nested_queries -- + +-- !with_hint_groupby_pushdown_basic -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_left_join -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_right_join -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_full_join -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_left_semi_join -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_left_anti_join -- + +-- !with_hint_groupby_pushdown_complex_conditions -- + +-- !with_hint_groupby_pushdown_with_aggregate -- +1 1.0 +1 2.0 +2 2.0 +3 3.0 + +-- !with_hint_groupby_pushdown_subquery -- + +-- !with_hint_groupby_pushdown_outer_join -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_deep_subquery -- + +-- !with_hint_groupby_pushdown_having -- + +-- !with_hint_groupby_pushdown_mixed_aggregates -- +1 1 +1 6 +2 2 +3 3 + +-- !with_hint_groupby_pushdown_multi_table_join -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_with_order_by -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_multiple_equal_conditions -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 2 +3 3 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 2 +c 3 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 2 2 +c 3 3 + +-- !with_hint_groupby_pushdown_with_where_clause -- + +-- !with_hint_groupby_pushdown_varied_aggregates -- +1 1.5 a +1 7.0 \N +2 4.5 b +3 7.5 c + +-- !with_hint_groupby_pushdown_with_order_by_limit -- +1 +1 +2 +3 + +-- !with_hint_groupby_pushdown_alias_multiple_equal_conditions -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_complex_join_condition -- + +-- !with_hint_groupby_pushdown_function_processed_columns -- +\N +1 +1 +1 + +-- !with_hint_groupby_pushdown_nested_queries -- + diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.out new file mode 100644 index 00000000000000..bb8366176a7ba6 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.out @@ -0,0 +1,231 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_pushdown_basic -- +1 +2 +3 +6 + +-- !groupby_pushdown_left_join -- +1 +2 +3 +6 + +-- !groupby_pushdown_right_join -- +1 +2 +3 +6 + +-- !groupby_pushdown_full_join -- +1 +2 +3 +6 + +-- !groupby_pushdown_left_semi_join -- +1 +2 +3 +6 + +-- !groupby_pushdown_left_anti_join -- + +-- !groupby_pushdown_complex_conditions -- + +-- !groupby_pushdown_with_aggregate -- +1 1.0 +2 2.0 +3 3.0 +6 2.0 + +-- !groupby_pushdown_subquery -- + +-- !groupby_pushdown_outer_join -- +1 +2 +3 +6 + +-- !groupby_pushdown_deep_subquery -- + +-- !groupby_pushdown_having -- + +-- !groupby_pushdown_mixed_aggregates -- +1 1 +2 2 +3 3 +6 6 + +-- !groupby_pushdown_multi_table_join -- +1 +2 +3 + +-- !groupby_pushdown_with_order_by -- +1 +2 +3 +6 + +-- !groupby_pushdown_multiple_equal_conditions -- +1 +2 +3 + +-- !groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 2 +3 3 + +-- !groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 2 +c 3 + +-- !groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 2 2 +c 3 3 + +-- !groupby_pushdown_with_where_clause -- + +-- !groupby_pushdown_varied_aggregates -- +1 +2 +3 +6 + +-- !groupby_pushdown_with_order_by_limit -- +1 +2 +3 +6 + +-- !groupby_pushdown_alias_multiple_equal_conditions -- +1 +2 +3 + +-- !groupby_pushdown_complex_join_condition -- + +-- !groupby_pushdown_function_processed_columns -- +1 +2 +3 +6 + +-- !groupby_pushdown_nested_queries -- + +-- !with_hint_groupby_pushdown_basic -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_left_join -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_right_join -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_full_join -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_left_semi_join -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_left_anti_join -- + +-- !with_hint_groupby_pushdown_complex_conditions -- + +-- !with_hint_groupby_pushdown_with_aggregate -- +1 1.0 +2 2.0 +3 3.0 +6 2.0 + +-- !with_hint_groupby_pushdown_subquery -- + +-- !with_hint_groupby_pushdown_outer_join -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_deep_subquery -- + +-- !with_hint_groupby_pushdown_having -- + +-- !with_hint_groupby_pushdown_mixed_aggregates -- +1 1 +2 2 +3 3 +6 6 + +-- !with_hint_groupby_pushdown_multi_table_join -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_with_order_by -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_multiple_equal_conditions -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_equal_conditions_with_aggregate -- +1 1 +2 2 +3 3 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate -- +a 1 +b 2 +c 3 + +-- !with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate -- +a 1 1 +b 2 2 +c 3 3 + +-- !with_hint_groupby_pushdown_with_where_clause -- + +-- !with_hint_groupby_pushdown_varied_aggregates -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_with_order_by_limit -- +1 +2 +3 +6 + +-- !with_hint_groupby_pushdown_alias_multiple_equal_conditions -- +1 +2 +3 + +-- !with_hint_groupby_pushdown_complex_join_condition -- + +-- !with_hint_groupby_pushdown_nested_queries -- + diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy new file mode 100644 index 00000000000000..9134d66b76d751 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_aggr_distinct_through_join_one_side_cust") { + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF" + sql "SET enable_fallback_to_original_planner=false" + sql "set DISABLE_NEREIDS_RULES='PRUNE_EMPTY_PARTITION, ELIMINATE_GROUP_BY_KEY_BY_UNIFORM'" + + sql """ + DROP TABLE IF EXISTS dwd_com_abtest_result_inc_ymds; + DROP TABLE IF EXISTS dwd_tracking_sensor_init_tmp_ymds; + """ + + sql """ + CREATE TABLE `dwd_com_abtest_result_inc_ymds` ( + `app_name` varchar(255) NULL, + `user_key` text NULL, + `group_name` text NULL, + `dt` date NOT NULL, + ) ENGINE=OLAP + DUPLICATE KEY(`app_name`) + AUTO PARTITION BY RANGE (date_trunc(`dt`, 'day')) + (PARTITION p20240813000000 VALUES [('2024-08-13'), ('2024-08-14')), + PARTITION p20240814000000 VALUES [('2024-08-14'), ('2024-08-15')), + PARTITION p20240815000000 VALUES [('2024-08-15'), ('2024-08-16')), + PARTITION p20240816000000 VALUES [('2024-08-16'), ('2024-08-17')), + PARTITION p20240817000000 VALUES [('2024-08-17'), ('2024-08-18')), + PARTITION p20240818000000 VALUES [('2024-08-18'), ('2024-08-19')), + PARTITION p20240819000000 VALUES [('2024-08-19'), ('2024-08-20'))) + DISTRIBUTED BY HASH(`app_name`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + + CREATE TABLE `dwd_tracking_sensor_init_tmp_ymds` ( + `ip` varchar(20) NULL, + `gz_user_id` text NULL, + `dt` date NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`ip`) + AUTO PARTITION BY RANGE (date_trunc(`dt`, 'day')) + (PARTITION p20240813000000 VALUES [('2024-08-13'), ('2024-08-14')), + PARTITION p20240814000000 VALUES [('2024-08-14'), ('2024-08-15')), + PARTITION p20240815000000 VALUES [('2024-08-15'), ('2024-08-16')), + PARTITION p20240816000000 VALUES [('2024-08-16'), ('2024-08-17')), + PARTITION p20240817000000 VALUES [('2024-08-17'), ('2024-08-18')), + PARTITION p20240818000000 VALUES [('2024-08-18'), ('2024-08-19')), + PARTITION p20240819000000 VALUES [('2024-08-19'), ('2024-08-20'))) + DISTRIBUTED BY HASH(`ip`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + """ + + explain { + sql("physical PLAN SELECT /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/" + + "COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.gz_user_id) AS a2c1a830_1," + + "dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6," + + "dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123 " + + "FROM dwd_tracking_sensor_init_tmp_ymds " + + "LEFT JOIN dwd_com_abtest_result_inc_ymds " + + "ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id = dwd_com_abtest_result_inc_ymds.user_key " + + "AND dwd_tracking_sensor_init_tmp_ymds.dt = dwd_com_abtest_result_inc_ymds.dt " + + "WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN '2024-08-15' AND '2024-08-15' " + + "AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15' AND '2024-08-15' " + + "GROUP BY 2, 3 ORDER BY 3 asc limit 10000;"); + contains"groupByExpr=[gz_user_id#1, dt#2]" + contains"groupByExpr=[gz_user_id#1, dt#2, group_name#5], outputExpr=[gz_user_id#1, dt#2, group_name#5]" + contains"[group_name#5, dt#2]" + contains"groupByExpr=[group_name#5, dt#2], outputExpr=[group_name#5, dt#2, count(partial_count(gz_user_id)#12) AS `a2c1a830_1`#7]" + } + + explain { + sql("physical PLAN SELECT /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/" + + "COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.ip) AS a2c1a830_1," + + "dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6," + + "dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123 " + + "FROM dwd_tracking_sensor_init_tmp_ymds " + + "LEFT JOIN dwd_com_abtest_result_inc_ymds " + + "ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id = dwd_com_abtest_result_inc_ymds.user_key " + + "AND dwd_tracking_sensor_init_tmp_ymds.dt = dwd_com_abtest_result_inc_ymds.dt " + + "WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN '2024-08-15' AND '2024-08-15' " + + "AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15' AND '2024-08-15' " + + "GROUP BY 2, 3 ORDER BY 3 asc limit 10000;"); + contains"groupByExpr=[ip#0, gz_user_id#1, dt#2], outputExpr=[ip#0, gz_user_id#1, dt#2]" + contains"groupByExpr=[ip#0, dt#2, group_name#5], outputExpr=[ip#0, dt#2, group_name#5]" + contains"groupByExpr=[group_name#5, dt#2], outputExpr=[group_name#5, dt#2, partial_count(ip#0) AS `partial_count(ip)`#12]" + contains"groupByExpr=[group_name#5, dt#2], outputExpr=[group_name#5, dt#2, count(partial_count(ip)#12) AS `a2c1a830_1`#7]" + } +} diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.groovy new file mode 100644 index 00000000000000..65a6b15ceccf73 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_distinct_through_join_one_side.groovy @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_count_distinct_through_join_one_side") { + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF" + sql "SET enable_fallback_to_original_planner=false" + sql """ + DROP TABLE IF EXISTS count_with_distinct_t; + """ + + sql """ + CREATE TABLE IF NOT EXISTS count_with_distinct_t( + `id` int(32), + `score` int(64) NULL, + `name` varchar(64) NULL + ) ENGINE = OLAP + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into count_with_distinct_t values (1, 1, 'a')" + sql "insert into count_with_distinct_t values (2, null, 'a')" + sql "insert into count_with_distinct_t values (3, 1, null)" + sql "insert into count_with_distinct_t values (4, 2, 'b')" + sql "insert into count_with_distinct_t values (5, null, 'b')" + sql "insert into count_with_distinct_t values (6, 2, null)" + sql "insert into count_with_distinct_t values (7, 3, 'c')" + sql "insert into count_with_distinct_t values (8, null, 'c')" + sql "insert into count_with_distinct_t values (9, 3, null)" + sql "insert into count_with_distinct_t values (10, null, null)" + sql "analyze table count_with_distinct_t with full with sync;" + + order_qt_groupby_pushdown_basic """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_right_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 right join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_full_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 full join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_semi_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 inner join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_anti_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left anti join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_complex_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), avg(t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where score > 10) t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_outer_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_deep_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t) count_with_distinct_t where score > 10) t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_having """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name having count(distinct t1.score) > 100; + """ + + order_qt_groupby_pushdown_mixed_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), sum(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_multi_table_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id join count_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_groupby_pushdown_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ sum(distinct t1.score), count(distinct t2.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, count(distinct t1.score), count(distinct t2.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_where_clause """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_groupby_pushdown_varied_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), avg(t1.id), count(distinct t2.name) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by_limit """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name order by count(distinct t1.score) limit 10; + """ + + order_qt_groupby_pushdown_alias_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1_alias.score) from count_with_distinct_t t1_alias join count_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_groupby_pushdown_complex_join_condition """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_function_processed_columns """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct LENGTH(t1.name)) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_nested_queries """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where score > 20) t1 join (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_basic """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_right_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 right join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_full_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 full join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_semi_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 inner join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_anti_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left anti join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), avg(t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where score > 10) t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_outer_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 left join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_deep_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t) count_with_distinct_t where score > 10) t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_having """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name having count(distinct t1.score) > 100; + """ + + order_qt_with_hint_groupby_pushdown_mixed_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), sum(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multi_table_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id join count_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ sum(distinct t1.score), count(distinct t2.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, count(distinct t1.score), count(distinct t2.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_where_clause """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_varied_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score), avg(t1.id), count(distinct t2.name) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by_limit """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name order by count(distinct t1.score) limit 10; + """ + + order_qt_with_hint_groupby_pushdown_alias_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1_alias.score) from count_with_distinct_t t1_alias join count_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_join_condition """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from count_with_distinct_t t1 join count_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_function_processed_columns """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct LENGTH(t1.name)) from count_with_distinct_t t1, count_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_nested_queries """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ count(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where score > 20) t1 join (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from count_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ +} diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.groovy new file mode 100644 index 00000000000000..676becbcb6ecfa --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_distinct_through_join_one_side.groovy @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_min_distinct_through_join_one_side") { + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF" + sql "SET enable_fallback_to_original_planner=false" + sql """ + DROP TABLE IF EXISTS min_with_distinct_t; + """ + + sql """ + CREATE TABLE IF NOT EXISTS min_with_distinct_t( + `id` int(32), + `score` int(64) NULL, + `name` varchar(64) NULL + ) ENGINE = OLAP + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into min_with_distinct_t values (1, 1, 'a')" + sql "insert into min_with_distinct_t values (2, null, 'a')" + sql "insert into min_with_distinct_t values (3, 1, null)" + sql "insert into min_with_distinct_t values (4, 2, 'b')" + sql "insert into min_with_distinct_t values (5, null, 'b')" + sql "insert into min_with_distinct_t values (6, 2, null)" + sql "insert into min_with_distinct_t values (7, 3, 'c')" + sql "insert into min_with_distinct_t values (8, null, 'c')" + sql "insert into min_with_distinct_t values (9, 3, null)" + sql "insert into min_with_distinct_t values (10, null, null)" + sql "analyze table min_with_distinct_t with sync;" + order_qt_groupby_pushdown_basic """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_right_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 right join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_full_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 full join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_semi_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 inner join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_anti_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left anti join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_complex_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), avg(t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where score > 10) t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_outer_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_deep_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t) min_with_distinct_t where score > 10) t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_having """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name having min(distinct t1.score) > 100; + """ + + order_qt_groupby_pushdown_mixed_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), sum(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_multi_table_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id join min_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_groupby_pushdown_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ sum(distinct t1.score), min(distinct t2.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, min(distinct t1.score), min(distinct t2.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_where_clause """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_groupby_pushdown_varied_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), avg(t1.id), min(distinct t2.name) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by_limit """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name order by min(distinct t1.score) limit 10; + """ + + order_qt_groupby_pushdown_alias_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1_alias.score) from min_with_distinct_t t1_alias join min_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_groupby_pushdown_complex_join_condition """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_function_processed_columns """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct LENGTH(t1.name)) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_nested_queries """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where score > 20) t1 join (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_basic """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_right_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 right join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_full_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 full join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_semi_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 inner join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_anti_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left anti join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), avg(t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where score > 10) t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_outer_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 left join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_deep_subquery """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t) min_with_distinct_t where score > 10) t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_having """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name having min(distinct t1.score) > 100; + """ + + order_qt_with_hint_groupby_pushdown_mixed_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), sum(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multi_table_join """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id join min_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ sum(distinct t1.score), min(distinct t2.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ t1.name, min(distinct t1.score), min(distinct t2.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_where_clause """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_varied_aggregates """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score), avg(t1.id), min(distinct t2.name) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by_limit """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name order by min(distinct t1.score) limit 10; + """ + + order_qt_with_hint_groupby_pushdown_alias_multiple_equal_conditions """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1_alias.score) from min_with_distinct_t t1_alias join min_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_join_condition """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from min_with_distinct_t t1 join min_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_function_processed_columns """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct LENGTH(t1.name)) from min_with_distinct_t t1, min_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_nested_queries """ + select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ min(distinct t1.score) from (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where score > 20) t1 join (select /*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/ * from min_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ +} diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.groovy new file mode 100644 index 00000000000000..799d45ba413fb7 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_distinct_through_join_one_side.groovy @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_sum_distinct_through_join_one_side") { + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF" + sql "SET enable_fallback_to_original_planner=false" + sql """ + DROP TABLE IF EXISTS sum_with_distinct_t; + """ + + sql """ + CREATE TABLE IF NOT EXISTS sum_with_distinct_t( + `id` int(32), + `score` int(64) NULL, + `name` varchar(64) NULL + ) ENGINE = OLAP + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into sum_with_distinct_t values (1, 1, 'a')" + sql "insert into sum_with_distinct_t values (2, null, 'a')" + sql "insert into sum_with_distinct_t values (3, 1, null)" + sql "insert into sum_with_distinct_t values (4, 2, 'b')" + sql "insert into sum_with_distinct_t values (5, null, 'b')" + sql "insert into sum_with_distinct_t values (6, 2, null)" + sql "insert into sum_with_distinct_t values (7, 3, 'c')" + sql "insert into sum_with_distinct_t values (8, null, 'c')" + sql "insert into sum_with_distinct_t values (9, 3, null)" + sql "insert into sum_with_distinct_t values (10, null, null)" + sql "analyze table sum_with_distinct_t with sync;" + order_qt_groupby_pushdown_basic """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_right_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 right join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_full_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 full join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_semi_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 inner join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_left_anti_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left anti join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_complex_conditions """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_aggregate """ + select sum(distinct t1.score), avg(t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_subquery """ + select sum(distinct t1.score) from (select * from sum_with_distinct_t where score > 10) t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_outer_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_deep_subquery """ + select sum(distinct t1.score) from (select * from (select * from sum_with_distinct_t) sum_with_distinct_t where score > 10) t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_having """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name having sum(distinct t1.score) > 100; + """ + + order_qt_groupby_pushdown_mixed_aggregates """ + select sum(distinct t1.score), sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_multi_table_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id join sum_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_groupby_pushdown_multiple_equal_conditions """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_with_aggregate """ + select sum(distinct t1.score), sum(distinct t2.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate """ + select t1.name, sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select t1.name, sum(distinct t1.score), sum(distinct t2.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_with_where_clause """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_groupby_pushdown_varied_aggregates """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_with_order_by_limit """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name order by sum(distinct t1.score) limit 10; + """ + + order_qt_groupby_pushdown_alias_multiple_equal_conditions """ + select sum(distinct t1_alias.score) from sum_with_distinct_t t1_alias join sum_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_groupby_pushdown_complex_join_condition """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_groupby_pushdown_function_processed_columns """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_groupby_pushdown_nested_queries """ + select sum(distinct t1.score) from (select * from sum_with_distinct_t where score > 20) t1 join (select * from sum_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_basic """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_right_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 right join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_full_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 full join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_semi_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 inner join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_left_anti_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left anti join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_conditions """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_aggregate """ + select sum(distinct t1.score), avg(t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_subquery """ + select sum(distinct t1.score) from (select * from sum_with_distinct_t where score > 10) t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_outer_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 left join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_deep_subquery """ + select sum(distinct t1.score) from (select * from (select * from sum_with_distinct_t) sum_with_distinct_t where score > 10) t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_having """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name having sum(distinct t1.score) > 100; + """ + + order_qt_with_hint_groupby_pushdown_mixed_aggregates """ + select sum(distinct t1.score), sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multi_table_join """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id join sum_with_distinct_t t3 on t1.name = t3.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_multiple_equal_conditions """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_with_aggregate """ + select sum(distinct t1.score), sum(distinct t2.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate """ + select t1.name, sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_equal_conditions_non_aggregate_with_aggregate """ + select t1.name, sum(distinct t1.score), sum(distinct t2.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_where_clause """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_varied_aggregates """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_with_order_by_limit """ + select sum(distinct t1.score) from sum_with_distinct_t t1, sum_with_distinct_t t2 where t1.id = t2.id group by t1.name order by sum(distinct t1.score) limit 10; + """ + + order_qt_with_hint_groupby_pushdown_alias_multiple_equal_conditions """ + select sum(distinct t1_alias.score) from sum_with_distinct_t t1_alias join sum_with_distinct_t t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + order_qt_with_hint_groupby_pushdown_complex_join_condition """ + select sum(distinct t1.score) from sum_with_distinct_t t1 join sum_with_distinct_t t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + order_qt_with_hint_groupby_pushdown_nested_queries """ + select sum(distinct t1.score) from (select * from sum_with_distinct_t where score > 20) t1 join (select * from sum_with_distinct_t where id < 100) t2 on t1.id = t2.id group by t1.name; + """ +}