From cb1c15623cbf284a448caf774322d2566ef4f2af Mon Sep 17 00:00:00 2001 From: 924060929 <924060929@qq.com> Date: Thu, 6 Jun 2024 11:27:34 +0800 Subject: [PATCH] [enhancement](Nereids) support 4 phases distinct aggregate with full distribution (#35871) The origin implementation of 4 phases distinct aggregate only support the pattern which not contains `group by`, and only one distinct aggregate function for example: ```sql select count(distinct sex), sum(age) from student ``` This pr complement the 4 phases distinct aggregate with full distribution, to avoid data skew in the `group by`. for example ```sql select sex, sum(distinct age) from student group by sex; ``` The sex only contains two distinct values, `male` and `female`, and the table store millions rows. Shuffle by the `sex` cause the data skew and lots of instances process empty rows. The 4 phase aggregate shuffle `sex, age` to distinct rows first, so more instances can do parallel distinct, the plan shape will like this: ``` PhysicalAggregate(groupBy=[sex], output=[sex, sum(partial_sum(age))], mode=BUFFER_TO_RESULT) | PhysicalDistribute(columns=[sex]) | PhysicalAggregate(groupBy=[sex], output=[sex, partial_sum(age)], mode=INPUT_TO_BUFFER) | PhysicalAggregate(groupBy=[sex, age], output=[sex, age], mode=BUFFER_TO_BUFFER) | PhysicalDistribute(columns=[sex, age]) # more columns to shuffle avoid data skew | PhysicalAggregate(groupBy=[sex, age], output=[sex, age], mode=INPUT_TO_BUFFER) | PhysicalOlapScan(name=student) ``` (cherry picked from commit 03f1cbde7aea4aca9a1a2ac5b3ef43e250d2ca92) --- .../ChildrenPropertiesRegulator.java | 10 -- .../apache/doris/nereids/rules/RuleType.java | 1 + .../implementation/AggregateStrategies.java | 119 ++++++++++++++---- .../trees/plans/algebra/Aggregate.java | 22 ++++ .../data/nereids_p0/aggregate/aggregate.out | 4 + .../nereids_p0/aggregate/aggregate.groovy | 22 ++++ .../nereids_syntax_p0/agg_4_phase.groovy | 4 +- 7 files changed, 144 insertions(+), 38 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index 038e2646a6dd996..3beed014aac9109 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -113,16 +113,6 @@ public Boolean visitPhysicalHashAggregate(PhysicalHashAggregate extends Plan> // this means one stage gather agg, usually bad pattern return false; } - // forbid three or four stage distinct agg inter by distribute - if (agg.getAggMode() == AggMode.BUFFER_TO_BUFFER && children.get(0).getPlan() instanceof PhysicalDistribute) { - // if distinct without group by key, we prefer three or four stage distinct agg - // because the second phase of multi-distinct only have one instance, and it is slow generally. - if (agg.getGroupByExpressions().size() == 1 - && agg.getOutputExpressions().size() == 1) { - return true; - } - return false; - } // forbid TWO_PHASE_AGGREGATE_WITH_DISTINCT after shuffle // TODO: this is forbid good plan after cte reuse by mistake diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 45439e4cd51bf55..24ba21c06a35cfa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -443,6 +443,7 @@ public enum RuleType { TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT(RuleTypeClass.IMPLEMENTATION), THREE_PHASE_AGGREGATE_WITH_DISTINCT(RuleTypeClass.IMPLEMENTATION), FOUR_PHASE_AGGREGATE_WITH_DISTINCT(RuleTypeClass.IMPLEMENTATION), + FOUR_PHASE_AGGREGATE_WITH_DISTINCT_WITH_FULL_DISTRIBUTE(RuleTypeClass.IMPLEMENTATION), LOGICAL_UNION_TO_PHYSICAL_UNION(RuleTypeClass.IMPLEMENTATION), LOGICAL_EXCEPT_TO_PHYSICAL_EXCEPT(RuleTypeClass.IMPLEMENTATION), LOGICAL_INTERSECT_TO_PHYSICAL_INTERSECT(RuleTypeClass.IMPLEMENTATION), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index e1095df7bab0b97..9cb7b4d84a47f7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -75,6 +75,7 @@ import org.apache.doris.nereids.util.TypeCoercionUtils; import org.apache.doris.qe.ConnectContext; +import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -293,15 +294,89 @@ && couldConvertToMulti(agg)) // .thenApplyMulti(ctx -> twoPhaseAggregateWithDistinct(ctx.root, ctx.connectContext)) // ), RuleType.THREE_PHASE_AGGREGATE_WITH_DISTINCT.build( - basePattern - .when(agg -> agg.getDistinctArguments().size() == 1) - .thenApplyMulti(ctx -> threePhaseAggregateWithDistinct(ctx.root, ctx.connectContext)) + basePattern + .when(agg -> agg.getDistinctArguments().size() == 1) + .thenApplyMulti(ctx -> threePhaseAggregateWithDistinct(ctx.root, ctx.connectContext)) ), + /* + * sql: + * select count(distinct name), sum(age) from student; + *
+ * 4 phase plan
+ * DISTINCT_GLOBAL(BUFFER_TO_RESULT, groupBy(),
+ * output[count(partial_count(name)), sum(partial_sum(partial_sum(age)))],
+ * GATHER)
+ * +--DISTINCT_LOCAL(INPUT_TO_BUFFER, groupBy(),
+ * output(partial_count(name), partial_sum(partial_sum(age))),
+ * hash distribute by name)
+ * +--GLOBAL(BUFFER_TO_BUFFER, groupBy(name),
+ * output(name, partial_sum(age)),
+ * hash_distribute by name)
+ * +--LOCAL(INPUT_TO_BUFFER, groupBy(name), output(name, partial_sum(age)))
+ * +--scan(name, age)
+ */
RuleType.FOUR_PHASE_AGGREGATE_WITH_DISTINCT.build(
- basePattern
- .when(agg -> agg.getDistinctArguments().size() == 1)
- .when(agg -> agg.getGroupByExpressions().isEmpty())
- .thenApplyMulti(ctx -> fourPhaseAggregateWithDistinct(ctx.root, ctx.connectContext))
+ basePattern
+ .when(agg -> agg.getDistinctArguments().size() == 1)
+ .when(agg -> agg.getGroupByExpressions().isEmpty())
+ .thenApplyMulti(ctx -> {
+ Function
+ * 4 phase plan
+ * DISTINCT_GLOBAL(BUFFER_TO_RESULT, groupBy(age),
+ * output[age, sum(partial_count(name))],
+ * hash distribute by name)
+ * +--DISTINCT_LOCAL(INPUT_TO_BUFFER, groupBy(age),
+ * output(age, partial_count(name)),
+ * hash distribute by age, name)
+ * +--GLOBAL(BUFFER_TO_BUFFER, groupBy(age, name),
+ * output(age, name),
+ * hash_distribute by age, name)
+ * +--LOCAL(INPUT_TO_BUFFER, groupBy(age, name), output(age, name))
+ * +--scan(age, name)
+ */
+ RuleType.FOUR_PHASE_AGGREGATE_WITH_DISTINCT_WITH_FULL_DISTRIBUTE.build(
+ basePattern
+ .when(agg -> agg.everyDistinctArgumentNumIsOne() && !agg.getGroupByExpressions().isEmpty())
+ .when(agg ->
+ ImmutableSet.builder()
+ .addAll(agg.getGroupByExpressions())
+ .addAll(agg.getDistinctArguments())
+ .build().size() > agg.getGroupByExpressions().size()
+ )
+ .thenApplyMulti(ctx -> {
+ Function
- * 4 phase plan
- * DISTINCT_GLOBAL, BUFFER_TO_RESULT groupBy(), output[count(name), sum(age#5)], [GATHER]
- * +--DISTINCT_LOCAL, INPUT_TO_BUFFER, groupBy()), output(count(name), partial_sum(age)), hash distribute by name
- * +--GLOBAL, BUFFER_TO_BUFFER, groupBy(name), output(name, partial_sum(age)), hash_distribute by name
- * +--LOCAL, INPUT_TO_BUFFER, groupBy(name), output(name, partial_sum(age))
- * +--scan(name, age)
- */
private List, RequireProperties> secondPhaseRequireDistinctHash =
+ groupByAndDistinct -> RequireProperties.of(
+ PhysicalProperties.createHash(
+ ctx.root.getDistinctArguments(), ShuffleType.REQUIRE
+ )
+ );
+ Function
, RequireProperties> secondPhaseRequireGroupByAndDistinctHash =
+ groupByAndDistinct -> RequireProperties.of(
+ PhysicalProperties.createHash(groupByAndDistinct, ShuffleType.REQUIRE)
+ );
+
+ Function
, RequireProperties> secondPhaseRequireSupplier,
+ Function