From b6f7885569cea1dbf91e8e1990212418a9680143 Mon Sep 17 00:00:00 2001 From: "shuming.li" Date: Thu, 22 Aug 2024 17:44:21 +0800 Subject: [PATCH] Support push down aggregate functions in mv rewrite Signed-off-by: shuming.li --- .../starrocks/sql/analyzer/QueryAnalyzer.java | 20 +- .../AggregateFunctionRollupUtils.java | 13 + .../materialization/EquationRewriter.java | 198 +++++++++ ...aterializedViewAggPushDownRewriteTest.java | 7 +- .../planner/MaterializedViewManualTest.java | 390 +++++++++++++++++- .../com/starrocks/sql/plan/TracerMVTest.java | 4 +- ...est_materialized_view_agg_pushdown_rewrite | 16 +- .../R/test_sync_materialized_view_rewrite | 16 +- ...c_materialized_view_rewrite_with_case_when | 8 +- .../R/test_sync_materialized_view_with_where | 4 +- .../T/test_sync_materialized_view_rewrite | 10 +- ...c_materialized_view_rewrite_with_case_when | 8 +- .../T/test_sync_materialized_view_with_where | 4 +- .../test_mv_rewrite_with_push_down_aggregate | 242 +++++++++++ .../test_mv_rewrite_with_push_down_aggregate | 115 ++++++ 15 files changed, 986 insertions(+), 69 deletions(-) create mode 100644 test/sql/test_materialized_view_rewrite/R/test_mv_rewrite_with_push_down_aggregate create mode 100644 test/sql/test_materialized_view_rewrite/T/test_mv_rewrite_with_push_down_aggregate diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/QueryAnalyzer.java b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/QueryAnalyzer.java index a68b66da26c65..05061d2c194d2 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/QueryAnalyzer.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/QueryAnalyzer.java @@ -55,8 +55,6 @@ import com.starrocks.common.Pair; import com.starrocks.common.profile.Timer; import com.starrocks.common.profile.Tracers; -import com.starrocks.common.util.concurrent.lock.LockType; -import com.starrocks.common.util.concurrent.lock.Locker; import com.starrocks.privilege.SecurityPolicyRewriteRule; import com.starrocks.qe.ConnectContext; import com.starrocks.server.GlobalStateMgr; @@ -1359,7 +1357,6 @@ public Table resolveTable(TableRelation tableRelation) { } MetaUtils.checkDbNullAndReport(db, dbName); - Locker locker = new Locker(); Table table = null; if (tableRelation.isSyncMVQuery()) { @@ -1370,17 +1367,12 @@ public Table resolveTable(TableRelation tableRelation) { Table mvTable = materializedIndex.first; Preconditions.checkState(mvTable != null); Preconditions.checkState(mvTable instanceof OlapTable); - try { - // Add read lock to avoid concurrent problems. - locker.lockDatabase(db.getId(), LockType.READ); - OlapTable mvOlapTable = new OlapTable(); - ((OlapTable) mvTable).copyOnlyForQuery(mvOlapTable); - // Copy the necessary olap table meta to avoid changing original meta; - mvOlapTable.setBaseIndexId(materializedIndex.second.getIndexId()); - table = mvOlapTable; - } finally { - locker.unLockDatabase(db.getId(), LockType.READ); - } + // Add read lock to avoid concurrent problems. + OlapTable mvOlapTable = new OlapTable(); + ((OlapTable) mvTable).copyOnlyForQuery(mvOlapTable); + // Copy the necessary olap table meta to avoid changing original meta; + mvOlapTable.setBaseIndexId(materializedIndex.second.getIndexId()); + table = mvOlapTable; } } } else { diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/AggregateFunctionRollupUtils.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/AggregateFunctionRollupUtils.java index c5be262cfd6fd..55ef969c37edb 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/AggregateFunctionRollupUtils.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/AggregateFunctionRollupUtils.java @@ -63,6 +63,19 @@ public class AggregateFunctionRollupUtils { .put(FunctionSet.ARRAY_AGG_DISTINCT, FunctionSet.ARRAY_UNIQUE_AGG) .build(); + // Functions that can be pushed down to mv union rewrite. + // eg: + // sum(fn(col)) = fn(sum(col)) + // min(fn(col)) = fn(min(col)) + // max(fn(col)) = fn(max(col)) + // if fn is a scalar function, it can be pushed down to mv union rewrite. + public static final Map MV_REWRITE_PUSH_DOWN_FUNCTION_MAP = ImmutableMap.builder() + // Functions and rollup functions are the same. + .put(FunctionSet.SUM, FunctionSet.SUM) + .put(FunctionSet.MAX, FunctionSet.MAX) + .put(FunctionSet.MIN, FunctionSet.MIN) + .build(); + public static final Set NON_CUMULATIVE_ROLLUP_FUNCTION_MAP = ImmutableSet.builder() .add(FunctionSet.MAX) .add(FunctionSet.MIN) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/EquationRewriter.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/EquationRewriter.java index 9a4af98024ee8..be4b87973568a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/EquationRewriter.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/EquationRewriter.java @@ -22,20 +22,30 @@ import com.starrocks.analysis.Expr; import com.starrocks.catalog.Function; import com.starrocks.catalog.FunctionSet; +import com.starrocks.catalog.Type; import com.starrocks.common.Pair; +import com.starrocks.qe.ConnectContext; +import com.starrocks.sql.analyzer.FunctionAnalyzer; import com.starrocks.sql.optimizer.operator.scalar.BinaryPredicateOperator; import com.starrocks.sql.optimizer.operator.scalar.CallOperator; +import com.starrocks.sql.optimizer.operator.scalar.CaseWhenOperator; import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator; import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator; +import com.starrocks.sql.optimizer.operator.scalar.ScalarOperatorUtil; import com.starrocks.sql.optimizer.operator.scalar.ScalarOperatorVisitor; import com.starrocks.sql.optimizer.rewrite.BaseScalarOperatorShuttle; +import com.starrocks.sql.optimizer.rewrite.ReplaceColumnRefRewriter; +import com.starrocks.sql.optimizer.rewrite.ScalarOperatorRewriter; +import com.starrocks.sql.optimizer.rewrite.scalar.ImplicitCastRule; import com.starrocks.sql.optimizer.rule.transformation.materialization.equivalent.EquivalentShuttleContext; import com.starrocks.sql.optimizer.rule.transformation.materialization.equivalent.IRewriteEquivalent; import com.starrocks.sql.optimizer.rule.transformation.materialization.equivalent.RewriteEquivalent; +import com.starrocks.sql.parser.NodePosition; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import static com.starrocks.sql.optimizer.rule.transformation.materialization.equivalent.RewriteEquivalent.EQUIVALENTS; @@ -48,6 +58,9 @@ public class EquationRewriter { private AggregateFunctionRewriter aggregateFunctionRewriter; boolean underAggFunctionRewriteContext; + // Replace the corresponding ColumnRef with ScalarOperator if this call operator can be pushed down. + private Map> aggPushDownOperatorMap = Maps.newHashMap(); + public EquationRewriter() { this.equationMap = ArrayListMultimap.create(); this.rewriteEquivalents = Maps.newHashMap(); @@ -161,6 +174,13 @@ public ScalarOperator visitCall(CallOperator call, Void context) { return rewritten; } } + + // rewrite by pushing down aggregate + rewritten = rewriteByPushDownAggregation(call); + if (rewritten != null) { + return rewritten; + } + // If count(1)/sum(1) cannot be rewritten by mv's defined equivalents, return null directly, // otherwise it may cause a wrong plan. // mv : SELECT 1, count(distinct k1) from tbl1; @@ -169,9 +189,167 @@ public ScalarOperator visitCall(CallOperator call, Void context) { if (call.isAggregate() && call.isConstant()) { return null; } + return super.visitCall(call, context); } + private ScalarOperator rewriteByPushDownAggregation(CallOperator call) { + if (AggregateFunctionRollupUtils.MV_REWRITE_PUSH_DOWN_FUNCTION_MAP.containsKey(call.getFnName()) && + aggPushDownOperatorMap.containsKey(call.getFnName())) { + Map operatorMap = aggPushDownOperatorMap.get(call.getFnName()); + ScalarOperator arg0 = call.getChild(0); + if (call.getChildren().size() != 1) { + return null; + } + // push down aggregate now only supports one child + // it's fine since rewrite will clone argo + ScalarOperator pdCall = pushDownAggregationToArg0(call, arg0, operatorMap); + if (pdCall == null || pdCall.equals(arg0)) { + return null; + } + ScalarOperator rewritten = pdCall.accept(this, null); + // only can be used if pdCall is rewritten + if (rewritten != null && !rewritten.equals(pdCall)) { + shuttleContext.setRewrittenByEquivalent(true); + if (FunctionSet.SUM.equalsIgnoreCase(call.getFnName())) { + Function newFn = ScalarOperatorUtil.findSumFn(new Type[] {rewritten.getType()}); + CallOperator newCall = new CallOperator(call.getFnName(), call.getType(), Lists.newArrayList(rewritten), + newFn); + ScalarOperatorRewriter scalarRewriter = new ScalarOperatorRewriter(); + CallOperator result = (CallOperator) scalarRewriter.rewrite(newCall, + Lists.newArrayList(new ImplicitCastRule())); + return result; + } else { + return new CallOperator(call.getFnName(), call.getType(), Lists.newArrayList(rewritten), + call.getFunction()); + } + } + } + return null; + } + + /** + * Whether the call operator can be pushed down: + * - Only supports to push down min/max/sum aggregate function + * - If the argument is a column ref, and operator map contains the column ref, return true + * - If the argument is a call operator and contains multi-column refs(Ony IF/CaseWhen is supported), + * ensure the aggregate column does not appear in the condition clause. + */ + private ScalarOperator pushDownAggregationToArg0(ScalarOperator call, + ScalarOperator arg0, + Map operatorMap) { + if (arg0 == null || !(arg0 instanceof CallOperator)) { + return null; + } + CallOperator arg0Call = (CallOperator) arg0; + List columnRefs = arg0.getColumnRefs(); + if (columnRefs.size() == 1) { + ColumnRefOperator child0 = columnRefs.get(0); + if (!operatorMap.containsKey(child0)) { + return null; + } + CallOperator aggFunc = operatorMap.get(child0); + // strict mode, only supports col's type and agg(col)'s type are strict equal; otherwise we should + // refresh the call operator's argument/result type recursively. + if (!aggFunc.getType().equals(child0.getType())) { + return null; + } + ReplaceColumnRefRewriter rewriter = new ReplaceColumnRefRewriter(operatorMap); + return rewriter.rewrite(arg0); + } else { + // if there are many column refs in arg0, the agg column must be the same. + if (arg0Call.getFnName().equalsIgnoreCase(FunctionSet.IF)) { + if (arg0Call.getChildren().size() != 3) { + return null; + } + // if the first column ref is in the operatorMap, means the agg column maybe as condition which + // cannot be rewritten + if (isContainAggregateColumn(arg0Call.getChild(0), operatorMap)) { + return null; + } + ScalarOperator child1 = arg0Call.getChild(1); + ScalarOperator rewritten1 = rewriteIfOnlyContianAggregateColumn(child1, operatorMap); + if (rewritten1 == null) { + return null; + } + ScalarOperator child2 = arg0Call.getChild(2); + ScalarOperator rewritten2 = rewriteIfOnlyContianAggregateColumn(child2, operatorMap); + if (rewritten2 == null) { + return null; + } + ConnectContext ctx = ConnectContext.get() == null ? new ConnectContext() : ConnectContext.get(); + List args = Lists.newArrayList(arg0Call.getChild(0), rewritten1, rewritten2); + Type[] argTypes = args.stream().map(x -> x.getType()).collect(Collectors.toList()).toArray(new Type[0]); + Function newFn = FunctionAnalyzer.getAnalyzedBuiltInFunction(ctx, FunctionSet.IF, null, argTypes, + NodePosition.ZERO); + if (newFn == null) { + return null; + } + return new CallOperator(FunctionSet.IF, newFn.getReturnType(), args, newFn); + } else if (arg0Call instanceof CaseWhenOperator) { + CaseWhenOperator caseClause = (CaseWhenOperator) arg0Call; + // if case condition contains any agg column ref, return false + ScalarOperator caseExpr = caseClause.hasCase() ? caseClause.getCaseClause() : null; + if (caseExpr != null && isContainAggregateColumn(caseExpr, operatorMap)) { + return null; + } + List newCaseWhens = Lists.newArrayList(); + for (int i = 0; i < caseClause.getWhenClauseSize(); i++) { + ScalarOperator when = caseClause.getWhenClause(i); + if (isContainAggregateColumn(when, operatorMap)) { + return null; + } + newCaseWhens.add(when); + + // when clause or else clause can only contain aggregate column ref + ScalarOperator then = caseClause.getThenClause(i); + ScalarOperator newThen = rewriteIfOnlyContianAggregateColumn(then, operatorMap); + if (newThen == null) { + return null; + } + newCaseWhens.add(newThen); + } + ScalarOperator elseClause = caseClause.hasElse() ? caseClause.getElseClause() : null; + ScalarOperator newElseClause = elseClause; + if (elseClause != null) { + newElseClause = rewriteIfOnlyContianAggregateColumn(elseClause, operatorMap); + if (newElseClause == null) { + return null; + } + } + // NOTE: use call's result type as its input. + return new CaseWhenOperator(call.getType(), caseExpr, newElseClause, newCaseWhens); + } + } + return null; + } + + private boolean isContainAggregateColumn(ScalarOperator child, + Map operatorMap) { + return child.getColumnRefs().stream().anyMatch(x -> operatorMap.containsKey(x)); + } + + private ScalarOperator rewriteIfOnlyContianAggregateColumn(ScalarOperator child, + Map operatorMap) { + List colRefs = child.getColumnRefs(); + if (colRefs.size() > 1) { + return null; + } + // constant operator + if (colRefs.size() == 0) { + return child; + } + // TODO: only supports column ref now, support common expression later. + if (!(child instanceof ColumnRefOperator)) { + return null; + + } + if (!operatorMap.containsKey(colRefs.get(0))) { + return null; + } + return operatorMap.get(colRefs.get(0)); + } + Optional replace(ScalarOperator scalarOperator) { if (equationMap.containsKey(scalarOperator)) { Optional> mappedColumnAndExprRef = @@ -240,6 +418,7 @@ public void addMapping(ScalarOperator expr, ColumnRefOperator col) { equationMap.put(extendedEntry.first, Pair.create(col, extendedEntry.second)); } + // add into equivalents for (IRewriteEquivalent equivalent : EQUIVALENTS) { IRewriteEquivalent.RewriteEquivalentContext eqContext = equivalent.prepare(expr); if (eqContext != null) { @@ -248,6 +427,25 @@ public void addMapping(ScalarOperator expr, ColumnRefOperator col) { .add(eq); } } + + // add into a push-down operator map + if (expr instanceof CallOperator) { + CallOperator call = (CallOperator) expr; + String fnName = call.getFnName(); + if (AggregateFunctionRollupUtils.REWRITE_ROLLUP_FUNCTION_MAP.containsKey(fnName) && call.getChildren().size() == 1) { + ScalarOperator arg0 = call.getChild(0); + // NOTE: only support push down when the argument is a column ref. + // eg: + // mv: sum(cast(col as tinyint)) + // query: 2 * sum(col) + // query cannot be used to push down, because the argument is not a column ref. + if (arg0 != null && arg0.isColumnRef()) { + aggPushDownOperatorMap + .computeIfAbsent(fnName, x -> Maps.newHashMap()) + .put((ColumnRefOperator) arg0, call); + } + } + } } private static class EquationTransformer extends ScalarOperatorVisitor { diff --git a/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewAggPushDownRewriteTest.java b/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewAggPushDownRewriteTest.java index e6e6b23cabd2b..3d92bb783acdc 100644 --- a/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewAggPushDownRewriteTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewAggPushDownRewriteTest.java @@ -151,6 +151,7 @@ public void testAggPushDown_RollupFunctions_QueryMV_NoMatch() { // query and mv's agg function is not the same, cannot rewrite. String mvAggArg = "LO_REVENUE"; String queryAggArg = "(LO_REVENUE + 1) * 2"; + Set supportedPushDownAggregateFunctions = Sets.newHashSet("min", "max"); for (Map.Entry e : SAFE_REWRITE_ROLLUP_FUNCTION_MAP.entrySet()) { String funcName = e.getKey(); String mvAggFunc = getAggFunction(funcName, mvAggArg); @@ -162,7 +163,11 @@ public void testAggPushDown_RollupFunctions_QueryMV_NoMatch() { String query = String.format("select LO_ORDERDATE, %s as revenue_sum\n" + " from lineorder l join dates d on l.LO_ORDERDATE = d.d_datekey\n" + " group by LO_ORDERDATE", queryAggFunc); - sql(query).nonMatch("mv0"); + if (supportedPushDownAggregateFunctions.contains(funcName)) { + sql(query).match("mv0"); + } else { + sql(query).nonMatch("mv0"); + } }); } } diff --git a/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewManualTest.java b/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewManualTest.java index da1dd891c7007..fc322b071a196 100644 --- a/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewManualTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/planner/MaterializedViewManualTest.java @@ -14,6 +14,7 @@ package com.starrocks.planner; +import com.starrocks.sql.plan.PlanTestBase; import org.junit.BeforeClass; import org.junit.Test; @@ -328,6 +329,8 @@ public void testDateTruncPartitionColumnExpr2() throws Exception { starRocksAssert.dropTable("test_partition_expr_tbl1"); } + + @Test public void testMvRewriteForColumnReorder() throws Exception { { @@ -353,7 +356,7 @@ public void testRewriteWithCaseWhen() { " as select t1a, t1b, sum(t1f) as total from test.test_all_type group by t1a, t1b;", () -> { { String query = "select t1a, sum(if(t1b=0, t1f, 0)) as total from test.test_all_type group by t1a;"; - sql(query).notContain("mv0"); + sql(query).contains("mv0"); } { String query = "select t1a, sum(if(t1b=0, t1b, 0)) as total from test.test_all_type group by t1a;"; @@ -456,24 +459,24 @@ public void testRewriteWithEliminateJoinsBasic1() { @Test public void testRewriteWithEliminateJoinsBasic2() throws Exception { - starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + - " `k1` date,\n" + - " `k2` decimal64(18, 2),\n" + - " `k3` varchar(255),\n" + - " `v1` varchar(255)\n" + - ") ENGINE=OLAP \n" + - "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + - "DISTRIBUTED BY RANDOM\n" + - "PROPERTIES (\n" + - "\"replication_num\" = \"1\"\n" + - ");"); - starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + - "DISTRIBUTED BY RANDOM\n" + - "REFRESH ASYNC\n" + - "PROPERTIES (\n" + - "\"replication_num\" = \"1\"\n" + - ")\n" + - "AS SELECT k1, k2, k3, sum(v1) from tbl1 group by k1, k2, k3"); + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` varchar(255)\n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k2, k3, sum(v1) from tbl1 group by k1, k2, k3"); { String sql = "with cte as(" + " select " + @@ -565,4 +568,353 @@ public void testWrongMVRewrite() throws Exception { } starRocksAssert.dropTable("tbl1"); } + + @Test + public void testRewriteWithPushDownEquivalent1() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k2, k3, sum(v1) from tbl1 group by k1, k2, k3"); + { + String sql = "select t1.k1, " + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.v1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + sql(sql).contains("mv1") + .contains(" 1:AGGREGATE (update serialize)\n" + + " | STREAMING\n" + + " | output: sum(if((7: k1 >= '2024-06-20') AND (7: k1 <= '2024-08-20'), 10: sum(v1), 0))\n" + + " | group by: 7: k1\n" + + " | \n" + + " 0:OlapScanNode\n" + + " TABLE: mv1"); + } + { + String sql = "select t1.k1, " + + " 2 * sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.v1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + sql(sql).contains("mv1") + .contains(" 1:AGGREGATE (update serialize)\n" + + " | STREAMING\n" + + " | output: sum(if((7: k1 >= '2024-06-20') AND (7: k1 <= '2024-08-20'), 10: sum(v1), 0))\n" + + " | group by: 7: k1\n" + + " | \n" + + " 0:OlapScanNode\n" + + " TABLE: mv1") + .contains(" 4:Project\n" + + " | : 8: k1\n" + + " | : 2 * 12: sum"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithPushDownEquivalent2() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k2, k3, sum(2 * v1) from tbl1 group by k1, k2, k3"); + { + // mv's sum doesn't contain column ref which cannot be used for rewrite + String sql = "select t1.k1, " + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then 2 * t1.v1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + sql(sql).notContain("mv1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithPushDownEquivalent3() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k2, k3, sum(v1), max(v1) from tbl1 group by k1, k2, k3"); + { + String sql = "select t1.k1, 2 * min(v1 + 1) from tbl1 t1 group by t1.k1"; + sql(sql).notContain("mv1"); + } + { + String sql = "select t1.k1, 2 * sum(case when t1.v1 > 10 then t1.v1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + sql(sql).notContain("mv1"); + } + { + String sql = "select t1.k1, " + + " 2 * sum(v1 + cast(k3 as int)) " + + " from tbl1 t1 group by t1.k1"; + sql(sql).notContain("mv1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithPushDownEquivalent4() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k3, sum(k2) from tbl1 group by k1, k3"); + // sum(decimal) + { + String sql = "select t1.k1, " + + " 2 * sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.k2 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertContains(plan, "mv1"); + PlanTestBase.assertContains(plan, " | aggregate: sum[(if[((8: k1 >= '2024-06-20') AND " + + "(8: k1 <= '2024-08-20'), [10: sum(k2), DECIMAL128(38,2), true], cast(0 as DECIMAL128(38,2))); " + + "args: BOOLEAN,DECIMAL128,DECIMAL128; result: DECIMAL128(38,2); args nullable: true; " + + "result nullable: true]); args: DECIMAL128; result: DECIMAL128(38,2); " + + "args nullable: true; result nullable: true]"); + } + { + String sql = "select t1.k1, " + + " 2 * sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then 2 * t1.k2 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + String plan = getCostExplain(sql); + PlanTestBase.assertNotContains(plan, "mv1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithPushDownEquivalent5() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k3, sum(k2) from tbl1 group by k1, k3"); + // sum(decimal) + { + String sql = "select t1.k1, " + + " 2 * sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.k2 when t1.k3 ='xxxx' then k2 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertContains(plan, "mv1"); + PlanTestBase.assertContains(plan, "| aggregate: sum[(CASE WHEN (8: k1 >= '2024-06-20') AND " + + "(8: k1 <= '2024-08-20') THEN 10: sum(k2) WHEN 9: k3 = 'xxxx' THEN 10: sum(k2) " + + "ELSE CAST(0 AS DECIMAL128(38,2)) END); args: DECIMAL128; result: DECIMAL128(38,2); " + + "args nullable: true; result nullable: true]"); + } + + { + String sql = "select t1.k1, " + + " 2 * sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.k2 when t1.k3 ='xxxx' then k2 + 1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertNotContains(plan, "mv1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithCaseWhen1() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k3, " + + " sum(k2) as sum1," + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) " + + "and date_add('2024-07-20', interval 1 month) then t1.k2 + 1 else 0 end ) as sum2 \n" + + " from tbl1 t1 group by k1, k3"); + // sum(decimal) + { + String sql = "select t1.k1, " + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month) then t1.k2 + 1 else 0 end) " + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertContains(plan, "mv1"); + PlanTestBase.assertContains(plan, " 1:AGGREGATE (update serialize)\n" + + " | STREAMING\n" + + " | aggregate: sum[([10: sum2, DECIMAL128(38,2), true]); args: DECIMAL128; result: DECIMAL128(38,2); " + + "args nullable: true; result nullable: true]\n" + + " | group by: [7: k1, DATE, true]\n" + + " | cardinality: 1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithCaseWhen2() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k3, " + + " sum(k2) as sum1," + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) " + + "and date_add('2024-07-20', interval 1 month) then t1.k2 + 1 else 0 end ) as sum2 \n" + + " from tbl1 t1 group by k1, k3"); + // mv: case when case when cond1 then val1 else val2 + // query: if (cond1, val1, val2) + { + String sql = "select t1.k1, " + + " sum(if(t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month), t1.k2 + 1, 0)) " + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertContains(plan, "mv1"); + PlanTestBase.assertContains(plan, " 1:AGGREGATE (update serialize)\n" + + " | STREAMING\n" + + " | aggregate: sum[([10: sum2, DECIMAL128(38,2), true]); args: DECIMAL128; result: DECIMAL128(38,2); " + + "args nullable: true; result nullable: true]\n" + + " | group by: [7: k1, DATE, true]\n" + + " | cardinality: 1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } + + @Test + public void testRewriteWithCaseWhen3() throws Exception { + starRocksAssert.withTable("CREATE TABLE `tbl1` (\n" + + " `k1` date,\n" + + " `k2` decimal64(18, 2),\n" + + " `k3` varchar(255),\n" + + " `v1` bigint \n" + + ") ENGINE=OLAP \n" + + "DUPLICATE KEY(`k1`, `k2`, `k3`)\n" + + "DISTRIBUTED BY RANDOM\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW `mv1` \n" + + "DISTRIBUTED BY RANDOM\n" + + "REFRESH ASYNC\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ")\n" + + "AS SELECT k1, k3, " + + " sum(k2) as sum1," + + " sum(if(t1.k1 between date_add('2024-07-20', interval -1 month) and " + + " date_add('2024-07-20', interval 1 month), t1.k2 + 1, 0)) as sum2" + + " from tbl1 t1 group by k1, k3"); + // mv: if (cond1, val1, val2) + // query: case when case when cond1 then val1 else val2 + { + String sql = "select t1.k1, " + + " sum(case when t1.k1 between date_add('2024-07-20', interval -1 month) " + + "and date_add('2024-07-20', interval 1 month) then t1.k2 + 1 else 0 end ) \n" + + " from tbl1 t1 group by t1.k1"; + String plan = getVerboseExplain(sql); + PlanTestBase.assertContains(plan, "mv1"); + PlanTestBase.assertContains(plan, " 1:AGGREGATE (update serialize)\n" + + " | STREAMING\n" + + " | aggregate: sum[([10: sum2, DECIMAL128(38,2), true]); args: DECIMAL128; result: DECIMAL128(38,2); " + + "args nullable: true; result nullable: true]\n" + + " | group by: [7: k1, DATE, true]\n" + + " | cardinality: 1"); + } + starRocksAssert.dropMaterializedView("mv1"); + starRocksAssert.dropTable("tbl1"); + } } diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/TracerMVTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/TracerMVTest.java index 57babceb01260..c680acfbc587d 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/TracerMVTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/TracerMVTest.java @@ -148,7 +148,7 @@ public void testTracerLogMV_Fail1() { Tracers.init(connectContext, Tracers.Mode.LOGS, "MV"); String mv = "select locations.locationid, empid, sum(emps.deptno) as col3 from emps " + "join locations on emps.locationid = locations.locationid group by empid,locations.locationid"; - testRewriteFail(mv, "select emps.locationid, empid, sum(emps.deptno + 1) as col3 from emps " + + testRewriteFail(mv, "select emps.locationid, empid, min(emps.deptno + 1) as col3 from emps " + "join locations on emps.locationid = locations.locationid where empid > 10 group by empid,emps.locationid"); String pr = Tracers.printLogs(); Tracers.close(); @@ -163,7 +163,7 @@ public void testTracerLogMV_Fail2() { Tracers.init(connectContext, Tracers.Mode.LOGS, "MV"); String mv = "select locations.locationid, empid, sum(emps.deptno) as col3 from emps " + "join locations on emps.locationid = locations.locationid group by empid,locations.locationid"; - testRewriteFail(mv, "select emps.locationid, empid, sum(emps.deptno + 1) as col3 from emps " + + testRewriteFail(mv, "select emps.locationid, empid, min(emps.deptno + 1) as col3 from emps " + "join locations on emps.locationid = locations.locationid where empid > 10 group by empid,emps.locationid"); String pr = Tracers.printLogs(); Tracers.close(); diff --git a/test/sql/test_materialized_view/R/test_materialized_view_agg_pushdown_rewrite b/test/sql/test_materialized_view/R/test_materialized_view_agg_pushdown_rewrite index edf13ab70ac09..a2e4166939abb 100644 --- a/test/sql/test_materialized_view/R/test_materialized_view_agg_pushdown_rewrite +++ b/test/sql/test_materialized_view/R/test_materialized_view_agg_pushdown_rewrite @@ -71,14 +71,14 @@ COMMENT "OLAP" DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1; -- result: -- !result - CREATE TABLE IF NOT EXISTS `supplier` ( - `s_suppkey` int(11) NOT NULL COMMENT "", - `s_name` varchar(26) NOT NULL COMMENT "", - `s_address` varchar(26) NOT NULL COMMENT "", - `s_city` varchar(11) NOT NULL COMMENT "", - `s_nation` varchar(16) NOT NULL COMMENT "", - `s_region` varchar(13) NOT NULL COMMENT "", - `s_phone` varchar(16) NOT NULL COMMENT "" +CREATE TABLE IF NOT EXISTS `supplier` ( + `s_suppkey` int(11) NOT NULL COMMENT "", + `s_name` varchar(26) NOT NULL COMMENT "", + `s_address` varchar(26) NOT NULL COMMENT "", + `s_city` varchar(11) NOT NULL COMMENT "", + `s_nation` varchar(16) NOT NULL COMMENT "", + `s_region` varchar(13) NOT NULL COMMENT "", + `s_phone` varchar(16) NOT NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`s_suppkey`) COMMENT "OLAP" diff --git a/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite b/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite index dc5181bfac06c..945f397f0d766 100644 --- a/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite +++ b/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite @@ -297,9 +297,9 @@ CREATE MATERIALIZED VIEW mv_test_base_table1 AS SELECT col2,col3,col0,id,col1 FROM test_base_table1 ORDER BY col2,col3,col0; -- result: -- !result -analyze full table test_base_table1 with sync mode; +[UC]analyze full table test_base_table1 with sync mode; -- result: -test_db_f3ff2226c33811eea0bfdbbedfe51b53.test_base_table1 analyze status OK +test_db_b359ee2e7ea640ca89b1b2ec0cf3e504.test_base_table1 analyze status OK -- !result function: wait_materialized_view_finish() -- result: @@ -355,7 +355,7 @@ function: check_no_hit_materialized_view("select * from (select col0, col2, col3 -- result: None -- !result -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t order by 1; -- result: 123456789 2022-04-30 12:00:00 Guangdong 1 10001 987654321 2022-04-30 13:00:00 Fujian None None @@ -368,7 +368,7 @@ function: check_no_hit_materialized_view("select * from (select col0, col2, col3 -- result: None -- !result -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t order by 1; -- result: 123456789 2022-04-30 12:00:00 Guangdong 1 10001 -- !result @@ -379,16 +379,16 @@ function: check_hit_materialized_view("select * from (select col0, col2, col3, i -- result: None -- !result -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t order by 1; -- result: -987654321 2022-04-30 13:00:00 Fujian None None 123456789 2022-04-30 12:00:00 Guangdong 1 10001 +987654321 2022-04-30 13:00:00 Fujian None None -- !result function: check_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t;", "rollup: mv_test_base_table1", "rollup: test_base_table1") -- result: None -- !result -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t order by 1; -- result: 123456789 2022-04-30 12:00:00 Guangdong 1 10001 -- !result @@ -412,4 +412,4 @@ set enable_sync_materialized_view_rewrite=true; -- !result drop table test_base_table1; -- result: --- !result +-- !result \ No newline at end of file diff --git a/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite_with_case_when b/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite_with_case_when index 747521b41e5be..94c3da1ae997e 100644 --- a/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite_with_case_when +++ b/test/sql/test_materialized_view/R/test_sync_materialized_view_rewrite_with_case_when @@ -113,7 +113,7 @@ function: wait_materialized_view_finish() -- result: None -- !result -function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") -- result: None -- !result @@ -121,7 +121,7 @@ function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k -- result: None -- !result -function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") -- result: None -- !result @@ -129,11 +129,11 @@ function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k -- result: None -- !result -function: check_no_hit_materialized_view("SELECT k1, sum(k9), sum(if(k6=0, k9, 0)) as cnt0, sum(if(k6=1, k9, 0)) as cnt1, sum(if(k6=2, k9, 0)) as cnt2 from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(k9), sum(if(k6=0, k9, 0)) as cnt0, sum(if(k6=1, k9, 0)) as cnt1, sum(if(k6=2, k9, 0)) as cnt2 from t1 group by k1 order by k1;", "test_mv1") -- result: None -- !result -function: check_no_hit_materialized_view("SELECT k1, sum(if(k6 > 1, k9, 0)) as cnt0 from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(if(k6 > 1, k9, 0)) as cnt0 from t1 group by k1 order by k1;", "test_mv1") -- result: None -- !result diff --git a/test/sql/test_materialized_view/R/test_sync_materialized_view_with_where b/test/sql/test_materialized_view/R/test_sync_materialized_view_with_where index 0a6de9813783e..0a12d80584338 100644 --- a/test/sql/test_materialized_view/R/test_sync_materialized_view_with_where +++ b/test/sql/test_materialized_view/R/test_sync_materialized_view_with_where @@ -1,4 +1,4 @@ --- name: test_sync_materialized_view_rewrite +-- name: test_sync_materialized_view_with_where CREATE TABLE `duplicate_tbl` ( `k1` date NULL COMMENT "", `k2` datetime NULL COMMENT "", @@ -50,7 +50,7 @@ function: check_hit_materialized_view("select k1, sum(k6), max(k7) as max from d -- result: None -- !result -function: check_no_hit_materialized_view("select k1, sum(k6), max(k7 + 1) as max from duplicate_tbl where k7 > 2 group by 1", "mv_1") +function: check_hit_materialized_view("select k1, sum(k6), max(k7 + 1) as max from duplicate_tbl where k7 > 2 group by 1", "mv_1") -- result: None -- !result diff --git a/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite b/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite index bdda4c9527033..66e0e62115b06 100644 --- a/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite +++ b/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite @@ -146,7 +146,7 @@ CREATE MATERIALIZED VIEW mv_test_base_table1 AS SELECT col2,col3,col0,id,col1 FROM test_base_table1 ORDER BY col2,col3,col0; -- analyze table to global dict rewrite -analyze full table test_base_table1 with sync mode; +[UC]analyze full table test_base_table1 with sync mode; -- check rewrite by rollup mv function: wait_materialized_view_finish() function: wait_global_dict_ready("col3", "test_base_table1") @@ -169,17 +169,17 @@ select * from (select col0, col2, col3, id, col1 from test_base_table1 where col set select_ratio_threshold = 0.15; function: check_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t;", "rollup: mv_test_base_table1") function: check_no_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t;", "rollup: test_base_table1") -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t order by 1; function: check_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t;", "rollup: mv_test_base_table1") function: check_no_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t", "rollup: test_base_table1") -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t order by 1; -- if force convert or to union all, chooose mv and base table's short key set select_ratio_threshold=-1; function: check_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t;", "rollup: mv_test_base_table1", "rollup: test_base_table1") -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 >='2022-04-30 12:00:00') t order by 1; function: check_hit_materialized_view("select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t;", "rollup: mv_test_base_table1", "rollup: test_base_table1") -select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t; +select * from (select col0, col2, col3, id, col1 from test_base_table1 where col0=123456789 or col2 ='2022-04-30 12:00:00') t order by 1; set select_ratio_threshold = 0.15; set enable_sync_materialized_view_rewrite=false; diff --git a/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite_with_case_when b/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite_with_case_when index be6b468f6d0e4..48a7600b93ec3 100644 --- a/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite_with_case_when +++ b/test/sql/test_materialized_view/T/test_sync_materialized_view_rewrite_with_case_when @@ -57,12 +57,12 @@ CREATE MATERIALIZED VIEW test_mv1 AS SELECT k1, k6, SUM(k9) as sum1, MAX(k10 + 2 * k11) as max1, SUM(2 * k13) as sum2 FROM t1 GROUP BY k1, k6; function: wait_materialized_view_finish() -function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 > 1 then k9 + 1 else 0 end) from t1 group by k1 order by k1;", "test_mv1") -function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k9 else 0 end) from t1 group by k1 order by k1;", "test_mv1") function: check_no_hit_materialized_view("SELECT k1, sum(case when k6 = 1 then k9 + 1 else 0 end) from t1 group by k1 order by k1;", "test_mv1") -function: check_no_hit_materialized_view("SELECT k1, sum(k9), sum(if(k6=0, k9, 0)) as cnt0, sum(if(k6=1, k9, 0)) as cnt1, sum(if(k6=2, k9, 0)) as cnt2 from t1 group by k1 order by k1;", "test_mv1") -function: check_no_hit_materialized_view("SELECT k1, sum(if(k6 > 1, k9, 0)) as cnt0 from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(k9), sum(if(k6=0, k9, 0)) as cnt0, sum(if(k6=1, k9, 0)) as cnt1, sum(if(k6=2, k9, 0)) as cnt2 from t1 group by k1 order by k1;", "test_mv1") +function: check_hit_materialized_view("SELECT k1, sum(if(k6 > 1, k9, 0)) as cnt0 from t1 group by k1 order by k1;", "test_mv1") SELECT k1, sum(case when k6 > 1 then k9 else 0 end) from t1 group by k1 order by k1; SELECT k1, sum(case when k6 > 1 then k9 + 1 else 0 end) from t1 group by k1 order by k1; diff --git a/test/sql/test_materialized_view/T/test_sync_materialized_view_with_where b/test/sql/test_materialized_view/T/test_sync_materialized_view_with_where index d33ec3bbd3287..88f3e046dcc92 100644 --- a/test/sql/test_materialized_view/T/test_sync_materialized_view_with_where +++ b/test/sql/test_materialized_view/T/test_sync_materialized_view_with_where @@ -1,4 +1,4 @@ --- name: test_sync_materialized_view_with_where @sequential +-- name: test_sync_materialized_view_with_where CREATE TABLE `duplicate_tbl` ( `k1` date NULL COMMENT "", `k2` datetime NULL COMMENT "", @@ -38,7 +38,7 @@ create materialized view mv_1 as select k1, sum(k6) as sum_k6, max(k7) as max fr function: wait_materialized_view_finish() function: check_hit_materialized_view("select k1, sum(k6), max(k7) as max from duplicate_tbl where k7 > 2 group by 1", "mv_1") function: check_hit_materialized_view("select k1, sum(k6), max(k7) as max from duplicate_tbl where k7 > 2 and k1 > 1 group by 1", "mv_1") -function: check_no_hit_materialized_view("select k1, sum(k6), max(k7 + 1) as max from duplicate_tbl where k7 > 2 group by 1", "mv_1") +function: check_hit_materialized_view("select k1, sum(k6), max(k7 + 1) as max from duplicate_tbl where k7 > 2 group by 1", "mv_1") function: check_no_hit_materialized_view("select k1, sum(k6), max(k7) as max from duplicate_tbl where k7 > 3 group by 1", "mv_1") select k1, sum(k6), max(k7) as max from duplicate_tbl where k7 > 2 group by 1 order by 1; select k1, sum(k6), max(k7) as max from duplicate_tbl where k7 > 2 and k1 > '2023-06-15' group by 1 order by 1; diff --git a/test/sql/test_materialized_view_rewrite/R/test_mv_rewrite_with_push_down_aggregate b/test/sql/test_materialized_view_rewrite/R/test_mv_rewrite_with_push_down_aggregate new file mode 100644 index 0000000000000..c527f86c40b55 --- /dev/null +++ b/test/sql/test_materialized_view_rewrite/R/test_mv_rewrite_with_push_down_aggregate @@ -0,0 +1,242 @@ +-- name: test_mv_rewrite_with_push_down_aggregate +create database db_${uuid0}; +-- result: +-- !result +use db_${uuid0}; +-- result: +-- !result +CREATE TABLE t1 ( + k1 date NULL COMMENT "", + k2 varchar(255) NULL COMMENT "", + k3 varchar(255) NULL COMMENT "", + v1 decimal64(18, 2) NULL COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(k1, k2, k3) +DISTRIBUTED BY RANDOM +PROPERTIES ( + "replication_num" = "1" +); +-- result: +-- !result +INSERT INTO t1 (k1, v1, k3, k2) VALUES +('2024-07-20', 10000.00, 'LOB1', 'Region1'), +('2024-07-19', 8500.00, 'LOB1', 'Region1'), +('2024-06-20', 9500.00, 'LOB1', 'Region1'), +('2024-06-19', 9000.00, 'LOB2', 'Region2'), +('2024-07-01', 12000.00, 'LOB2', 'Region2'); +-- result: +-- !result +CREATE MATERIALIZED VIEW test_mv1 +DISTRIBUTED BY RANDOM +REFRESH ASYNC +PROPERTIES ( + "replication_num" = "1" +) +AS SELECT t1.k1, t1.k3,t1.k2, sum(t1.v1) AS period_actual_value FROM t1 GROUP BY t1.k1, t1.k3, t1.k2; +-- result: +-- !result +refresh materialized view test_mv1 with sync mode; +[UC]result=EXPLAIN WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-20' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT date_cte.date1, date_cte.date2, k3, k2, SUM(v1) +FROM t1 JOIN date_cte ON TRUE +WHERE k1 BETWEEN date_cte.date4 AND date_cte.date5 OR k1 BETWEEN date_cte.date6 AND date_cte.date7 +GROUP BY date_cte.date1, date_cte.date2, k3, k2; +-- result: +PLAN FRAGMENT 0 + OUTPUT EXPRS:10: expr | 7: expr | 3: k3 | 2: k2 | 13: sum + PARTITION: UNPARTITIONED + + RESULT SINK + + 5:EXCHANGE + +PLAN FRAGMENT 1 + OUTPUT EXPRS: + PARTITION: HASH_PARTITIONED: 15: k3, 16: k2 + + STREAM DATA SINK + EXCHANGE ID: 05 + UNPARTITIONED + + 4:Project + | : 16: k2 + | : 15: k3 + | : 'last_month_period' + | : '2024-07-20' + | : 18: sum + | + 3:AGGREGATE (merge finalize) + | output: sum(18: sum) + | group by: 15: k3, 16: k2 + | + 2:EXCHANGE + +PLAN FRAGMENT 2 + OUTPUT EXPRS: + colocate exec groups: ExecGroup{groupId=1, nodeIds=[0, 1]} + PARTITION: RANDOM + + STREAM DATA SINK + EXCHANGE ID: 02 + HASH_PARTITIONED: 15: k3, 16: k2 + + 1:AGGREGATE (update serialize) + | STREAMING + | output: sum(17: period_actual_value) + | group by: 15: k3, 16: k2 + | + 0:OlapScanNode + TABLE: test_mv1 + PREAGGREGATION: ON + PREDICATES: 14: k1 IN ('2024-06-20', '2024-07-20') + partitions=1/1 + rollup: test_mv1 + tabletRatio=2/2 + tabletList=23043,23045 + cardinality=1 + avgRowSize=0.0 + MaterializedView: true +-- !result +function: check_hit_materialized_view_plan("""${result}""", "test_mv1") +-- result: +None +-- !result +WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-20' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT date_cte.date1, date_cte.date2, k3, k2, SUM(v1) +FROM t1 JOIN date_cte ON TRUE +WHERE k1 BETWEEN date_cte.date4 AND date_cte.date5 OR k1 BETWEEN date_cte.date6 AND date_cte.date7 +GROUP BY date_cte.date1, date_cte.date2, k3, k2 order by k3, k2; +-- result: +2024-07-20 last_month_period LOB1 Region1 19500.00 +-- !result +[UC]result=EXPLAIN WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-26' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT + date_cte.date1, + date_cte.date2, + k2, + SUM(CASE + WHEN k1 BETWEEN date_cte.date6 AND date_cte.date7 + THEN v1 + ELSE 0 + END), + SUM(CASE + WHEN k1 BETWEEN date_cte.date1 AND date_cte.date5 + THEN v1 + ELSE 0 + END) +FROM t1 JOIN date_cte ON TRUE GROUP BY date_cte.date1, date_cte.date2, k2; +-- result: +PLAN FRAGMENT 0 + OUTPUT EXPRS:9: expr | 7: expr | 2: k2 | 15: sum | 16: sum + PARTITION: UNPARTITIONED + + RESULT SINK + + 5:EXCHANGE + +PLAN FRAGMENT 1 + OUTPUT EXPRS: + PARTITION: HASH_PARTITIONED: 23: k2 + + STREAM DATA SINK + EXCHANGE ID: 05 + UNPARTITIONED + + 4:Project + | : DictDecode(23: k2, []) + | : 'last_month_period' + | : '2024-07-20' + | : 21: sum + | : 22: sum + | + 3:AGGREGATE (merge finalize) + | output: sum(21: sum), sum(22: sum) + | group by: 23: k2 + | + 2:EXCHANGE + +PLAN FRAGMENT 2 + OUTPUT EXPRS: + colocate exec groups: ExecGroup{groupId=1, nodeIds=[0, 1]} + PARTITION: RANDOM + + STREAM DATA SINK + EXCHANGE ID: 02 + HASH_PARTITIONED: 23: k2 + + 1:AGGREGATE (update serialize) + | STREAMING + | output: sum(if((17: k1 >= CAST('2024-06-20 00:00:00' AS DATE)) AND (17: k1 <= CAST('2024-06-20 00:00:00' AS DATE)), 20: period_actual_value, CAST(0 AS DECIMAL128(38,2)))), sum(if((17: k1 >= CAST('2024-07-20' AS DATE)) AND (17: k1 <= CAST('2024-07-26' AS DATE)), 20: period_actual_value, CAST(0 AS DECIMAL128(38,2)))) + | group by: 23: k2 + | + 0:OlapScanNode + TABLE: test_mv1 + PREAGGREGATION: ON + partitions=1/1 + rollup: test_mv1 + tabletRatio=2/2 + tabletList=23043,23045 + cardinality=1 + avgRowSize=0.0 + MaterializedView: true +-- !result +function: check_hit_materialized_view_plan("""${result}""", "test_mv1") +-- result: +None +-- !result +WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-26' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT + date_cte.date1, + date_cte.date2, + k2, + SUM(CASE + WHEN k1 BETWEEN date_cte.date6 AND date_cte.date7 + THEN v1 + ELSE 0 + END), + SUM(CASE + WHEN k1 BETWEEN date_cte.date1 AND date_cte.date5 + THEN v1 + ELSE 0 + END) +FROM t1 JOIN date_cte ON TRUE GROUP BY date_cte.date1, date_cte.date2, k2 order by k2; +-- result: +2024-07-20 last_month_period Region1 9500.00 10000.00 +2024-07-20 last_month_period Region2 0.00 0.00 +-- !result \ No newline at end of file diff --git a/test/sql/test_materialized_view_rewrite/T/test_mv_rewrite_with_push_down_aggregate b/test/sql/test_materialized_view_rewrite/T/test_mv_rewrite_with_push_down_aggregate new file mode 100644 index 0000000000000..e10d810cd6a26 --- /dev/null +++ b/test/sql/test_materialized_view_rewrite/T/test_mv_rewrite_with_push_down_aggregate @@ -0,0 +1,115 @@ +-- name: test_mv_rewrite_with_push_down_aggregate + +create database db_${uuid0}; +use db_${uuid0}; +CREATE TABLE t1 ( + k1 date NULL COMMENT "", + k2 varchar(255) NULL COMMENT "", + k3 varchar(255) NULL COMMENT "", + v1 decimal64(18, 2) NULL COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(k1, k2, k3) +DISTRIBUTED BY RANDOM +PROPERTIES ( + "replication_num" = "1" +); +INSERT INTO t1 (k1, v1, k3, k2) VALUES +('2024-07-20', 10000.00, 'LOB1', 'Region1'), +('2024-07-19', 8500.00, 'LOB1', 'Region1'), +('2024-06-20', 9500.00, 'LOB1', 'Region1'), +('2024-06-19', 9000.00, 'LOB2', 'Region2'), +('2024-07-01', 12000.00, 'LOB2', 'Region2'); +--mv +CREATE MATERIALIZED VIEW test_mv1 +DISTRIBUTED BY RANDOM +REFRESH ASYNC +PROPERTIES ( + "replication_num" = "1" +) +AS SELECT t1.k1, t1.k3,t1.k2, sum(t1.v1) AS period_actual_value FROM t1 GROUP BY t1.k1, t1.k3, t1.k2; +refresh materialized view test_mv1 with sync mode; + +-- case1 +[UC]result=EXPLAIN WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-20' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT date_cte.date1, date_cte.date2, k3, k2, SUM(v1) +FROM t1 JOIN date_cte ON TRUE +WHERE k1 BETWEEN date_cte.date4 AND date_cte.date5 OR k1 BETWEEN date_cte.date6 AND date_cte.date7 +GROUP BY date_cte.date1, date_cte.date2, k3, k2; +function: check_hit_materialized_view_plan("""${result}""", "test_mv1") +WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-20' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT date_cte.date1, date_cte.date2, k3, k2, SUM(v1) +FROM t1 JOIN date_cte ON TRUE +WHERE k1 BETWEEN date_cte.date4 AND date_cte.date5 OR k1 BETWEEN date_cte.date6 AND date_cte.date7 +GROUP BY date_cte.date1, date_cte.date2, k3, k2 order by k3, k2; + +-- case2 +[UC]result=EXPLAIN WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-26' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT + date_cte.date1, + date_cte.date2, + k2, + SUM(CASE + WHEN k1 BETWEEN date_cte.date6 AND date_cte.date7 + THEN v1 + ELSE 0 + END), + SUM(CASE + WHEN k1 BETWEEN date_cte.date1 AND date_cte.date5 + THEN v1 + ELSE 0 + END) +FROM t1 JOIN date_cte ON TRUE GROUP BY date_cte.date1, date_cte.date2, k2; +function: check_hit_materialized_view_plan("""${result}""", "test_mv1") + +WITH date_cte AS ( + SELECT + '2024-07-20' AS date1, + 'last_month_period' AS date2, + 'LD' AS date3, + '2024-07-20' AS date4, + '2024-07-26' AS date5, + date_add('2024-07-20', interval -1 month) AS date6, + date_add('2024-07-20',interval -1 month) AS date7 +) +SELECT + date_cte.date1, + date_cte.date2, + k2, + SUM(CASE + WHEN k1 BETWEEN date_cte.date6 AND date_cte.date7 + THEN v1 + ELSE 0 + END), + SUM(CASE + WHEN k1 BETWEEN date_cte.date1 AND date_cte.date5 + THEN v1 + ELSE 0 + END) +FROM t1 JOIN date_cte ON TRUE GROUP BY date_cte.date1, date_cte.date2, k2 order by k2; \ No newline at end of file