From d4add5c4cb295879a2eb58f0eb89c37a5c882f18 Mon Sep 17 00:00:00 2001 From: "shuming.li" Date: Wed, 18 Sep 2024 19:55:17 +0800 Subject: [PATCH] [BugFix] Add enable_active_materialized_view_schema_strict_check config to decide whether to check schema strictlly in activing mv (#50869) Signed-off-by: shuming.li --- .../java/com/starrocks/alter/AlterJobMgr.java | 36 +++++- .../java/com/starrocks/common/Config.java | 3 + .../PartitionBasedMvRefreshProcessor.java | 5 +- .../MVRewriteWithSchemaChangeTest.java | 122 ++++++++++++++++++ test/sql/test_alter_mv/R/test_alter_mv | 97 +++++++++++++- test/sql/test_alter_mv/T/test_alter_mv | 53 +++++++- 6 files changed, 311 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobMgr.java b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobMgr.java index 3e9922bf5a0e7..9d09f1107db90 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobMgr.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobMgr.java @@ -50,6 +50,7 @@ import com.starrocks.catalog.Table.TableType; import com.starrocks.catalog.TableProperty; import com.starrocks.catalog.View; +import com.starrocks.common.Config; import com.starrocks.common.DdlException; import com.starrocks.common.ErrorCode; import com.starrocks.common.ErrorReport; @@ -254,10 +255,13 @@ public static QueryStatement recreateMVQuery(MaterializedView materializedView, throw new SemanticException(String.format("number of columns changed: %d != %d", existedColumns.size(), newColumns.size())); } + for (int i = 0; i < existedColumns.size(); i++) { Column existed = existedColumns.get(i); Column created = newColumns.get(i); - if (!existed.isSchemaCompatible(created)) { + if (!isSchemaCompatible(existed, created)) { + LOG.warn("Active materialized view {} failed, column schema changed: {} != {}", + materializedView.getName(), existed.toString(), created.toString()); String message = MaterializedViewExceptions.inactiveReasonForColumnNotCompatible( existed.toString(), created.toString()); materializedView.setInactiveAndReason(message); @@ -268,6 +272,36 @@ public static QueryStatement recreateMVQuery(MaterializedView materializedView, return createStmt.getQueryStatement(); } + /** + * Check if the schema of existed and created column is compatible, if not, return false + * @param existed mv's existed column + * @param created new mv's created column + */ + private static boolean isSchemaCompatible(Column existed, Column created) { + if (Config.enable_active_materialized_view_schema_strict_check) { + return existed.isSchemaCompatible(created); + } else { + return isSchemaCompatibleInLoose(existed, created); + } + } + + /** + * Check if the schema of existed and created column is compatible in loose mode + * @param t1 mv's existed column + * @param t2 new mv's created column + */ + private static boolean isSchemaCompatibleInLoose(Column t1, Column t2) { + // check whether the column name are the same + if (!t1.getName().equalsIgnoreCase(t2.getName())) { + return false; + } + // check whether the column primitive type are the same + if (!t1.getType().getPrimitiveType().equals(t2.getType().getPrimitiveType())) { + return false; + } + return true; + } + public void replayAlterMaterializedViewBaseTableInfos(AlterMaterializedViewBaseTableInfosLog log) { long dbId = log.getDbId(); long mvId = log.getMvId(); diff --git a/fe/fe-core/src/main/java/com/starrocks/common/Config.java b/fe/fe-core/src/main/java/com/starrocks/common/Config.java index a3fa3142baeb3..daa22e69b3e6d 100644 --- a/fe/fe-core/src/main/java/com/starrocks/common/Config.java +++ b/fe/fe-core/src/main/java/com/starrocks/common/Config.java @@ -3016,6 +3016,9 @@ public class Config extends ConfigBase { @ConfField(mutable = true) public static int default_mv_partition_refresh_number = 1; + @ConfField(mutable = true, comment = "Check the schema of materialized view's base table strictly or not") + public static boolean enable_active_materialized_view_schema_strict_check = true; + @ConfField(mutable = true, comment = "The default behavior of whether REFRESH IMMEDIATE or not, " + "which would refresh the materialized view after creating") diff --git a/fe/fe-core/src/main/java/com/starrocks/scheduler/PartitionBasedMvRefreshProcessor.java b/fe/fe-core/src/main/java/com/starrocks/scheduler/PartitionBasedMvRefreshProcessor.java index 6d8e9ac743493..75bb7dc41ddca 100644 --- a/fe/fe-core/src/main/java/com/starrocks/scheduler/PartitionBasedMvRefreshProcessor.java +++ b/fe/fe-core/src/main/java/com/starrocks/scheduler/PartitionBasedMvRefreshProcessor.java @@ -608,7 +608,10 @@ private boolean isMVPropertyContains(String key) { } private void postProcess() { - mvContext.ctx.getSessionVariable().setTransactionVisibleWaitTimeout(oldTransactionVisibleWaitTimeout); + // If mv's not active, mvContext may be null. + if (mvContext != null && mvContext.ctx != null) { + mvContext.ctx.getSessionVariable().setTransactionVisibleWaitTimeout(oldTransactionVisibleWaitTimeout); + } } private boolean isEnableMVRefreshQueryRewrite(ConnectContext ctx, diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/materialization/MVRewriteWithSchemaChangeTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/materialization/MVRewriteWithSchemaChangeTest.java index 6875211d87059..771e2ed5a3b69 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/materialization/MVRewriteWithSchemaChangeTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/materialization/MVRewriteWithSchemaChangeTest.java @@ -15,6 +15,7 @@ import com.starrocks.catalog.Database; import com.starrocks.catalog.MaterializedView; +import com.starrocks.common.Config; import com.starrocks.common.FeConstants; import com.starrocks.qe.DDLStmtExecutor; import com.starrocks.server.GlobalStateMgr; @@ -151,6 +152,127 @@ public void testMVCacheInvalidAndReValid() throws Exception { plan = getFragmentPlan(sql); PlanTestBase.assertContains(plan, "test_cache_mv1"); } + starRocksAssert.dropTable("test_base_tbl"); + starRocksAssert.dropMaterializedView("test_cache_mv1"); + } + + @Test + public void testMVWithSchemaChangeInStrictMode() throws Exception { + starRocksAssert.withTable("\n" + + "CREATE TABLE test_base_tbl(\n" + + " `dt` datetime DEFAULT NULL,\n" + + " `col1` int DEFAULT NULL,\n" + + " `col2` bigint DEFAULT NULL,\n" + + " `col3` decimal(32, 2) DEFAULT NULL,\n" + + " `error_code` varchar(100) DEFAULT NULL\n" + + ")\n" + + "DUPLICATE KEY (dt)\n" + + "PARTITION BY date_trunc('day', dt);"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW test_mv1\n" + + "DISTRIBUTED BY RANDOM \n" + + "partition by date_trunc('day', dt)\n" + + "AS select dt, col1, col2, col3, error_code from test_base_tbl;"); + refreshMaterializedView("test", "test_mv1"); + + String sql = "SELECT dt, col1, col2, col3 FROM test_base_tbl AS f\n" + + " WHERE (dt >= STR_TO_DATE('2023-08-15 00:00:00', '%Y-%m-%d %H:%i:%s'))\n" + + " AND (dt <= STR_TO_DATE('2023-08-15 00:00:00', '%Y-%m-%d %H:%i:%s'))\n"; + String plan = getFragmentPlan(sql); + PlanTestBase.assertContains(plan, "test_mv1"); + + // active is not supported in strict mode: type's primitive type is the same but length is different + String alterSql = "alter table test_base_tbl modify column error_code varchar(1024);"; + AlterTableStmt alterTableStmt = (AlterTableStmt) UtFrameUtils.parseStmtWithNewParser(alterSql, + connectContext); + DDLStmtExecutor.execute(alterTableStmt, connectContext); + waitForSchemaChangeAlterJobFinish(); + + // check mv invalid + Database testDb = GlobalStateMgr.getCurrentState().getLocalMetastore().getDb("test"); + MaterializedView mv1 = ((MaterializedView) GlobalStateMgr.getCurrentState().getLocalMetastore() + .getTable(testDb.getFullName(), "test_mv1")); + Assert.assertFalse(mv1.isActive()); + try { + cluster.runSql("test", "alter materialized view test_mv1 active;"); + Assert.fail("could not active the mv"); + } catch (Exception e) { + Assert.assertTrue(e.getMessage(), e.getMessage().contains("column schema not compatible")); + } + + plan = getFragmentPlan(sql); + PlanTestBase.assertNotContains(plan, "test_mv1"); + + starRocksAssert.dropTable("test_base_tbl"); + starRocksAssert.dropMaterializedView("test_mv1"); + } + + @Test + public void testMVWithSchemaChangeInLooseMode() throws Exception { + starRocksAssert.withTable("\n" + + "CREATE TABLE test_base_tbl(\n" + + " `dt` datetime DEFAULT NULL,\n" + + " `col1` int DEFAULT NULL,\n" + + " `col2` bigint DEFAULT NULL,\n" + + " `col3` decimal(32, 2) DEFAULT NULL,\n" + + " `error_code` varchar(100) DEFAULT NULL\n" + + ")\n" + + "DUPLICATE KEY (dt)\n" + + "PARTITION BY date_trunc('day', dt);"); + starRocksAssert.withMaterializedView("CREATE MATERIALIZED VIEW test_mv1\n" + + "DISTRIBUTED BY RANDOM \n" + + "partition by date_trunc('day', dt)\n" + + "AS select dt, col1, col2, col3, error_code from test_base_tbl;"); + refreshMaterializedView("test", "test_mv1"); + + String sql = "SELECT dt, col1, col2, col3 FROM test_base_tbl AS f\n" + + " WHERE (dt >= STR_TO_DATE('2023-08-15 00:00:00', '%Y-%m-%d %H:%i:%s'))\n" + + " AND (dt <= STR_TO_DATE('2023-08-15 00:00:00', '%Y-%m-%d %H:%i:%s'))\n"; + String plan = getFragmentPlan(sql); + PlanTestBase.assertContains(plan, "test_mv1"); + + Config.enable_active_materialized_view_schema_strict_check = false; + + { + // active is ok: type's primitive type is the same, but length is different + String alterSql = "alter table test_base_tbl modify column error_code varchar(1024);"; + AlterTableStmt alterTableStmt = (AlterTableStmt) UtFrameUtils.parseStmtWithNewParser(alterSql, + connectContext); + DDLStmtExecutor.execute(alterTableStmt, connectContext); + waitForSchemaChangeAlterJobFinish(); + + cluster.runSql("test", "alter materialized view test_mv1 active;"); + plan = getFragmentPlan(sql); + PlanTestBase.assertContains(plan, "test_mv1"); + } + + { + // invalid base table: type's primitive type is different + String alterSql = "alter table test_base_tbl modify column col1 varchar(30);"; + AlterTableStmt alterTableStmt = (AlterTableStmt) UtFrameUtils.parseStmtWithNewParser(alterSql, + connectContext); + DDLStmtExecutor.execute(alterTableStmt, connectContext); + waitForSchemaChangeAlterJobFinish(); + + // check mv invalid + Database testDb = GlobalStateMgr.getCurrentState().getLocalMetastore().getDb("test"); + MaterializedView mv1 = ((MaterializedView) GlobalStateMgr.getCurrentState().getLocalMetastore() + .getTable(testDb.getFullName(), "test_mv1")); + Assert.assertFalse(mv1.isActive()); + try { + cluster.runSql("test", "alter materialized view test_mv1 active;"); + Assert.fail("could not active the mv"); + } catch (Exception e) { + Assert.assertTrue(e.getMessage(), e.getMessage().contains("column schema not compatible")); + } + + plan = getFragmentPlan(sql); + PlanTestBase.assertNotContains(plan, "test_mv1"); + } + + Config.enable_active_materialized_view_schema_strict_check = true; + + starRocksAssert.dropTable("test_base_tbl"); + starRocksAssert.dropMaterializedView("test_mv1"); } @Test diff --git a/test/sql/test_alter_mv/R/test_alter_mv b/test/sql/test_alter_mv/R/test_alter_mv index fa7ff46dec86a..e578d784a5877 100644 --- a/test/sql/test_alter_mv/R/test_alter_mv +++ b/test/sql/test_alter_mv/R/test_alter_mv @@ -1,4 +1,4 @@ --- name: test_alter_mv_add_index +-- name: test_alter_mv CREATE TABLE t0(c0 INT, c1 INT) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 1 PROPERTIES('replication_num'='1'); -- result: -- !result @@ -17,10 +17,103 @@ None -- !result [UC] REFRESH MATERIALIZED VIEW mv1 FORCE WITH SYNC MODE; -- result: -73ce5143-1296-11ef-96c0-469314b28ba2 +a2928638-6f21-11ef-b66a-cac4e30a3e1d -- !result SELECT k, cnt FROM mv1 ORDER BY k; -- result: 1 2 2 2 +-- !result +DROP TABLE t0; +-- result: +-- !result +CREATE TABLE `t1` ( + `k1` date not null, + `k2` datetime not null, + `k3` char(20), + `k4` varchar(20), + `k5` boolean, + `k6` tinyint, + `k7` smallint, + `k8` int, + `k9` bigint, + `k10` largeint, + `k11` float, + `k12` double, + `k13` decimal(27,9) ) +DUPLICATE KEY(`k1`) +PARTITION BY date_trunc('day', `k1`) +DISTRIBUTED BY RANDOM BUCKETS 3 ; +-- result: +-- !result +INSERT INTO t1 VALUES + ('2020-10-11','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889), + ('2020-10-12','2020-10-25 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889), + ('2020-10-21','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889); +-- result: +-- !result +CREATE MATERIALIZED VIEW test_mv1 +REFRESH DEFERRED MANUAL +AS SELECT k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13 FROM t1; +-- result: +-- !result +admin set frontend config("enable_active_materialized_view_schema_strict_check"="false"); +-- result: +-- !result +ALTER TABLE t1 MODIFY COLUMN k13 decimal(32, 10); +-- result: +-- !result +function: wait_alter_table_finish() +-- result: +None +-- !result +[UC] ALTER MATERIALIZED VIEW test_mv1 ACTIVE; +-- result: +-- !result +INSERT INTO t1 VALUES + ('2020-10-23','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889); +-- result: +-- !result +[UC] REFRESH MATERIALIZED VIEW test_mv1 FORCE WITH SYNC MODE; +-- result: +afd4fe2c-6f21-11ef-b66a-cac4e30a3e1d +-- !result +SELECT * FROM test_mv1 ORDER BY k1; +-- result: +2020-10-11 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +2020-10-12 2020-10-25 12:12:12 k3 k4 0 1 2 3 4 5 1.1 1.12 2.889000000 +2020-10-21 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +2020-10-23 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +-- !result +ALTER TABLE t1 MODIFY COLUMN k3 char(32); +-- result: +-- !result +function: wait_alter_table_finish() +-- result: +None +-- !result +[UC] ALTER MATERIALIZED VIEW test_mv1 ACTIVE; +-- result: +-- !result +INSERT INTO t1 VALUES + ('2020-10-24','2020-10-25 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889); +-- result: +-- !result +[UC] REFRESH MATERIALIZED VIEW test_mv1 FORCE WITH SYNC MODE; +-- result: +bb99b09b-6f21-11ef-b66a-cac4e30a3e1d +-- !result +SELECT * FROM test_mv1 ORDER BY k1; +-- result: +2020-10-11 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +2020-10-12 2020-10-25 12:12:12 k3 k4 0 1 2 3 4 5 1.1 1.12 2.889000000 +2020-10-21 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +2020-10-23 2020-10-24 12:12:12 k3 k4 0 0 2 3 4 5 1.1 1.12 2.889000000 +2020-10-24 2020-10-25 12:12:12 k3 k4 0 1 2 3 4 5 1.1 1.12 2.889000000 +-- !result +DROP TABLE t1; +-- result: +-- !result +admin set frontend config("enable_active_materialized_view_schema_strict_check"="true"); +-- result: -- !result \ No newline at end of file diff --git a/test/sql/test_alter_mv/T/test_alter_mv b/test/sql/test_alter_mv/T/test_alter_mv index eea636fd131e6..68101da956a01 100644 --- a/test/sql/test_alter_mv/T/test_alter_mv +++ b/test/sql/test_alter_mv/T/test_alter_mv @@ -1,4 +1,4 @@ --- name: test_alter_mv_add_index +-- name: test_alter_mv CREATE TABLE t0(c0 INT, c1 INT) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 1 PROPERTIES('replication_num'='1'); INSERT INTO t0 VALUES(1,1),(2,2),(1,3),(2,4); CREATE MATERIALIZED VIEW mv1 REFRESH IMMEDIATE MANUAL AS SELECT c0 AS k, count(c1) as cnt FROM t0 GROUP BY c0; @@ -6,3 +6,54 @@ CREATE INDEX index_cnt ON mv1(cnt) USING BITMAP COMMENT 'index1'; function: wait_alter_table_finish() [UC] REFRESH MATERIALIZED VIEW mv1 FORCE WITH SYNC MODE; SELECT k, cnt FROM mv1 ORDER BY k; +DROP TABLE t0; + +-- test mv with schema change +CREATE TABLE `t1` ( + `k1` date not null, + `k2` datetime not null, + `k3` char(20), + `k4` varchar(20), + `k5` boolean, + `k6` tinyint, + `k7` smallint, + `k8` int, + `k9` bigint, + `k10` largeint, + `k11` float, + `k12` double, + `k13` decimal(27,9) ) +DUPLICATE KEY(`k1`) +PARTITION BY date_trunc('day', `k1`) +DISTRIBUTED BY RANDOM BUCKETS 3 ; +INSERT INTO t1 VALUES + ('2020-10-11','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889), + ('2020-10-12','2020-10-25 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889), + ('2020-10-21','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889); + +CREATE MATERIALIZED VIEW test_mv1 +REFRESH DEFERRED MANUAL +AS SELECT k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13 FROM t1; + +admin set frontend config("enable_active_materialized_view_schema_strict_check"="false"); + +-- alter column: k13 +ALTER TABLE t1 MODIFY COLUMN k13 decimal(32, 10); +function: wait_alter_table_finish() +[UC] ALTER MATERIALIZED VIEW test_mv1 ACTIVE; +INSERT INTO t1 VALUES + ('2020-10-23','2020-10-24 12:12:12','k3','k4',0,0,2,3,4,5,1.1,1.12,2.889); +[UC] REFRESH MATERIALIZED VIEW test_mv1 FORCE WITH SYNC MODE; +SELECT * FROM test_mv1 ORDER BY k1; + +-- alter column: k3 +ALTER TABLE t1 MODIFY COLUMN k3 char(32); +function: wait_alter_table_finish() +[UC] ALTER MATERIALIZED VIEW test_mv1 ACTIVE; +INSERT INTO t1 VALUES + ('2020-10-24','2020-10-25 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889); +[UC] REFRESH MATERIALIZED VIEW test_mv1 FORCE WITH SYNC MODE; +SELECT * FROM test_mv1 ORDER BY k1; + +DROP TABLE t1; +admin set frontend config("enable_active_materialized_view_schema_strict_check"="true"); \ No newline at end of file