From 9e46918a9e29acb586ce1c0a7b8516516e748406 Mon Sep 17 00:00:00 2001 From: prinz <281208364@qq.com> Date: Mon, 25 Sep 2023 19:46:28 +0800 Subject: [PATCH] doc of partition pruning --- .../1.1-through-partition-pruning.md | 447 ++++++++++++++++++ 1 file changed, 447 insertions(+) create mode 100644 docs/MatrixOne/Performance-Tuning/optimization-concepts/1.1-through-partition-pruning.md diff --git a/docs/MatrixOne/Performance-Tuning/optimization-concepts/1.1-through-partition-pruning.md b/docs/MatrixOne/Performance-Tuning/optimization-concepts/1.1-through-partition-pruning.md new file mode 100644 index 000000000..81983a0ec --- /dev/null +++ b/docs/MatrixOne/Performance-Tuning/optimization-concepts/1.1-through-partition-pruning.md @@ -0,0 +1,447 @@ +## Partition Pruning + +### Overview + +Partition pruning is a method used for query optimization, applicable only when dealing with partitioned tables. By analyzing the filtering conditions in a query statement, partition pruning selects the partitions that may satisfy the conditions, thus avoiding scanning partitions that do not match the criteria and significantly reducing the amount of data that needs to be processed. + +For example: + +```sql +CREATE TABLE t1 ( + col1 INT NOT NULL, + col2 DATE NOT NULL, + col3 INT NOT NULL, + PRIMARY KEY(col1, col3) +) PARTITION BY KEY(col1, col3) PARTITIONS 4; + +INSERT INTO t1 VALUES +(1, '1980-12-17', 7369), +(2, '1981-02-20', 7499), +(3, '1981-02-22', 7521), +(4, '1981-04-02', 7566), +(5, '1981-09-28', 7654), +(6, '1981-05-01', 7698), +(7, '1981-06-09', 7782), +(8, '0087-07-13', 7788), +(9, '1981-11-17', 7839), +(10, '1981-09-08', 7844), +(11, '2007-07-13', 7876), +(12, '1981-12-03', 7900), +(13, '1987-07-13', 7980), +(14, '2001-11-17', 7981), +(15, '1951-11-08', 7982), +(16, '1927-10-13', 7983), +(17, '1671-12-09', 7984), +(18, '1981-11-06', 7985), +(19, '1771-12-06', 7986), +(20, '1985-10-06', 7987), +(21, '1771-10-06', 7988), +(22, '1981-10-05', 7989), +(23, '2001-12-04', 7990), +(24, '1999-08-01', 7991), +(25, '1951-11-08', 7992), +(26, '1927-10-13', 7993), +(27, '1971-12-09', 7994), +(28, '1981-12-09', 7995), +(29, '2001-11-17', 7996), +(30, '1981-12-09', 7997), +(31, '2001-11-17', 7998), +(32, '2001-11-17', 7999); + +mysql> EXPLAIN VERBOSE SELECT * FROM t1 WHERE (col1 = 1 AND col3 = 7369) OR (col1 = 6 AND col3 = 7698); ++-----------------------------------------------------------------------------------------------------+ +| QUERY PLAN | ++-----------------------------------------------------------------------------------------------------+ +| Project (cost=1000.00 outcnt=1000.00 selectivity=1.0000) | +| Output: t1.col1, t1.col2, t1.col3 | +| -> Table Scan on db2.t1 (cost=1000.00 outcnt=1000.00 selectivity=1.0000 blockNum=1) | +| Output: t1.col1, t1.col2, t1.col3 | +| Table: 't1' (0:'col1', 1:'col2', 2:'col3') | +| Hit Partition: p0, p2 | +| Filter Cond: (((t1.col1 = 1) and (t1.col3 = 7369)) or ((t1.col1 = 6) and (t1.col3 = 7698))) | ++-----------------------------------------------------------------------------------------------------+ +7 rows in set (0.00 sec) +``` + +This query prunes the partitions and scans only the p0 and p2 partitions. + +## Usage scenarios of Partition Pruning in KEY Partitioned Tables + +### Applicable Scenarios in KEY Partitioned Tables + +Only equality comparison query conditions that match the partition key can support partition pruning for KEY partitioned tables. + +```sql +CREATE TABLE t1 ( + col1 INT NOT NULL, + col2 DATE NOT NULL, + col3 INT PRIMARY KEY +) PARTITION BY KEY(col3) PARTITIONS 4; + +mysql> EXPLAIN SELECT * FROM t1 WHERE col3 = 7990 OR col3 = 7988; ++-------------------------------------------------------------------+ +| QUERY PLAN | ++-------------------------------------------------------------------+ +| Project | +| -> Table Scan on db1.t1 | +| Hit Partition: p0, p1 | +| Filter Cond: ((t1.col3 = 7990) or (t1.col3 = 7988)) | +| Block Filter Cond: ((t1.col3 = 7990) or (t1.col3 = 7988)) | ++-------------------------------------------------------------------+ +5 rows in set (0.00 sec) +``` + +In this SQL statement, the condition `col3 = 7990` can determine that all results are located in partition p0. The condition `col3 = 7988` can determine that all results are situated in partition p1. Since these two conditions are combined using OR, only partitions p0 and p1 need to be scanned, resulting in partition pruning with the outcome of p0 and p1. + +### Inapplicable Scenarios in KEY Partitioned Tables + +#### Scenario 1 + +Conditions that cannot determine if the query results are within a single partition, such as `between`, `> < >= <=`, and similar conditions, cannot benefit from partition pruning optimization. + +```sql +mysql> EXPLAIN SELECT * FROM t1 WHERE col3 >= 7782; ++----------------------------------------------+ +| QUERY PLAN | ++----------------------------------------------+ +| Project | +| -> Table Scan on db1.t1 | +| Hit Partition: all partitions | +| Filter Cond: (t1.col3 >= 7782) | +| Block Filter Cond: (t1.col3 >= 7782) | ++----------------------------------------------+ +5 rows in set (0.00 sec) +``` + +#### Scenario 2 + +In scenarios where filtering conditions can only be obtained during the execution plan generation phase, partition pruning optimization cannot be utilized. + +```sql +mysql> EXPLAIN SELECT * FROM t1 WHERE col3 = (SELECT col3 FROM t2 WHERE t1.col3 = t2.col3 AND t2.col1 < 5); ++------------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------------+ +| Project | +| -> Filter | +| Filter Cond: (t1.col3 = t2.col3 + +) | +| -> Join | +| Join Type: SINGLE | +| Join Cond: (t1.col3 = t2.col3) | +| -> Table Scan on db1.t1 | +| Hit Partition: all partitions | +| -> Table Scan on db1.t2 | +| Hit Partition: all partitions | +| Filter Cond: (t2.col1 < 5) | +| Block Filter Cond: (t2.col1 < 5) | ++------------------------------------------------------+ +12 rows in set (0.00 sec) +``` + +This query retrieves results from a subquery and constructs an equality filter condition `col3 = ?` for each row read. However, partition pruning only takes effect during the query plan generation phase, not during execution, making it ineffective in this scenario. + +## Usage scenarios of Partition Pruning in HASH Partitioned Tables + +### Applicable Scenarios in HASH Partitioned Tables + +HASH partitioned tables are similar to KEY partitioned tables, where only equality comparison query conditions can benefit from HASH partition pruning. + +```sql +CREATE TABLE employees ( + id INT NOT NULL, + fname VARCHAR(30), + lname VARCHAR(30), + hired DATE NOT NULL DEFAULT '1970-01-01', + separated DATE NOT NULL DEFAULT '9999-12-31', + job_code INT, + store_id INT +) +PARTITION BY HASH(store_id) PARTITIONS 4; + +INSERT INTO employees VALUES +(10001, 'Georgi', 'Facello', '1953-09-02','1986-06-26',120, 1), +(10002, 'Bezalel', 'Simmel', '1964-06-02','1985-11-21',150, 7), +(10003, 'Parto', 'Bamford', '1959-12-03','1986-08-28',140, 3), +(10004, 'Chirstian', 'Koblick', '1954-05-01','1986-12-01',150, 3), +(10005, 'Kyoichi', 'Maliniak', '1955-01-21','1989-09-12',150, 18), +(10006, 'Anneke', 'Preusig', '1953-04-20','1989-06-02',150, 15), +(10007, 'Tzvetan', 'Zielinski', '1957-05-23','1989-02-10',110, 6), +(10008, 'Saniya', 'Kalloufi', '1958-02-19','1994-09-15',170, 10), +(10009, 'Sumant', 'Peac', '1952-04-19','1985-02-18',110, 13), +(10010, 'Duangkaew', 'Piveteau', '1963-06-01','1989-08-24',160, 10), +(10011, 'Mary', 'Sluis', '1953-11-07','1990-01-22',120, 8), +(10012, 'Patricio', 'Bridgland', '1960-10-04','1992-12-18',120, 7), +(10013, 'Eberhardt', 'Terkki', '1963-06-07','1985-10-20',160, 17), +(10014, 'Berni', 'Genin', '1956-02-12','1987-03-11',120, 15), +(10015, 'Guoxiang', 'Nooteboom', '1959-08-19','1987-07-02',140, 8), +(10016, 'Kazuhito', 'Cappelletti', '1961-05-02','1995-01-27',140, 2), +(10017, 'Cristinel', 'Bouloucos', '1958-07-06','1993-08-03',170, 10), +(10018, 'Kazuhide', 'Peha', '1954-06-19','1987-04-03',170, 2), +(10019, 'Lillian', 'Haddadi', '1953-01-23','1999-04-30',170, 13), +(10020, 'Mayuko', 'Warwick', '1952-12-24','1991-01-26',120, 1), +(10021, 'Ramzi', 'Erde', '1960-02-20','1988-02-10',120, 9), +(10022, 'Shahaf', 'Famili', '1952-07-08','1995-08-22',130, 10), +(10023, 'Bojan', 'Montemayor', '1953-09-29','1989-12-17',120, 5), +(10024, 'Suzette', 'Pettey', '1958-09-05','1997-05-19',130, 4), +(10025, 'Prasadram', 'Heyers', '1958-10-31','1987-08-17',180, 8), +(10026, 'Yongqiao', 'Berztiss', '1953-04-03','1995-03-20',170, 4), +(10027, 'Divier', 'Reistad', '1962-07-10','1989-07-07',180, 10), +(10028, 'Domenick', 'Tempesti', '1963-11-26','1991-10-22',110, 11), +(10029, 'Otmar', 'Herbst', '1956-12-13','1985-11-20',110, 12), +(10030, 'Elvis', 'Demeyer', '1958-07-14','1994-02-17',110, 1), +(10031, 'Karsten', 'Joslin', '1959-01-27','1991-09-01',110, 10), +(10032, 'Jeong', 'Reistad', '1960-08-09','1990-06-20',120, 19), +(10033, 'Arif', 'Merlo', '1956-11-14','1987-03-18',120, 14), +(10034, 'Bader', 'Swan', '1962-12-29','1988-09-21',130, 16), +(10035, 'Alain', 'Chappelet', '1953-02-08','1988-09-05',130, 3), +(10036, 'Adamantios', 'Portugali', '1959-08-10','1992-01-03',130, 14), +(10037, 'Pradeep', 'Makrucki', '1963-07-22','1990-12-05',140, 12), +(10038, 'Huan', 'Lortz', '1960-07-20','1989-09-20',140, 7), +(10039, 'Alejandro', 'Brender', '1959-10-01','1988-01-19',110, 20), +(10040, 'Weiyi', 'Meriste', '1959-09-13','1993-02-14',140, 17); + +mysql> EXPLAIN SELECT * FROM employees WHERE store_id = 10; ++------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------+ +| Project | +| -> Table Scan on db1.employees | +| Hit Partition: p0 | +| Filter Cond: (employees.store_id = 10) | ++------------------------------------------------+ +4 rows in set (0.00 sec) +``` + +In this SQL statement, since the partition key for the partitioned table is `store_id`, the condition `store_id = 10` can determine that all results are within a single partition. The value 10 is hashed to partition p0. Therefore, it is only necessary to scan partition p0, without accessing partitions p1, p2, and p3, which do not contain relevant results. As seen in the execution plan, there is only one `TableScan` operator, specifying `Hit Partition` as p0, ensuring the effectiveness of partition pruning. + +### Inapplicable Scenarios in HASH Partitioned Tables + +#### Scenario 1 + +Conditions that cannot determine if the query results are within a single partition, such as `between`, `> < >= <=`, and similar conditions, cannot benefit from partition pruning optimization. + +```sql +CREATE TABLE employees ( + id INT NOT NULL, + fname VARCHAR(30), + lname VARCHAR(30), + hired DATE NOT NULL DEFAULT '1970-01-01', + separated DATE NOT NULL DEFAULT '9999-12-31', + job_code INT, + store_id INT +) +PARTITION BY HASH(store_id) PARTITIONS 4; + +mysql> EXPLAIN SELECT * FROM employees WHERE store_id > 15; ++------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------+ +| Project | +| -> Table Scan on db1.employees | +| Hit Partition: all partitions | +| Filter Cond: (employees.store_id > 15) | ++------------------------------------------------+ +4 rows in set (0.00 sec) + +``` + +In this SQL statement, the condition `store_id > 15` cannot determine the corresponding Hash partition, and therefore, partition pruning optimization cannot be used. + +#### Scenario 2 + +In scenarios where filtering conditions can only be obtained during the execution plan generation phase, partition pruning optimization cannot be utilized. + +```sql +mysql> EXPLAIN SELECT * FROM t1 WHERE col1 = (SELECT store_id FROM employees WHERE employees.store_id = t1.col1 AND employees.id = 10010); ++---------------------------------------------------------+ +| QUERY PLAN | ++---------------------------------------------------------+ +| Project | +| -> Filter | +| Filter Cond: (t1.col1 = employees.store_id) | +| -> Join | +| Join Type: SINGLE | +| Join Cond: (t1.col1 = employees.store_id) | +| -> Table Scan on db1.t1 | +| Hit Partition: all partitions | +| -> Table Scan on db1.employees | +| Hit Partition: all partitions | +| Filter Cond: (employees.id = 10010) | ++---------------------------------------------------------+ +11 rows in set (0.01 sec) + +``` + +This query retrieves results from a subquery and constructs an equality filter condition `col3 = ?` for each row read. However, partition pruning only takes effect during the query plan generation phase, not during execution, making it ineffective in this scenario. + +#### Scenario 3 + +Using function expressions as partitioning expressions for HASH partitioned tables is not supported for partition pruning. + +```sql + +CREATE TABLE t3 ( +col1 INT, +col2 CHAR(10), +col3 DATETIME +) PARTITION BY HASH (YEAR(col3)) PARTITIONS 4; + +INSERT INTO t3 VALUES +(10001, 'Georgi', '1999-04-05 11:01:02'), +(10002, 'Bezalel', '2004-04-03 13:11:10'), +(10003, 'Parto', '1997-04-05 11:01:02'), +(10004, 'Chirstian', '2004-04-03 13:11:10'), +(10005, 'Mary', '1998-04-05 11:01:02'), +(10006, 'Patricio', '2004-04-03 13:11:10'), +(10007, 'Eberhardt', '1953-09-02 13:11:10'), +(10008, 'Kazuhide', '1986-06-26 19:21:10'), +(10009, 'Tempesti', '1956-11-14 08:11:10'), +(10010, 'Nooteboom', '1987-03-18 23:11:10'); + +mysql> EXPLAIN SELECT * FROM t3 WHERE YEAR(col3) = 1999; ++---------------------------------------------+ +| QUERY PLAN | ++---------------------------------------------+ +| Project | +| -> Table Scan on db1.t3 | +| Hit Partition: all partitions | +| Filter Cond: (YEAR(t3.col3) = 1999) | ++---------------------------------------------+ +4 rows in set (0.00 sec) + +mysql> SELECT * FROM t3 WHERE YEAR(col3) = 1999; ++-------+--------+---------------------+ +| col1 | col2 | col3 | ++-------+--------+---------------------+ +| 10001 | Georgi | 1999-04-05 11:01:02 | ++-------+--------+---------------------+ +1 row in set (0.01 sec) +``` + +## Performance Tuning Examples for Partition Pruning + +### Example 1 + +Equality Conditions in KEY Partitioned Table: + +```sql +CREATE TABLE t1 ( + col1 INT NOT NULL, + col2 DATE NOT NULL, + col3 INT NOT NULL, + PRIMARY KEY(col1, col3) +) PARTITION BY KEY(col1, col3) PARTITIONS 4; + +mysql> EXPLAIN SELECT * FROM t1 WHERE col1 = 1 AND col3 = 7369; ++------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------+ +| Project | +| -> Table Scan on db2.t1 | +| Hit Partition: p0 | +| Filter Cond: ((t1.col1 = 1) and (t1.col3 = 7369)) | ++------------------------------------------------+ +5 rows in set (0.00 sec) +``` + +In this query, only partition p0 is accessed after partition pruning because the query conditions match the data in partition p0. + +### Example 2 + +`OR` Conditions in KEY Partitioned Table: + +```sql +mysql> EXPLAIN SELECT * FROM t1 WHERE (col1 = 1 AND col3 = 7369) OR (col1 = 6 AND col3 = 7698); ++-----------------------------------------------------------------------------------------------------+ +| QUERY PLAN | ++-----------------------------------------------------------------------------------------------------+ +| Project (cost=1000.00 outcnt=1000.00 selectivity=1.0000) | +| Output: t1.col1, t1.col2, t1.col3 | +| -> Table Scan on db2.t1 (cost=1000.00 outcnt=1000.00 selectivity=1.0000 blockNum=1) | +| Output: t1.col1, t1.col2, t1.col3 | +| Table: 't1' (0:'col1', 1:'col2', 2:'col3') | +| Hit Partition: p0, p2 | +| Filter Cond: (((t1.col1 = 1) and (t1.col3 = 7369)) or ((t1.col1 = 6) and (t1.col3 = 7698))) | ++-----------------------------------------------------------------------------------------------------+ +7 rows in set (0.00 sec) +``` + +This query prunes partitions p0 and p2 because the query conditions match the data in these two partitions. + +### Example 3 + +Equality Condition in HASH Partitioned Table: + +```sql +CREATE TABLE employees ( + id INT NOT NULL, + fname VARCHAR(30), + lname VARCHAR(30), + hired DATE NOT NULL DEFAULT '1970-01-01', + separated DATE NOT NULL DEFAULT '9999-12-31', + job_code INT, + store_id INT +) PARTITION BY HASH(store_id) PARTITIONS 4; + +mysql> EXPLAIN SELECT * FROM employees WHERE store_id = 10; ++------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------+ +| Project | +| -> Table Scan on db1.employees | +| Hit Partition: p0 | +| Filter Cond: (employees.store_id = 10) | ++------------------------------------------------+ +4 rows in set (0.00 sec) +``` + +In this query, only partition p0 contains data matching the condition `store_id = 10`, so only partition p0 is scanned. + +### Example 4 + +Multiple Conditions in HASH Partitioned Table: + +```sql +mysql> EXPLAIN SELECT * FROM employees WHERE store_id = 10 OR store_id = 15; ++---------------------------------------------------+ +| QUERY PLAN | ++---------------------------------------------------+ +| Project | +| -> Table Scan on db1.employees | +| Hit Partition: p0, p3 | +| Filter Cond: ((employees.store_id = 10) or (employees.store_id = 15)) | ++---------------------------------------------------+ +5 rows in set (0.00 sec) +``` + +This query prunes partitions p0 and p3 because these two partitions contain data matching the condition `store_id = 10 OR store_id = 15`. + +### Example 5 + +Scenarios Inapplicable for Partition Pruning: + +```sql +mysql> EXPLAIN SELECT * FROM t1 WHERE col1 > 5; ++------------------------------------------------+ +| QUERY PLAN | ++------------------------------------------------+ +| Project | +| -> Table Scan on db1.t1 | +| Hit Partition: all partitions | +| Filter Cond: (t1.col1 > 5) | +| Block Filter Cond: (t1.col1 > 5) | ++------------------------------------------------+ +5 rows in set (0.00 sec) +``` + +In this query, the condition `col1 > 5` cannot determine the corresponding partition, and therefore, partition pruning cannot be used. + +## Constraints + +MatrixOne supports four partitioning methods: `Key`, `Hash`, `Range`, and `List`: + +- However, partition pruning is currently supported only for `Key` and `Hash` partitioned tables, with pruning for other types planned for future implementation. + +- `List` partitioning and `Range` partitioning currently do not accelerate query performance.