From 0d224301feb8f7c0095e13ffa5e1fd421bf5544d Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Wed, 27 Mar 2024 19:18:48 +0800 Subject: [PATCH] choose-index: update the docs for supporting more patterns for mv index (#16788) --- choose-index.md | 499 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 354 insertions(+), 145 deletions(-) diff --git a/choose-index.md b/choose-index.md index 6ef39b09e17b0..3f8a4860eda67 100644 --- a/choose-index.md +++ b/choose-index.md @@ -195,197 +195,406 @@ mysql> EXPLAIN SELECT /*+ use_index_merge(t1, idx) */ * FROM t1 WHERE JSON_OVERL The composite multi-valued index can also be accessed through IndexMerge: ```sql -mysql> CREATE TABLE t2 (a INT, j JSON, b INT, INDEX idx(a, (CAST(j->'$.path' AS SIGNED ARRAY)), b)); -Query OK, 0 rows affected (0.04 sec) - -mysql> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND (1 MEMBER OF (j->'$.path')) AND b=2; -+---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+------------------------------------------------------------------------+ -| Selection_5 | 0.01 | root | | json_memberof(cast(1, json BINARY), json_extract(test.t2.j, "$.path")) | -| └─IndexMerge_8 | 0.00 | root | | type: union | -| ├─IndexRangeScan_6(Build) | 0.00 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 1 2,1 1 2], keep order:false, stats:pseudo | -| └─TableRowIDScan_7(Probe) | 0.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | -+---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+------------------------------------------------------------------------+ -4 rows in set, 1 warning (0.00 sec) +CREATE TABLE t2 (a INT, j JSON, b INT, k JSON, INDEX idx(a, (CAST(j->'$.path' AS SIGNED ARRAY)), b), INDEX idx2(b, (CAST(k->'$.path' AS SIGNED ARRAY)))); +EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND (1 MEMBER OF (j->'$.path')) AND b=2; +EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_CONTAINS((j->'$.path'), '[1, 2, 3]'); +EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_OVERLAPS((j->'$.path'), '[1, 2, 3]'); +EXPLAIN SELECT /*+ use_index_merge(t2, idx, idx2) */ * FROM t2 WHERE (a=1 AND 1 member of (j->'$.path')) AND (b=1 AND 2 member of (k->'$.path')); +``` -mysql> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_CONTAINS((j->'$.path'), '[1, 2, 3]'); +```sql +> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND (1 MEMBER OF (j->'$.path')) AND b=2; ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ +| IndexMerge_7 | 0.00 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 0.00 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 1 2,1 1 2], keep order:false, stats:pseudo | +| └─TableRowIDScan_6(Probe) | 0.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ + +> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_CONTAINS((j->'$.path'), '[1, 2, 3]'); +-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-------------------------------------------------+ | id | estRows | task | access object | operator info | +-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-------------------------------------------------+ -| IndexMerge_9 | 0.10 | root | | type: intersection | +| IndexMerge_9 | 0.00 | root | | type: intersection | | ├─IndexRangeScan_5(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 1,1 1], keep order:false, stats:pseudo | | ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 2,1 2], keep order:false, stats:pseudo | | ├─IndexRangeScan_7(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 3,1 3], keep order:false, stats:pseudo | -| └─TableRowIDScan_8(Probe) | 0.10 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +| └─TableRowIDScan_8(Probe) | 0.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-------------------------------------------------+ -5 rows in set (0.00 sec) -mysql> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_OVERLAPS((j->'$.path'), '[1, 2, 3]'); +> EXPLAIN SELECT /*+ use_index_merge(t2, idx) */ * FROM t2 WHERE a=1 AND JSON_OVERLAPS((j->'$.path'), '[1, 2, 3]'); +---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ | id | estRows | task | access object | operator info | +---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ -| Selection_5 | 8.00 | root | | json_overlaps(json_extract(test.t2.j, "$.path"), cast("[1, 2, 3]", json BINARY)) | -| └─IndexMerge_10 | 0.10 | root | | type: union | +| Selection_5 | 0.24 | root | | json_overlaps(json_extract(test.t2.j, "$.path"), cast("[1, 2, 3]", json BINARY)) | +| └─IndexMerge_10 | 0.30 | root | | type: union | | ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 1,1 1], keep order:false, stats:pseudo | | ├─IndexRangeScan_7(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 2,1 2], keep order:false, stats:pseudo | | ├─IndexRangeScan_8(Build) | 0.10 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 3,1 3], keep order:false, stats:pseudo | -| └─TableRowIDScan_9(Probe) | 0.10 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +| └─TableRowIDScan_9(Probe) | 0.30 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +---------------------------------+---------+-----------+-----------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ -6 rows in set, 1 warning (0.00 sec) + +> EXPLAIN SELECT /*+ use_index_merge(t2, idx, idx2) */ * FROM t2 WHERE (a=1 AND 1 member of (j->'$.path')) AND (b=1 AND 2 member of (k->'$.path')); ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ +| IndexMerge_8 | 0.00 | root | | type: intersection | +| ├─IndexRangeScan_5(Build) | 0.00 | cop[tikv] | table:t2, index:idx(a, cast(json_extract(`j`, _utf8'$.path') as signed array), b) | range:[1 1 1,1 1 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t2, index:idx2(b, cast(json_extract(`k`, _utf8'$.path') as signed array)) | range:[1 2,1 2], keep order:false, stats:pseudo | +| └─TableRowIDScan_7(Probe) | 0.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------------+-----------------------------------------------------+ ``` -For `OR`/`AND` conditions composed of multiple `member of` expressions that can access the same multi-valued index or multiple multi-valued indexes, due to the single nature of `member of`, IndexMerge `UNION` or `INTERSECTION` can be used to access the multi-valued index: +TiDB can also use IndexMerge to access both multi-valued indexes and normal indexes. For example: ```sql -mysql> CREATE TABLE t3 (a INT, j JSON, b INT, k JSON, INDEX idx(a, (CAST(j AS SIGNED ARRAY))), INDEX idx2(b,(CAST(k as SIGNED ARRAY)))); -Query OK, 0 rows affected (0.04 sec) +CREATE TABLE t3(j1 JSON, j2 JSON, a INT, INDEX k1((CAST(j1->'$.path' AS SIGNED ARRAY))), INDEX k2((CAST(j2->'$.path' AS SIGNED ARRAY))), INDEX ka(a)); +EXPLAIN SELECT /*+ use_index_merge(t3, k1, k2, ka) */ * FROM t3 WHERE 1 member of (j1->'$.path') OR a = 3; +EXPLAIN SELECT /*+ use_index_merge(t3, k1, k2, ka) */ * FROM t3 WHERE 1 member of (j1->'$.path') AND 2 member of (j2->'$.path') AND (a = 3); ``` -Use `UNION`: - ```sql -mysql> EXPLAIN SELECT /*+ use_index_merge(t3, idx) */ * FROM t3 WHERE ((a=1 AND (1 member of (j)))) OR ((a=2 AND (2 member of (j)))); -+---------------------------------+---------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+---------------------------------+---------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ -| Selection_5 | 0.08 | root | | or(and(eq(test.t3.a, 1), json_memberof(cast(1, json BINARY), test.t3.j)), and(eq(test.t3.a, 2), json_memberof(cast(2, json BINARY), test.t3.j))) | -| └─IndexMerge_9 | 0.10 | root | | type: union | -| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t3, index:idx(a, cast(`j` as signed array)) | range:[1 1,1 1], keep order:false, stats:pseudo | -| ├─IndexRangeScan_7(Build) | 0.10 | cop[tikv] | table:t3, index:idx(a, cast(`j` as signed array)) | range:[2 2,2 2], keep order:false, stats:pseudo | -| └─TableRowIDScan_8(Probe) | 0.10 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | -+---------------------------------+---------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ +> EXPLAIN SELECT /*+ use_index_merge(t3, k1, k2, ka) */ * FROM t3 WHERE 1 member of (j1->'$.path') OR a = 3; ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ +| IndexMerge_8 | 19.99 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t3, index:k1(cast(json_extract(`j1`, _utf8'$.path') as signed array)) | range:[1,1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t3, index:ka(a) | range:[3,3], keep order:false, stats:pseudo | +| └─TableRowIDScan_7(Probe) | 19.99 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + +> EXPLAIN SELECT /*+ use_index_merge(t3, k1, k2, ka) */ * FROM t3 WHERE 1 member of (j1->'$.path') AND 2 member of (j2->'$.path') AND (a = 3); ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ +| IndexMerge_9 | 0.00 | root | | type: intersection | +| ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t3, index:ka(a) | range:[3,3], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t3, index:k1(cast(json_extract(`j1`, _utf8'$.path') as signed array)) | range:[1,1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t3, index:k2(cast(json_extract(`j2`, _utf8'$.path') as signed array)) | range:[2,2], keep order:false, stats:pseudo | +| └─TableRowIDScan_8(Probe) | 0.00 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ ``` -Use `INTERSECTION`: +If several `json_member_of`, `json_contains` or `json_overlaps` conditions are connected with `OR` or `AND`, they need to meet the following requirements to access multi-valued indexes with IndexMerge: ```sql -tidb> EXPLAIN SELECT /*+ use_index_merge(t3, idx, idx2) */ * FROM t3 WHERE ((a=1 AND (1 member of (j)))) AND ((b=1 AND (2 member of (k)))); -+-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ -| IndexMerge_8 | 0.10 | root | | type: intersection | -| ├─IndexRangeScan_5(Build) | 0.10 | cop[tikv] | table:t3, index:idx(a, cast(`j` as signed array)) | range:[1 1,1 1], keep order:false, stats:pseudo | -| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t3, index:idx2(b, cast(`k` as signed array)) | range:[1 2,1 2], keep order:false, stats:pseudo | -| └─TableRowIDScan_7(Probe) | 0.10 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | -+-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ -4 rows in set (0.01 sec) +CREATE TABLE t4(a INT, j JSON, INDEX mvi1((CAST(j->'$.a' AS UNSIGNED ARRAY))), INDEX mvi2((CAST(j->'$.b' AS UNSIGNED ARRAY)))); ``` -### Partially supported scenarios +- For conditions connected with `OR`, each of them needs to be able to be accessed with IndexMerge respectively. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1, 2]') OR json_overlaps(j->'$.a', '[3, 4]'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1, 2]') OR json_length(j->'$.a') = 3; + SHOW WARNINGS; + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1, 2]') OR json_overlaps(j->'$.a', '[3, 4]'); + +----------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +----------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Selection_5 | 31.95 | root | | or(json_overlaps(json_extract(test.t4.j, "$.a"), cast("[1, 2]", json BINARY)), json_overlaps(json_extract(test.t4.j, "$.a"), cast("[3, 4]", json BINARY))) | + | └─IndexMerge_11 | 39.94 | root | | type: union | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_8(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | ├─IndexRangeScan_9(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[4,4], keep order:false, stats:pseudo | + | └─TableRowIDScan_10(Probe) | 39.94 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +----------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + -- json_length(j->'$.a') = 3 cannot be accessed with IndexMerge directly, so TiDB cannot use IndexMerge for this SQL statement. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1, 2]') OR json_length(j->'$.a') = 3; + +-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------------------------+ + | Selection_5 | 8000.00 | root | | or(json_overlaps(json_extract(test.t4.j, "$.a"), cast("[1, 2]", json BINARY)), eq(json_length(json_extract(test.t4.j, "$.a")), 3)) | + | └─TableReader_7 | 10000.00 | root | | data:TableFullScan_6 | + | └─TableFullScan_6 | 10000.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------------------------+ + + > SHOW WARNINGS; + +---------+------+----------------------------+ + | Level | Code | Message | + +---------+------+----------------------------+ + | Warning | 1105 | IndexMerge is inapplicable | + +---------+------+----------------------------+ + ``` + +- For conditions connected with `AND`, some of them need to be able to be accessed with IndexMerge respectively. TiDB can only use IndexMerge to access multi-valued indexes with these conditions. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_contains(j->'$.a', '[1, 2]') AND json_contains(j->'$.a', '[3, 4]'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_contains(j->'$.a', '[1, 2]') AND json_contains(j->'$.a', '[3, 4]') AND json_length(j->'$.a') = 2; + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_contains(j->'$.a', '[1, 2]') AND json_contains(j->'$.a', '[3, 4]'); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | IndexMerge_10 | 0.00 | root | | type: intersection | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | ├─IndexRangeScan_8(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[4,4], keep order:false, stats:pseudo | + | └─TableRowIDScan_9(Probe) | 0.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + + -- json_length(j->'$.a') = 3 cannot be accessed with IndexMerge directly, so TiDB uses IndexMerge to access the other two json_contains conditions, and json_length(j->'$.a') = 3 becomes a Selection operator. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1) */ * FROM t4 WHERE json_contains(j->'$.a', '[1, 2]') AND json_contains(j->'$.a', '[3, 4]') AND json_length(j->'$.a') = 2; + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+----------------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+----------------------------------------------------+ + | IndexMerge_11 | 0.00 | root | | type: intersection | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | ├─IndexRangeScan_8(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[4,4], keep order:false, stats:pseudo | + | └─Selection_10(Probe) | 0.00 | cop[tikv] | | eq(json_length(json_extract(test.t4.j, "$.a")), 2) | + | └─TableRowIDScan_9 | 0.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+----------------------------------------------------+ + ``` + +- All the conditions that are used for the IndexMerge must match the semantics of `OR` or `AND` that connects them. + + - If `json_contains` is connected with `AND`, it matches the semantics. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') AND json_contains(j->'$.b', '[2, 3]'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') OR json_contains(j->'$.b', '[2, 3]'); + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') AND json_contains(j->'$.b', '[2, 3]'); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | IndexMerge_9 | 0.00 | root | | type: intersection | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | └─TableRowIDScan_8(Probe) | 0.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + + -- The conditions do not match the semantics, so TiDB cannot use IndexMerge for this SQL statement as explained above. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') OR json_contains(j->'$.b', '[2, 3]'); + +-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + | TableReader_7 | 10.01 | root | | data:Selection_6 | + | └─Selection_6 | 10.01 | cop[tikv] | | or(json_contains(json_extract(test.t4.j, "$.a"), cast("[1]", json BINARY)), json_contains(json_extract(test.t4.j, "$.b"), cast("[2, 3]", json BINARY))) | + | └─TableFullScan_5 | 10000.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + ``` + + - If `json_overlaps` is connected with `OR`, it matches the semantics. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') OR json_overlaps(j->'$.b', '[2, 3]'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') AND json_overlaps(j->'$.b', '[2, 3]'); + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') OR json_overlaps(j->'$.b', '[2, 3]'); + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Selection_5 | 23.98 | root | | or(json_overlaps(json_extract(test.t4.j, "$.a"), cast("[1]", json BINARY)), json_overlaps(json_extract(test.t4.j, "$.b"), cast("[2, 3]", json BINARY))) | + | └─IndexMerge_10 | 29.97 | root | | type: union | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_8(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | └─TableRowIDScan_9(Probe) | 29.97 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ + + -- The conditions do not match the semantics, so TiDB can only use IndexMerge for part of the conditions of this SQL statement as explained above. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') AND json_overlaps(j->'$.b', '[2, 3]'); + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ + | Selection_5 | 15.99 | root | | json_overlaps(json_extract(test.t4.j, "$.a"), cast("[1]", json BINARY)), json_overlaps(json_extract(test.t4.j, "$.b"), cast("[2, 3]", json BINARY)) | + | └─IndexMerge_8 | 10.00 | root | | type: union | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | └─TableRowIDScan_7(Probe) | 10.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ + ``` + + - If `json_member_of` is connected with `OR` or `AND`, it matches the semantics. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') AND 2 member of (j->'$.b') AND 3 member of (j->'$.a'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') OR 2 member of (j->'$.b') OR 3 member of (j->'$.a'); + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') AND 2 member of (j->'$.b') AND 3 member of (j->'$.a'); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | IndexMerge_9 | 0.00 | root | | type: intersection | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | └─TableRowIDScan_8(Probe) | 0.00 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') OR 2 member of (j->'$.b') OR 3 member of (j->'$.a'); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | IndexMerge_9 | 29.97 | root | | type: union | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | └─TableRowIDScan_8(Probe) | 29.97 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + ``` + + - If `json_contains` conditions that contain multiple values are connected with `OR`, or `json_overlaps` conditions that contain multiple values are connected with `AND`, they do not match the semantics, but they match the semantics if they only contain one value. For example: + + ```sql + -- Refer to the preceding examples for conditions that do not match the semantics. The following only provides examples of conditions that match the semantics. + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') AND json_overlaps(j->'$.b', '[2]'); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') OR json_contains(j->'$.b', '[2]'); + ``` + + ```sql + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_overlaps(j->'$.a', '[1]') AND json_overlaps(j->'$.b', '[2]'); + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ + | Selection_5 | 8.00 | root | | json_overlaps(json_extract(test.t4.j, "$.a"), cast("[1]", json BINARY)), json_overlaps(json_extract(test.t4.j, "$.b"), cast("[2]", json BINARY)) | + | └─IndexMerge_9 | 0.01 | root | | type: intersection | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | └─TableRowIDScan_8(Probe) | 0.01 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +---------------------------------+---------+-----------+-----------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ + + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE json_contains(j->'$.a', '[1]') OR json_contains(j->'$.b', '[2]'); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + | IndexMerge_8 | 19.99 | root | | type: union | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | └─TableRowIDScan_7(Probe) | 19.99 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+---------------------------------------------+ + ``` + + - When both `OR` and `AND` are used to connect conditions (essentially nested `OR` and `AND`), the conditions that constitute an IndexMerge must either all match the semantics of `OR` or all match the semantics of `AND`, not partially match the semantics of `OR` and partially match the semantics of `AND`. For example: + + ```sql + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') AND (2 member of (j->'$.b') OR 3 member of (j->'$.a')); + EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') OR (2 member of (j->'$.b') AND 3 member of (j->'$.a')); + ``` + + ```sql + -- Only 2 member of (j->'$.b') and 3 member of (j->'$.a') that match the semantics of OR constitute the IndexMerge. 1 member of (j->'$.a') that matches the semantics of AND is not included. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') AND (2 member of (j->'$.b') OR 3 member of (j->'$.a')); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | IndexMerge_9 | 0.00 | root | | type: union | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi2(cast(json_extract(`j`, _utf8'$.b') as unsigned array)) | range:[2,2], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | └─Selection_8(Probe) | 0.00 | cop[tikv] | | json_memberof(cast(1, json BINARY), json_extract(test.t4.j, "$.a")), or(json_memberof(cast(2, json BINARY), json_extract(test.t4.j, "$.b")), json_memberof(cast(3, json BINARY), json_extract(test.t4.j, "$.a"))) | + | └─TableRowIDScan_7 | 19.99 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + -- Only 1 member of (j->'$.a') and 2 member of (j->'$.a') that match the semantics of OR constitute the IndexMerge. 2 member of (j->'$.b') that matches the semantics of AND is not included. + > EXPLAIN SELECT /*+ use_index_merge(t4, mvi1, mvi2) */ * FROM t4 WHERE 1 member of (j->'$.a') OR (2 member of (j->'$.b') AND 3 member of (j->'$.a')); + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | id | estRows | task | access object | operator info | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | IndexMerge_9 | 0.02 | root | | type: union | + | ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[1,1], keep order:false, stats:pseudo | + | ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t4, index:mvi1(cast(json_extract(`j`, _utf8'$.a') as unsigned array)) | range:[3,3], keep order:false, stats:pseudo | + | └─Selection_8(Probe) | 0.02 | cop[tikv] | | or(json_memberof(cast(1, json BINARY), json_extract(test.t4.j, "$.a")), and(json_memberof(cast(2, json BINARY), json_extract(test.t4.j, "$.b")), json_memberof(cast(3, json BINARY), json_extract(test.t4.j, "$.a")))) | + | └─TableRowIDScan_7 | 19.99 | cop[tikv] | table:t4 | keep order:false, stats:pseudo | + +-------------------------------+---------+-----------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + ``` + +If conditions contain nested `OR`/`AND`, or if conditions correspond to only indexed columns after transformations, such as expansion, TiDB might not be able to use IndexMerge or make full use of all conditions. It is recommended to verify the behavior of each specific case. + +The following are some examples: -For `AND`/`ON` conditions composed of multiple expressions where each `item` condition corresponds to multiple different indexes, the specific index to be used varies: +```sql +CREATE TABLE t5 (a INT, j JSON, b INT, k JSON, INDEX idx(a, (CAST(j AS SIGNED ARRAY))), INDEX idx2(b, (CAST(k as SIGNED ARRAY)))); +CREATE TABLE t6 (a INT, j JSON, b INT, k JSON, INDEX idx(a, (CAST(j AS SIGNED ARRAY)), b), INDEX idx2(a, (CAST(k as SIGNED ARRAY)), b)); +``` -* If the path of a single item is also index merge, as long as its logic is consistent with the logic of the external `AND`/`OR`, it can be merged with the external index merge. -* If the path of a single item is also index merge, and it has only one index partial path, then it can be merged with the external index merge regardless of whether its own index merge logic is `AND`/`OR`. +If `AND` is nested in conditions connected by `OR` and subconditions connected by `AND` correspond to the exact columns of a multi-column index, TiDB can usually make full use of the conditions. For example: ```sql -mysql> CREATE TABLE t(j1 JSON, j2 JSON, a INT, INDEX k1((CAST(j1->'$.path' AS SIGNED ARRAY))), INDEX k2((CAST(j2->'$.path' AS SIGNED ARRAY))), INDEX ka(a)); -Query OK, 0 rows affected (0.02 sec) +EXPLAIN SELECT /*+ use_index_merge(t5, idx, idx2) */ * FROM t5 WHERE (a=1 AND 1 member of (j)) OR (b=2 AND 2 member of (k)); ``` ```sql -tidb> EXPLAIN SELECT /*+ use_index_merge(t, k1, k2, ka) */ * FROM t WHERE (1 member of (j1->'$.path')) AND (2 member of (j2->'$.path')) AND (a = 3); -+-------------------------------+---------+-----------+----------------------------------------------------------------------------+---------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------------+---------+-----------+----------------------------------------------------------------------------+---------------------------------------------+ -| IndexMerge_9 | 10.00 | root | | type: intersection | -| ├─IndexRangeScan_5(Build) | 10.00 | cop[tikv] | table:t, index:ka(a) | range:[3,3], keep order:false, stats:pseudo | -| ├─IndexRangeScan_6(Build) | 10.00 | cop[tikv] | table:t, index:k1(cast(json_extract(`j1`, _utf8'$.path') as signed array)) | range:[1,1], keep order:false, stats:pseudo | -| ├─IndexRangeScan_7(Build) | 10.00 | cop[tikv] | table:t, index:k2(cast(json_extract(`j2`, _utf8'$.path') as signed array)) | range:[2,2], keep order:false, stats:pseudo | -| └─TableRowIDScan_8(Probe) | 10.00 | cop[tikv] | table:t | keep order:false, stats:pseudo | -+-------------------------------+---------+-----------+----------------------------------------------------------------------------+---------------------------------------------+ -5 rows in set (0.00 sec) +> EXPLAIN SELECT /*+ use_index_merge(t5, idx, idx2) */ * FROM t5 WHERE (a=1 AND 1 member of (j)) OR (b=2 AND 2 member of (k)); ++-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ +| IndexMerge_8 | 0.20 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 0.10 | cop[tikv] | table:t5, index:idx(a, cast(`j` as signed array)) | range:[1 1,1 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t5, index:idx2(b, cast(`k` as signed array)) | range:[2 2,2 2], keep order:false, stats:pseudo | +| └─TableRowIDScan_7(Probe) | 0.20 | cop[tikv] | table:t5 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+----------------------------------------------------+-------------------------------------------------+ ``` -Currently, TiDB only supports using one index to access instead of generating the following plan that uses multiple indexes to access at the same time: +If a single `OR` is nested in conditions connected by `AND` and subconditions connected by `OR` correspond to the indexed columns after expansion, TiDB can usually make full use of the conditions. For example: +```sql +EXPLAIN SELECT /*+ use_index_merge(t6, idx, idx2) */ * FROM t6 WHERE a=1 AND (1 member of (j) OR 2 member of (k)); ``` -Selection -└─IndexMerge - ├─IndexRangeScan(k1) - ├─IndexRangeScan(k2) - ├─IndexRangeScan(ka) - └─Selection - └─TableRowIDScan -``` - -### Unsupported scenarios - -For `OR` conditions composed of multiple expressions that correspond to multiple different indexes, multi-valued indexes cannot be used: ```sql -mysql> create table t(j1 json, j2 json, a int, INDEX k1((CAST(j1->'$.path' AS SIGNED ARRAY))), INDEX k2((CAST(j2->'$.path' AS SIGNED ARRAY))), INDEX ka(a)); -Query OK, 0 rows affected (0.03 sec) - -mysql> explain select /*+ use_index_merge(t, k1, k2, ka) */ * from t where (1 member of (j1->'$.path')) or (2 member of (j2->'$.path')); -+-------------------------+----------+-----------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------+----------+-----------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Selection_5 | 8000.00 | root | | or(json_memberof(cast(1, json BINARY), json_extract(test.t.j1, "$.path")), json_memberof(cast(2, json BINARY), json_extract(test.t.j2, "$.path"))) | -| └─TableReader_7 | 10000.00 | root | | data:TableFullScan_6 | -| └─TableFullScan_6 | 10000.00 | cop[tikv] | table:t | keep order:false, stats:pseudo | -+-------------------------+----------+-----------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -3 rows in set, 3 warnings (0.00 sec) - -mysql> explain select /*+ use_index_merge(t, k1, k2, ka) */ * from t where (1 member of (j1->'$.path')) or (a = 3); -+-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------+ -| Selection_5 | 8000.00 | root | | or(json_memberof(cast(1, json BINARY), json_extract(test.t.j1, "$.path")), eq(test.t.a, 3)) | -| └─TableReader_7 | 10000.00 | root | | data:TableFullScan_6 | -| └─TableFullScan_6 | 10000.00 | cop[tikv] | table:t | keep order:false, stats:pseudo | -+-------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------+ -3 rows in set, 3 warnings (0.00 sec) ++-------------------------------+---------+-----------+-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------+ +| IndexMerge_9 | 0.20 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 0.10 | cop[tikv] | table:t6, index:idx(a, cast(`j` as signed array), b) | range:[1 1,1 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t6, index:idx2(a, cast(`k` as signed array), b) | range:[1 2,1 2], keep order:false, stats:pseudo | +| └─Selection_8(Probe) | 0.20 | cop[tikv] | | eq(test2.t6.a, 1), or(json_memberof(cast(1, json BINARY), test2.t6.j), json_memberof(cast(2, json BINARY), test2.t6.k)) | +| └─TableRowIDScan_7 | 0.20 | cop[tikv] | table:t6 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------+ ``` -A workaround for the preceding scenario is to rewrite the query using `Union All`: - -The following are some more complex scenarios that are not yet supported. +If multiple `OR` are nested in conditions connected by `AND` and subconditions connected by `OR` need expansion to correspond to the index columns, TiDB might not be able to make full use of all conditions. For example: ```sql -mysql> CREATE TABLE t4 (j JSON, INDEX idx((CAST(j AS SIGNED ARRAY)))); -Query OK, 0 rows affected (0.04 sec) - --- If a query contains the OR condition composed of multiple json_contains expressions, the index cannot be accessed using IndexMerge. -mysql> EXPLAIN SELECT /*+ use_index_merge(t3, idx) */ * FROM t3 WHERE (json_contains(j, '[1, 2]')) OR (json_contains(j, '[3, 4]')); -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -| TableReader_7 | 9600.00 | root | | data:Selection_6 | -| └─Selection_6 | 9600.00 | cop[tikv] | | or(json_contains(test.t3.j, cast("[1, 2]", json BINARY)), json_contains(test.t3.j, cast("[3, 4]", json BINARY))) | -| └─TableFullScan_5 | 10000.00 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -3 rows in set, 1 warning (0.00 sec) - -mysql> SHOW WARNINGS; -+---------+------+----------------------------+ -| Level | Code | Message | -+---------+------+----------------------------+ -| Warning | 1105 | IndexMerge is inapplicable | -+---------+------+----------------------------+ -1 row in set (0.00 sec) +EXPLAIN SELECT /*+ use_index_merge(t6, idx, idx2) */ * FROM t6 WHERE a=1 AND (1 member of (j) OR 2 member of (k)) and (b = 1 OR b = 2); +EXPLAIN SELECT /*+ use_index_merge(t6, idx, idx2) */ * FROM t6 WHERE a=1 AND ((1 member of (j) AND b = 1) OR (1 member of (j) AND b = 2) OR (2 member of (k) AND b = 1) OR (2 member of (k) AND b = 2)); +``` -mysql> EXPLAIN SELECT /*+ use_index_merge(t3, idx) */ * FROM t3 WHERE (json_contains(j, '[1, 2]')) OR (json_contains(j, '[3, 4]')); -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -| TableReader_7 | 9600.00 | root | | data:Selection_6 | -| └─Selection_6 | 9600.00 | cop[tikv] | | or(json_contains(test.t3.j, cast("[1, 2]", json BINARY)), json_contains(test.t3.j, cast("[3, 4]", json BINARY))) | -| └─TableFullScan_5 | 10000.00 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | -+-------------------------+----------+-----------+---------------+------------------------------------------------------------------------------------------------------------------+ -3 rows in set, 1 warning (0.01 sec) - --- If a query contains the more complex expression formed by multi-layer OR/AND nesting, the index cannot be accessed using IndexMerge. -mysql> EXPLAIN SELECT /*+ use_index_merge(t3, idx) */ * FROM t3 WHERE ((1 member of (j)) AND (2 member of (j))) OR ((3 member of (j)) AND (4 member of (j))); -+-------------------------+----------+-----------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| id | estRows | task | access object | operator info | -+-------------------------+----------+-----------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Selection_5 | 8000.00 | root | | or(and(json_memberof(cast(1, json BINARY), test.t3.j), json_memberof(cast(2, json BINARY), test.t3.j)), and(json_memberof(cast(3, json BINARY), test.t3.j), json_memberof(cast(4, json BINARY), test.t3.j))) | -| └─TableReader_7 | 10000.00 | root | | data:TableFullScan_6 | -| └─TableFullScan_6 | 10000.00 | cop[tikv] | table:t3 | keep order:false, stats:pseudo | -+-------------------------+----------+-----------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -3 rows in set, 2 warnings (0.00 sec) +```sql +-- Due to current implementation limitations, (b = 1 or b = 2) does not constitute the IndexMerge, but becomes a Selection operator +> EXPLAIN SELECT /*+ use_index_merge(t6, idx, idx2) */ * FROM t6 WHERE a=1 AND (1 member of (j) OR 2 member of (k)) AND (b = 1 OR b = 2); ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| IndexMerge_9 | 0.20 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 0.10 | cop[tikv] | table:t6, index:idx(a, cast(`j` as signed array), b) | range:[1 1,1 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 0.10 | cop[tikv] | table:t6, index:idx2(a, cast(`k` as signed array), b) | range:[1 2,1 2], keep order:false, stats:pseudo | +| └─Selection_8(Probe) | 0.20 | cop[tikv] | | eq(test.t6.a, 1), or(eq(test.t6.b, 1), eq(test.t6.b, 2)), or(json_memberof(cast(1, json BINARY), test.t6.j), json_memberof(cast(2, json BINARY), test.t6.k)) | +| └─TableRowIDScan_7 | 0.20 | cop[tikv] | table:t6 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +-- If you manually expand the two OR conditions connected with AND, TiDB can make full use of these conditions +> EXPLAIN SELECT /*+ use_index_merge(t6, idx, idx2) */ * FROM t6 WHERE a=1 AND ((1 member of (j) AND b = 1) OR (1 member of (j) AND b = 2) OR (2 member of (k) AND b = 1) OR (2 member of (k) AND b = 2)); ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| IndexMerge_11 | 0.00 | root | | type: union | +| ├─IndexRangeScan_5(Build) | 0.00 | cop[tikv] | table:t6, index:idx(a, cast(`j` as signed array), b) | range:[1 1 1,1 1 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_6(Build) | 0.00 | cop[tikv] | table:t6, index:idx(a, cast(`j` as signed array), b) | range:[1 1 2,1 1 2], keep order:false, stats:pseudo | +| ├─IndexRangeScan_7(Build) | 0.00 | cop[tikv] | table:t6, index:idx2(a, cast(`k` as signed array), b) | range:[1 2 1,1 2 1], keep order:false, stats:pseudo | +| ├─IndexRangeScan_8(Build) | 0.00 | cop[tikv] | table:t6, index:idx2(a, cast(`k` as signed array), b) | range:[1 2 2,1 2 2], keep order:false, stats:pseudo | +| └─Selection_10(Probe) | 0.00 | cop[tikv] | | eq(test.t6.a, 1), or(or(and(json_memberof(cast(1, json BINARY), test.t6.j), eq(test.t6.b, 1)), and(json_memberof(cast(1, json BINARY), test.t6.j), eq(test.t6.b, 2))), or(and(json_memberof(cast(2, json BINARY), test.t6.k), eq(test.t6.b, 1)), and(json_memberof(cast(2, json BINARY), test.t6.k), eq(test.t6.b, 2)))) | +| └─TableRowIDScan_9 | 0.00 | cop[tikv] | table:t6 | keep order:false, stats:pseudo | ++-------------------------------+---------+-----------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ``` Limited by the current implementation of multi-valued indexes, using [`use_index`](/optimizer-hints.md#use_indext1_name-idx1_name--idx2_name-) might return the `Can't find a proper physical plan for this query` error while using [`use_index_merge`](/optimizer-hints.md#use_index_merget1_name-idx1_name--idx2_name-) will not return such an error. Therefore, it is recommended to use `use_index_merge` if you want to use multi-valued indexes. @@ -475,4 +684,4 @@ mysql> SHOW WARNINGS; -- cannot hit plan cache since the JSON_CONTAINS predicat | Warning | 1105 | skip prepared plan-cache: json_contains function with immutable parameters can affect index selection | +---------+------+-------------------------------------------------------------------------------------------------------+ 1 row in set (0.01 sec) -``` \ No newline at end of file +```