Skip to content

Commit

Permalink
[BugFix] fix multiple partition column statistics (#50488)
Browse files Browse the repository at this point in the history
Signed-off-by: Murphy <[email protected]>
(cherry picked from commit 7bdc5bf)
  • Loading branch information
murphyatwork authored and mergify[bot] committed Sep 3, 2024
1 parent fb9009b commit 619230b
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -710,18 +710,15 @@ private void adjustPartitionColsStatistic(Collection<Long> selectedPartitionId,
}

Map<String, ColumnRefOperator> colNameMap = Maps.newHashMap();
colRefToColumnMetaMap.entrySet().stream()
.forEach(e -> colNameMap.put(e.getValue().getName(), e.getKey()));
List<ColumnRefOperator> partitionCols = Lists.newArrayList();
for (String partitionColName : olapTable.getPartitionColumnNames()) {
if (!colNameMap.containsKey(partitionColName)) {
return;
}
partitionCols.add(colNameMap.get(partitionColName));
}
colRefToColumnMetaMap.entrySet().stream().forEach(e -> colNameMap.put(e.getValue().getName(), e.getKey()));
// It might contain null value, if some partition columns are not referenced in the scan
List<ColumnRefOperator> partitionCols =
olapTable.getPartitionColumnNames().stream()
.map(colNameMap::get)
.collect(Collectors.toList());
PartitionInfo partitionInfo = olapTable.getPartitionInfo();
if (partitionInfo instanceof RangePartitionInfo) {
if (partitionCols.size() != 1) {
if (partitionCols.size() != 1 || partitionCols.stream().anyMatch(Objects::isNull)) {
return;
}
if (optimizerContext.getDumpInfo() != null) {
Expand Down Expand Up @@ -769,15 +766,20 @@ private void adjustPartitionColsStatistic(Collection<Long> selectedPartitionId,
} else if (partitionInfo instanceof ListPartitionInfo) {
ListPartitionInfo listPartitionInfo = (ListPartitionInfo) partitionInfo;
for (int i = 0; i < partitionCols.size(); i++) {
ColumnRefOperator columnRef = partitionCols.get(i);
// For multi-column list partition, pruning on any column should adjust the statistics
if (columnRef == null) {
continue;
}
if (optimizerContext.getDumpInfo() != null) {
optimizerContext.getDumpInfo().addTableStatistics(olapTable,
partitionCols.get(i).getName(),
builder.getColumnStatistics(partitionCols.get(i)));
}
long ndv = extractDistinctPartitionValues(listPartitionInfo, selectedPartitionId, i);
ColumnStatistic columnStatistic = ColumnStatistic.buildFrom(builder.getColumnStatistics(partitionCols.get(i)))
ColumnStatistic columnStatistic = ColumnStatistic.buildFrom(builder.getColumnStatistics(columnRef))
.setDistinctValuesCount(ndv).build();
builder.addColumnStatistic(partitionCols.get(i), columnStatistic);
builder.addColumnStatistic(columnRef, columnStatistic);
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion test/lib/sr_sql_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1873,8 +1873,10 @@ def assert_explain_verbose_contains(self, query, *expects):
"""
sql = "explain verbose %s" % (query)
res = self.execute_sql(sql, True)
tools.assert_true(res["status"], res['msg'])
for expect in expects:
tools.assert_true(str(res["result"]).find(expect) > 0, "assert expect %s is not found in plan" % (expect))
plan_string = "\n".join(item[0] for item in res["result"])
tools.assert_true(plan_string.find(expect) > 0, "assert expect %s is not found in plan: %s" % (expect, plan_string))

def assert_trace_values_contains(self, query, *expects):
"""
Expand Down
81 changes: 81 additions & 0 deletions test/sql/test_list_partition/R/test_list_partition_cardinality
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
-- name: test_list_partition_cardinality
DROP DATABASE IF EXISTS test_list_partition_cardinality;
-- result:
-- !result
CREATE DATABASE test_list_partition_cardinality;
-- result:
-- !result
USE test_list_partition_cardinality;
-- result:
-- !result
CREATE TABLE partitions_multi_column_1 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 int
)
PARTITION BY (c1, c2);
-- result:
-- !result
INSERT INTO partitions_multi_column_1 VALUES
(1,1,1),
(1,2,4),
(1,2,4),
(1,2,4),
(2,3,2),
(2,4,5),
(3,5,3),
(3,6,6);
-- result:
-- !result
INSERT INTO partitions_multi_column_1
SELECT 4, 7, generate_series FROM TABLE(generate_series(1, 1000));
-- result:
-- !result
ANALYZE FULL TABLE partitions_multi_column_1 WITH SYNC MODE;
-- result:
test_list_partition_cardinality.partitions_multi_column_1 analyze status OK
-- !result
SELECT count(*) FROM partitions_multi_column_1;
-- result:
1008
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=0', 'EMPTYSET')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=1', 'cardinality: 2')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=2', 'cardinality: 1')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=3', 'cardinality: 1')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=4', 'cardinality: 500')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=0', 'EMPTYSET')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=1', 'cardinality: 1')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=2', 'cardinality: 2')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=3', 'cardinality: 1')
-- result:
None
-- !result
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=7', 'cardinality: 500')
-- result:
None
-- !result
41 changes: 41 additions & 0 deletions test/sql/test_list_partition/T/test_list_partition_cardinality
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- name: test_list_partition_cardinality

DROP DATABASE IF EXISTS test_list_partition_cardinality;
CREATE DATABASE test_list_partition_cardinality;
USE test_list_partition_cardinality;

CREATE TABLE partitions_multi_column_1 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 int
)
PARTITION BY (c1, c2);

INSERT INTO partitions_multi_column_1 VALUES
(1,1,1),
(1,2,4),
(1,2,4),
(1,2,4),
(2,3,2),
(2,4,5),
(3,5,3),
(3,6,6);

INSERT INTO partitions_multi_column_1
SELECT 4, 7, generate_series FROM TABLE(generate_series(1, 1000));

ANALYZE FULL TABLE partitions_multi_column_1 WITH SYNC MODE;

SELECT count(*) FROM partitions_multi_column_1;

function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=0', 'EMPTYSET')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=1', 'cardinality: 2')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=2', 'cardinality: 1')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=3', 'cardinality: 1')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c1=4', 'cardinality: 500')

function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=0', 'EMPTYSET')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=1', 'cardinality: 1')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=2', 'cardinality: 2')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=3', 'cardinality: 1')
function: assert_explain_verbose_contains('SELECT COUNT(*) FROM partitions_multi_column_1 WHERE c2=7', 'cardinality: 500')

0 comments on commit 619230b

Please sign in to comment.