Skip to content

Commit

Permalink
support column-level & table-level stats
Browse files Browse the repository at this point in the history
Signed-off-by: Murphy <[email protected]>
  • Loading branch information
murphyatwork committed Sep 4, 2024
1 parent 8bf333e commit f5e89ff
Show file tree
Hide file tree
Showing 18 changed files with 217 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ private void updateLakeTabletStat() {
}

private void adjustStatUpdateRows(long tableId, long totalRowCount) {
BasicStatsMeta meta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getBasicStatsMetaMap().get(tableId);
BasicStatsMeta meta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getTableBasicStatsMeta(tableId);
if (meta != null) {
meta.setUpdateRows(totalRowCount);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,7 @@ private void handleQueryStmt(ExecPlan execPlan) throws Exception {
}
}

// TODO: move to DdlExecutor
private void handleAnalyzeStmt() throws IOException {
AnalyzeStmt analyzeStmt = (AnalyzeStmt) parsedStmt;
TableName tableName = analyzeStmt.getTableName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public static long getTableRowCount(Table table, Operator node, OptimizerContext
long rowCount = 0;

BasicStatsMeta basicStatsMeta =
GlobalStateMgr.getCurrentState().getAnalyzeMgr().getBasicStatsMetaMap().get(table.getId());
GlobalStateMgr.getCurrentState().getAnalyzeMgr().getTableBasicStatsMeta(table.getId());
StatsConstants.AnalyzeType analyzeType = basicStatsMeta == null ? null : basicStatsMeta.getType();
LocalDateTime lastWorkTimestamp = GlobalStateMgr.getCurrentState().getTabletStatMgr().getLastWorkTimestamp();
if (StatsConstants.AnalyzeType.FULL == analyzeType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ public void replayRemoveBasicStatsMeta(BasicStatsMeta basicStatsMeta) {
basicStatsMetaMap.remove(basicStatsMeta.getTableId());
}

public BasicStatsMeta getTableBasicStatsMeta(long tableId) {
return basicStatsMetaMap.get(tableId);
}

public Map<Long, BasicStatsMeta> getBasicStatsMetaMap() {
return basicStatsMetaMap;
}
Expand Down Expand Up @@ -840,7 +844,8 @@ public void load(SRMetaBlockReader reader) throws IOException, SRMetaBlockExcept
}

private void updateBasicStatsMeta(long dbId, long tableId, long loadedRows) {
BasicStatsMeta basicStatsMeta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getBasicStatsMetaMap().get(tableId);
BasicStatsMeta basicStatsMeta =
GlobalStateMgr.getCurrentState().getAnalyzeMgr().getTableBasicStatsMeta(tableId);
if (basicStatsMeta == null) {
// first load without analyze op, we need fill a meta with loaded rows for cardinality estimation
BasicStatsMeta meta = new BasicStatsMeta(dbId, tableId, Lists.newArrayList(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package com.starrocks.statistic;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.annotations.SerializedName;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.OlapTable;
Expand Down Expand Up @@ -61,6 +63,9 @@ public class BasicStatsMeta implements Writable {
@SerializedName("deltaRows")
private long deltaRows;

@SerializedName("columnStats")
private Map<String, ColumnStatsMeta> columnStatsMetaMap = Maps.newConcurrentMap();

public BasicStatsMeta(long dbId, long tableId, List<String> columns,
StatsConstants.AnalyzeType type,
LocalDateTime updateTime,
Expand Down Expand Up @@ -190,4 +195,22 @@ public boolean isUpdatedAfterLoad(LocalDateTime loadTime) {
return updateTime.isAfter(loadTime);
}
}

public void updateStats(StatsConstants.AnalyzeType type, LocalDateTime updateTime, Map<String, String> properties) {
this.type = type;
this.updateTime = updateTime;
this.properties = properties;
}

public Map<String, ColumnStatsMeta> getColumnStatsMetaMap() {
return columnStatsMetaMap;
}

public List<ColumnStatsMeta> getColumnStatsMetaList() {
return Lists.newArrayList(columnStatsMetaMap.values());
}

public void addColumnStatsMeta(ColumnStatsMeta columnStatsMeta) {
this.columnStatsMetaMap.put(columnStatsMeta.getColumnName(), columnStatsMeta);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.starrocks.statistic;

import com.google.gson.annotations.SerializedName;

import java.time.LocalDateTime;
import java.util.Objects;

/**
* Meta of column-level statistics
*/
public class ColumnStatsMeta {

@SerializedName("columnName")
private String columnName;

@SerializedName("type")
private StatsConstants.AnalyzeType type;

@SerializedName("updateTime")
private LocalDateTime updateTime;

public ColumnStatsMeta(String columnName, StatsConstants.AnalyzeType type, LocalDateTime updateTime) {
this.columnName = columnName;
this.type = type;
this.updateTime = updateTime;
}

public String getColumnName() {
return columnName;
}

public void setColumnName(String columnName) {
this.columnName = columnName;
}

public StatsConstants.AnalyzeType getType() {
return type;
}

public void setType(StatsConstants.AnalyzeType type) {
this.type = type;
}

public LocalDateTime getUpdateTime() {
return updateTime;
}

public void setUpdateTime(LocalDateTime updateTime) {
this.updateTime = updateTime;
}

@Override
public String toString() {
final StringBuffer sb = new StringBuffer("ColumnStatsMeta{");
sb.append("columnName='").append(columnName).append('\'');
sb.append(", type=").append(type);
sb.append('}');
return sb.toString();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ColumnStatsMeta that = (ColumnStatsMeta) o;
return Objects.equals(columnName, that.columnName) && type == that.type;
}

@Override
public int hashCode() {
return Objects.hash(columnName, type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public class ExternalFullStatisticsCollectJob extends StatisticsCollectJob {
public ExternalFullStatisticsCollectJob(String catalogName, Database db, Table table, List<String> partitionNames,
List<String> columnNames, List<Type> columnTypes,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties);
Map<String, String> properties, boolean allColumns) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties, allColumns);
this.catalogName = catalogName;
this.partitionNames = partitionNames;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public ExternalHistogramStatisticsCollectJob(String catalogName, Database db, Ta
List<Type> columnTypes, StatsConstants.AnalyzeType type,
StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties);
super(db, table, columnNames, columnTypes, type, scheduleType, properties, false);
this.catalogName = catalogName;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,16 @@ public class FullStatisticsCollectJob extends StatisticsCollectJob {

public FullStatisticsCollectJob(Database db, Table table, List<Long> partitionIdList, List<String> columns,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columns, type, scheduleType, properties);
Map<String, String> properties, boolean allColumns) {
super(db, table, columns, type, scheduleType, properties, allColumns);
this.partitionIdList = partitionIdList;
}

public FullStatisticsCollectJob(Database db, Table table, List<Long> partitionIdList, List<String> columnNames,
List<Type> columnTypes, StatsConstants.AnalyzeType type,
StatsConstants.ScheduleType scheduleType, Map<String, String> properties) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties);
StatsConstants.ScheduleType scheduleType, Map<String, String> properties,
boolean allColumns) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties, allColumns);
this.partitionIdList = partitionIdList;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class HistogramStatisticsCollectJob extends StatisticsCollectJob {
public HistogramStatisticsCollectJob(Database db, Table table, List<String> columnNames, List<Type> columnTypes,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties);
super(db, table, columnNames, columnTypes, type, scheduleType, properties, false);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ public class SampleStatisticsCollectJob extends StatisticsCollectJob {

public SampleStatisticsCollectJob(Database db, Table table, List<String> columnNames,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columnNames, type, scheduleType, properties);
Map<String, String> properties, boolean allColumns) {
super(db, table, columnNames, type, scheduleType, properties, allColumns);
}

public SampleStatisticsCollectJob(Database db, Table table, List<String> columnNames, List<Type> columnTypes,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties);
Map<String, String> properties, boolean allColumns) {
super(db, table, columnNames, columnTypes, type, scheduleType, properties, allColumns);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import com.starrocks.thrift.TResultBatch;
import com.starrocks.thrift.TResultSinkType;
import com.starrocks.thrift.TStatisticData;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.thrift.TDeserializer;
Expand All @@ -54,6 +55,9 @@
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public class StatisticExecutor {
private static final Logger LOG = LogManager.getLogger(StatisticExecutor.class);
Expand All @@ -76,7 +80,7 @@ public List<TStatisticData> queryStatisticSync(ConnectContext context, String ta
public List<TStatisticData> queryStatisticSync(ConnectContext context, Long dbId, Long tableId,
List<String> columnNames) {
String sql;
BasicStatsMeta meta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getBasicStatsMetaMap().get(tableId);
BasicStatsMeta meta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getTableBasicStatsMeta(tableId);
if (meta != null && meta.getType().equals(StatsConstants.AnalyzeType.FULL)) {
Table table = null;
if (dbId == null) {
Expand Down Expand Up @@ -112,8 +116,31 @@ public List<TStatisticData> queryStatisticSync(ConnectContext context, Long dbId
} else {
sql = StatisticSQLBuilder.buildQuerySampleStatisticsSQL(dbId, tableId, columnNames);
}

return executeStatisticDQL(context, sql);
List<TStatisticData> tableStats = executeStatisticDQL(context, sql);

Table table = GlobalStateMgr.getCurrentState().getLocalMetastore().getTable(dbId, tableId);
if (table != null && meta != null) {
List<ColumnStatsMeta> columnStatsMetaList = meta.getColumnStatsMetaList();
if (CollectionUtils.isNotEmpty(columnStatsMetaList)) {
List<String> columnNamesForStats =
columnStatsMetaList.stream().map(ColumnStatsMeta::getColumnName)
.collect(Collectors.toList());
List<Type> columnTypesForStats =
columnNamesForStats.stream()
.map(x -> StatisticUtils.getQueryStatisticsColumnType(table, x))
.collect(Collectors.toList());
String statsSql = StatisticSQLBuilder.buildQueryFullStatisticsSQL(
dbId, tableId, columnNamesForStats, columnTypesForStats);
List<TStatisticData> columnStats = executeStatisticDQL(context, statsSql);

// overwrite table-stats
Map<String, TStatisticData> merged = tableStats.stream()
.collect(Collectors.toMap(TStatisticData::getColumnName, Function.identity()));
columnStats.forEach(x -> merged.put(x.getColumnName(), x));
tableStats = Lists.newArrayList(merged.values());
}
}
return tableStats;
}

public void dropTableStatistics(ConnectContext statsConnectCtx, Long tableIds,
Expand Down Expand Up @@ -336,13 +363,29 @@ public AnalyzeStatus collectStatistics(ConnectContext statsConnectCtx,
}
}
} else {
AnalyzeMgr analyzeMgr = GlobalStateMgr.getCurrentState().getAnalyzeMgr();
if (table.isNativeTableOrMaterializedView()) {
long existUpdateRows = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getExistUpdateRows(table.getId());
BasicStatsMeta basicStatsMeta = new BasicStatsMeta(db.getId(), table.getId(),
statsJob.getColumnNames(), statsJob.getType(), analyzeStatus.getEndTime(),
statsJob.getProperties(), existUpdateRows);
GlobalStateMgr.getCurrentState().getAnalyzeMgr().addBasicStatsMeta(basicStatsMeta);
GlobalStateMgr.getCurrentState().getAnalyzeMgr().refreshBasicStatisticsCache(
BasicStatsMeta basicStatsMeta = analyzeMgr.getTableBasicStatsMeta(table.getId());
if (basicStatsMeta == null) {
long existUpdateRows = analyzeMgr.getExistUpdateRows(table.getId());
basicStatsMeta = new BasicStatsMeta(db.getId(), table.getId(),
statsJob.getColumnNames(), statsJob.getType(), analyzeStatus.getEndTime(),
statsJob.getProperties(), existUpdateRows);
}

if (!statsJob.isAllColumns()) {
for (String column : statsJob.getColumnNames()) {
ColumnStatsMeta meta =
new ColumnStatsMeta(column, statsJob.getType(), analyzeStatus.getEndTime());
basicStatsMeta.addColumnStatsMeta(meta);
}
} else {
basicStatsMeta.updateStats(
statsJob.getType(), analyzeStatus.getEndTime(), statsJob.getProperties());
}

analyzeMgr.addBasicStatsMeta(basicStatsMeta);
analyzeMgr.refreshBasicStatisticsCache(
basicStatsMeta.getDbId(), basicStatsMeta.getTableId(), basicStatsMeta.getColumns(),
refreshAsync);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,36 @@ public abstract class StatisticsCollectJob {
protected final Table table;
protected final List<String> columnNames;
protected final List<Type> columnTypes;
protected final boolean allColumns;

protected final StatsConstants.AnalyzeType type;
protected final StatsConstants.ScheduleType scheduleType;
protected final Map<String, String> properties;

protected StatisticsCollectJob(Database db, Table table, List<String> columnNames,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
Map<String, String> properties, boolean allColumns) {
this.db = db;
this.table = table;
this.columnNames = columnNames;
this.columnTypes = columnNames.stream().map(table::getColumn).map(Column::getType).collect(Collectors.toList());
this.type = type;
this.scheduleType = scheduleType;
this.properties = properties;
this.allColumns = allColumns;
}

protected StatisticsCollectJob(Database db, Table table, List<String> columnNames, List<Type> columnTypes,
StatsConstants.AnalyzeType type, StatsConstants.ScheduleType scheduleType,
Map<String, String> properties) {
Map<String, String> properties, boolean allColumns) {
this.db = db;
this.table = table;
this.columnNames = columnNames;
this.columnTypes = columnTypes;
this.type = type;
this.scheduleType = scheduleType;
this.properties = properties;
this.allColumns = allColumns;
}

protected static final VelocityEngine DEFAULT_VELOCITY_ENGINE;
Expand Down Expand Up @@ -113,6 +116,10 @@ public List<String> getColumnNames() {
return columnNames;
}

public boolean isAllColumns() {
return allColumns;
}

public StatsConstants.AnalyzeType getType() {
return type;
}
Expand Down
Loading

0 comments on commit f5e89ff

Please sign in to comment.