Skip to content

Commit

Permalink
[Fix](multi catalog)(planner) Fix external table statistic collection…
Browse files Browse the repository at this point in the history
… bug (#16486)

Add index id to column statistic id. Refresh statistic cache after analyze.
  • Loading branch information
Jibing-Li authored and morningman committed Feb 8, 2023
1 parent 204ca0b commit cc11695
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ public Partition getPartition(List<String> partitionValues) {

@Override
public List<Column> initSchema() {
makeSureInitialized();
List<Column> columns;
List<FieldSchema> schema = ((HMSExternalCatalog) catalog).getClient().getSchema(dbName, name);
if (dlaType.equals(DLAType.ICEBERG)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.simple.JSONObject;
Expand All @@ -38,6 +39,7 @@
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.statistics;

import org.apache.doris.catalog.Env;
import org.apache.doris.common.FeConstants;
import org.apache.doris.datasource.HMSExternalCatalog;
import org.apache.doris.qe.AutoCloseConnectContext;
Expand Down Expand Up @@ -50,6 +51,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
public static final String NUM_ROWS = "numRows";
public static final String NUM_FILES = "numFiles";
public static final String TIMESTAMP = "transient_lastDdlTime";
public static final String DELIMITER = "-";

public HiveAnalysisTask(AnalysisTaskScheduler analysisTaskScheduler, AnalysisTaskInfo info) {
super(analysisTaskScheduler, info);
Expand Down Expand Up @@ -81,7 +83,7 @@ protected void getColumnStatsByMeta() throws Exception {
Map<String, String> parameters = table.getRemoteTable().getParameters();
// Collect table level row count, null number and timestamp.
setParameterData(parameters, params);
params.put("id", String.valueOf(tbl.getId()) + "-" + String.valueOf(col.getName()));
params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), null));
List<ColumnStatisticsObj> tableStats = table.getHiveTableColumnStats(columns);
// Collect table level ndv, nulls, min and max. tableStats contains at most 1 item;
for (ColumnStatisticsObj tableStat : tableStats) {
Expand Down Expand Up @@ -113,7 +115,7 @@ protected void getColumnStatsByMeta() throws Exception {
parameters = partition.getParameters();
// Collect row count, null number and timestamp.
setParameterData(parameters, params);
params.put("id", String.valueOf(tbl.getId()) + "-" + String.valueOf(col.getName()) + "-" + partName);
params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), partName));
params.put("partId", partName);
List<ColumnStatisticsObj> value = entry.getValue();
Preconditions.checkState(value.size() == 1);
Expand All @@ -134,6 +136,7 @@ protected void getColumnStatsByMeta() throws Exception {
this.stmtExecutor.execute();
}
}
Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, col.getName());
}

private void getStatData(ColumnStatisticsData data, Map<String, String> params) {
Expand Down Expand Up @@ -201,4 +204,18 @@ private void setParameterData(Map<String, String> parameters, Map<String, String
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
params.put("update_time", sdf.format(new Date(timestamp * 1000)));
}

private String genColumnStatId(long tableId, long indexId, String columnName, String partitionName) {
StringBuilder sb = new StringBuilder();
sb.append(tableId);
sb.append(DELIMITER);
sb.append(indexId);
sb.append(DELIMITER);
sb.append(columnName);
if (partitionName != null) {
sb.append(DELIMITER);
sb.append(partitionName);
}
return sb.toString();
}
}

0 comments on commit cc11695

Please sign in to comment.