Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
wuwenchi committed Dec 23, 2024
1 parent fa2276e commit 4dd3f0d
Show file tree
Hide file tree
Showing 6 changed files with 402 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hudi.source.HudiCachedFsViewProcessor;
import org.apache.doris.datasource.hudi.source.HudiCachedMetaClientProcessor;
import org.apache.doris.datasource.hudi.source.HudiMetadataCacheMgr;
import org.apache.doris.datasource.hudi.source.HudiPartitionProcessor;
import org.apache.doris.datasource.iceberg.IcebergMetadataCache;
Expand Down Expand Up @@ -172,6 +173,10 @@ public HudiCachedFsViewProcessor getFsViewProcessor(ExternalCatalog catalog) {
return hudiMetadataCacheMgr.getFsViewProcessor(catalog);
}

public HudiCachedMetaClientProcessor getMetaClientProcessor(ExternalCatalog catalog) {
return hudiMetadataCacheMgr.getHudiMetaClientProcessor(catalog);
}

public HudiMetadataCacheMgr getHudiMetadataCacheMgr() {
return hudiMetadataCacheMgr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,6 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI

private static final String USE_HIVE_SYNC_PARTITION = "use_hive_sync_partition";

private HoodieTableMetaClient hudiClient = null;
private final byte[] hudiClientLock = new byte[0];

static {
SUPPORTED_HIVE_FILE_FORMATS = Sets.newHashSet();
SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat");
Expand Down Expand Up @@ -1024,16 +1021,13 @@ public void beforeMTMVRefresh(MTMV mtmv) throws DdlException {
}

public HoodieTableMetaClient getHudiClient() {
if (hudiClient != null) {
return hudiClient;
}
synchronized (hudiClientLock) {
if (hudiClient != null) {
return hudiClient;
}
hudiClient = HudiUtils.buildHudiTableMetaClient(
getRemoteTable().getSd().getLocation(), catalog.getConfiguration());
return hudiClient;
}
return Env.getCurrentEnv()
.getExtMetaCacheMgr()
.getMetaClientProcessor(getCatalog())
.getHoodieTableMetaClient(
getDbName(),
getName(),
getRemoteTable().getSd().getLocation(),
getCatalog().getConfiguration());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,15 @@ public static Schema getHudiTableSchema(HMSExternalTable table) {
HoodieTableMetaClient metaClient = table.getHudiClient();
TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
Schema hudiSchema;

// Here, the timestamp should be reloaded again.
// Because when hudi obtains the schema in `getTableAvroSchema`, it needs to read the specified commit file,
// which is saved in the `metaClient`.
// But the `metaClient` is obtained from cache, so the file obtained may be an old file.
// This file may be deleted by hudi clean task, and an error will be reported.
// So, we should reload timeline so that we can read the latest commit files.
metaClient.reloadActiveTimeline();

try {
hudiSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaUtil.getTableAvroSchema());
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.hudi.source;

import org.apache.doris.common.CacheFactory;
import org.apache.doris.common.Config;
import org.apache.doris.datasource.ExternalMetaCacheMgr;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;

import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Map;
import java.util.Objects;
import java.util.OptionalLong;
import java.util.concurrent.ExecutorService;

public class HudiCachedMetaClientProcessor {
private static final Logger LOG = LogManager.getLogger(HudiCachedMetaClientProcessor.class);
private final LoadingCache<HudiCachedClientKey, HoodieTableMetaClient> hudiTableMetaClientCache;

public HudiCachedMetaClientProcessor(ExecutorService executor) {
CacheFactory partitionCacheFactory = new CacheFactory(
OptionalLong.of(28800L),
OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60),
Config.max_external_table_cache_num,
true,
null);

this.hudiTableMetaClientCache =
partitionCacheFactory.buildCache(
this::createHoodieTableMetaClient,
null,
executor);
}

private HoodieTableMetaClient createHoodieTableMetaClient(HudiCachedClientKey key) {
LOG.debug("create hudi table meta client for {}.{}", key.getDbName(), key.getTbName());
HadoopStorageConfiguration hadoopStorageConfiguration = new HadoopStorageConfiguration(key.getConf());
return HiveMetaStoreClientHelper.ugiDoAs(
key.getConf(),
() -> HoodieTableMetaClient
.builder()
.setConf(hadoopStorageConfiguration)
.setBasePath(key.getHudiBasePath())
.build());
}

public HoodieTableMetaClient getHoodieTableMetaClient(
String dbName, String tbName, String hudiBasePath, Configuration conf) {
return hudiTableMetaClientCache.get(new HudiCachedClientKey(dbName, tbName, hudiBasePath, conf));
}

public void cleanUp() {
hudiTableMetaClientCache.cleanUp();
}

public void invalidateAll() {
hudiTableMetaClientCache.invalidateAll();
}

public void invalidateDbCache(String dbName) {
hudiTableMetaClientCache.asMap().forEach((k, v) -> {
if (k.getDbName().equals(dbName)) {
hudiTableMetaClientCache.invalidate(k);
}
});
}

public void invalidateTableCache(String dbName, String tbName) {
hudiTableMetaClientCache.asMap().forEach((k, v) -> {
if (k.getDbName().equals(dbName) && k.getTbName().equals(tbName)) {
hudiTableMetaClientCache.invalidate(k);
}
});
}

private static class HudiCachedClientKey {
String dbName;
String tbName;
String hudiBasePath;
Configuration conf;

public HudiCachedClientKey(String dbName, String tbName, String hudiBasePath, Configuration conf) {
this.dbName = dbName;
this.tbName = tbName;
this.hudiBasePath = hudiBasePath;
this.conf = conf;
}

public String getDbName() {
return dbName;
}

public String getTbName() {
return tbName;
}

public String getHudiBasePath() {
return hudiBasePath;
}

public Configuration getConf() {
return conf;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HudiCachedClientKey that = (HudiCachedClientKey) o;
return Objects.equals(dbName, that.dbName) && Objects.equals(tbName, that.tbName)
&& Objects.equals(hudiBasePath, that.hudiBasePath);
}

@Override
public int hashCode() {
return Objects.hash(dbName, tbName, hudiBasePath);
}
}

public Map<String, Map<String, String>> getCacheStats() {
Map<String, Map<String, String>> res = Maps.newHashMap();
res.put("hudi_meta_client_cache", ExternalMetaCacheMgr.getCacheStats(hudiTableMetaClientCache.stats(),
hudiTableMetaClientCache.estimatedSize()));
return res;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
public class HudiMetadataCacheMgr {
private final Map<Long, HudiPartitionProcessor> partitionProcessors = Maps.newConcurrentMap();
private final Map<Long, HudiCachedFsViewProcessor> fsViewProcessors = Maps.newConcurrentMap();
private final Map<Long, HudiCachedMetaClientProcessor> metaClientProcessors = Maps.newConcurrentMap();

private final ExecutorService executor;

public HudiMetadataCacheMgr(ExecutorService executor) {
Expand All @@ -54,6 +56,16 @@ public HudiCachedFsViewProcessor getFsViewProcessor(ExternalCatalog catalog) {
});
}

public HudiCachedMetaClientProcessor getHudiMetaClientProcessor(ExternalCatalog catalog) {
return metaClientProcessors.computeIfAbsent(catalog.getId(), catalogId -> {
if (catalog instanceof HMSExternalCatalog) {
return new HudiCachedMetaClientProcessor(executor);
} else {
throw new RuntimeException("Hudi only supports hive(or compatible) catalog now");
}
});
}

public void removeCache(long catalogId) {
HudiPartitionProcessor partitionProcessor = partitionProcessors.remove(catalogId);
if (partitionProcessor != null) {
Expand All @@ -63,6 +75,10 @@ public void removeCache(long catalogId) {
if (fsViewProcessor != null) {
fsViewProcessor.cleanUp();
}
HudiCachedMetaClientProcessor metaClientProcessor = metaClientProcessors.remove(catalogId);
if (metaClientProcessor != null) {
metaClientProcessor.cleanUp();
}
}

public void invalidateCatalogCache(long catalogId) {
Expand All @@ -74,6 +90,10 @@ public void invalidateCatalogCache(long catalogId) {
if (fsViewProcessor != null) {
fsViewProcessor.invalidateAll();
}
HudiCachedMetaClientProcessor metaClientProcessor = metaClientProcessors.get(catalogId);
if (metaClientProcessor != null) {
metaClientProcessor.invalidateAll();
}
}

public void invalidateDbCache(long catalogId, String dbName) {
Expand All @@ -85,6 +105,10 @@ public void invalidateDbCache(long catalogId, String dbName) {
if (fsViewProcessor != null) {
fsViewProcessor.invalidateDbCache(dbName);
}
HudiCachedMetaClientProcessor metaClientProcessor = metaClientProcessors.get(catalogId);
if (metaClientProcessor != null) {
metaClientProcessor.invalidateDbCache(dbName);
}
}

public void invalidateTableCache(long catalogId, String dbName, String tblName) {
Expand All @@ -96,6 +120,10 @@ public void invalidateTableCache(long catalogId, String dbName, String tblName)
if (fsViewProcessor != null) {
fsViewProcessor.invalidateTableCache(dbName, tblName);
}
HudiCachedMetaClientProcessor metaClientProcessor = metaClientProcessors.get(catalogId);
if (metaClientProcessor != null) {
metaClientProcessor.invalidateTableCache(dbName, tblName);
}
}

public Map<String, Map<String, String>> getCacheStats(ExternalCatalog catalog) {
Expand All @@ -106,6 +134,10 @@ public Map<String, Map<String, String>> getCacheStats(ExternalCatalog catalog) {

HudiCachedFsViewProcessor fsViewProcessor = getFsViewProcessor(catalog);
res.putAll(fsViewProcessor.getCacheStats());

HudiCachedMetaClientProcessor metaClientProcessor = getHudiMetaClientProcessor(catalog);
res.putAll(metaClientProcessor.getCacheStats());

return res;
}
}
Loading

0 comments on commit 4dd3f0d

Please sign in to comment.