Skip to content

Commit

Permalink
[feature](hive)support hive4 acid
Browse files Browse the repository at this point in the history
  • Loading branch information
hubgeter committed Nov 15, 2024
1 parent 88a6268 commit b65c8e1
Show file tree
Hide file tree
Showing 8 changed files with 320 additions and 65 deletions.
31 changes: 28 additions & 3 deletions be/src/vec/exec/format/table/transactional_hive_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "transactional_hive_common.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/exec/format/orc/vorc_reader.h"
#include <re2/re2.h>

namespace doris {

Expand Down Expand Up @@ -108,15 +109,39 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
int64_t num_delete_files = 0;
std::filesystem::path file_path(data_file_path);


//See https://github.com/apache/hive/commit/ffee30e6267e85f00a22767262192abb9681cfb7#diff-5fe26c36b4e029dcd344fc5d484e7347R165
// bucket_xxx_attemptId => bucket_xxx
// bucket_xxx => bucket_xxx
auto remove_bucket_attemptId = [](const std::string& str) {
re2::RE2 pattern("^bucket_\\d+_\\d+$");

if (re2::RE2::FullMatch(str, pattern)) {
size_t pos = str.rfind('_');
if (pos != std::string::npos) {
return str.substr(0, pos);
}
}
return str;
};


SCOPED_TIMER(_transactional_orc_profile.delete_files_read_time);
for (auto& delete_delta : range.table_format_params.transactional_hive_params.delete_deltas) {
const std::string file_name = file_path.filename().string();
auto iter = std::find(delete_delta.file_names.begin(), delete_delta.file_names.end(),
file_name);

//need opt.
std::vector<std::string> delete_delta_file_names;
for (const auto& x : delete_delta.file_names){
delete_delta_file_names.emplace_back(remove_bucket_attemptId(x));
}
auto iter = std::find(delete_delta_file_names.begin(), delete_delta_file_names.end(),
remove_bucket_attemptId(file_name));
if (iter == delete_delta.file_names.end()) {
continue;
}
auto delete_file = fmt::format("{}/{}", delete_delta.directory_location, file_name);
auto delete_file = fmt::format("{}/{}", delete_delta.directory_location,
delete_delta.file_names[iter-delete_delta_file_names.begin()]);

TFileRangeDesc delete_range;
// must use __set() method to make sure __isset is true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.doris.datasource.TableMetadata;
import org.apache.doris.datasource.hive.event.MetastoreNotificationFetchException;

import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
Expand Down Expand Up @@ -84,6 +85,8 @@ NotificationEventResponse getNextNotification(long lastEventId,

ValidWriteIdList getValidWriteIds(String fullTableName, long currentTransactionId);

ValidTxnList getValidTxns();

void acquireSharedLock(String queryId, long txnId, String user, TableName tblName,
List<String> partitionNames, long timeoutMs);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.common.UserException;

import com.google.common.collect.Lists;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;

import java.util.List;
Expand Down Expand Up @@ -61,6 +62,10 @@ public boolean isFullAcid() {
return isFullAcid;
}

public ValidTxnList getValidTxns(HMSCachedClient client) {
return client.getValidTxns();
}

public ValidWriteIdList getValidWriteIds(HMSCachedClient client) {
if (validWriteIdList == null) {
TableName tableName = new TableName(hiveTable.getCatalog().getName(), hiveTable.getDbName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.IMetaStoreClient.NotificationFilter;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
Expand Down Expand Up @@ -527,6 +528,11 @@ public ValidWriteIdList getValidWriteIds(String fullTableName, long currentTrans
throw new HMSClientException("Do not support in PostgreSQLJdbcHMSCachedClient.");
}

@Override
public ValidTxnList getValidTxns() {
throw new HMSClientException("Do not support in PostgreSQLJdbcHMSCachedClient.");
}

@Override
public void acquireSharedLock(String queryId, long txnId, String user, TableName tblName,
List<String> partitionNames, long timeoutMs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,21 @@ public ValidWriteIdList getValidWriteIds(String fullTableName, long currentTrans
}
}

@Override
public ValidTxnList getValidTxns() {
try (ThriftHMSClient client = getClient()) {
try {
return ugiDoAs(client.client::getValidTxns);
} catch (Exception e) {
client.setThrowable(e);
throw e;
}
} catch (Exception e) {
throw new HMSClientException("Catalog Get the transactions that "
+ "are currently valid fail. Exception = {}", e);
}
}

private LockResponse checkLock(long lockId) {
try (ThriftHMSClient client = getClient()) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.Setter;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
Expand Down Expand Up @@ -345,8 +346,11 @@ private List<FileCacheValue> getFileSplitByTransaction(HiveMetaStoreCache cache,
}
ValidWriteIdList validWriteIds = hiveTransaction.getValidWriteIds(
((HMSExternalCatalog) hmsTable.getCatalog()).getClient());
return cache.getFilesByTransaction(partitions, validWriteIds,
hiveTransaction.isFullAcid(), skipCheckingAcidVersionFile, hmsTable.getId(), bindBrokerName);
ValidTxnList validTxnList = hiveTransaction.getValidTxns(
((HMSExternalCatalog) hmsTable.getCatalog()).getClient());

return cache.getFilesByTransaction(partitions, validWriteIds, validTxnList,
hiveTransaction.isFullAcid(), skipCheckingAcidVersionFile, hmsTable.getId(), bindBrokerName);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.doris.datasource.hive.event.MetastoreNotificationFetchException;

import com.google.common.collect.ImmutableList;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
Expand Down Expand Up @@ -206,6 +207,11 @@ public ValidWriteIdList getValidWriteIds(String fullTableName, long currentTrans
return null;
}

@Override
public ValidTxnList getValidTxns() {
return null;
}

@Override
public void acquireSharedLock(String queryId, long txnId, String user, TableName tblName, List<String> partitionNames, long timeoutMs) {

Expand Down

0 comments on commit b65c8e1

Please sign in to comment.