diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..9fe95928
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.iml
+*.ipr
+*.iws
+.idea/
+target/
\ No newline at end of file
diff --git a/omnidata/omnidata-openlookeng-connector/.gitignore b/omnidata/omnidata-openlookeng-connector/.gitignore
new file mode 100644
index 00000000..9fe95928
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/.gitignore
@@ -0,0 +1,5 @@
+*.iml
+*.ipr
+*.iws
+.idea/
+target/
\ No newline at end of file
diff --git a/omnidata/omnidata-openlookeng-connector/README.md b/omnidata/omnidata-openlookeng-connector/README.md
index 193fc2b5..8b37416e 100644
--- a/omnidata/omnidata-openlookeng-connector/README.md
+++ b/omnidata/omnidata-openlookeng-connector/README.md
@@ -1 +1,31 @@
-# omnidata-openlookeng-connector
\ No newline at end of file
+# OmniData Connector
+
+## Overview
+
+OmniData Connector is a data source connector developed for openLooKeng.
+
+The OmniData connector allows querying data sources where c Server is deployed. It pushes down some operators such as filter to the OmniData service close to the storage to improve the performance of storage-computing-separated system.
+
+## Building OmniData Connector
+
+1. OmniData Connector is developed under the architecture of openLooKeng. You need to build openLooKeng first as a non-root user.
+2. Simply run the following command from the project root directory:
+`mvn clean install -Dos.detected.arch="aarch64"`
+Then you will find omnidata-openlookeng-connector-*.zip under the omnidata-openlookeng-connector/connector/target/ directory.
+OmniData Connector has a comprehensive set of unit tests that can take several minutes to run. You can disable the tests when building:
+`mvn clean install -DskipTests -Dos.detected.arch="aarch64"`
+
+## Deploying OmniData Connector
+
+1. Unzip omnidata-openlookeng-connector-*.zip to the plugin directory of openLooKeng.
+2. Obtain the latest OmniData software package, replace the boostkit-omnidata-client-\*.jar and boostkit-omnidata-core-\*.jar in the omnidata-openlookeng-connector-\* directory.
+3. Set "connector.name=omnidata-openlookeng" in the openLooKeng catalog properties file.
+
+## Contribution Guidelines
+
+Track the bugs and feature requests via GitHub issues.
+
+## More Information
+
+For further assistance, send an email to kunpengcompute@huawei.com.
+
diff --git a/omnidata/omnidata-openlookeng-connector/build.sh b/omnidata/omnidata-openlookeng-connector/build.sh
new file mode 100644
index 00000000..25ef7e1e
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/build.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Building omniData openLooKeng Connector packages
+# Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+
+set -e
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+mvn -T12 clean install -Dos.detected.arch="${cpu_name}"
+
diff --git a/omnidata/omnidata-openlookeng-connector/connector/pom.xml b/omnidata/omnidata-openlookeng-connector/connector/pom.xml
new file mode 100644
index 00000000..2e9e819a
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/pom.xml
@@ -0,0 +1,791 @@
+
+
+ 4.0.0
+
+
+ io.hetu.core
+ presto-root
+ 1.4.0
+
+
+ openlookeng-omnidata-connector
+ openLooKeng OmniData Connector
+ hetu-plugin
+
+
+ ${os.detected.arch}
+ 1.35.0
+ 30.1.1-jre
+ 206
+ 9.4.43.v20210629
+
+
+
+
+ com.google.guava
+ guava
+ ${dep.guava.version}
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+
+
+
+ org.bouncycastle
+ bcprov-jdk15on
+ 1.68
+ runtime
+
+
+
+
+ io.grpc
+ grpc-api
+ ${dep.grpc.version}
+ runtime
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+
+
+ io.grpc
+ grpc-protobuf
+ ${dep.grpc.version}
+ runtime
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+
+
+ io.grpc
+ grpc-stub
+ ${dep.grpc.version}
+ runtime
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+
+
+ io.grpc
+ grpc-netty-shaded
+ ${dep.grpc.version}
+ runtime
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+ com.google.code.gson
+ gson
+
+
+
+
+
+ io.hetu.core
+ presto-plugin-toolkit
+
+
+
+ org.mockito
+ mockito-core
+ test
+
+
+
+ io.hetu.core
+ hetu-common
+
+
+
+ io.hetu.core
+ presto-orc
+
+
+
+ io.hetu.core
+ presto-parquet
+
+
+ org.apache.parquet
+ parquet-encoding
+
+
+
+
+
+ org.apache.hudi
+ hudi-hadoop-mr
+
+
+
+ io.hetu.core
+ presto-expressions
+
+
+
+ io.hetu.core
+ presto-memory-context
+
+
+
+ io.hetu.core
+ presto-rcfile
+
+
+
+ io.prestosql.hadoop
+ hadoop-apache
+
+
+
+ io.prestosql.hive
+ hive-apache
+
+
+
+ org.apache.thrift
+ libthrift
+
+
+
+ io.airlift
+ aircompressor
+
+
+
+ io.airlift
+ stats
+
+
+
+ io.airlift
+ bootstrap
+
+
+
+ io.airlift
+ concurrent
+
+
+
+ io.airlift
+ log
+
+
+
+ io.airlift
+ event
+
+
+
+ io.airlift
+ json
+ ${dep.arilift.version}
+
+
+
+ io.airlift
+ configuration
+
+
+
+ io.airlift
+ parameternames
+ 1.4
+
+
+
+ com.google.inject
+ guice
+ 5.0.1
+
+
+
+ com.google.code.findbugs
+ jsr305
+ true
+
+
+
+ it.unimi.dsi
+ fastutil
+
+
+
+ javax.validation
+ validation-api
+
+
+
+ org.weakref
+ jmxutils
+
+
+
+ joda-time
+ joda-time
+ 2.10.9
+
+
+
+ io.airlift
+ joda-to-java-time-bridge
+ runtime
+
+
+
+ com.amazonaws
+ aws-java-sdk-core
+
+
+
+ com.amazonaws
+ aws-java-sdk-glue
+
+
+
+ com.amazonaws
+ aws-java-sdk-s3
+
+
+
+ com.google.cloud.bigdataoss
+ util
+
+
+
+ com.google.cloud.bigdataoss
+ gcsio
+
+
+
+ com.google.cloud.bigdataoss
+ util-hadoop
+
+
+
+ com.google.cloud.bigdataoss
+ gcs-connector
+
+
+
+ com.amazonaws
+ aws-java-sdk-sts
+
+
+
+ org.xerial.snappy
+ snappy-java
+ runtime
+
+
+
+ javax.inject
+ javax.inject
+
+
+
+
+ io.airlift
+ log-manager
+ runtime
+
+
+
+
+ io.hetu.core
+ presto-spi
+ provided
+
+
+ com.google.code.gson
+ gson
+
+
+
+
+
+ io.hetu.core
+ hetu-cube
+ provided
+
+
+
+ io.airlift
+ slice
+ 0.39
+
+
+
+ io.airlift
+ units
+ provided
+
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ provided
+
+
+
+ org.openjdk.jol
+ jol-core
+ provided
+
+
+
+ com.huawei.boostkit
+ boostkit-omnidata-client
+ 1.0.0
+ ${dep.os.arch}
+
+
+ com.google.code.gson
+ gson
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+ org.codehaus.mojo
+ animal-sniffer-annotations
+
+
+ io.airlift
+ configuration
+
+
+ log4j
+ log4j
+
+
+ guava
+ com.google.guava
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ snappy-java
+ org.xerial.snappy
+
+
+ slf4j-api
+ org.slf4j
+
+
+ zookeeper
+ org.apache.zookeeper
+
+
+ jcl-over-slf4j
+ org.slf4j
+
+
+ hive-storage-api
+ org.apache.hive
+
+
+ jersey-common
+ org.glassfish.jersey.core
+
+
+ jersey-server
+ org.glassfish.jersey.core
+
+
+ jersey-container-servlet-core
+ org.glassfish.jersey.containers
+
+
+ jul-to-slf4j
+ org.slf4j
+
+
+ javassist
+ org.javassist
+
+
+ arrow-vector
+ org.apache.arrow
+
+
+ arrow-memory
+ org.apache.arrow
+
+
+ hadoop-hdfs
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ leveldbjni-all
+ org.fusesource.leveldbjni
+
+
+ spark-catalyst_2.12
+ org.apache.spark
+
+
+ spark-core_2.12
+ org.apache.spark
+
+
+ spark-kvstore_2.12
+ org.apache.spark
+
+
+ spark-launcher_2.12
+ org.apache.spark
+
+
+ spark-sketch_2.12
+ org.apache.spark
+
+
+ spark-sql_2.12
+ org.apache.spark
+
+
+ spark-tags_2.12
+ org.apache.spark
+
+
+ unused
+ org.spark-project.spark
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jdk8
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-guava
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-joda
+
+
+ com.fasterxml.jackson.module
+ jackson-module-parameter-names
+
+
+
+
+
+ com.huawei.boostkit
+ boostkit-omnidata-core
+ 1.0.0
+ ${dep.os.arch}
+
+
+ guava
+ com.google.guava
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ logback-classic
+ ch.qos.logback
+
+
+ io.airlift
+ configuration
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ org.slf4j
+ slf4j-api
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jdk8
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-guava
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-joda
+
+
+ com.fasterxml.jackson.module
+ jackson-module-parameter-names
+
+
+
+
+
+
+ io.hetu.core
+ presto-spi
+ test-jar
+ test
+
+
+ com.google.code.gson
+ gson
+
+
+
+
+ io.airlift
+ testing-mysql-server
+ test
+
+
+ io.hetu.core
+ hetu-metastore
+ test
+
+
+ io.hetu.core
+ presto-main
+ test
+
+
+ org.slf4j
+ log4j-over-slf4j
+
+
+ com.sun
+ tools
+
+
+ org.bouncycastle
+ bcprov-jdk15on
+
+
+
+
+
+ io.hetu.core
+ presto-main
+ test-jar
+ test
+
+
+ org.bouncycastle
+ bcprov-jdk15on
+
+
+
+
+ io.hetu.core
+ presto-client
+ test
+
+
+
+ io.hetu.core
+ presto-parser
+ test
+
+
+
+ io.hetu.core
+ presto-tests
+ test
+
+
+
+ io.hetu.core
+ presto-tpch
+ test
+
+
+
+ io.airlift.tpch
+ tpch
+ test
+
+
+
+ org.jetbrains
+ annotations
+ provided
+
+
+
+ org.testng
+ testng
+ test
+
+
+
+ io.airlift
+ testing
+ test
+
+
+
+ org.assertj
+ assertj-core
+ test
+
+
+
+ org.anarres.lzo
+ lzo-hadoop
+ test
+
+
+
+
+ io.hetu.core
+ presto-benchmark
+ test
+
+
+
+ org.openjdk.jmh
+ jmh-core
+ test
+
+
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ test
+
+
+ io.hetu.core
+ hetu-transport
+
+
+
+ io.hetu.core
+ hetu-startree
+ test
+
+
+ org.objenesis
+ objenesis
+
+
+ org.checkerframework
+ checker-qual
+
+
+ com.google.errorprone
+ error_prone_annotations
+
+
+
+
+ org.eclipse.jetty
+ jetty-util
+ ${dep.jetty.version}
+
+
+ io.airlift
+ discovery
+
+
+ io.airlift
+ http-client
+
+
+ javax.annotation
+ javax.annotation-api
+
+
+
+
+
+ default
+
+ true
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+
+
+ **/TestHiveGlueMetastore.java
+
+
+ **/TestFullParquetReader.java
+
+
+
+
+
+
+
+
+ test-hive-glue
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+
+ **/TestHiveGlueMetastore.java
+
+
+
+
+
+
+
+
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java
new file mode 100644
index 00000000..ee625032
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java
@@ -0,0 +1,909 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.base.Suppliers;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Streams;
+import com.google.common.io.CharStreams;
+import com.google.common.util.concurrent.ListenableFuture;
+import io.airlift.log.Logger;
+import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
+import io.prestosql.plugin.hive.HiveBucketing.BucketingVersion;
+import io.prestosql.plugin.hive.HiveSplit.BucketConversion;
+import io.prestosql.plugin.hive.HiveVacuumTableHandle.Range;
+import io.prestosql.plugin.hive.metastore.Column;
+import io.prestosql.plugin.hive.metastore.Partition;
+import io.prestosql.plugin.hive.metastore.Table;
+import io.prestosql.plugin.hive.util.ConfigurationUtils;
+import io.prestosql.plugin.hive.util.HiveFileIterator;
+import io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryNotAllowedException;
+import io.prestosql.plugin.hive.util.InternalHiveSplitFactory;
+import io.prestosql.plugin.hive.util.ResumableTask;
+import io.prestosql.plugin.hive.util.ResumableTasks;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.connector.ColumnHandle;
+import io.prestosql.spi.connector.ConnectorSession;
+import io.prestosql.spi.dynamicfilter.DynamicFilter;
+import io.prestosql.spi.predicate.TupleDomain;
+import io.prestosql.spi.resourcegroups.QueryType;
+import io.prestosql.spi.type.TypeManager;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.common.ValidCompactorWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hive.common.util.Ref;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.HoodieROTablePathFilter;
+import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.annotation.Annotation;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.OptionalInt;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedDeque;
+import java.util.concurrent.Executor;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.IntPredicate;
+import java.util.function.Supplier;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.util.concurrent.Futures.immediateFuture;
+import static io.prestosql.plugin.hive.HiveSessionProperties.isDynamicFilteringSplitFilteringEnabled;
+import static io.prestosql.plugin.hive.HiveSessionProperties.isForceLocalScheduling;
+import static io.prestosql.plugin.hive.HiveUtil.checkCondition;
+import static io.prestosql.plugin.hive.HiveUtil.getBucketNumber;
+import static io.prestosql.plugin.hive.HiveUtil.getFooterCount;
+import static io.prestosql.plugin.hive.HiveUtil.getHeaderCount;
+import static io.prestosql.plugin.hive.HiveUtil.getInputFormat;
+import static io.prestosql.plugin.hive.HiveUtil.isPartitionFiltered;
+import static io.prestosql.plugin.hive.S3SelectPushdown.shouldEnablePushdownForTable;
+import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getHiveSchema;
+import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getPartitionLocation;
+import static io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.FAIL;
+import static io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.IGNORED;
+import static io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.RECURSE;
+import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED;
+import static java.lang.Math.max;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER;
+
+public class BackgroundHiveSplitLoader
+ implements HiveSplitLoader
+{
+ private static final Logger LOG = Logger.get(BackgroundHiveSplitLoader.class);
+
+ private static final Pattern DELETE_DELTA_PATTERN = Pattern.compile("delete_delta_(\\d+)_(\\d+)(_\\d+)?");
+
+ private static final ListenableFuture> COMPLETED_FUTURE = immediateFuture(null);
+
+ private final Table table;
+ private final TupleDomain extends ColumnHandle> compactEffectivePredicate;
+ private final Optional tableBucketInfo;
+ private final HdfsEnvironment hdfsEnvironment;
+ private final HdfsContext hdfsContext;
+ private final NamenodeStats namenodeStats;
+ private final DirectoryLister directoryLister;
+ private final int loaderConcurrency;
+ private final boolean recursiveDirWalkerEnabled;
+ private final Executor executor;
+ private final ConnectorSession session;
+ private final ConcurrentLazyQueue partitions;
+ private final Deque> fileIterators = new ConcurrentLinkedDeque<>();
+ private final Optional validWriteIds;
+ private final Supplier>> dynamicFilterSupplier;
+ private final Configuration configuration;
+ private final Supplier hoodiePathFilterSupplier;
+
+ // Purpose of this lock:
+ // * Write lock: when you need a consistent view across partitions, fileIterators, and hiveSplitSource.
+ // * Read lock: when you need to modify any of the above.
+ // Make sure the lock is held throughout the period during which they may not be consistent with each other.
+ // Details:
+ // * When write lock is acquired, except the holder, no one can do any of the following:
+ // ** poll from (or check empty) partitions
+ // ** poll from (or check empty) or push to fileIterators
+ // ** push to hiveSplitSource
+ // * When any of the above three operations is carried out, either a read lock or a write lock must be held.
+ // * When a series of operations involving two or more of the above three operations are carried out, the lock
+ // must be continuously held throughout the series of operations.
+ // Implications:
+ // * if you hold a read lock but not a write lock, you can do any of the above three operations, but you may
+ // see a series of operations involving two or more of the operations carried out half way.
+ private final ReentrantReadWriteLock taskExecutionLock = new ReentrantReadWriteLock();
+
+ private HiveSplitSource hiveSplitSource;
+ private volatile boolean stopped;
+ private Optional queryType;
+ private Map queryInfo;
+ private TypeManager typeManager;
+ private JobConf jobConf;
+
+ private final Map cachedDynamicFilters = new ConcurrentHashMap<>();
+
+ public BackgroundHiveSplitLoader(
+ Table table,
+ Iterable partitions,
+ TupleDomain extends ColumnHandle> compactEffectivePredicate,
+ Optional tableBucketInfo,
+ ConnectorSession session,
+ HdfsEnvironment hdfsEnvironment,
+ NamenodeStats namenodeStats,
+ DirectoryLister directoryLister,
+ Executor executor,
+ int loaderConcurrency,
+ boolean recursiveDirWalkerEnabled,
+ Optional validWriteIds,
+ Supplier>> dynamicFilterSupplier,
+ Optional queryType,
+ Map queryInfo,
+ TypeManager typeManager)
+ {
+ this.table = table;
+ this.compactEffectivePredicate = compactEffectivePredicate;
+ this.tableBucketInfo = tableBucketInfo;
+ this.loaderConcurrency = loaderConcurrency;
+ this.typeManager = typeManager;
+ this.session = session;
+ this.hdfsEnvironment = hdfsEnvironment;
+ this.namenodeStats = namenodeStats;
+ this.directoryLister = directoryLister;
+ this.recursiveDirWalkerEnabled = recursiveDirWalkerEnabled;
+ this.executor = executor;
+ this.hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName());
+ this.validWriteIds = requireNonNull(validWriteIds, "validWriteIds is null");
+ this.dynamicFilterSupplier = dynamicFilterSupplier;
+ this.queryType = requireNonNull(queryType, "queryType is null");
+ this.queryInfo = requireNonNull(queryInfo, "queryproperties is null");
+ this.partitions = new ConcurrentLazyQueue<>(getPrunedPartitions(partitions));
+ Path path = new Path(getPartitionLocation(table, getPrunedPartitions(partitions).iterator().next().getPartition()));
+ configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
+ jobConf = ConfigurationUtils.toJobConf(configuration);
+ this.hoodiePathFilterSupplier = Suppliers.memoize(HoodieROTablePathFilter::new);
+ }
+
+ /**
+ * Get pruned partitions, if applicable.
+ */
+ private Iterable getPrunedPartitions(Iterable partitions)
+ {
+ if (AcidUtils.isTransactionalTable(table.getParameters()) &&
+ (queryType.map(t -> t == QueryType.VACUUM).orElse(false))) {
+ String vacuumPartition = (String) queryInfo.get("partition");
+ if (vacuumPartition != null && !vacuumPartition.isEmpty()) {
+ List list = new ArrayList<>();
+ for (Iterator it = partitions.iterator(); it.hasNext(); ) {
+ HivePartitionMetadata next = it.next();
+ if (vacuumPartition.equals(next.getHivePartition().getPartitionId())) {
+ return ImmutableList.of(next);
+ }
+ }
+ }
+ }
+ return partitions;
+ }
+
+ @Override
+ public void start(HiveSplitSource splitSource)
+ {
+ this.hiveSplitSource = splitSource;
+ for (int i = 0; i < loaderConcurrency; i++) {
+ ResumableTasks.submit(executor, new HiveSplitLoaderTask());
+ }
+ }
+
+ @Override
+ public void stop()
+ {
+ stopped = true;
+ }
+
+ private class HiveSplitLoaderTask
+ implements ResumableTask
+ {
+ @Override
+ public TaskStatus process()
+ {
+ while (true) {
+ if (stopped) {
+ return TaskStatus.finished();
+ }
+ ListenableFuture> future;
+ taskExecutionLock.readLock().lock();
+ try {
+ future = loadSplits();
+ }
+ catch (Exception e) {
+ if (e instanceof IOException) {
+ e = new PrestoException(HiveErrorCode.HIVE_FILESYSTEM_ERROR, e);
+ }
+ else if (!(e instanceof PrestoException)) {
+ e = new PrestoException(HiveErrorCode.HIVE_UNKNOWN_ERROR, e);
+ }
+ // Fail the split source before releasing the execution lock
+ // Otherwise, a race could occur where the split source is completed before we fail it.
+ hiveSplitSource.fail(e);
+ checkState(stopped);
+ return TaskStatus.finished();
+ }
+ // For TestBackgroundHiveSplitLoader.testPropagateException
+ catch (Error e) {
+ hiveSplitSource.fail(e);
+ return TaskStatus.finished();
+ }
+ finally {
+ taskExecutionLock.readLock().unlock();
+ }
+ invokeNoMoreSplitsIfNecessary();
+ if (!future.isDone()) {
+ return TaskStatus.continueOn(future);
+ }
+ }
+ }
+ }
+
+ private void invokeNoMoreSplitsIfNecessary()
+ {
+ taskExecutionLock.readLock().lock();
+ try {
+ // This is an opportunistic check to avoid getting the write lock unnecessarily
+ if (!partitions.isEmpty() || !fileIterators.isEmpty()) {
+ return;
+ }
+ }
+ catch (Exception e) {
+ hiveSplitSource.fail(e);
+ checkState(stopped, "Task is not marked as stopped even though it failed");
+ return;
+ }
+ finally {
+ taskExecutionLock.readLock().unlock();
+ }
+
+ taskExecutionLock.writeLock().lock();
+ try {
+ // the write lock guarantees that no one is operating on the partitions, fileIterators, or hiveSplitSource, or half way through doing so.
+ if (partitions.isEmpty() && fileIterators.isEmpty()) {
+ // It is legal to call `noMoreSplits` multiple times or after `stop` was called.
+ // Nothing bad will happen if `noMoreSplits` implementation calls methods that will try to obtain a read lock because the lock is re-entrant.
+ hiveSplitSource.noMoreSplits();
+ }
+ }
+ catch (Exception e) {
+ hiveSplitSource.fail(e);
+ checkState(stopped, "Task is not marked as stopped even though it failed");
+ }
+ finally {
+ taskExecutionLock.writeLock().unlock();
+ }
+ }
+
+ private ListenableFuture> loadSplits()
+ throws IOException
+ {
+ Iterator splits = fileIterators.poll();
+ if (splits == null) {
+ HivePartitionMetadata partition = partitions.poll();
+ if (partition == null) {
+ return COMPLETED_FUTURE;
+ }
+ return loadPartition(partition);
+ }
+
+ while (splits.hasNext() && !stopped) {
+ ListenableFuture> future = hiveSplitSource.addToQueue(splits.next());
+ if (!future.isDone()) {
+ fileIterators.addFirst(splits);
+ return future;
+ }
+ }
+
+ // No need to put the iterator back, since it's either empty or we've stopped
+ return COMPLETED_FUTURE;
+ }
+
+ private ListenableFuture> loadPartition(HivePartitionMetadata partition)
+ throws IOException
+ {
+ HivePartition hivePartition = partition.getHivePartition();
+ String partitionName = hivePartition.getPartitionId();
+ Properties schema = getPartitionSchema(table, partition.getPartition());
+ List partitionKeys = getPartitionKeys(table, partition.getPartition());
+ TupleDomain effectivePredicate = (TupleDomain) compactEffectivePredicate;
+
+ if (dynamicFilterSupplier != null && isDynamicFilteringSplitFilteringEnabled(session)) {
+ if (isPartitionFiltered(partitionKeys, dynamicFilterSupplier.get(), typeManager)) {
+ // Avoid listing files and creating splits from a partition if it has been pruned due to dynamic filters
+ return COMPLETED_FUTURE;
+ }
+ }
+
+ Path path = new Path(getPartitionLocation(table, partition.getPartition()));
+ InputFormat, ?> inputFormat = getInputFormat(configuration, schema, false, jobConf);
+ FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
+ boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
+
+ if (inputFormat instanceof SymlinkTextInputFormat) {
+ if (tableBucketInfo.isPresent()) {
+ throw new PrestoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
+ }
+
+ // TODO: This should use an iterator like the HiveFileIterator
+ ListenableFuture> lastResult = COMPLETED_FUTURE;
+ for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
+ // The input should be in TextInputFormat.
+ TextInputFormat targetInputFormat = new TextInputFormat();
+ // the splits must be generated using the file system for the target path
+ // get the configuration for the target path -- it may be a different hdfs instance
+ FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath);
+ jobConf.setInputFormat(TextInputFormat.class);
+ targetInputFormat.configure(jobConf);
+ FileInputFormat.setInputPaths(jobConf, targetPath);
+ InputSplit[] targetSplits = targetInputFormat.getSplits(jobConf, 0);
+
+ InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(
+ targetFilesystem,
+ partitionName,
+ inputFormat,
+ schema,
+ partitionKeys,
+ effectivePredicate,
+ partition.getColumnCoercions(),
+ Optional.empty(),
+ isForceLocalScheduling(session),
+ s3SelectPushdownEnabled);
+ lastResult = addSplitsToSource(targetSplits, splitFactory);
+ if (stopped) {
+ return COMPLETED_FUTURE;
+ }
+ }
+ return lastResult;
+ }
+
+ Optional bucketConversion = Optional.empty();
+ boolean bucketConversionRequiresWorkerParticipation = false;
+ if (partition.getPartition().isPresent()) {
+ Optional partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
+ if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
+ int readBucketCount = tableBucketInfo.get().getReadBucketCount();
+ BucketingVersion bucketingVersion = partitionBucketProperty.get().getBucketingVersion(); // TODO can partition's bucketing_version be different from table's?
+ int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
+ // Validation was done in HiveSplitManager#getPartitionMetadata.
+ // Here, it's just trying to see if its needs the BucketConversion.
+ if (readBucketCount != partitionBucketCount) {
+ bucketConversion = Optional.of(new BucketConversion(bucketingVersion, readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
+ if (readBucketCount > partitionBucketCount) {
+ bucketConversionRequiresWorkerParticipation = true;
+ }
+ }
+ }
+ }
+ InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(
+ fs,
+ partitionName,
+ inputFormat,
+ schema,
+ partitionKeys,
+ effectivePredicate,
+ partition.getColumnCoercions(),
+ bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(),
+ isForceLocalScheduling(session),
+ s3SelectPushdownEnabled);
+
+ // To support custom input formats, we want to call getSplits()
+ // on the input format to obtain file splits.
+ if (!isHudiParquetInputFormat(inputFormat) && shouldUseFileSplitsFromInputFormat(inputFormat)) {
+ if (tableBucketInfo.isPresent()) {
+ throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
+ }
+
+ if (AcidUtils.isTransactionalTable(table.getParameters())) {
+ throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables in an input format with UseFileSplitsFromInputFormat annotation are not supported: " + inputFormat.getClass().getSimpleName());
+ }
+
+ FileInputFormat.setInputPaths(jobConf, path);
+ InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
+
+ return addSplitsToSource(splits, splitFactory);
+ }
+
+ PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterSupplier.get() : path1 -> true;
+
+ // S3 Select pushdown works at the granularity of individual S3 objects,
+ // therefore we must not split files when it is enabled.
+ boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0 && !s3SelectPushdownEnabled;
+
+ List readPaths;
+ Optional deleteDeltaLocations;
+ long min = Long.MAX_VALUE;
+ long max = Long.MIN_VALUE;
+ if (AcidUtils.isTransactionalTable(table.getParameters())) {
+ boolean isVacuum = queryType.map(type -> type == QueryType.VACUUM).orElse(false);
+ AcidUtils.Directory directory = hdfsEnvironment.doAs(hdfsContext.getIdentity().getUser(), () -> {
+ ValidWriteIdList writeIdList = validWriteIds.orElseThrow(() -> new IllegalStateException("No validWriteIds present"));
+ if (isVacuum) {
+ writeIdList = new ValidCompactorWriteIdList(writeIdList.writeToString()) {
+ @Override
+ public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId)
+ {
+ //For unknown reasons.. ValidCompactorWriteIdList#isWriteIdRangeValid() doesnot
+ // check for aborted transactions and AcidUtils.getAcidState() adds aborted transaction to both aborted and working lists.
+ //Avoid this by overriding.
+ RangeResponse writeIdRangeValid = super.isWriteIdRangeValid(minWriteId, maxWriteId);
+ if (writeIdRangeValid == RangeResponse.NONE) {
+ return RangeResponse.NONE;
+ }
+ else if (super.isWriteIdRangeAborted(minWriteId, maxWriteId) == RangeResponse.ALL) {
+ return RangeResponse.NONE;
+ }
+ return writeIdRangeValid;
+ }
+ };
+ }
+ return AcidUtils.getAcidState(
+ path,
+ configuration,
+ writeIdList,
+ Ref.from(false),
+ true,
+ table.getParameters());
+ });
+
+ if (AcidUtils.isFullAcidTable(table.getParameters())) {
+ // From Hive version >= 3.0, delta/base files will always have file '_orc_acid_version' with value >= '2'.
+ Path baseOrDeltaPath = directory.getBaseDirectory() != null
+ ? directory.getBaseDirectory()
+ : (directory.getCurrentDirectories().size() > 0 ? directory.getCurrentDirectories().get(0).getPath() : null);
+
+ if (baseOrDeltaPath != null && AcidUtils.OrcAcidVersion.getAcidVersionFromMetaFile(baseOrDeltaPath, fs) < 2) {
+ throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables are supported with Hive 3.0 and only after a major compaction has been run");
+ }
+ }
+
+ readPaths = new ArrayList<>();
+
+ boolean isFullVacuum = isVacuum ? Boolean.valueOf(queryInfo.get("FULL").toString()) : false;
+
+ if (isFullVacuum) {
+ //Base will contain everything
+ min = 0;
+ }
+ // base
+ //In case of vacuum, include only in case of Full vacuum.
+ if (directory.getBaseDirectory() != null && (!isVacuum || isFullVacuum)) {
+ readPaths.add(directory.getBaseDirectory());
+ if (isVacuum) {
+ min = 0;
+ max = AcidUtils.parseBase(directory.getBaseDirectory());
+ }
+ }
+
+ // delta directories
+ for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
+ if (!delta.isDeleteDelta()) {
+ readPaths.add(delta.getPath());
+ }
+ //In case of Minor compaction, all delete_delta files should be compacted separately,
+ else if (isVacuum && !isFullVacuum) {
+ readPaths.add(delta.getPath());
+ }
+ if (isVacuum) {
+ min = Math.min(delta.getMinWriteId(), min);
+ max = Math.max(delta.getMaxWriteId(), max);
+ }
+ }
+
+ // Create a registry of delete_delta directories for the partition
+ DeleteDeltaLocations.Builder deleteDeltaLocationsBuilder = DeleteDeltaLocations.builder(path);
+ for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
+ //In case of minor compaction, delete_delta directories should not be used for masking.
+ if (delta.isDeleteDelta() && (!isVacuum || isFullVacuum)) {
+ //For unknown reasons ParseDelta.getStatementId() returns 0, though parsed statement is -1;
+ //This creates issue while trying to locate the delete_delta directory.
+ //So parsing again.
+ OptionalInt statementId = getStatementId(delta.getPath().getName());
+ int stmtId = statementId.orElse(0);
+ deleteDeltaLocationsBuilder.addDeleteDelta(delta.getPath(), delta.getMinWriteId(), delta.getMaxWriteId(), stmtId);
+ }
+ }
+
+ deleteDeltaLocations = deleteDeltaLocationsBuilder.build();
+
+ if (!directory.getOriginalFiles().isEmpty()) {
+ LOG.info("Now supporting read from non-ACID files in ACID reader");
+ // non-ACID file
+ int numberOfBuckets = Integer.parseInt(schema.getProperty("bucket_count"));
+ long[] bucketStartRowOffset = new long[Integer.max(numberOfBuckets, 1)];
+ for (HadoopShims.HdfsFileStatusWithId f : directory.getOriginalFiles()) {
+ Path currFilePath = f.getFileStatus().getPath();
+ int currBucketNumber = getBucketNumber(currFilePath.getName()).getAsInt();
+ fileIterators.addLast(createInternalHiveSplitIterator(currFilePath, fs, splitFactory, splittable, deleteDeltaLocations, Optional.of(bucketStartRowOffset[currBucketNumber]), pathFilter));
+ try {
+ Reader copyReader = OrcFile.createReader(f.getFileStatus().getPath(),
+ OrcFile.readerOptions(configuration));
+ bucketStartRowOffset[currBucketNumber] += copyReader.getNumberOfRows();
+ }
+ catch (Exception e) {
+ throw new PrestoException(NOT_SUPPORTED, e.getMessage());
+ }
+ }
+ }
+
+ if (isVacuum && !readPaths.isEmpty()) {
+ Object vacuumHandle = queryInfo.get("vacuumHandle");
+ if (vacuumHandle != null && vacuumHandle instanceof HiveVacuumTableHandle) {
+ HiveVacuumTableHandle hiveVacuumTableHandle = (HiveVacuumTableHandle) vacuumHandle;
+ hiveVacuumTableHandle.addRange(partitionName, new Range(min, max));
+ }
+ }
+ }
+ else {
+ readPaths = ImmutableList.of(path);
+ deleteDeltaLocations = Optional.empty();
+ }
+
+ // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
+ if (tableBucketInfo.isPresent()) {
+ ListenableFuture> lastResult = immediateFuture(null); // TODO document in addToQueue() that it is sufficient to hold on to last returned future
+ for (Path readPath : readPaths) {
+ lastResult = hiveSplitSource.addToQueue(getBucketedSplits(readPath, fs, splitFactory,
+ tableBucketInfo.get(), bucketConversion, getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), pathFilter));
+ }
+ return lastResult;
+ }
+
+ for (Path readPath : readPaths) {
+ fileIterators.addLast(createInternalHiveSplitIterator(readPath, fs, splitFactory, splittable,
+ getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), Optional.empty(), pathFilter));
+ }
+
+ return COMPLETED_FUTURE;
+ }
+
+ private Optional getDeleteDeltaLocationFor(Path readPath, Optional allDeleteDeltaLocations)
+ {
+ if (!allDeleteDeltaLocations.isPresent() || allDeleteDeltaLocations.get().getDeleteDeltas().isEmpty()) {
+ return allDeleteDeltaLocations;
+ }
+ /*
+ * Source delta/base files' record can be deleted in only delete_delta directories having greater writeId
+ * than source file's writeId.
+ * Therefore, skipping delta_directories which lesser/same writeId as source will avoid unnecessary
+ * reads and memory.
+ */
+ Long sourceWriteId = AcidUtils.extractWriteId(readPath);
+ sourceWriteId = (sourceWriteId == null) ? 0 : sourceWriteId;
+ if (sourceWriteId == 0) {
+ return allDeleteDeltaLocations;
+ }
+ long sId = sourceWriteId.longValue();
+ DeleteDeltaLocations allLocations = allDeleteDeltaLocations.get();
+ List filteredWriteIds = allLocations.getDeleteDeltas().stream()
+ .filter(writeIdInfo -> writeIdInfo.getMaxWriteId() > sId).collect(Collectors.toList());
+ if (filteredWriteIds.isEmpty()) {
+ return Optional.empty();
+ }
+ return Optional.of(new DeleteDeltaLocations(allLocations.getPartitionLocation(), filteredWriteIds));
+ }
+
+ private ListenableFuture> addSplitsToSource(InputSplit[] targetSplits, InternalHiveSplitFactory splitFactory)
+ throws IOException
+ {
+ ListenableFuture> lastResult = COMPLETED_FUTURE;
+ for (InputSplit inputSplit : targetSplits) {
+ Optional internalHiveSplit = splitFactory.createInternalHiveSplit((FileSplit) inputSplit);
+ if (internalHiveSplit.isPresent()) {
+ lastResult = hiveSplitSource.addToQueue(internalHiveSplit.get());
+ }
+ if (stopped) {
+ return COMPLETED_FUTURE;
+ }
+ }
+ return lastResult;
+ }
+
+ private static boolean isHudiParquetInputFormat(InputFormat, ?> inputFormat)
+ {
+ if (inputFormat instanceof HoodieParquetRealtimeInputFormat) {
+ return false;
+ }
+ return inputFormat instanceof HoodieParquetInputFormat;
+ }
+
+ private static boolean shouldUseFileSplitsFromInputFormat(InputFormat, ?> inputFormat)
+ {
+ return Arrays.stream(inputFormat.getClass().getAnnotations())
+ .map(Annotation::annotationType)
+ .map(Class::getSimpleName)
+ .anyMatch(name -> name.equals("UseFileSplitsFromInputFormat"));
+ }
+
+ private Iterator createInternalHiveSplitIterator(Path path, FileSystem fileSystem, InternalHiveSplitFactory splitFactory, boolean splittable, Optional deleteDeltaLocations, Optional startRowOffsetOfFile, PathFilter pathFilter)
+ {
+ return Streams.stream(new HiveFileIterator(table, path, fileSystem, directoryLister, namenodeStats, recursiveDirWalkerEnabled ? RECURSE : IGNORED, pathFilter))
+ .map(status -> splitFactory.createInternalHiveSplit(status, splittable, deleteDeltaLocations, startRowOffsetOfFile))
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .iterator();
+ }
+
+ private List getBucketedSplits(Path path, FileSystem fileSystem, InternalHiveSplitFactory splitFactory, BucketSplitInfo bucketSplitInfo, Optional bucketConversion, Optional deleteDeltaLocations, PathFilter pathFilter)
+ {
+ int readBucketCount = bucketSplitInfo.getReadBucketCount();
+ int tableBucketCount = bucketSplitInfo.getTableBucketCount();
+ int partitionBucketCount = bucketConversion.map(BucketConversion::getPartitionBucketCount).orElse(tableBucketCount);
+ int bucketCount = max(readBucketCount, partitionBucketCount);
+
+ // list all files in the partition
+ List files = new ArrayList<>(partitionBucketCount);
+ try {
+ Iterators.addAll(files, new HiveFileIterator(table, path, fileSystem, directoryLister, namenodeStats, FAIL, pathFilter));
+ }
+ catch (NestedDirectoryNotAllowedException e) {
+ // Fail here to be on the safe side. This seems to be the same as what Hive does
+ throw new PrestoException(
+ HiveErrorCode.HIVE_INVALID_BUCKET_FILES,
+ format("Hive table '%s' is corrupt. Found sub-directory in bucket directory for partition: %s",
+ table.getSchemaTableName(),
+ splitFactory.getPartitionName()));
+ }
+
+ // build mapping of file name to bucket
+ ListMultimap bucketFiles = ArrayListMultimap.create();
+ for (LocatedFileStatus file : files) {
+ String fileName = file.getPath().getName();
+ OptionalInt bucket = getBucketNumber(fileName);
+ if (bucket.isPresent()) {
+ bucketFiles.put(bucket.getAsInt(), file);
+ continue;
+ }
+
+ // legacy mode requires exactly one file per bucket
+ if (files.size() != partitionBucketCount) {
+ throw new PrestoException(HiveErrorCode.HIVE_INVALID_BUCKET_FILES, format(
+ "Hive table '%s' is corrupt. File '%s' does not match the standard naming pattern, and the number " +
+ "of files in the directory (%s) does not match the declared bucket count (%s) for partition: %s",
+ table.getSchemaTableName(),
+ fileName,
+ files.size(),
+ partitionBucketCount,
+ splitFactory.getPartitionName()));
+ }
+
+ // sort FileStatus objects per `org.apache.hadoop.hive.ql.metadata.Table#getSortedPaths()`
+ files.sort(null);
+
+ // use position in sorted list as the bucket number
+ bucketFiles.clear();
+ for (int i = 0; i < files.size(); i++) {
+ bucketFiles.put(i, files.get(i));
+ }
+ break;
+ }
+
+ // convert files internal splits
+ List splitList = new ArrayList<>();
+ for (int bucketNumber = 0; bucketNumber < bucketCount; bucketNumber++) {
+ // Physical bucket #. This determine file name. It also determines the order of splits in the result.
+ int partitionBucketNumber = bucketNumber % partitionBucketCount;
+ // Logical bucket #. Each logical bucket corresponds to a "bucket" from engine's perspective.
+ int readBucketNumber = bucketNumber % readBucketCount;
+
+ boolean containsEligibleTableBucket = false;
+ boolean containsIneligibleTableBucket = false;
+ for (int tableBucketNumber = bucketNumber % tableBucketCount; tableBucketNumber < tableBucketCount; tableBucketNumber += bucketCount) {
+ // table bucket number: this is used for evaluating "$bucket" filters.
+ if (bucketSplitInfo.isTableBucketEnabled(tableBucketNumber)) {
+ containsEligibleTableBucket = true;
+ }
+ else {
+ containsIneligibleTableBucket = true;
+ }
+ }
+
+ if (containsEligibleTableBucket && containsIneligibleTableBucket) {
+ throw new PrestoException(
+ NOT_SUPPORTED,
+ "The bucket filter cannot be satisfied. There are restrictions on the bucket filter when all the following is true: " +
+ "1. a table has a different buckets count as at least one of its partitions that is read in this query; " +
+ "2. the table has a different but compatible bucket number with another table in the query; " +
+ "3. some buckets of the table is filtered out from the query, most likely using a filter on \"$bucket\". " +
+ "(table name: " + table.getTableName() + ", table bucket count: " + tableBucketCount + ", " +
+ "partition bucket count: " + partitionBucketCount + ", effective reading bucket count: " + readBucketCount + ")");
+ }
+ if (containsEligibleTableBucket) {
+ for (LocatedFileStatus file : bucketFiles.get(partitionBucketNumber)) {
+ // OrcDeletedRows will load only delete delta files matching current bucket (same file name),
+ // so we can pass all delete delta locations here, without filtering.
+ splitFactory.createInternalHiveSplit(file, readBucketNumber, deleteDeltaLocations)
+ .ifPresent(splitList::add);
+ }
+ }
+ }
+ return splitList;
+ }
+
+ static OptionalInt getStatementId(String deleteDeltaFileName)
+ {
+ Matcher matcher = DELETE_DELTA_PATTERN.matcher(deleteDeltaFileName);
+ if (matcher.matches()) {
+ String statementId = matcher.group(3);
+ if (statementId == null) {
+ return OptionalInt.of(-1);
+ }
+ return OptionalInt.of(Integer.valueOf(statementId.substring(1)));
+ }
+ return OptionalInt.empty();
+ }
+
+ private static List getTargetPathsFromSymlink(FileSystem fileSystem, Path symlinkDir)
+ {
+ try {
+ FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, HIDDEN_FILES_PATH_FILTER);
+ List targets = new ArrayList<>();
+
+ for (FileStatus symlink : symlinks) {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) {
+ CharStreams.readLines(reader).stream()
+ .map(Path::new)
+ .forEach(targets::add);
+ }
+ }
+ return targets;
+ }
+ catch (IOException e) {
+ throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Error parsing symlinks from: " + symlinkDir, e);
+ }
+ }
+
+ private static List getPartitionKeys(Table table, Optional partition)
+ {
+ if (!partition.isPresent()) {
+ return ImmutableList.of();
+ }
+ ImmutableList.Builder partitionKeys = ImmutableList.builder();
+ List keys = table.getPartitionColumns();
+ List values = partition.get().getValues();
+ checkCondition(keys.size() == values.size(), HiveErrorCode.HIVE_INVALID_METADATA, "Expected %s partition key values, but got %s", keys.size(), values.size());
+ for (int i = 0; i < keys.size(); i++) {
+ String name = keys.get(i).getName();
+ HiveType hiveType = keys.get(i).getType();
+ if (!hiveType.isSupportedType()) {
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
+ }
+ String value = values.get(i);
+ checkCondition(value != null, HiveErrorCode.HIVE_INVALID_PARTITION_VALUE, "partition key value cannot be null for field: %s", name);
+ partitionKeys.add(new HivePartitionKey(name, value));
+ }
+ return partitionKeys.build();
+ }
+
+ private static Properties getPartitionSchema(Table table, Optional partition)
+ {
+ if (!partition.isPresent()) {
+ return getHiveSchema(table);
+ }
+ return getHiveSchema(partition.get(), table);
+ }
+
+ public Table getTable()
+ {
+ return table;
+ }
+
+ public static class BucketSplitInfo
+ {
+ private final List bucketColumns;
+ private final int tableBucketCount;
+ private final int readBucketCount;
+ private final IntPredicate bucketFilter;
+
+ public static Optional createBucketSplitInfo(Optional bucketHandle, Optional bucketFilter)
+ {
+ requireNonNull(bucketHandle, "bucketHandle is null");
+ requireNonNull(bucketFilter, "buckets is null");
+
+ if (!bucketHandle.isPresent()) {
+ checkArgument(!bucketFilter.isPresent(), "bucketHandle must be present if bucketFilter is present");
+ return Optional.empty();
+ }
+
+ int tableBucketCount = bucketHandle.get().getTableBucketCount();
+ int readBucketCount = bucketHandle.get().getReadBucketCount();
+
+ if (tableBucketCount != readBucketCount && bucketFilter.isPresent()) {
+ // TODO: remove when supported
+ throw new PrestoException(NOT_SUPPORTED, "Filter on \"$bucket\" is not supported when the table has partitions with different bucket counts");
+ }
+
+ List bucketColumns = bucketHandle.get().getColumns();
+ IntPredicate predicate = bucketFilter
+ .map(filter -> filter.getBucketsToKeep()::contains)
+ .orElse(bucket -> true);
+ return Optional.of(new BucketSplitInfo(bucketColumns, tableBucketCount, readBucketCount, predicate));
+ }
+
+ private BucketSplitInfo(List bucketColumns, int tableBucketCount, int readBucketCount, IntPredicate bucketFilter)
+ {
+ this.bucketColumns = ImmutableList.copyOf(requireNonNull(bucketColumns, "bucketColumns is null"));
+ this.tableBucketCount = tableBucketCount;
+ this.readBucketCount = readBucketCount;
+ this.bucketFilter = requireNonNull(bucketFilter, "bucketFilter is null");
+ }
+
+ public List getBucketColumns()
+ {
+ return bucketColumns;
+ }
+
+ public int getTableBucketCount()
+ {
+ return tableBucketCount;
+ }
+
+ public int getReadBucketCount()
+ {
+ return readBucketCount;
+ }
+
+ /**
+ * Evaluates whether the provided table bucket number passes the bucket predicate.
+ * A bucket predicate can be present in two cases:
+ *
+ * Filter on "$bucket" column. e.g. {@code "$bucket" between 0 and 100}
+ * Single-value equality filter on all bucket columns. e.g. for a table with two bucketing columns,
+ * {@code bucketCol1 = 'a' AND bucketCol2 = 123}
+ *
+ */
+ public boolean isTableBucketEnabled(int tableBucketNumber)
+ {
+ return bucketFilter.test(tableBucketNumber);
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BaseStorageFormat.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BaseStorageFormat.java
new file mode 100644
index 00000000..a45778c0
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/BaseStorageFormat.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.prestosql.plugin.hive;
+
+public interface BaseStorageFormat
+{
+ String getSerDe();
+
+ String getInputFormat();
+
+ String getOutputFormat();
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CachingDirectoryLister.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CachingDirectoryLister.java
new file mode 100644
index 00000000..3bad0400
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CachingDirectoryLister.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.Weigher;
+import com.google.common.collect.ImmutableList;
+import io.airlift.units.Duration;
+import io.prestosql.plugin.hive.metastore.Table;
+import io.prestosql.spi.connector.SchemaTableName;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.weakref.jmx.Managed;
+
+import javax.inject.Inject;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+public class CachingDirectoryLister
+ implements DirectoryLister
+{
+ private final Cache> cache;
+ private final Set tableNames;
+
+ @Inject
+ public CachingDirectoryLister(HiveConfig hiveClientConfig)
+ {
+ this(hiveClientConfig.getFileStatusCacheExpireAfterWrite(), hiveClientConfig.getFileStatusCacheMaxSize(), hiveClientConfig.getFileStatusCacheTables());
+ }
+
+ public CachingDirectoryLister(Duration expireAfterWrite, long maxSize, List tables)
+ {
+ this.cache = CacheBuilder.newBuilder()
+ .maximumWeight(maxSize)
+ .weigher((Weigher>) (key, value) -> value.size())
+ .expireAfterWrite(expireAfterWrite.toMillis(), TimeUnit.MILLISECONDS)
+ .recordStats()
+ .build();
+ this.tableNames = tables.stream()
+ .map(CachingDirectoryLister::parseTableName)
+ .collect(Collectors.toSet());
+ }
+
+ private static SchemaTableName parseTableName(String tableName)
+ {
+ String[] parts = tableName.split("\\.");
+ checkArgument(parts.length == 2, "Invalid schemaTableName: %s", tableName);
+ return new SchemaTableName(parts[0], parts[1]);
+ }
+
+ @Override
+ public RemoteIterator list(FileSystem fs, Table table, Path path)
+ throws IOException
+ {
+ List files = cache.getIfPresent(path);
+ if (files != null) {
+ return simpleRemoteIterator(files);
+ }
+ RemoteIterator iterator = fs.listLocatedStatus(path);
+
+ if (!tableNames.contains(table.getSchemaTableName())) {
+ return iterator;
+ }
+ return cachingRemoteIterator(iterator, path);
+ }
+
+ private RemoteIterator cachingRemoteIterator(RemoteIterator iterator, Path path)
+ {
+ return new RemoteIterator()
+ {
+ private final List files = new ArrayList<>();
+
+ @Override
+ public boolean hasNext()
+ throws IOException
+ {
+ boolean hasNext = iterator.hasNext();
+ if (!hasNext) {
+ cache.put(path, ImmutableList.copyOf(files));
+ }
+ return hasNext;
+ }
+
+ @Override
+ public LocatedFileStatus next()
+ throws IOException
+ {
+ LocatedFileStatus next = iterator.next();
+ files.add(next);
+ return next;
+ }
+ };
+ }
+
+ private static RemoteIterator simpleRemoteIterator(List files)
+ {
+ return new RemoteIterator()
+ {
+ private final Iterator iterator = ImmutableList.copyOf(files).iterator();
+
+ @Override
+ public boolean hasNext()
+ {
+ return iterator.hasNext();
+ }
+
+ @Override
+ public LocatedFileStatus next()
+ {
+ return iterator.next();
+ }
+ };
+ }
+
+ @Managed
+ public void flushCache()
+ {
+ cache.invalidateAll();
+ }
+
+ @Managed
+ public Double getHitRate()
+ {
+ return cache.stats().hitRate();
+ }
+
+ @Managed
+ public Double getMissRate()
+ {
+ return cache.stats().missRate();
+ }
+
+ @Managed
+ public long getHitCount()
+ {
+ return cache.stats().hitCount();
+ }
+
+ @Managed
+ public long getMissCount()
+ {
+ return cache.stats().missCount();
+ }
+
+ @Managed
+ public long getRequestCount()
+ {
+ return cache.stats().requestCount();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CoercionPolicy.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CoercionPolicy.java
new file mode 100644
index 00000000..1222a26c
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CoercionPolicy.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+public interface CoercionPolicy
+{
+ boolean canCoerce(HiveType fromType, HiveType toType);
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConcurrentLazyQueue.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConcurrentLazyQueue.java
new file mode 100644
index 00000000..a6d2443e
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConcurrentLazyQueue.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.annotation.concurrent.GuardedBy;
+
+import java.util.Iterator;
+
+public class ConcurrentLazyQueue
+{
+ @GuardedBy("this")
+ private final Iterator iterator;
+
+ public ConcurrentLazyQueue(Iterable iterable)
+ {
+ this.iterator = iterable.iterator();
+ }
+
+ public synchronized boolean isEmpty()
+ {
+ return !iterator.hasNext();
+ }
+
+ public synchronized E poll()
+ {
+ if (!iterator.hasNext()) {
+ return null;
+ }
+ return iterator.next();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConnectorObjectNameGeneratorModule.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConnectorObjectNameGeneratorModule.java
new file mode 100644
index 00000000..f4701c45
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ConnectorObjectNameGeneratorModule.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.inject.Binder;
+import com.google.inject.Module;
+import com.google.inject.Provides;
+import io.airlift.configuration.Config;
+import org.weakref.jmx.ObjectNameBuilder;
+import org.weakref.jmx.ObjectNameGenerator;
+
+import java.util.Map;
+
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static io.airlift.configuration.ConfigBinder.configBinder;
+import static java.util.Objects.requireNonNull;
+
+// Note: There are multiple copies of this class in the codebase. If you change one you, should change them all.
+public class ConnectorObjectNameGeneratorModule
+ implements Module
+{
+ private static final String CONNECTOR_PACKAGE_NAME = "io.prestosql.plugin.hive";
+ private static final String DEFAULT_DOMAIN_BASE = "presto.plugin.hive";
+
+ private final String catalogName;
+
+ public ConnectorObjectNameGeneratorModule(String catalogName)
+ {
+ this.catalogName = requireNonNull(catalogName, "catalogName is null");
+ }
+
+ @Override
+ public void configure(Binder binder)
+ {
+ configBinder(binder).bindConfig(ConnectorObjectNameGeneratorConfig.class);
+ }
+
+ @Provides
+ ObjectNameGenerator createPrefixObjectNameGenerator(ConnectorObjectNameGeneratorConfig config)
+ {
+ String domainBase = firstNonNull(config.getDomainBase(), DEFAULT_DOMAIN_BASE);
+ return new ConnectorObjectNameGenerator(domainBase, catalogName);
+ }
+
+ public static class ConnectorObjectNameGeneratorConfig
+ {
+ private String domainBase;
+
+ public String getDomainBase()
+ {
+ return domainBase;
+ }
+
+ @Config("jmx.base-name")
+ public ConnectorObjectNameGeneratorConfig setDomainBase(String domainBase)
+ {
+ this.domainBase = domainBase;
+ return this;
+ }
+ }
+
+ public static final class ConnectorObjectNameGenerator
+ implements ObjectNameGenerator
+ {
+ private final String domainBase;
+ private final String catalogName;
+
+ public ConnectorObjectNameGenerator(String domainBase, String catalogName)
+ {
+ this.domainBase = domainBase;
+ this.catalogName = catalogName;
+ }
+
+ @Override
+ public String generatedNameOf(Class> type)
+ {
+ return new ObjectNameBuilder(toDomain(type))
+ .withProperties(ImmutableMap.builder()
+ .put("type", type.getSimpleName())
+ .put("name", catalogName)
+ .build())
+ .build();
+ }
+
+ @Override
+ public String generatedNameOf(Class> type, Map properties)
+ {
+ return new ObjectNameBuilder(toDomain(type))
+ .withProperties(ImmutableMap.builder()
+ .putAll(properties)
+ .put("catalog", catalogName)
+ .build())
+ .build();
+ }
+
+ private String toDomain(Class> type)
+ {
+ String domain = type.getPackage().getName();
+ if (domain.startsWith(CONNECTOR_PACKAGE_NAME)) {
+ domain = domainBase + domain.substring(CONNECTOR_PACKAGE_NAME.length());
+ }
+ return domain;
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CreateEmptyPartitionProcedure.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CreateEmptyPartitionProcedure.java
new file mode 100644
index 00000000..a308e255
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/CreateEmptyPartitionProcedure.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableList;
+import io.airlift.json.JsonCodec;
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import io.prestosql.plugin.hive.LocationService.WriteInfo;
+import io.prestosql.plugin.hive.PartitionUpdate.UpdateMode;
+import io.prestosql.plugin.hive.authentication.HiveIdentity;
+import io.prestosql.plugin.hive.metastore.HiveMetastore;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.classloader.ThreadContextClassLoader;
+import io.prestosql.spi.connector.ConnectorSession;
+import io.prestosql.spi.connector.ConnectorTableHandle;
+import io.prestosql.spi.connector.SchemaTableName;
+import io.prestosql.spi.procedure.Procedure;
+import io.prestosql.spi.procedure.Procedure.Argument;
+import org.apache.hadoop.hive.common.FileUtils;
+
+import javax.inject.Inject;
+import javax.inject.Provider;
+
+import java.lang.invoke.MethodHandle;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.function.Supplier;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.prestosql.spi.StandardErrorCode.ALREADY_EXISTS;
+import static io.prestosql.spi.StandardErrorCode.INVALID_PROCEDURE_ARGUMENT;
+import static io.prestosql.spi.block.MethodHandleUtil.methodHandle;
+import static io.prestosql.spi.type.StandardTypes.VARCHAR;
+import static java.util.Objects.requireNonNull;
+
+public class CreateEmptyPartitionProcedure
+ implements Provider
+{
+ private static final MethodHandle CREATE_EMPTY_PARTITION = methodHandle(
+ CreateEmptyPartitionProcedure.class,
+ "createEmptyPartition",
+ ConnectorSession.class,
+ String.class,
+ String.class,
+ List.class,
+ List.class);
+
+ private final Supplier hiveMetadataFactory;
+ private final HiveMetastore metastore;
+ private final LocationService locationService;
+ private final JsonCodec partitionUpdateJsonCodec;
+
+ @Inject
+ public CreateEmptyPartitionProcedure(Supplier hiveMetadataFactory, HiveMetastore metastore, LocationService locationService, JsonCodec partitionUpdateCodec)
+ {
+ this.hiveMetadataFactory = requireNonNull(hiveMetadataFactory, "hiveMetadataFactory is null");
+ this.metastore = requireNonNull(metastore, "metastore is null");
+ this.locationService = requireNonNull(locationService, "locationService is null");
+ this.partitionUpdateJsonCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");
+ }
+
+ @Override
+ public Procedure get()
+ {
+ return new Procedure(
+ "system",
+ "create_empty_partition",
+ ImmutableList.of(
+ new Argument("schema_name", VARCHAR),
+ new Argument("table_name", VARCHAR),
+ new Argument("partition_columns", "array(varchar)"),
+ new Argument("partition_values", "array(varchar)")),
+ CREATE_EMPTY_PARTITION.bindTo(this));
+ }
+
+ public void createEmptyPartition(ConnectorSession session, String schema, String table, List partitionColumnNames, List partitionValues)
+ {
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) {
+ doCreateEmptyPartition(session, schema, table, partitionColumnNames, partitionValues);
+ }
+ }
+
+ private void doCreateEmptyPartition(ConnectorSession session, String schema, String table, List partitionColumnNames, List partitionValues)
+ {
+ TransactionalMetadata hiveMetadata = hiveMetadataFactory.get();
+
+ ConnectorTableHandle tableHandle = hiveMetadata.getTableHandle(session, new SchemaTableName(schema, table));
+ hiveMetadata.beginQuery(session);
+ HiveInsertTableHandle hiveInsertTableHandle = (HiveInsertTableHandle) hiveMetadata.beginInsert(session, tableHandle);
+
+ List actualPartitionColumnNames = hiveInsertTableHandle.getInputColumns().stream()
+ .filter(HiveColumnHandle::isPartitionKey)
+ .map(HiveColumnHandle::getName)
+ .collect(toImmutableList());
+ if (!Objects.equals(partitionColumnNames, actualPartitionColumnNames)) {
+ throw new PrestoException(INVALID_PROCEDURE_ARGUMENT, "input partition column names doesn't match actual partition column names");
+ }
+
+ List partitionStringValues = partitionValues.stream()
+ .map(String.class::cast)
+ .collect(toImmutableList());
+
+ if (metastore.getPartition(new HiveIdentity(session), schema, table, partitionStringValues).isPresent()) {
+ throw new PrestoException(ALREADY_EXISTS, "Partition already exists");
+ }
+ String partitionName = FileUtils.makePartName(actualPartitionColumnNames, partitionStringValues);
+
+ WriteInfo writeInfo = locationService.getPartitionWriteInfo(hiveInsertTableHandle.getLocationHandle(), Optional.empty(), partitionName);
+ Slice serializedPartitionUpdate = Slices.wrappedBuffer(
+ partitionUpdateJsonCodec.toJsonBytes(
+ new PartitionUpdate(
+ partitionName,
+ UpdateMode.NEW,
+ writeInfo.getWritePath(),
+ writeInfo.getTargetPath(),
+ ImmutableList.of(),
+ 0,
+ 0,
+ 0,
+ ImmutableList.of())));
+
+ hiveMetadata.finishInsert(
+ session,
+ hiveInsertTableHandle,
+ ImmutableList.of(serializedPartitionUpdate),
+ ImmutableList.of());
+ hiveMetadata.commit();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DeleteDeltaLocations.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DeleteDeltaLocations.java
new file mode 100644
index 00000000..46eccad5
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DeleteDeltaLocations.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
+import org.apache.hadoop.fs.Path;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Stores information about ACID DELETE_DELTA for a Partition
+ */
+public class DeleteDeltaLocations
+{
+ private final String partitionLocation;
+ private final List deleteDeltas;
+
+ @JsonCreator
+ public DeleteDeltaLocations(
+ @JsonProperty("partitionLocation") String partitionLocation,
+ @JsonProperty("deleteDeltas") List deleteDeltas)
+ {
+ this.partitionLocation = requireNonNull(partitionLocation, "partitionLocation is null");
+ this.deleteDeltas = ImmutableList.copyOf(requireNonNull(deleteDeltas, "deleteDeltas is null"));
+ checkArgument(!deleteDeltas.isEmpty(), "deleteDeltas is empty");
+ }
+
+ @JsonProperty
+ public String getPartitionLocation()
+ {
+ return partitionLocation;
+ }
+
+ @JsonProperty
+ public List getDeleteDeltas()
+ {
+ return deleteDeltas;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ DeleteDeltaLocations that = (DeleteDeltaLocations) o;
+ return partitionLocation.equals(that.partitionLocation) &&
+ deleteDeltas.equals(that.deleteDeltas);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(partitionLocation, deleteDeltas);
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .add("partitionLocation", partitionLocation)
+ .add("deleteDeltas", deleteDeltas)
+ .toString();
+ }
+
+ public static Builder builder(Path partitionPath)
+ {
+ return new Builder(partitionPath);
+ }
+
+ public static class Builder
+ {
+ private final Path partitionLocation;
+ private final ImmutableList.Builder deleteDeltaInfoBuilder = ImmutableList.builder();
+
+ private Builder(Path partitionPath)
+ {
+ partitionLocation = requireNonNull(partitionPath, "partitionPath is null");
+ }
+
+ public Builder addDeleteDelta(Path deleteDeltaPath, long minWriteId, long maxWriteId, int statementId)
+ {
+ requireNonNull(deleteDeltaPath, "deleteDeltaPath is null");
+ Path partitionPathFromDeleteDelta = deleteDeltaPath.getParent();
+ checkArgument(
+ partitionLocation.equals(partitionPathFromDeleteDelta),
+ "Partition location in DeleteDelta '%s' does not match stored location '%s'",
+ deleteDeltaPath.getParent().toString(),
+ partitionLocation);
+
+ deleteDeltaInfoBuilder.add(new WriteIdInfo(minWriteId, maxWriteId, statementId));
+ return this;
+ }
+
+ public Optional build()
+ {
+ List deleteDeltas = deleteDeltaInfoBuilder.build();
+ if (deleteDeltas.isEmpty()) {
+ return Optional.empty();
+ }
+ return Optional.of(new DeleteDeltaLocations(partitionLocation.toString(), deleteDeltas));
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DirectoryLister.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DirectoryLister.java
new file mode 100644
index 00000000..0003fced
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DirectoryLister.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.plugin.hive.metastore.Table;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import java.io.IOException;
+
+public interface DirectoryLister
+{
+ RemoteIterator list(FileSystem fs, Table table, Path path)
+ throws IOException;
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DynamicConfigurationProvider.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DynamicConfigurationProvider.java
new file mode 100644
index 00000000..fdf788b7
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/DynamicConfigurationProvider.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.net.URI;
+
+import static io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
+import static org.apache.hadoop.fs.PrestoFileSystemCache.CACHE_KEY;
+
+public interface DynamicConfigurationProvider
+{
+ void updateConfiguration(Configuration configuration, HdfsContext context, URI uri);
+
+ /**
+ * Set a cache key to invalidate the file system on credential (or other configuration) change.
+ */
+ static void setCacheKey(Configuration configuration, String value)
+ {
+ configuration.set(CACHE_KEY, configuration.get(CACHE_KEY, "") + "|" + value);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/FileFormatDataSourceStats.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/FileFormatDataSourceStats.java
new file mode 100644
index 00000000..6871759e
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/FileFormatDataSourceStats.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.airlift.stats.DistributionStat;
+import io.airlift.stats.TimeStat;
+import org.weakref.jmx.Managed;
+import org.weakref.jmx.Nested;
+
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
+
+public class FileFormatDataSourceStats
+{
+ private final DistributionStat readBytes = new DistributionStat();
+ private final DistributionStat maxCombinedBytesPerRow = new DistributionStat();
+ private final TimeStat time0Bto100KB = new TimeStat(MILLISECONDS);
+ private final TimeStat time100KBto1MB = new TimeStat(MILLISECONDS);
+ private final TimeStat time1MBto10MB = new TimeStat(MILLISECONDS);
+ private final TimeStat time10MBPlus = new TimeStat(MILLISECONDS);
+
+ @Managed
+ @Nested
+ public DistributionStat getReadBytes()
+ {
+ return readBytes;
+ }
+
+ @Managed
+ @Nested
+ public DistributionStat getMaxCombinedBytesPerRow()
+ {
+ return maxCombinedBytesPerRow;
+ }
+
+ @Managed
+ @Nested
+ public TimeStat get0Bto100KB()
+ {
+ return time0Bto100KB;
+ }
+
+ @Managed
+ @Nested
+ public TimeStat get100KBto1MB()
+ {
+ return time100KBto1MB;
+ }
+
+ @Managed
+ @Nested
+ public TimeStat get1MBto10MB()
+ {
+ return time1MBto10MB;
+ }
+
+ @Managed
+ @Nested
+ public TimeStat get10MBPlus()
+ {
+ return time10MBPlus;
+ }
+
+ public void readDataBytesPerSecond(long bytes, long nanos)
+ {
+ readBytes.add(bytes);
+ if (bytes < 100 * 1024) {
+ time0Bto100KB.add(nanos, NANOSECONDS);
+ }
+ else if (bytes < 1024 * 1024) {
+ time100KBto1MB.add(nanos, NANOSECONDS);
+ }
+ else if (bytes < 10 * 1024 * 1024) {
+ time1MBto10MB.add(nanos, NANOSECONDS);
+ }
+ else {
+ time10MBPlus.add(nanos, NANOSECONDS);
+ }
+ }
+
+ public void addMaxCombinedBytesPerRow(long bytes)
+ {
+ maxCombinedBytesPerRow.add(bytes);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastore.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastore.java
new file mode 100644
index 00000000..5b4285cc
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastore.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForCachingHiveMetastore
+{
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastoreTableRefresh.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastoreTableRefresh.java
new file mode 100644
index 00000000..b43bd0f1
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForCachingHiveMetastoreTableRefresh.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018-2021. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForCachingHiveMetastoreTableRefresh
+{
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHdfs.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHdfs.java
new file mode 100644
index 00000000..c22da3bd
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHdfs.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForHdfs
+{
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHive.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHive.java
new file mode 100644
index 00000000..1b0dc45d
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHive.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForHive {}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveMetastore.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveMetastore.java
new file mode 100644
index 00000000..d8c918dd
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveMetastore.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForHiveMetastore
+{
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveTransactionHeartbeats.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveTransactionHeartbeats.java
new file mode 100644
index 00000000..ed4bd856
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveTransactionHeartbeats.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForHiveTransactionHeartbeats {}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveVacuum.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveVacuum.java
new file mode 100644
index 00000000..f23c187a
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForHiveVacuum.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForHiveVacuum {}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForRecordingHiveMetastore.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForRecordingHiveMetastore.java
new file mode 100644
index 00000000..3971ab5a
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/ForRecordingHiveMetastore.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import javax.inject.Qualifier;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@Qualifier
+public @interface ForRecordingHiveMetastore
+{
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java
new file mode 100644
index 00000000..c78c5d3b
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java
@@ -0,0 +1,518 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import io.prestosql.hadoop.TextLineLengthLimitExceededException;
+import io.prestosql.plugin.hive.util.SerDeUtils;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.block.Block;
+import io.prestosql.spi.connector.RecordCursor;
+import io.prestosql.spi.type.DecimalType;
+import io.prestosql.spi.type.Decimals;
+import io.prestosql.spi.type.Type;
+import io.prestosql.spi.type.TypeManager;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+import static io.prestosql.plugin.hive.HiveUtil.closeWithSuppression;
+import static io.prestosql.plugin.hive.HiveUtil.getDeserializer;
+import static io.prestosql.plugin.hive.HiveUtil.getTableObjectInspector;
+import static io.prestosql.plugin.hive.HiveUtil.isStructuralType;
+import static io.prestosql.spi.type.BigintType.BIGINT;
+import static io.prestosql.spi.type.BooleanType.BOOLEAN;
+import static io.prestosql.spi.type.Chars.isCharType;
+import static io.prestosql.spi.type.Chars.truncateToLengthAndTrimSpaces;
+import static io.prestosql.spi.type.DateType.DATE;
+import static io.prestosql.spi.type.Decimals.rescale;
+import static io.prestosql.spi.type.DoubleType.DOUBLE;
+import static io.prestosql.spi.type.IntegerType.INTEGER;
+import static io.prestosql.spi.type.RealType.REAL;
+import static io.prestosql.spi.type.SmallintType.SMALLINT;
+import static io.prestosql.spi.type.TimestampType.TIMESTAMP;
+import static io.prestosql.spi.type.TinyintType.TINYINT;
+import static io.prestosql.spi.type.VarbinaryType.VARBINARY;
+import static io.prestosql.spi.type.Varchars.isVarcharType;
+import static io.prestosql.spi.type.Varchars.truncateToLength;
+import static java.lang.Float.floatToRawIntBits;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+
+class GenericHiveRecordCursor
+ implements RecordCursor
+{
+ private final Path path;
+ private final RecordReader recordReader;
+ private final K key;
+ private final V value;
+
+ private final Deserializer deserializer;
+
+ private final Type[] types;
+ private final HiveType[] hiveTypes;
+
+ private final StructObjectInspector rowInspector;
+ private final ObjectInspector[] fieldInspectors;
+ private final StructField[] structFields;
+
+ private final boolean[] loaded;
+ private final boolean[] booleans;
+ private final long[] longs;
+ private final double[] doubles;
+ private final Slice[] slices;
+ private final Object[] objects;
+ private final boolean[] nulls;
+
+ private final long totalBytes;
+
+ private long completedBytes;
+ private Object rowData;
+ private boolean closed;
+
+ public GenericHiveRecordCursor(
+ Configuration configuration,
+ Path path,
+ RecordReader recordReader,
+ long totalBytes,
+ Properties splitSchema,
+ List columns,
+ TypeManager typeManager)
+ {
+ requireNonNull(path, "path is null");
+ requireNonNull(recordReader, "recordReader is null");
+ checkArgument(totalBytes >= 0, "totalBytes is negative");
+ requireNonNull(splitSchema, "splitSchema is null");
+ requireNonNull(columns, "columns is null");
+
+ this.path = path;
+ this.recordReader = recordReader;
+ this.totalBytes = totalBytes;
+ this.key = recordReader.createKey();
+ this.value = recordReader.createValue();
+
+ this.deserializer = getDeserializer(configuration, splitSchema);
+ this.rowInspector = getTableObjectInspector(deserializer);
+
+ int size = columns.size();
+
+ this.types = new Type[size];
+ this.hiveTypes = new HiveType[size];
+
+ this.structFields = new StructField[size];
+ this.fieldInspectors = new ObjectInspector[size];
+
+ this.loaded = new boolean[size];
+ this.booleans = new boolean[size];
+ this.longs = new long[size];
+ this.doubles = new double[size];
+ this.slices = new Slice[size];
+ this.objects = new Object[size];
+ this.nulls = new boolean[size];
+
+ // initialize data columns
+ for (int i = 0; i < columns.size(); i++) {
+ HiveColumnHandle column = columns.get(i);
+ checkState(column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR, "column type must be regular");
+
+ types[i] = typeManager.getType(column.getTypeSignature());
+ hiveTypes[i] = column.getHiveType();
+
+ StructField field = rowInspector.getStructFieldRef(column.getName());
+ structFields[i] = field;
+ fieldInspectors[i] = field.getFieldObjectInspector();
+ }
+ }
+
+ @Override
+ public long getCompletedBytes()
+ {
+ if (!closed) {
+ updateCompletedBytes();
+ }
+ return completedBytes;
+ }
+
+ @Override
+ public long getReadTimeNanos()
+ {
+ return 0;
+ }
+
+ private void updateCompletedBytes()
+ {
+ try {
+ long newCompletedBytes = (long) (totalBytes * recordReader.getProgress());
+ completedBytes = min(totalBytes, max(completedBytes, newCompletedBytes));
+ }
+ catch (IOException ignored) {
+ }
+ }
+
+ @Override
+ public Type getType(int field)
+ {
+ return types[field];
+ }
+
+ @Override
+ public boolean advanceNextPosition()
+ {
+ try {
+ if (closed || !recordReader.next(key, value)) {
+ close();
+ return false;
+ }
+
+ // reset loaded flags
+ Arrays.fill(loaded, false);
+
+ // decode value
+ rowData = deserializer.deserialize(value);
+
+ return true;
+ }
+ catch (IOException | SerDeException | RuntimeException e) {
+ closeWithSuppression(this, e);
+ if (e instanceof TextLineLengthLimitExceededException) {
+ throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Line too long in text file: " + path, e);
+ }
+ throw new PrestoException(HiveErrorCode.HIVE_CURSOR_ERROR, e);
+ }
+ }
+
+ @Override
+ public boolean getBoolean(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ validateType(fieldId, boolean.class);
+ if (!loaded[fieldId]) {
+ parseBooleanColumn(fieldId);
+ }
+ return booleans[fieldId];
+ }
+
+ private void parseBooleanColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
+ checkState(fieldValue != null, "fieldValue should not be null");
+ booleans[column] = (Boolean) fieldValue;
+ nulls[column] = false;
+ }
+ }
+
+ @Override
+ public long getLong(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ validateType(fieldId, long.class);
+ if (!loaded[fieldId]) {
+ parseLongColumn(fieldId);
+ }
+ return longs[fieldId];
+ }
+
+ private void parseLongColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
+ checkState(fieldValue != null, "fieldValue should not be null");
+ longs[column] = getLongExpressedValue(fieldValue);
+ nulls[column] = false;
+ }
+ }
+
+ private long getLongExpressedValue(Object value)
+ {
+ if (value instanceof Date) {
+ return ((Date) value).toEpochDay();
+ }
+ if (value instanceof Timestamp) {
+ return ((Timestamp) value).toEpochMilli();
+ }
+ if (value instanceof Float) {
+ return floatToRawIntBits(((Float) value));
+ }
+ return ((Number) value).longValue();
+ }
+
+ @Override
+ public double getDouble(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ validateType(fieldId, double.class);
+ if (!loaded[fieldId]) {
+ parseDoubleColumn(fieldId);
+ }
+ return doubles[fieldId];
+ }
+
+ private void parseDoubleColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
+ checkState(fieldValue != null, "fieldValue should not be null");
+ doubles[column] = ((Number) fieldValue).doubleValue();
+ nulls[column] = false;
+ }
+ }
+
+ @Override
+ public Slice getSlice(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ validateType(fieldId, Slice.class);
+ if (!loaded[fieldId]) {
+ parseStringColumn(fieldId);
+ }
+ return slices[fieldId];
+ }
+
+ private void parseStringColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveWritableObject(fieldData);
+ checkState(fieldValue != null, "fieldValue should not be null");
+ BinaryComparable hiveValue;
+ if (fieldValue instanceof Text) {
+ hiveValue = (Text) fieldValue;
+ }
+ else if (fieldValue instanceof BytesWritable) {
+ hiveValue = (BytesWritable) fieldValue;
+ }
+ else if (fieldValue instanceof HiveVarcharWritable) {
+ hiveValue = ((HiveVarcharWritable) fieldValue).getTextValue();
+ }
+ else if (fieldValue instanceof HiveCharWritable) {
+ hiveValue = ((HiveCharWritable) fieldValue).getTextValue();
+ }
+ else {
+ throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
+ }
+
+ // create a slice view over the hive value and trim to character limits
+ Slice value = Slices.wrappedBuffer(hiveValue.getBytes(), 0, hiveValue.getLength());
+ Type type = types[column];
+ if (isVarcharType(type)) {
+ value = truncateToLength(value, type);
+ }
+ if (isCharType(type)) {
+ value = truncateToLengthAndTrimSpaces(value, type);
+ }
+
+ // store a copy of the bytes, since the hive reader can reuse the underlying buffer
+ slices[column] = Slices.copyOf(value);
+ nulls[column] = false;
+ }
+ }
+
+ private void parseDecimalColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
+ checkState(fieldValue != null, "fieldValue should not be null");
+
+ HiveDecimal decimal = (HiveDecimal) fieldValue;
+ DecimalType columnType = (DecimalType) types[column];
+ BigInteger unscaledDecimal = rescale(decimal.unscaledValue(), decimal.scale(), columnType.getScale());
+
+ if (columnType.isShort()) {
+ longs[column] = unscaledDecimal.longValue();
+ }
+ else {
+ slices[column] = Decimals.encodeUnscaledValue(unscaledDecimal);
+ }
+ nulls[column] = false;
+ }
+ }
+
+ @Override
+ public Object getObject(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ validateType(fieldId, Block.class);
+ if (!loaded[fieldId]) {
+ parseObjectColumn(fieldId);
+ }
+ return objects[fieldId];
+ }
+
+ private void parseObjectColumn(int column)
+ {
+ loaded[column] = true;
+
+ Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);
+
+ if (fieldData == null) {
+ nulls[column] = true;
+ }
+ else {
+ objects[column] = SerDeUtils.getBlockObject(types[column], fieldData, fieldInspectors[column]);
+ nulls[column] = false;
+ }
+ }
+
+ @Override
+ public boolean isNull(int fieldId)
+ {
+ checkState(!closed, "Cursor is closed");
+
+ if (!loaded[fieldId]) {
+ parseColumn(fieldId);
+ }
+ return nulls[fieldId];
+ }
+
+ private void parseColumn(int column)
+ {
+ Type type = types[column];
+ if (BOOLEAN.equals(type)) {
+ parseBooleanColumn(column);
+ }
+ else if (BIGINT.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (INTEGER.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (SMALLINT.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (TINYINT.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (REAL.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (DOUBLE.equals(type)) {
+ parseDoubleColumn(column);
+ }
+ else if (isVarcharType(type) || VARBINARY.equals(type)) {
+ parseStringColumn(column);
+ }
+ else if (isCharType(type)) {
+ parseStringColumn(column);
+ }
+ else if (isStructuralType(hiveTypes[column])) {
+ parseObjectColumn(column);
+ }
+ else if (DATE.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (TIMESTAMP.equals(type)) {
+ parseLongColumn(column);
+ }
+ else if (type instanceof DecimalType) {
+ parseDecimalColumn(column);
+ }
+ else {
+ throw new UnsupportedOperationException("Unsupported column type: " + type);
+ }
+ }
+
+ private void validateType(int fieldId, Class> type)
+ {
+ if (!types[fieldId].getJavaType().equals(type)) {
+ // we don't use Preconditions.checkArgument because it requires boxing fieldId, which affects inner loop performance
+ throw new IllegalArgumentException(format("Expected field to be %s, actual %s (field %s)", type, types[fieldId], fieldId));
+ }
+ }
+
+ @Override
+ public void close()
+ {
+ // some hive input formats are broken and bad things can happen if you close them multiple times
+ if (closed) {
+ return;
+ }
+ closed = true;
+
+ updateCompletedBytes();
+
+ try {
+ recordReader.close();
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java
new file mode 100644
index 00000000..0b9dcc31
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.connector.ConnectorSession;
+import io.prestosql.spi.connector.RecordCursor;
+import io.prestosql.spi.predicate.TupleDomain;
+import io.prestosql.spi.type.TypeManager;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+
+import javax.inject.Inject;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+
+import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR;
+import static java.util.Objects.requireNonNull;
+
+public class GenericHiveRecordCursorProvider
+ implements HiveRecordCursorProvider
+{
+ private final HdfsEnvironment hdfsEnvironment;
+
+ @Inject
+ public GenericHiveRecordCursorProvider(HdfsEnvironment hdfsEnvironment)
+ {
+ this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
+ }
+
+ @Override
+ public Optional createRecordCursor(
+ Configuration configuration,
+ ConnectorSession session,
+ Path path,
+ long start,
+ long length,
+ long fileSize,
+ Properties schema,
+ List columns,
+ TupleDomain effectivePredicate,
+ TypeManager typeManager,
+ boolean s3SelectPushdownEnabled,
+ Map customSplitInfo)
+ {
+ // make sure the FileSystem is created with the proper Configuration object
+ try {
+ this.hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
+ }
+ catch (IOException e) {
+ throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
+ }
+
+ return hdfsEnvironment.doAs(session.getUser(), () -> {
+ RecordReader, ?> recordReader = HiveUtil.createRecordReader(configuration, path, start, length, schema, columns, customSplitInfo);
+
+ return Optional.of(new GenericHiveRecordCursor<>(
+ configuration,
+ path,
+ genericRecordReader(recordReader),
+ length,
+ schema,
+ columns,
+ typeManager));
+ });
+ }
+
+ @SuppressWarnings("unchecked")
+ private static RecordReader, ? extends Writable> genericRecordReader(RecordReader, ?> recordReader)
+ {
+ return (RecordReader, ? extends Writable>) recordReader;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfiguration.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfiguration.java
new file mode 100644
index 00000000..f02adc20
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfiguration.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
+import org.apache.hadoop.conf.Configuration;
+
+import java.net.URI;
+
+public interface HdfsConfiguration
+{
+ Configuration getConfiguration(HdfsContext context, URI uri);
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java
new file mode 100644
index 00000000..01fb3e9f
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.net.HostAndPort;
+import io.airlift.units.Duration;
+import io.prestosql.hadoop.SocksSocketFactory;
+import io.prestosql.plugin.hive.s3.ConfigurationInitializer;
+import io.prestosql.plugin.hive.util.ConfigurationUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
+import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.net.DNSToSwitchMapping;
+import org.apache.orc.OrcConf;
+import org.apache.parquet.hadoop.ParquetOutputFormat;
+
+import javax.inject.Inject;
+import javax.net.SocketFactory;
+
+import java.util.List;
+import java.util.Set;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.lang.Math.toIntExact;
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_PING_INTERVAL_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SOCKS_SERVER_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_MS;
+import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
+import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT;
+import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK;
+
+public class HdfsConfigurationInitializer
+{
+ private final HostAndPort socksProxy;
+ private final Duration ipcPingInterval;
+ private final Duration dfsTimeout;
+ private final Duration dfsConnectTimeout;
+ private final int dfsConnectMaxRetries;
+ private final int dfsKeyProviderCacheTtlMillis;
+ private final String domainSocketPath;
+ private final Configuration resourcesConfiguration;
+ private final HiveCompressionCodec compressionCodec;
+ private final int fileSystemMaxCacheSize;
+ private final Set configurationInitializers;
+ private final boolean isHdfsWireEncryptionEnabled;
+ private int textMaxLineLength;
+
+ @VisibleForTesting
+ public HdfsConfigurationInitializer(HiveConfig config)
+ {
+ this(config, ImmutableSet.of());
+ }
+
+ @Inject
+ public HdfsConfigurationInitializer(HiveConfig config, Set configurationInitializers)
+ {
+ requireNonNull(config, "config is null");
+ checkArgument(config.getDfsTimeout().toMillis() >= 1, "dfsTimeout must be at least 1 ms");
+ checkArgument(toIntExact(config.getTextMaxLineLength().toBytes()) >= 1, "textMaxLineLength must be at least 1 byte");
+
+ this.socksProxy = config.getMetastoreSocksProxy();
+ this.ipcPingInterval = config.getIpcPingInterval();
+ this.dfsTimeout = config.getDfsTimeout();
+ this.dfsConnectTimeout = config.getDfsConnectTimeout();
+ this.dfsConnectMaxRetries = config.getDfsConnectMaxRetries();
+ this.dfsKeyProviderCacheTtlMillis = toIntExact(config.getDfsKeyProviderCacheTtl().toMillis());
+ this.domainSocketPath = config.getDomainSocketPath();
+ this.resourcesConfiguration = readConfiguration(config.getResourceConfigFiles());
+ this.compressionCodec = config.getHiveCompressionCodec();
+ this.fileSystemMaxCacheSize = config.getFileSystemMaxCacheSize();
+ this.isHdfsWireEncryptionEnabled = config.isHdfsWireEncryptionEnabled();
+ this.textMaxLineLength = toIntExact(config.getTextMaxLineLength().toBytes());
+
+ this.configurationInitializers = ImmutableSet.copyOf(requireNonNull(configurationInitializers, "configurationInitializers is null"));
+ }
+
+ private static Configuration readConfiguration(List resourcePaths)
+ {
+ Configuration result = new Configuration(false);
+
+ for (String resourcePath : resourcePaths) {
+ Configuration resourceProperties = new Configuration(false);
+ resourceProperties.addResource(new Path(resourcePath));
+ ConfigurationUtils.copy(resourceProperties, result);
+ }
+
+ return result;
+ }
+
+ public void initializeConfiguration(Configuration config)
+ {
+ ConfigurationUtils.copy(resourcesConfiguration, config);
+
+ // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
+ config.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class);
+
+ if (socksProxy != null) {
+ config.setClass(HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY, SocksSocketFactory.class, SocketFactory.class);
+ config.set(HADOOP_SOCKS_SERVER_KEY, socksProxy.toString());
+ }
+
+ if (domainSocketPath != null) {
+ config.setStrings(DFS_DOMAIN_SOCKET_PATH_KEY, domainSocketPath);
+ }
+
+ // only enable short circuit reads if domain socket path is properly configured
+ if (!config.get(DFS_DOMAIN_SOCKET_PATH_KEY, "").trim().isEmpty()) {
+ config.setBooleanIfUnset(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);
+ }
+
+ config.setInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, toIntExact(dfsTimeout.toMillis()));
+ config.setInt(IPC_PING_INTERVAL_KEY, toIntExact(ipcPingInterval.toMillis()));
+ config.setInt(IPC_CLIENT_CONNECT_TIMEOUT_KEY, toIntExact(dfsConnectTimeout.toMillis()));
+ config.setInt(IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, dfsConnectMaxRetries);
+
+ if (isHdfsWireEncryptionEnabled) {
+ config.set(HADOOP_RPC_PROTECTION, "privacy");
+ config.setBoolean("dfs.encrypt.data.transfer", true);
+ }
+
+ config.setInt("fs.cache.max-size", fileSystemMaxCacheSize);
+
+ config.setInt(DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_MS, dfsKeyProviderCacheTtlMillis);
+ config.setInt(LineRecordReader.MAX_LINE_LENGTH, textMaxLineLength);
+
+ configureCompression(config, compressionCodec);
+
+ configurationInitializers.forEach(configurationInitializer -> configurationInitializer.initializeConfiguration(config));
+ }
+
+ public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec)
+ {
+ boolean compression = compressionCodec != HiveCompressionCodec.NONE;
+ config.setBoolean(COMPRESSRESULT.varname, compression);
+ config.setBoolean("mapred.output.compress", compression);
+ config.setBoolean(FileOutputFormat.COMPRESS, compression);
+ // For ORC
+ OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
+ // For RCFile and Text
+ if (compressionCodec.getCodec().isPresent()) {
+ config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
+ config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
+ }
+ else {
+ config.unset("mapred.output.compression.codec");
+ config.unset(FileOutputFormat.COMPRESS_CODEC);
+ }
+ // For Parquet
+ config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
+ // For SequenceFile
+ config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
+ }
+
+ public static class NoOpDNSToSwitchMapping
+ implements DNSToSwitchMapping
+ {
+ @Override
+ public List resolve(List names)
+ {
+ // dfs client expects an empty list as an indication that the host->switch mapping for the given names are not known
+ return ImmutableList.of();
+ }
+
+ @Override
+ public void reloadCachedMappings()
+ {
+ // no-op
+ }
+
+ @Override
+ public void reloadCachedMappings(List names)
+ {
+ // no-op
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsEnvironment.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsEnvironment.java
new file mode 100644
index 00000000..2d373139
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HdfsEnvironment.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.hadoop.HadoopNative;
+import io.prestosql.plugin.hive.authentication.GenericExceptionAction;
+import io.prestosql.plugin.hive.authentication.HdfsAuthentication;
+import io.prestosql.spi.connector.ConnectorSession;
+import io.prestosql.spi.security.ConnectorIdentity;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import javax.inject.Inject;
+
+import java.io.IOException;
+import java.util.Optional;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static java.util.Objects.requireNonNull;
+
+public class HdfsEnvironment
+{
+ static {
+ HadoopNative.requireHadoopNative();
+ }
+
+ private final HdfsConfiguration hdfsConfiguration;
+ private final HdfsAuthentication hdfsAuthentication;
+ private final boolean verifyChecksum;
+
+ @Inject
+ public HdfsEnvironment(
+ HdfsConfiguration hdfsConfiguration,
+ HiveConfig config,
+ HdfsAuthentication hdfsAuthentication)
+ {
+ this.hdfsConfiguration = requireNonNull(hdfsConfiguration, "hdfsConfiguration is null");
+ this.verifyChecksum = requireNonNull(config, "config is null").isVerifyChecksum();
+ this.hdfsAuthentication = requireNonNull(hdfsAuthentication, "hdfsAuthentication is null");
+ }
+
+ public Configuration getConfiguration(HdfsContext context, Path path)
+ {
+ return hdfsConfiguration.getConfiguration(context, path.toUri());
+ }
+
+ public FileSystem getFileSystem(HdfsContext context, Path path)
+ throws IOException
+ {
+ return getFileSystem(context.getIdentity().getUser(), path, getConfiguration(context, path));
+ }
+
+ public FileSystem getFileSystem(String user, Path path, Configuration configuration)
+ throws IOException
+ {
+ return hdfsAuthentication.doAs(user, () -> {
+ FileSystem fileSystem = path.getFileSystem(configuration);
+ fileSystem.setVerifyChecksum(verifyChecksum);
+ return fileSystem;
+ });
+ }
+
+ public R doAs(String user, GenericExceptionAction action)
+ throws E
+ {
+ return hdfsAuthentication.doAs(user, action);
+ }
+
+ public void doAs(String user, Runnable action)
+ {
+ hdfsAuthentication.doAs(user, action);
+ }
+
+ public static class HdfsContext
+ {
+ private final ConnectorIdentity identity;
+ private final Optional source;
+ private final Optional queryId;
+ private final Optional schemaName;
+ private final Optional tableName;
+
+ public HdfsContext(ConnectorIdentity identity)
+ {
+ this.identity = requireNonNull(identity, "identity is null");
+ this.source = Optional.empty();
+ this.queryId = Optional.empty();
+ this.schemaName = Optional.empty();
+ this.tableName = Optional.empty();
+ }
+
+ public HdfsContext(ConnectorSession session, String schemaName)
+ {
+ requireNonNull(session, "session is null");
+ requireNonNull(schemaName, "schemaName is null");
+ this.identity = requireNonNull(session.getIdentity(), "session.getIdentity() is null");
+ this.source = requireNonNull(session.getSource(), "session.getSource()");
+ this.queryId = Optional.of(session.getQueryId());
+ this.schemaName = Optional.of(schemaName);
+ this.tableName = Optional.empty();
+ }
+
+ public HdfsContext(ConnectorSession session, String schemaName, String tableName)
+ {
+ requireNonNull(session, "session is null");
+ requireNonNull(schemaName, "schemaName is null");
+ requireNonNull(tableName, "tableName is null");
+ this.identity = requireNonNull(session.getIdentity(), "session.getIdentity() is null");
+ this.source = requireNonNull(session.getSource(), "session.getSource()");
+ this.queryId = Optional.of(session.getQueryId());
+ this.schemaName = Optional.of(schemaName);
+ this.tableName = Optional.of(tableName);
+ }
+
+ public ConnectorIdentity getIdentity()
+ {
+ return identity;
+ }
+
+ public Optional getSource()
+ {
+ return source;
+ }
+
+ public Optional getQueryId()
+ {
+ return queryId;
+ }
+
+ public Optional getSchemaName()
+ {
+ return schemaName;
+ }
+
+ public Optional getTableName()
+ {
+ return tableName;
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .omitNullValues()
+ .add("user", identity)
+ .add("source", source.orElse(null))
+ .add("queryId", queryId.orElse(null))
+ .add("schemaName", schemaName.orElse(null))
+ .add("tableName", tableName.orElse(null))
+ .toString();
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveACIDWriteType.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveACIDWriteType.java
new file mode 100644
index 00000000..72dc8463
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveACIDWriteType.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2018-2020. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+public enum HiveACIDWriteType
+{
+ VACUUM(-2),
+ VACUUM_UNIFY(-2),
+ NONE(-1),
+ INSERT(0),
+ INSERT_OVERWRITE(0),
+ UPDATE(0), //Hive ACID semantics post hive-3.x expects operation to be 0.
+ DELETE(2);
+
+ private int operationId;
+
+ HiveACIDWriteType(int operationId)
+ {
+ this.operationId = operationId;
+ }
+
+ public int getOperationId()
+ {
+ return operationId;
+ }
+
+ public static boolean isUpdateOrDelete(HiveACIDWriteType writeType)
+ {
+ return writeType == UPDATE || writeType == DELETE;
+ }
+
+ public static boolean isRowIdNeeded(HiveACIDWriteType writeType)
+ {
+ return isUpdateOrDelete(writeType) || isVacuum(writeType);
+ }
+
+ public static boolean isVacuum(HiveACIDWriteType writeType)
+ {
+ return writeType == VACUUM || writeType == VACUUM_UNIFY;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java
new file mode 100644
index 00000000..52d8f211
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableList;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.session.PropertyMetadata;
+import io.prestosql.spi.type.TypeManager;
+
+import javax.inject.Inject;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.collect.ImmutableSet.toImmutableSet;
+import static io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY;
+import static io.prestosql.spi.type.TypeSignature.parseTypeSignature;
+
+public class HiveAnalyzeProperties
+{
+ public static final String PARTITIONS_PROPERTY = "partitions";
+
+ private final List> analyzeProperties;
+
+ @Inject
+ public HiveAnalyzeProperties(TypeManager typeManager)
+ {
+ analyzeProperties = ImmutableList.of(
+ new PropertyMetadata<>(
+ PARTITIONS_PROPERTY,
+ "Partitions to be analyzed",
+ typeManager.getType(parseTypeSignature("array(array(varchar))")),
+ List.class,
+ null,
+ false,
+ HiveAnalyzeProperties::decodePartitionLists,
+ value -> value));
+ }
+
+ public List> getAnalyzeProperties()
+ {
+ return analyzeProperties;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Optional>> getPartitionList(Map properties)
+ {
+ List> partitions = (List>) properties.get(PARTITIONS_PROPERTY);
+ return partitions == null ? Optional.empty() : Optional.of(partitions);
+ }
+
+ private static List> decodePartitionLists(Object object)
+ {
+ if (object == null) {
+ return null;
+ }
+
+ // replace null partition value with hive default partition
+ return ImmutableList.copyOf(((Collection>) object).stream()
+ .peek(HiveAnalyzeProperties::throwIfNull)
+ .map(partition -> ((Collection>) partition).stream()
+ .map(name -> firstNonNull((String) name, HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION))
+ .collect(toImmutableList()))
+ .collect(toImmutableSet()));
+ }
+
+ private static void throwIfNull(Object object)
+ {
+ if (object == null) {
+ throw new PrestoException(INVALID_ANALYZE_PROPERTY, "Invalid null value in analyze partitions property");
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBasicStatistics.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBasicStatistics.java
new file mode 100644
index 00000000..d7f36b5a
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBasicStatistics.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import javax.annotation.concurrent.Immutable;
+
+import java.util.Objects;
+import java.util.OptionalLong;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static java.util.Objects.requireNonNull;
+
+@Immutable
+public class HiveBasicStatistics
+{
+ private final OptionalLong fileCount;
+ private final OptionalLong rowCount;
+ private final OptionalLong inMemoryDataSizeInBytes;
+ private final OptionalLong onDiskDataSizeInBytes;
+
+ public static HiveBasicStatistics createEmptyStatistics()
+ {
+ return new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty());
+ }
+
+ public static HiveBasicStatistics createZeroStatistics()
+ {
+ return new HiveBasicStatistics(0, 0, 0, 0);
+ }
+
+ public HiveBasicStatistics(long fileCount, long rowCount, long inMemoryDataSizeInBytes, long onDiskDataSizeInBytes)
+ {
+ this(OptionalLong.of(fileCount), OptionalLong.of(rowCount), OptionalLong.of(inMemoryDataSizeInBytes), OptionalLong.of(onDiskDataSizeInBytes));
+ }
+
+ @JsonCreator
+ public HiveBasicStatistics(
+ @JsonProperty("fileCount") OptionalLong fileCount,
+ @JsonProperty("rowCount") OptionalLong rowCount,
+ @JsonProperty("inMemoryDataSizeInBytes") OptionalLong inMemoryDataSizeInBytes,
+ @JsonProperty("onDiskDataSizeInBytes") OptionalLong onDiskDataSizeInBytes)
+ {
+ this.fileCount = requireNonNull(fileCount, "fileCount is null");
+ this.rowCount = requireNonNull(rowCount, "rowCount is null");
+ this.inMemoryDataSizeInBytes = requireNonNull(inMemoryDataSizeInBytes, "inMemoryDataSizeInBytes is null");
+ this.onDiskDataSizeInBytes = requireNonNull(onDiskDataSizeInBytes, "onDiskDataSizeInBytes is null");
+ }
+
+ @JsonProperty
+ public OptionalLong getFileCount()
+ {
+ return fileCount;
+ }
+
+ @JsonProperty
+ public OptionalLong getRowCount()
+ {
+ return rowCount;
+ }
+
+ @JsonProperty
+ public OptionalLong getInMemoryDataSizeInBytes()
+ {
+ return inMemoryDataSizeInBytes;
+ }
+
+ @JsonProperty
+ public OptionalLong getOnDiskDataSizeInBytes()
+ {
+ return onDiskDataSizeInBytes;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ HiveBasicStatistics that = (HiveBasicStatistics) o;
+ return Objects.equals(fileCount, that.fileCount) &&
+ Objects.equals(rowCount, that.rowCount) &&
+ Objects.equals(inMemoryDataSizeInBytes, that.inMemoryDataSizeInBytes) &&
+ Objects.equals(onDiskDataSizeInBytes, that.onDiskDataSizeInBytes);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(fileCount, rowCount, inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .add("fileCount", fileCount)
+ .add("rowCount", rowCount)
+ .add("inMemoryDataSizeInBytes", inMemoryDataSizeInBytes)
+ .add("onDiskDataSizeInBytes", onDiskDataSizeInBytes)
+ .toString();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBooleanParser.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBooleanParser.java
new file mode 100644
index 00000000..cb0bad73
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBooleanParser.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+public final class HiveBooleanParser
+{
+ private HiveBooleanParser() {}
+
+ public static Boolean parseHiveBoolean(byte[] bytes, int start, int length)
+ {
+ if (isTrue(bytes, start, length)) {
+ return true;
+ }
+ if (isFalse(bytes, start, length)) {
+ return false;
+ }
+ return null;
+ }
+
+ @SuppressWarnings("PointlessArithmeticExpression")
+ public static boolean isFalse(byte[] bytes, int start, int length)
+ {
+ return (length == 5) &&
+ (toUpperCase(bytes[start + 0]) == 'F') &&
+ (toUpperCase(bytes[start + 1]) == 'A') &&
+ (toUpperCase(bytes[start + 2]) == 'L') &&
+ (toUpperCase(bytes[start + 3]) == 'S') &&
+ (toUpperCase(bytes[start + 4]) == 'E');
+ }
+
+ @SuppressWarnings("PointlessArithmeticExpression")
+ public static boolean isTrue(byte[] bytes, int start, int length)
+ {
+ return (length == 4) &&
+ (toUpperCase(bytes[start + 0]) == 'T') &&
+ (toUpperCase(bytes[start + 1]) == 'R') &&
+ (toUpperCase(bytes[start + 2]) == 'U') &&
+ (toUpperCase(bytes[start + 3]) == 'E');
+ }
+
+ private static byte toUpperCase(byte b)
+ {
+ return isLowerCase(b) ? ((byte) (b - 32)) : b;
+ }
+
+ private static boolean isLowerCase(byte b)
+ {
+ return (b >= 'a') && (b <= 'z');
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketAdapterRecordCursor.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketAdapterRecordCursor.java
new file mode 100644
index 00000000..7e758632
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketAdapterRecordCursor.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.airlift.slice.Slice;
+import io.prestosql.plugin.hive.HiveBucketing.BucketingVersion;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.block.Block;
+import io.prestosql.spi.connector.RecordCursor;
+import io.prestosql.spi.type.Type;
+import io.prestosql.spi.type.TypeManager;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.List;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES;
+import static io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+
+public class HiveBucketAdapterRecordCursor
+ implements RecordCursor
+{
+ private final RecordCursor delegate;
+ private final int[] bucketColumnIndices;
+ private final List> javaTypeList;
+ private final List typeInfoList;
+ private final BucketingVersion bucketingVersion;
+ private final int tableBucketCount;
+ private final int partitionBucketCount;
+ private final int bucketToKeep;
+
+ private final Object[] scratch;
+
+ public HiveBucketAdapterRecordCursor(
+ int[] bucketColumnIndices,
+ List bucketColumnHiveTypes,
+ BucketingVersion bucketingVersion,
+ int tableBucketCount,
+ int partitionBucketCount,
+ int bucketToKeep,
+ TypeManager typeManager,
+ RecordCursor delegate)
+ {
+ this.bucketColumnIndices = requireNonNull(bucketColumnIndices, "bucketColumnIndices is null");
+ this.delegate = requireNonNull(delegate, "delegate is null");
+ requireNonNull(bucketColumnHiveTypes, "bucketColumnHiveTypes is null");
+ this.javaTypeList = bucketColumnHiveTypes.stream()
+ .map(HiveType::getTypeSignature)
+ .map(typeManager::getType)
+ .map(Type::getJavaType)
+ .collect(toImmutableList());
+ this.typeInfoList = bucketColumnHiveTypes.stream()
+ .map(HiveType::getTypeInfo)
+ .collect(toImmutableList());
+ this.bucketingVersion = requireNonNull(bucketingVersion, "bucketingVersion is null");
+ this.tableBucketCount = tableBucketCount;
+ this.partitionBucketCount = partitionBucketCount;
+ this.bucketToKeep = bucketToKeep;
+
+ this.scratch = new Object[bucketColumnHiveTypes.size()];
+ }
+
+ @Override
+ public long getCompletedBytes()
+ {
+ return delegate.getCompletedBytes();
+ }
+
+ @Override
+ public Type getType(int field)
+ {
+ return delegate.getType(field);
+ }
+
+ @Override
+ public boolean advanceNextPosition()
+ {
+ while (true) {
+ if (Thread.interrupted()) {
+ // Stop processing if the query has been destroyed.
+ Thread.currentThread().interrupt();
+ throw new PrestoException(GENERIC_INTERNAL_ERROR, "RecordCursor was interrupted");
+ }
+
+ boolean hasNextPosition = delegate.advanceNextPosition();
+ if (!hasNextPosition) {
+ return false;
+ }
+ for (int i = 0; i < scratch.length; i++) {
+ int index = bucketColumnIndices[i];
+ if (delegate.isNull(index)) {
+ scratch[i] = null;
+ continue;
+ }
+ Class> javaType = javaTypeList.get(i);
+ if (javaType == boolean.class) {
+ scratch[i] = delegate.getBoolean(index);
+ }
+ else if (javaType == long.class) {
+ scratch[i] = delegate.getLong(index);
+ }
+ else if (javaType == double.class) {
+ scratch[i] = delegate.getDouble(index);
+ }
+ else if (javaType == Slice.class) {
+ scratch[i] = delegate.getSlice(index);
+ }
+ else if (javaType == Block.class) {
+ scratch[i] = delegate.getObject(index);
+ }
+ else {
+ throw new UnsupportedOperationException("Unknown java type: " + javaType);
+ }
+ }
+ int bucket = HiveBucketing.getHiveBucket(bucketingVersion, tableBucketCount, typeInfoList, scratch);
+ if ((bucket - bucketToKeep) % partitionBucketCount != 0) {
+ throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format(
+ "A row that is supposed to be in bucket %s is encountered. Only rows in bucket %s (modulo %s) are expected",
+ bucket, bucketToKeep % partitionBucketCount, partitionBucketCount));
+ }
+ if (bucket == bucketToKeep) {
+ return true;
+ }
+ }
+ }
+
+ @Override
+ public boolean getBoolean(int field)
+ {
+ return delegate.getBoolean(field);
+ }
+
+ @Override
+ public long getLong(int field)
+ {
+ return delegate.getLong(field);
+ }
+
+ @Override
+ public double getDouble(int field)
+ {
+ return delegate.getDouble(field);
+ }
+
+ @Override
+ public Slice getSlice(int field)
+ {
+ return delegate.getSlice(field);
+ }
+
+ @Override
+ public Object getObject(int field)
+ {
+ return delegate.getObject(field);
+ }
+
+ @Override
+ public boolean isNull(int field)
+ {
+ return delegate.isNull(field);
+ }
+
+ @Override
+ public void close()
+ {
+ delegate.close();
+ }
+
+ @Override
+ public long getReadTimeNanos()
+ {
+ return delegate.getReadTimeNanos();
+ }
+
+ @Override
+ public long getSystemMemoryUsage()
+ {
+ return delegate.getSystemMemoryUsage();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketFunction.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketFunction.java
new file mode 100644
index 00000000..64554c3d
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableList;
+import io.prestosql.plugin.hive.HiveBucketing.BucketingVersion;
+import io.prestosql.spi.Page;
+import io.prestosql.spi.connector.BucketFunction;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static java.util.Objects.requireNonNull;
+
+public class HiveBucketFunction
+ implements BucketFunction
+{
+ private final BucketingVersion bucketingVersion;
+ private final int bucketCount;
+ private final List typeInfos;
+ private final List typeInfosForUpdate;
+ private final boolean isRowIdPartitioner;
+
+ public HiveBucketFunction(BucketingVersion bucketingVersion, int bucketCount, List hiveTypes)
+ {
+ this(bucketingVersion, bucketCount, hiveTypes, false);
+ }
+
+ public HiveBucketFunction(BucketingVersion bucketingVersion, int bucketCount, List hiveTypes, boolean forUpdate)
+ {
+ this.bucketingVersion = requireNonNull(bucketingVersion, "bucketingVersion is null");
+ this.bucketCount = bucketCount;
+ this.typeInfos = requireNonNull(hiveTypes, "hiveTypes is null").stream()
+ .map(HiveType::getTypeInfo)
+ .collect(Collectors.toList());
+ this.isRowIdPartitioner = forUpdate &&
+ typeInfos.get(typeInfos.size() - 1).getCategory() == Category.STRUCT;
+ if (forUpdate && typeInfos.size() > 1) {
+ typeInfosForUpdate = typeInfos.subList(0, typeInfos.size() - 1);
+ }
+ else {
+ typeInfosForUpdate = ImmutableList.of();
+ }
+ }
+
+ @Override
+ public int getBucket(Page page, int position)
+ {
+ if (isRowIdPartitioner) {
+ int bucketHashCode = 0;
+ if (page.getChannelCount() > 1) {
+ //Consider the partitioning columns also for partitioning during update to parallelize the updates of partitioned tables.
+ bucketHashCode = HiveBucketing.getBucketHashCode(bucketingVersion, typeInfosForUpdate, page, position, typeInfosForUpdate.size());
+ }
+ bucketHashCode = bucketHashCode * 31 + HiveBucketing.extractBucketNumber(page, position);
+ return HiveBucketing.getBucketNumber(bucketHashCode, bucketCount);
+ }
+ return HiveBucketing.getHiveBucket(bucketingVersion, bucketCount, typeInfos, page, position);
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .add("version", bucketingVersion)
+ .add("bucketCount", bucketCount)
+ .add("typeInfos", typeInfos)
+ .toString();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketHandle.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketHandle.java
new file mode 100644
index 00000000..2040aed4
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketHandle.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
+import io.prestosql.plugin.hive.HiveBucketing.BucketingVersion;
+
+import java.util.List;
+import java.util.Objects;
+
+import static java.util.Objects.requireNonNull;
+import static java.util.stream.Collectors.toList;
+
+public class HiveBucketHandle
+{
+ private final List columns;
+ private final BucketingVersion bucketingVersion;
+ // Number of buckets in the table, as specified in table metadata
+ private final int tableBucketCount;
+ // Number of buckets the table will appear to have when the Hive connector
+ // presents the table to the engine for read.
+ private final int readBucketCount;
+
+ @JsonCreator
+ public HiveBucketHandle(
+ @JsonProperty("columns") List columns,
+ @JsonProperty("bucketingVersion") BucketingVersion bucketingVersion,
+ @JsonProperty("tableBucketCount") int tableBucketCount,
+ @JsonProperty("readBucketCount") int readBucketCount)
+ {
+ this.columns = requireNonNull(columns, "columns is null");
+ this.bucketingVersion = requireNonNull(bucketingVersion, "bucketingVersion is null");
+ this.tableBucketCount = tableBucketCount;
+ this.readBucketCount = readBucketCount;
+ }
+
+ @JsonProperty
+ public List getColumns()
+ {
+ return columns;
+ }
+
+ @JsonProperty
+ public BucketingVersion getBucketingVersion()
+ {
+ return bucketingVersion;
+ }
+
+ @JsonProperty
+ public int getTableBucketCount()
+ {
+ return tableBucketCount;
+ }
+
+ @JsonProperty
+ public int getReadBucketCount()
+ {
+ return readBucketCount;
+ }
+
+ public HiveBucketProperty toTableBucketProperty()
+ {
+ return new HiveBucketProperty(
+ columns.stream()
+ .map(HiveColumnHandle::getName)
+ .collect(toList()),
+ bucketingVersion,
+ tableBucketCount,
+ ImmutableList.of());
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ HiveBucketHandle that = (HiveBucketHandle) o;
+ return Objects.equals(this.columns, that.columns) &&
+ Objects.equals(this.tableBucketCount, that.tableBucketCount) &&
+ Objects.equals(this.readBucketCount, that.readBucketCount) &&
+ Objects.equals(this.bucketingVersion, that.bucketingVersion);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(columns, bucketingVersion, tableBucketCount, readBucketCount);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketProperty.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketProperty.java
new file mode 100644
index 00000000..a8aced51
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketProperty.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
+import io.prestosql.plugin.hive.HiveBucketing.BucketingVersion;
+import io.prestosql.plugin.hive.metastore.SortingColumn;
+import io.prestosql.spi.PrestoException;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
+import static java.util.Objects.requireNonNull;
+
+public class HiveBucketProperty
+{
+ private final List bucketedBy;
+ private final BucketingVersion bucketingVersion;
+ private final int bucketCount;
+ private final List sortedBy;
+
+ @JsonCreator
+ public HiveBucketProperty(
+ @JsonProperty("bucketedBy") List bucketedBy,
+ @JsonProperty("bucketingVersion") BucketingVersion bucketingVersion,
+ @JsonProperty("bucketCount") int bucketCount,
+ @JsonProperty("sortedBy") List sortedBy)
+ {
+ this.bucketedBy = ImmutableList.copyOf(requireNonNull(bucketedBy, "bucketedBy is null"));
+ this.bucketingVersion = requireNonNull(bucketingVersion, "bucketingVersion is null");
+ this.bucketCount = bucketCount;
+ this.sortedBy = ImmutableList.copyOf(requireNonNull(sortedBy, "sortedBy is null"));
+ }
+
+ public static Optional fromStorageDescriptor(Map tableParameters, StorageDescriptor storageDescriptor, String tablePartitionName)
+ {
+ boolean bucketColsSet = storageDescriptor.isSetBucketCols() && !storageDescriptor.getBucketCols().isEmpty();
+ boolean numBucketsSet = storageDescriptor.isSetNumBuckets() && storageDescriptor.getNumBuckets() > 0;
+ if (!numBucketsSet) {
+ // In Hive, a table is considered as not bucketed when its bucketCols is set but its numBucket is not set.
+ return Optional.empty();
+ }
+ if (!bucketColsSet) {
+ throw new PrestoException(HIVE_INVALID_METADATA, "Table/partition metadata has 'numBuckets' set, but 'bucketCols' is not set: " + tablePartitionName);
+ }
+ List sortedBy = ImmutableList.of();
+ if (storageDescriptor.isSetSortCols()) {
+ sortedBy = storageDescriptor.getSortCols().stream()
+ .map(order -> SortingColumn.fromMetastoreApiOrder(order, tablePartitionName))
+ .collect(toImmutableList());
+ }
+ BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
+ return Optional.of(new HiveBucketProperty(storageDescriptor.getBucketCols(), bucketingVersion, storageDescriptor.getNumBuckets(), sortedBy));
+ }
+
+ @JsonProperty
+ public List getBucketedBy()
+ {
+ return bucketedBy;
+ }
+
+ @JsonProperty
+ public BucketingVersion getBucketingVersion()
+ {
+ return bucketingVersion;
+ }
+
+ @JsonProperty
+ public int getBucketCount()
+ {
+ return bucketCount;
+ }
+
+ @JsonProperty
+ public List getSortedBy()
+ {
+ return sortedBy;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ HiveBucketProperty that = (HiveBucketProperty) o;
+ return bucketingVersion == that.bucketingVersion &&
+ bucketCount == that.bucketCount &&
+ Objects.equals(bucketedBy, that.bucketedBy) &&
+ Objects.equals(sortedBy, that.sortedBy);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(bucketedBy, bucketingVersion, bucketCount, sortedBy);
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .add("bucketedBy", bucketedBy)
+ .add("bucketingVersion", bucketingVersion)
+ .add("bucketCount", bucketCount)
+ .add("sortedBy", sortedBy)
+ .toString();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketing.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketing.java
new file mode 100644
index 00000000..6325591a
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveBucketing.java
@@ -0,0 +1,325 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import io.prestosql.plugin.hive.metastore.Column;
+import io.prestosql.plugin.hive.metastore.Table;
+import io.prestosql.plugin.hive.util.HiveBucketingV1;
+import io.prestosql.plugin.hive.util.HiveBucketingV2;
+import io.prestosql.spi.Page;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.StandardErrorCode;
+import io.prestosql.spi.block.Block;
+import io.prestosql.spi.block.RowBlock;
+import io.prestosql.spi.connector.ColumnHandle;
+import io.prestosql.spi.predicate.Domain;
+import io.prestosql.spi.predicate.NullableValue;
+import io.prestosql.spi.predicate.TupleDomain;
+import io.prestosql.spi.predicate.ValueSet;
+import org.apache.hadoop.hive.ql.io.BucketCodec;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.OptionalInt;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1;
+import static io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V2;
+import static io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME;
+import static io.prestosql.plugin.hive.HiveUtil.getRegularColumnHandles;
+import static java.lang.String.format;
+import static java.util.Map.Entry;
+import static java.util.function.Function.identity;
+import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION;
+import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP;
+
+public final class HiveBucketing
+{
+ public enum BucketingVersion
+ {
+ BUCKETING_V1(1),
+ BUCKETING_V2(2),
+ /**/;
+
+ private final int version;
+
+ BucketingVersion(int version)
+ {
+ this.version = version;
+ }
+
+ public int getVersion()
+ {
+ return version;
+ }
+ }
+
+ private static final Set SUPPORTED_TYPES_FOR_BUCKET_FILTER = ImmutableSet.of(
+ HiveType.HIVE_BYTE,
+ HiveType.HIVE_SHORT,
+ HiveType.HIVE_INT,
+ HiveType.HIVE_LONG,
+ HiveType.HIVE_BOOLEAN,
+ HiveType.HIVE_STRING);
+
+ static final int MAX_BUCKET_NUMBER = 1_000_000 - 1;
+
+ private HiveBucketing() {}
+
+ public static int getHiveBucket(BucketingVersion bucketingVersion, int bucketCount, List types, Page page, int position)
+ {
+ return getBucketNumber(getBucketHashCode(bucketingVersion, types, page, position), bucketCount);
+ }
+
+ public static int getHiveBucket(BucketingVersion bucketingVersion, int bucketCount, List types, Object[] values)
+ {
+ return getBucketNumber(getBucketHashCode(bucketingVersion, types, values), bucketCount);
+ }
+
+ @VisibleForTesting
+ static int getBucketNumber(int hashCode, int bucketCount)
+ {
+ return (hashCode & Integer.MAX_VALUE) % bucketCount;
+ }
+
+ @VisibleForTesting
+ static int getBucketHashCode(BucketingVersion bucketingVersion, List types, Page page, int position)
+ {
+ int channelCount = page.getChannelCount();
+ return getBucketHashCode(bucketingVersion, types, page, position, channelCount);
+ }
+
+ static int getBucketHashCode(BucketingVersion bucketingVersion, List types, Page page, int position, int channelCount)
+ {
+ switch (bucketingVersion) {
+ case BUCKETING_V1:
+ return HiveBucketingV1.getBucketHashCode(types, page, position, channelCount);
+ case BUCKETING_V2:
+ return HiveBucketingV2.getBucketHashCode(types, page, position, channelCount);
+ default:
+ throw new IllegalArgumentException("Unsupported bucketing version: " + bucketingVersion);
+ }
+ }
+
+ @VisibleForTesting
+ static int getBucketHashCode(BucketingVersion bucketingVersion, List types, Object[] values)
+ {
+ switch (bucketingVersion) {
+ case BUCKETING_V1:
+ return HiveBucketingV1.getBucketHashCode(types, values);
+ case BUCKETING_V2:
+ return HiveBucketingV2.getBucketHashCode(types, values);
+ default:
+ throw new IllegalArgumentException("Unsupported bucketing version: " + bucketingVersion);
+ }
+ }
+
+ public static Optional getHiveBucketHandle(Table table)
+ {
+ Optional hiveBucketProperty = table.getStorage().getBucketProperty();
+ if (!hiveBucketProperty.isPresent()) {
+ return Optional.empty();
+ }
+
+ Map map = getRegularColumnHandles(table).stream()
+ .collect(Collectors.toMap(HiveColumnHandle::getName, identity()));
+
+ ImmutableList.Builder bucketColumns = ImmutableList.builder();
+ for (String bucketColumnName : hiveBucketProperty.get().getBucketedBy()) {
+ HiveColumnHandle bucketColumnHandle = map.get(bucketColumnName);
+ if (bucketColumnHandle == null) {
+ return Optional.empty();
+ }
+ bucketColumns.add(bucketColumnHandle);
+ }
+
+ BucketingVersion bucketingVersion = hiveBucketProperty.get().getBucketingVersion();
+ int bucketCount = hiveBucketProperty.get().getBucketCount();
+ return Optional.of(new HiveBucketHandle(bucketColumns.build(), bucketingVersion, bucketCount, bucketCount));
+ }
+
+ public static Optional getHiveBucketFilter(Table table, TupleDomain effectivePredicate)
+ {
+ if (!getHiveBucketHandle(table).isPresent()) {
+ return Optional.empty();
+ }
+
+ if (bucketedOnTimestamp(table.getStorage().getBucketProperty().get(), table)) {
+ return Optional.empty();
+ }
+
+ Optional> bindings = TupleDomain.extractFixedValues(effectivePredicate);
+ if (!bindings.isPresent()) {
+ return Optional.empty();
+ }
+ OptionalInt singleBucket = getHiveBucket(table, bindings.get());
+ if (singleBucket.isPresent()) {
+ return Optional.of(new HiveBucketFilter(ImmutableSet.of(singleBucket.getAsInt())));
+ }
+
+ if (!effectivePredicate.getDomains().isPresent()) {
+ return Optional.empty();
+ }
+ Optional domain = effectivePredicate.getDomains().get().entrySet().stream()
+ .filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME))
+ .findFirst()
+ .map(Entry::getValue);
+ if (!domain.isPresent()) {
+ return Optional.empty();
+ }
+ ValueSet values = domain.get().getValues();
+ ImmutableSet.Builder builder = ImmutableSet.builder();
+ int bucketCount = table.getStorage().getBucketProperty().get().getBucketCount();
+ for (int i = 0; i < bucketCount; i++) {
+ if (values.containsValue((long) i)) {
+ builder.add(i);
+ }
+ }
+ return Optional.of(new HiveBucketFilter(builder.build()));
+ }
+
+ private static OptionalInt getHiveBucket(Table table, Map bindings)
+ {
+ if (bindings.isEmpty()) {
+ return OptionalInt.empty();
+ }
+
+ List bucketColumns = table.getStorage().getBucketProperty().get().getBucketedBy();
+ Map hiveTypes = new HashMap<>();
+ for (Column column : table.getDataColumns()) {
+ hiveTypes.put(column.getName(), column.getType());
+ }
+
+ // Verify the bucket column types are supported
+ for (String column : bucketColumns) {
+ if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
+ return OptionalInt.empty();
+ }
+ }
+
+ // Get bindings for bucket columns
+ Map bucketBindings = new HashMap<>();
+ for (Entry entry : bindings.entrySet()) {
+ HiveColumnHandle colHandle = (HiveColumnHandle) entry.getKey();
+ if (!entry.getValue().isNull() && bucketColumns.contains(colHandle.getName())) {
+ bucketBindings.put(colHandle.getName(), entry.getValue().getValue());
+ }
+ }
+
+ // Check that we have bindings for all bucket columns
+ if (bucketBindings.size() != bucketColumns.size()) {
+ return OptionalInt.empty();
+ }
+
+ // Get bindings of bucket columns
+ ImmutableList.Builder typeInfos = ImmutableList.builder();
+ Object[] values = new Object[bucketColumns.size()];
+ for (int i = 0; i < bucketColumns.size(); i++) {
+ String column = bucketColumns.get(i);
+ typeInfos.add(hiveTypes.get(column).getTypeInfo());
+ values[i] = bucketBindings.get(column);
+ }
+
+ BucketingVersion bucketingVersion = getBucketingVersion(table);
+ return OptionalInt.of(getHiveBucket(bucketingVersion, table.getStorage().getBucketProperty().get().getBucketCount(), typeInfos.build(), values));
+ }
+
+ public static BucketingVersion getBucketingVersion(Table table)
+ {
+ return getBucketingVersion(table.getParameters());
+ }
+
+ public static BucketingVersion getBucketingVersion(Map tableProperties)
+ {
+ String bucketingVersion = tableProperties.getOrDefault(TABLE_BUCKETING_VERSION, "1");
+ switch (bucketingVersion) {
+ case "1":
+ return BUCKETING_V1;
+ case "2":
+ return BUCKETING_V2;
+ default:
+ // org.apache.hadoop.hive.ql.exec.Utilities.getBucketingVersion is more permissive and treats any non-number as "1"
+ throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, format("Unsupported bucketing version: '%s'", bucketingVersion));
+ }
+ }
+
+ public static boolean bucketedOnTimestamp(HiveBucketProperty bucketProperty, Table table)
+ {
+ return bucketProperty.getBucketedBy().stream()
+ .map(columnName -> table.getColumn(columnName)
+ .orElseThrow(() -> new IllegalArgumentException(format("Cannot find column '%s' in %s", columnName, table))))
+ .map(Column::getType)
+ .map(HiveType::getTypeInfo)
+ .anyMatch(HiveBucketing::bucketedOnTimestamp);
+ }
+
+ private static boolean bucketedOnTimestamp(TypeInfo type)
+ {
+ switch (type.getCategory()) {
+ case PRIMITIVE:
+ return ((PrimitiveTypeInfo) type).getPrimitiveCategory() == TIMESTAMP;
+ case LIST:
+ return bucketedOnTimestamp(((ListTypeInfo) type).getListElementTypeInfo());
+ case MAP:
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) type;
+ return bucketedOnTimestamp(mapTypeInfo.getMapKeyTypeInfo()) ||
+ bucketedOnTimestamp(mapTypeInfo.getMapValueTypeInfo());
+ default:
+ // TODO: support more types, e.g. ROW
+ throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
+ }
+ }
+
+ public static class HiveBucketFilter
+ {
+ private final Set bucketsToKeep;
+
+ @JsonCreator
+ public HiveBucketFilter(@JsonProperty("bucketsToKeep") Set bucketsToKeep)
+ {
+ this.bucketsToKeep = bucketsToKeep;
+ }
+
+ @JsonProperty
+ public Set getBucketsToKeep()
+ {
+ return bucketsToKeep;
+ }
+ }
+
+ /**
+ * Extracts the bucketNumber from page. Its expected that page contains $rowId as last column
+ *
+ * @return BucketNumber
+ */
+ static int extractBucketNumber(Page page, int position)
+ {
+ Block block = page.getBlock(page.getChannelCount() - 1);
+ RowBlock rowBlock = (RowBlock) block.getSingleValueBlock(position);
+ int encodedBucketNumber = rowBlock.getRawFieldBlocks()[1].getInt(0, 0);
+ return BucketCodec.determineVersion(encodedBucketNumber).decodeWriterId(encodedBucketNumber);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCatalogName.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCatalogName.java
new file mode 100644
index 00000000..cb2fb016
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCatalogName.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import static java.util.Objects.requireNonNull;
+
+public class HiveCatalogName
+{
+ private final String catalogName;
+
+ public HiveCatalogName(String catalogName)
+ {
+ this.catalogName = requireNonNull(catalogName, "catalogName is null");
+ }
+
+ @Override
+ public String toString()
+ {
+ return catalogName;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionPolicy.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionPolicy.java
new file mode 100644
index 00000000..738cd6a0
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionPolicy.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.spi.type.DecimalType;
+import io.prestosql.spi.type.Type;
+import io.prestosql.spi.type.TypeManager;
+import io.prestosql.spi.type.VarcharType;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+
+import javax.inject.Inject;
+
+import java.util.List;
+
+import static io.prestosql.plugin.hive.HiveUtil.extractStructFieldTypes;
+import static java.lang.Math.min;
+import static java.util.Objects.requireNonNull;
+
+public class HiveCoercionPolicy
+ implements CoercionPolicy
+{
+ private final TypeManager typeManager;
+
+ @Inject
+ public HiveCoercionPolicy(TypeManager typeManager)
+ {
+ this.typeManager = requireNonNull(typeManager, "typeManager is null");
+ }
+
+ @Override
+ public boolean canCoerce(HiveType fromHiveType, HiveType toHiveType)
+ {
+ Type fromType = typeManager.getType(fromHiveType.getTypeSignature());
+ Type toType = typeManager.getType(toHiveType.getTypeSignature());
+ if (fromType instanceof VarcharType && toType instanceof VarcharType) {
+ return true;
+ }
+ if (fromType instanceof VarcharType) {
+ return toHiveType.equals(HiveType.HIVE_BYTE) || toHiveType.equals(HiveType.HIVE_SHORT) || toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG);
+ }
+ if (toType instanceof VarcharType) {
+ return fromHiveType.equals(HiveType.HIVE_BYTE) || fromHiveType.equals(HiveType.HIVE_SHORT) || fromHiveType.equals(HiveType.HIVE_INT) || fromHiveType.equals(HiveType.HIVE_LONG);
+ }
+ if (fromHiveType.equals(HiveType.HIVE_BYTE)) {
+ return toHiveType.equals(HiveType.HIVE_SHORT) || toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG);
+ }
+ if (fromHiveType.equals(HiveType.HIVE_SHORT)) {
+ return toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG);
+ }
+ if (fromHiveType.equals(HiveType.HIVE_INT)) {
+ return toHiveType.equals(HiveType.HIVE_LONG);
+ }
+ if (fromHiveType.equals(HiveType.HIVE_FLOAT)) {
+ return toHiveType.equals(HiveType.HIVE_DOUBLE) || toType instanceof DecimalType;
+ }
+ if (fromHiveType.equals(HiveType.HIVE_DOUBLE)) {
+ return toHiveType.equals(HiveType.HIVE_FLOAT) || toType instanceof DecimalType;
+ }
+ if (fromType instanceof DecimalType) {
+ return toType instanceof DecimalType || toHiveType.equals(HiveType.HIVE_FLOAT) || toHiveType.equals(HiveType.HIVE_DOUBLE);
+ }
+
+ return canCoerceForList(fromHiveType, toHiveType) || canCoerceForMap(fromHiveType, toHiveType) || canCoerceForStruct(fromHiveType, toHiveType);
+ }
+
+ private boolean canCoerceForMap(HiveType fromHiveType, HiveType toHiveType)
+ {
+ if (!fromHiveType.getCategory().equals(Category.MAP) || !toHiveType.getCategory().equals(Category.MAP)) {
+ return false;
+ }
+ HiveType fromKeyType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
+ HiveType fromValueType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
+ HiveType toKeyType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
+ HiveType toValueType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
+ return (fromKeyType.equals(toKeyType) || canCoerce(fromKeyType, toKeyType)) &&
+ (fromValueType.equals(toValueType) || canCoerce(fromValueType, toValueType));
+ }
+
+ private boolean canCoerceForList(HiveType fromHiveType, HiveType toHiveType)
+ {
+ if (!fromHiveType.getCategory().equals(Category.LIST) || !toHiveType.getCategory().equals(Category.LIST)) {
+ return false;
+ }
+ HiveType fromElementType = HiveType.valueOf(((ListTypeInfo) fromHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
+ HiveType toElementType = HiveType.valueOf(((ListTypeInfo) toHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
+ return fromElementType.equals(toElementType) || canCoerce(fromElementType, toElementType);
+ }
+
+ private boolean canCoerceForStruct(HiveType fromHiveType, HiveType toHiveType)
+ {
+ if (!fromHiveType.getCategory().equals(Category.STRUCT) || !toHiveType.getCategory().equals(Category.STRUCT)) {
+ return false;
+ }
+ List fromFieldNames = ((StructTypeInfo) fromHiveType.getTypeInfo()).getAllStructFieldNames();
+ List toFieldNames = ((StructTypeInfo) toHiveType.getTypeInfo()).getAllStructFieldNames();
+ List fromFieldTypes = extractStructFieldTypes(fromHiveType);
+ List toFieldTypes = extractStructFieldTypes(toHiveType);
+ // Rule:
+ // * Fields may be added or dropped from the end.
+ // * For all other field indices, the corresponding fields must have
+ // the same name, and the type must be coercible.
+ for (int i = 0; i < min(fromFieldTypes.size(), toFieldTypes.size()); i++) {
+ if (!fromFieldNames.get(i).equals(toFieldNames.get(i))) {
+ return false;
+ }
+ if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i)) && !canCoerce(fromFieldTypes.get(i), toFieldTypes.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionRecordCursor.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionRecordCursor.java
new file mode 100644
index 00000000..8fa356ef
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCoercionRecordCursor.java
@@ -0,0 +1,696 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+import io.airlift.slice.Slice;
+import io.prestosql.spi.PageBuilder;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.block.Block;
+import io.prestosql.spi.block.BlockBuilder;
+import io.prestosql.spi.connector.RecordCursor;
+import io.prestosql.spi.type.Type;
+import io.prestosql.spi.type.TypeManager;
+import io.prestosql.spi.type.VarcharType;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+
+import java.util.List;
+
+import static io.airlift.slice.Slices.utf8Slice;
+import static io.prestosql.plugin.hive.HiveUtil.extractStructFieldTypes;
+import static io.prestosql.plugin.hive.HiveUtil.isArrayType;
+import static io.prestosql.plugin.hive.HiveUtil.isMapType;
+import static io.prestosql.plugin.hive.HiveUtil.isRowType;
+import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED;
+import static java.lang.Float.intBitsToFloat;
+import static java.lang.Math.min;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+
+public class HiveCoercionRecordCursor
+ implements RecordCursor
+{
+ private final RecordCursor delegate;
+ private final List columnMappings;
+ private final Coercer[] coercers;
+ private final BridgingRecordCursor bridgingRecordCursor;
+
+ public HiveCoercionRecordCursor(
+ List columnMappings,
+ TypeManager typeManager,
+ RecordCursor delegate)
+ {
+ requireNonNull(columnMappings, "columns is null");
+ requireNonNull(typeManager, "typeManager is null");
+ this.bridgingRecordCursor = new BridgingRecordCursor();
+
+ this.delegate = requireNonNull(delegate, "delegate is null");
+ this.columnMappings = ImmutableList.copyOf(columnMappings);
+
+ int size = columnMappings.size();
+
+ this.coercers = new Coercer[size];
+
+ for (int columnIndex = 0; columnIndex < size; columnIndex++) {
+ HivePageSourceProvider.ColumnMapping columnMapping = columnMappings.get(columnIndex);
+
+ if (columnMapping.getCoercionFrom().isPresent()) {
+ coercers[columnIndex] = createCoercer(typeManager, columnMapping.getCoercionFrom().get(), columnMapping.getHiveColumnHandle().getHiveType(), bridgingRecordCursor);
+ }
+ }
+ }
+
+ @Override
+ public long getCompletedBytes()
+ {
+ return delegate.getCompletedBytes();
+ }
+
+ @Override
+ public Type getType(int field)
+ {
+ return delegate.getType(field);
+ }
+
+ @Override
+ public boolean advanceNextPosition()
+ {
+ for (int i = 0; i < columnMappings.size(); i++) {
+ if (coercers[i] != null) {
+ coercers[i].reset();
+ }
+ }
+ return delegate.advanceNextPosition();
+ }
+
+ @Override
+ public boolean getBoolean(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.getBoolean(field);
+ }
+ return coercers[field].getBoolean(delegate, field);
+ }
+
+ @Override
+ public long getLong(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.getLong(field);
+ }
+ return coercers[field].getLong(delegate, field);
+ }
+
+ @Override
+ public double getDouble(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.getDouble(field);
+ }
+ return coercers[field].getDouble(delegate, field);
+ }
+
+ @Override
+ public Slice getSlice(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.getSlice(field);
+ }
+ return coercers[field].getSlice(delegate, field);
+ }
+
+ @Override
+ public Object getObject(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.getObject(field);
+ }
+ return coercers[field].getObject(delegate, field);
+ }
+
+ @Override
+ public boolean isNull(int field)
+ {
+ if (coercers[field] == null) {
+ return delegate.isNull(field);
+ }
+ return coercers[field].isNull(delegate, field);
+ }
+
+ @Override
+ public void close()
+ {
+ delegate.close();
+ }
+
+ @Override
+ public long getReadTimeNanos()
+ {
+ return delegate.getReadTimeNanos();
+ }
+
+ @Override
+ public long getSystemMemoryUsage()
+ {
+ return delegate.getSystemMemoryUsage();
+ }
+
+ @VisibleForTesting
+ RecordCursor getRegularColumnRecordCursor()
+ {
+ return delegate;
+ }
+
+ private abstract static class Coercer
+ {
+ private boolean isNull;
+ private boolean loaded;
+
+ private boolean booleanValue;
+ private long longValue;
+ private double doubleValue;
+ private Slice sliceValue;
+ private Object objectValue;
+
+ public void reset()
+ {
+ isNull = false;
+ loaded = false;
+ }
+
+ public boolean isNull(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return isNull;
+ }
+
+ public boolean getBoolean(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return booleanValue;
+ }
+
+ public long getLong(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return longValue;
+ }
+
+ public double getDouble(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return doubleValue;
+ }
+
+ public Slice getSlice(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return sliceValue;
+ }
+
+ public Object getObject(RecordCursor delegate, int field)
+ {
+ assureLoaded(delegate, field);
+ return objectValue;
+ }
+
+ private void assureLoaded(RecordCursor delegate, int field)
+ {
+ if (!loaded) {
+ isNull = delegate.isNull(field);
+ if (!isNull) {
+ coerce(delegate, field);
+ }
+ loaded = true;
+ }
+ }
+
+ protected abstract void coerce(RecordCursor delegate, int field);
+
+ protected void setBoolean(boolean value)
+ {
+ booleanValue = value;
+ }
+
+ protected void setLong(long value)
+ {
+ longValue = value;
+ }
+
+ protected void setDouble(double value)
+ {
+ doubleValue = value;
+ }
+
+ protected void setSlice(Slice value)
+ {
+ sliceValue = value;
+ }
+
+ protected void setObject(Object value)
+ {
+ objectValue = value;
+ }
+
+ protected void setIsNull(boolean isNull)
+ {
+ this.isNull = isNull;
+ }
+ }
+
+ private static Coercer createCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
+ {
+ Type fromType = typeManager.getType(fromHiveType.getTypeSignature());
+ Type toType = typeManager.getType(toHiveType.getTypeSignature());
+ if (toType instanceof VarcharType && (fromHiveType.equals(HiveType.HIVE_BYTE) || fromHiveType.equals(HiveType.HIVE_SHORT) || fromHiveType.equals(HiveType.HIVE_INT) || fromHiveType.equals(HiveType.HIVE_LONG))) {
+ return new IntegerNumberToVarcharCoercer();
+ }
+ if (fromType instanceof VarcharType && (toHiveType.equals(HiveType.HIVE_BYTE) || toHiveType.equals(HiveType.HIVE_SHORT) || toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG))) {
+ return new VarcharToIntegerNumberCoercer(toHiveType);
+ }
+ if (fromHiveType.equals(HiveType.HIVE_BYTE) && toHiveType.equals(HiveType.HIVE_SHORT) || toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG)) {
+ return new IntegerNumberUpscaleCoercer();
+ }
+ if (fromHiveType.equals(HiveType.HIVE_SHORT) && toHiveType.equals(HiveType.HIVE_INT) || toHiveType.equals(HiveType.HIVE_LONG)) {
+ return new IntegerNumberUpscaleCoercer();
+ }
+ if (fromHiveType.equals(HiveType.HIVE_INT) && toHiveType.equals(HiveType.HIVE_LONG)) {
+ return new IntegerNumberUpscaleCoercer();
+ }
+ if (fromHiveType.equals(HiveType.HIVE_FLOAT) && toHiveType.equals(HiveType.HIVE_DOUBLE)) {
+ return new FloatToDoubleCoercer();
+ }
+ if (isArrayType(fromType) && isArrayType(toType)) {
+ return new ListCoercer(typeManager, fromHiveType, toHiveType, bridgingRecordCursor);
+ }
+ if (isMapType(fromType) && isMapType(toType)) {
+ return new MapCoercer(typeManager, fromHiveType, toHiveType, bridgingRecordCursor);
+ }
+ if (isRowType(fromType) && isRowType(toType)) {
+ return new StructCoercer(typeManager, fromHiveType, toHiveType, bridgingRecordCursor);
+ }
+
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported coercion from %s to %s", fromHiveType, toHiveType));
+ }
+
+ private static class IntegerNumberUpscaleCoercer
+ extends Coercer
+ {
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ setLong(delegate.getLong(field));
+ }
+ }
+
+ private static class IntegerNumberToVarcharCoercer
+ extends Coercer
+ {
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ setSlice(utf8Slice(String.valueOf(delegate.getLong(field))));
+ }
+ }
+
+ private static class FloatToDoubleCoercer
+ extends Coercer
+ {
+ @Override
+ protected void coerce(RecordCursor delegate, int field)
+ {
+ setDouble(intBitsToFloat((int) delegate.getLong(field)));
+ }
+ }
+
+ private static class VarcharToIntegerNumberCoercer
+ extends Coercer
+ {
+ private final long maxValue;
+ private final long minValue;
+
+ public VarcharToIntegerNumberCoercer(HiveType type)
+ {
+ if (type.equals(HiveType.HIVE_BYTE)) {
+ minValue = Byte.MIN_VALUE;
+ maxValue = Byte.MAX_VALUE;
+ }
+ else if (type.equals(HiveType.HIVE_SHORT)) {
+ minValue = Short.MIN_VALUE;
+ maxValue = Short.MAX_VALUE;
+ }
+ else if (type.equals(HiveType.HIVE_INT)) {
+ minValue = Integer.MIN_VALUE;
+ maxValue = Integer.MAX_VALUE;
+ }
+ else if (type.equals(HiveType.HIVE_LONG)) {
+ minValue = Long.MIN_VALUE;
+ maxValue = Long.MAX_VALUE;
+ }
+ else {
+ throw new PrestoException(NOT_SUPPORTED, format("Could not create Coercer from varchar to %s", type));
+ }
+ }
+
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ try {
+ long value = Long.parseLong(delegate.getSlice(field).toStringUtf8());
+ if (minValue <= value && value <= maxValue) {
+ setLong(value);
+ }
+ else {
+ setIsNull(true);
+ }
+ }
+ catch (NumberFormatException e) {
+ setIsNull(true);
+ }
+ }
+ }
+
+ private static class ListCoercer
+ extends Coercer
+ {
+ private final Type fromElementType;
+ private final Type toType;
+ private final Type toElementType;
+ private final Coercer elementCoercer;
+ private final BridgingRecordCursor bridgingRecordCursor;
+ private final PageBuilder pageBuilder;
+
+ public ListCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
+ {
+ requireNonNull(typeManager, "typeManage is null");
+ requireNonNull(fromHiveType, "fromHiveType is null");
+ requireNonNull(toHiveType, "toHiveType is null");
+ this.bridgingRecordCursor = requireNonNull(bridgingRecordCursor, "bridgingRecordCursor is null");
+ HiveType fromElementHiveType = HiveType.valueOf(((ListTypeInfo) fromHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
+ HiveType toElementHiveType = HiveType.valueOf(((ListTypeInfo) toHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
+ this.fromElementType = fromElementHiveType.getType(typeManager);
+ this.toType = toHiveType.getType(typeManager);
+ this.toElementType = toElementHiveType.getType(typeManager);
+ this.elementCoercer = fromElementHiveType.equals(toElementHiveType) ? null : createCoercer(typeManager, fromElementHiveType, toElementHiveType, bridgingRecordCursor);
+ this.pageBuilder = elementCoercer == null ? null : new PageBuilder(ImmutableList.of(toType));
+ }
+
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ if (delegate.isNull(field)) {
+ setIsNull(true);
+ return;
+ }
+ Block block = (Block) delegate.getObject(field);
+ if (pageBuilder.isFull()) {
+ pageBuilder.reset();
+ }
+ BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);
+ BlockBuilder listBuilder = blockBuilder.beginBlockEntry();
+ for (int i = 0; i < block.getPositionCount(); i++) {
+ if (elementCoercer == null) {
+ toElementType.appendTo(block, i, listBuilder);
+ }
+ else {
+ if (block.isNull(i)) {
+ listBuilder.appendNull();
+ }
+ else {
+ rewriteBlock(fromElementType, toElementType, block, i, listBuilder, elementCoercer, bridgingRecordCursor);
+ }
+ }
+ }
+ blockBuilder.closeEntry();
+ pageBuilder.declarePosition();
+ setObject(toType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1));
+ }
+ }
+
+ private static class MapCoercer
+ extends Coercer
+ {
+ private final List fromKeyValueTypes;
+ private final Type toType;
+ private final List toKeyValueTypes;
+ private final Coercer[] coercers;
+ private final BridgingRecordCursor bridgingRecordCursor;
+ private final PageBuilder pageBuilder;
+
+ public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
+ {
+ requireNonNull(typeManager, "typeManage is null");
+ requireNonNull(fromHiveType, "fromHiveType is null");
+ requireNonNull(toHiveType, "toHiveType is null");
+ this.bridgingRecordCursor = requireNonNull(bridgingRecordCursor, "bridgingRecordCursor is null");
+ HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
+ HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
+ HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
+ HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
+ this.fromKeyValueTypes = fromHiveType.getType(typeManager).getTypeParameters();
+ this.toType = toHiveType.getType(typeManager);
+ this.toKeyValueTypes = toType.getTypeParameters();
+ this.coercers = new Coercer[2];
+ coercers[0] = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType, bridgingRecordCursor);
+ coercers[1] = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType, bridgingRecordCursor);
+ this.pageBuilder = coercers[0] == null && coercers[1] == null ? null : new PageBuilder(ImmutableList.of(toType));
+ }
+
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ if (delegate.isNull(field)) {
+ setIsNull(true);
+ return;
+ }
+ Block block = (Block) delegate.getObject(field);
+ if (pageBuilder.isFull()) {
+ pageBuilder.reset();
+ }
+ BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);
+ BlockBuilder mapBuilder = blockBuilder.beginBlockEntry();
+ for (int i = 0; i < block.getPositionCount(); i++) {
+ int k = i % 2;
+ if (coercers[k] == null) {
+ toKeyValueTypes.get(k).appendTo(block, i, mapBuilder);
+ }
+ else {
+ if (block.isNull(i)) {
+ mapBuilder.appendNull();
+ }
+ else {
+ rewriteBlock(fromKeyValueTypes.get(k), toKeyValueTypes.get(k), block, i, mapBuilder, coercers[k], bridgingRecordCursor);
+ }
+ }
+ }
+ blockBuilder.closeEntry();
+ pageBuilder.declarePosition();
+ setObject(toType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1));
+ }
+ }
+
+ private static class StructCoercer
+ extends Coercer
+ {
+ private final Type toType;
+ private final List fromFieldTypes;
+ private final List toFieldTypes;
+ private final Coercer[] coercers;
+ private final BridgingRecordCursor bridgingRecordCursor;
+ private final PageBuilder pageBuilder;
+
+ public StructCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
+ {
+ requireNonNull(typeManager, "typeManage is null");
+ requireNonNull(fromHiveType, "fromHiveType is null");
+ requireNonNull(toHiveType, "toHiveType is null");
+ this.bridgingRecordCursor = requireNonNull(bridgingRecordCursor, "bridgingRecordCursor is null");
+ List fromFieldHiveTypes = extractStructFieldTypes(fromHiveType);
+ List toFieldHiveTypes = extractStructFieldTypes(toHiveType);
+ this.fromFieldTypes = fromHiveType.getType(typeManager).getTypeParameters();
+ this.toType = toHiveType.getType(typeManager);
+ this.toFieldTypes = toType.getTypeParameters();
+ this.coercers = new Coercer[toFieldHiveTypes.size()];
+ for (int i = 0; i < min(fromFieldHiveTypes.size(), toFieldHiveTypes.size()); i++) {
+ if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i))) {
+ coercers[i] = createCoercer(typeManager, fromFieldHiveTypes.get(i), toFieldHiveTypes.get(i), bridgingRecordCursor);
+ }
+ }
+ this.pageBuilder = new PageBuilder(ImmutableList.of(toType));
+ }
+
+ @Override
+ public void coerce(RecordCursor delegate, int field)
+ {
+ if (delegate.isNull(field)) {
+ setIsNull(true);
+ return;
+ }
+ Block block = (Block) delegate.getObject(field);
+ if (pageBuilder.isFull()) {
+ pageBuilder.reset();
+ }
+ BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);
+ BlockBuilder rowBuilder = blockBuilder.beginBlockEntry();
+ for (int i = 0; i < toFieldTypes.size(); i++) {
+ if (i >= fromFieldTypes.size() || block.isNull(i)) {
+ rowBuilder.appendNull();
+ }
+ else if (coercers[i] == null) {
+ toFieldTypes.get(i).appendTo(block, i, rowBuilder);
+ }
+ else {
+ rewriteBlock(fromFieldTypes.get(i), toFieldTypes.get(i), block, i, rowBuilder, coercers[i], bridgingRecordCursor);
+ }
+ }
+ blockBuilder.closeEntry();
+ pageBuilder.declarePosition();
+ setObject(toType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1));
+ }
+ }
+
+ private static void rewriteBlock(
+ Type fromType,
+ Type toType,
+ Block block,
+ int position,
+ BlockBuilder blockBuilder,
+ Coercer coercer,
+ BridgingRecordCursor bridgingRecordCursor)
+ {
+ Class> fromJavaType = fromType.getJavaType();
+ if (fromJavaType == long.class) {
+ bridgingRecordCursor.setValue(fromType.getLong(block, position));
+ }
+ else if (fromJavaType == double.class) {
+ bridgingRecordCursor.setValue(fromType.getDouble(block, position));
+ }
+ else if (fromJavaType == boolean.class) {
+ bridgingRecordCursor.setValue(fromType.getBoolean(block, position));
+ }
+ else if (fromJavaType == Slice.class) {
+ bridgingRecordCursor.setValue(fromType.getSlice(block, position));
+ }
+ else if (fromJavaType == Block.class) {
+ bridgingRecordCursor.setValue(fromType.getObject(block, position));
+ }
+ else {
+ bridgingRecordCursor.setValue(null);
+ }
+ coercer.reset();
+ Class> toJaveType = toType.getJavaType();
+ if (coercer.isNull(bridgingRecordCursor, 0)) {
+ blockBuilder.appendNull();
+ }
+ else if (toJaveType == long.class) {
+ toType.writeLong(blockBuilder, coercer.getLong(bridgingRecordCursor, 0));
+ }
+ else if (toJaveType == double.class) {
+ toType.writeDouble(blockBuilder, coercer.getDouble(bridgingRecordCursor, 0));
+ }
+ else if (toJaveType == boolean.class) {
+ toType.writeBoolean(blockBuilder, coercer.getBoolean(bridgingRecordCursor, 0));
+ }
+ else if (toJaveType == Slice.class) {
+ toType.writeSlice(blockBuilder, coercer.getSlice(bridgingRecordCursor, 0));
+ }
+ else if (toJaveType == Block.class) {
+ toType.writeObject(blockBuilder, coercer.getObject(bridgingRecordCursor, 0));
+ }
+ else {
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported coercion from %s to %s", fromType.getDisplayName(), toType.getDisplayName()));
+ }
+ coercer.reset();
+ bridgingRecordCursor.close();
+ }
+
+ private static class BridgingRecordCursor
+ implements RecordCursor
+ {
+ private Object value;
+
+ public void setValue(Object value)
+ {
+ this.value = value;
+ }
+
+ @Override
+ public long getCompletedBytes()
+ {
+ return 0;
+ }
+
+ @Override
+ public long getReadTimeNanos()
+ {
+ return 0;
+ }
+
+ @Override
+ public Type getType(int field)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceNextPosition()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean getBoolean(int field)
+ {
+ return (Boolean) value;
+ }
+
+ @Override
+ public long getLong(int field)
+ {
+ return (Long) value;
+ }
+
+ @Override
+ public double getDouble(int field)
+ {
+ return (Double) value;
+ }
+
+ @Override
+ public Slice getSlice(int field)
+ {
+ return (Slice) value;
+ }
+
+ @Override
+ public Object getObject(int field)
+ {
+ return value;
+ }
+
+ @Override
+ public boolean isNull(int field)
+ {
+ return value == null;
+ }
+
+ @Override
+ public void close()
+ {
+ this.value = null;
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveColumnHandle.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveColumnHandle.java
new file mode 100644
index 00000000..5a66146e
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveColumnHandle.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Lists;
+import io.prestosql.plugin.hive.orc.OrcPageSourceFactory;
+import io.prestosql.spi.connector.ColumnHandle;
+import io.prestosql.spi.connector.ColumnMetadata;
+import io.prestosql.spi.type.TypeManager;
+import io.prestosql.spi.type.TypeSignature;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Optional;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.DUMMY_OFFLOADED;
+import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
+import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
+import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED;
+import static io.prestosql.plugin.hive.HiveType.HIVE_INT;
+import static io.prestosql.plugin.hive.HiveType.HIVE_LONG;
+import static io.prestosql.plugin.hive.HiveType.HIVE_STRING;
+import static java.util.Objects.requireNonNull;
+
+public class HiveColumnHandle
+ implements ColumnHandle
+{
+ public static final int PATH_COLUMN_INDEX = -11;
+ public static final String PATH_COLUMN_NAME = "$path";
+ public static final HiveType PATH_HIVE_TYPE = HIVE_STRING;
+ public static final TypeSignature PATH_TYPE_SIGNATURE = PATH_HIVE_TYPE.getTypeSignature();
+
+ public static final int BUCKET_COLUMN_INDEX = -12;
+ public static final String BUCKET_COLUMN_NAME = "$bucket";
+ public static final HiveType BUCKET_HIVE_TYPE = HIVE_INT;
+ public static final TypeSignature BUCKET_TYPE_SIGNATURE = BUCKET_HIVE_TYPE.getTypeSignature();
+
+ public static final int ROW_ID__COLUMN_INDEX = -13;
+ public static final String UPDATE_ROW_ID_COLUMN_NAME = "$rowId";
+
+ public static final int DUMMY_OFFLOADED_COLUMN_INDEX = -20;
+ public static final String DUMMY_OFFLOADED_COLUMN_NAME = "count_star";
+
+ // Ids <= MAX_PARTITION_KEY_COLUMN_INDEX, can be used for distinguishing between different partition prefilled columns.
+ // NOTE: Incase any new hidden columns added, their index should be more than below value or below value should be adjusted.
+ public static final int MAX_PARTITION_KEY_COLUMN_INDEX = -14;
+
+ public enum ColumnType
+ {
+ PARTITION_KEY,
+ REGULAR,
+ SYNTHESIZED,
+ DUMMY_OFFLOADED,
+ }
+
+ private final String name;
+ private final HiveType hiveType;
+ private final TypeSignature typeName;
+ private final int hiveColumnIndex;
+ private final ColumnType columnType;
+ private final Optional comment;
+ //If the column is a partitionColumn or bucketing column, then this is required
+ private final boolean required;
+
+ public HiveColumnHandle(
+ String name,
+ HiveType hiveType,
+ TypeSignature typeSignature,
+ int hiveColumnIndex,
+ ColumnType columnType,
+ Optional comment)
+ {
+ this(name, hiveType, typeSignature, hiveColumnIndex, columnType, comment, false);
+ }
+
+ @JsonCreator
+ public HiveColumnHandle(
+ @JsonProperty("name") String name,
+ @JsonProperty("hiveType") HiveType hiveType,
+ @JsonProperty("typeSignature") TypeSignature typeSignature,
+ @JsonProperty("hiveColumnIndex") int hiveColumnIndex,
+ @JsonProperty("columnType") ColumnType columnType,
+ @JsonProperty("comment") Optional comment,
+ @JsonProperty("required") boolean required)
+ {
+ this.name = requireNonNull(name, "name is null");
+ checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED || columnType == DUMMY_OFFLOADED, "hiveColumnIndex is negative");
+ this.hiveColumnIndex = hiveColumnIndex;
+ this.hiveType = requireNonNull(hiveType, "hiveType is null");
+ this.typeName = requireNonNull(typeSignature, "type is null");
+ this.columnType = requireNonNull(columnType, "columnType is null");
+ this.comment = requireNonNull(comment, "comment is null");
+ this.required = required;
+ }
+
+ @JsonProperty
+ public String getName()
+ {
+ return name;
+ }
+
+ @Override
+ public String getColumnName()
+ {
+ return name;
+ }
+
+ @JsonProperty
+ public HiveType getHiveType()
+ {
+ return hiveType;
+ }
+
+ @JsonProperty
+ public int getHiveColumnIndex()
+ {
+ return hiveColumnIndex;
+ }
+
+ public boolean isPartitionKey()
+ {
+ return columnType == PARTITION_KEY;
+ }
+
+ public boolean isRegular()
+ {
+ return columnType == REGULAR;
+ }
+
+ public boolean isHidden()
+ {
+ return columnType == SYNTHESIZED;
+ }
+
+ public ColumnMetadata getColumnMetadata(TypeManager typeManager)
+ {
+ return new ColumnMetadata(name, typeManager.getType(typeName), true, null, null, isHidden(), Collections.emptyMap(), required);
+ }
+
+ @JsonProperty
+ public Optional getComment()
+ {
+ return comment;
+ }
+
+ @JsonProperty
+ public TypeSignature getTypeSignature()
+ {
+ return typeName;
+ }
+
+ @JsonProperty
+ public ColumnType getColumnType()
+ {
+ return columnType;
+ }
+
+ @JsonProperty
+ public boolean isRequired()
+ {
+ return required;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(name, hiveColumnIndex, hiveType, columnType, comment, required);
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ HiveColumnHandle other = (HiveColumnHandle) obj;
+ return Objects.equals(this.name, other.name) &&
+ Objects.equals(this.hiveColumnIndex, other.hiveColumnIndex) &&
+ Objects.equals(this.hiveType, other.hiveType) &&
+ Objects.equals(this.columnType, other.columnType) &&
+ Objects.equals(this.comment, other.comment) &&
+ Objects.equals(this.required, other.required);
+ }
+
+ @Override
+ public String toString()
+ {
+ return name + ":" + hiveType + ":" + hiveColumnIndex + ":" + columnType;
+ }
+
+ public static HiveColumnHandle updateRowIdHandle()
+ {
+ // Hive connector only supports metadata delete. It does not support generic row-by-row deletion.
+ // Metadata delete is implemented in Hetu by generating a plan for row-by-row delete first,
+ // and then optimize it into metadata delete. As a result, Hive connector must provide partial
+ // plan-time support for row-by-row delete so that planning doesn't fail. This is why we need
+ // rowid handle. Note that in Hive connector, rowid handle is not implemented beyond plan-time.
+ ArrayList acidColumnNames = Lists.newArrayList(
+ OrcPageSourceFactory.ACID_COLUMN_ORIGINAL_TRANSACTION,
+ OrcPageSourceFactory.ACID_COLUMN_BUCKET,
+ OrcPageSourceFactory.ACID_COLUMN_ROW_ID,
+ OrcPageSourceFactory.ACID_COLUMN_CURRENT_TRANSACTION,
+ OrcPageSourceFactory.ACID_COLUMN_OPERATION);
+
+ ArrayList acidColumnTypes = Lists.newArrayList(
+ HIVE_LONG.getTypeInfo(),
+ HIVE_INT.getTypeInfo(),
+ HIVE_LONG.getTypeInfo(),
+ HIVE_LONG.getTypeInfo(),
+ HIVE_INT.getTypeInfo());
+ StructTypeInfo structTypeInfo = new StructTypeInfo();
+ structTypeInfo.setAllStructFieldNames(acidColumnNames);
+ structTypeInfo.setAllStructFieldTypeInfos(acidColumnTypes);
+ HiveType rowIdType = HiveType.createHiveType(structTypeInfo);
+ return new HiveColumnHandle(UPDATE_ROW_ID_COLUMN_NAME, rowIdType, rowIdType.getTypeSignature(), ROW_ID__COLUMN_INDEX, SYNTHESIZED, Optional.empty());
+ }
+
+ public static HiveColumnHandle pathColumnHandle()
+ {
+ return new HiveColumnHandle(PATH_COLUMN_NAME, PATH_HIVE_TYPE, PATH_TYPE_SIGNATURE, PATH_COLUMN_INDEX, SYNTHESIZED, Optional.empty());
+ }
+
+ /**
+ * The column indicating the bucket id.
+ * When table bucketing differs from partition bucketing, this column indicates
+ * what bucket the row will fall in under the table bucketing scheme.
+ */
+ public static HiveColumnHandle bucketColumnHandle()
+ {
+ return new HiveColumnHandle(BUCKET_COLUMN_NAME, BUCKET_HIVE_TYPE, BUCKET_TYPE_SIGNATURE, BUCKET_COLUMN_INDEX, SYNTHESIZED, Optional.empty());
+ }
+
+ public static boolean isPathColumnHandle(HiveColumnHandle column)
+ {
+ return column.getHiveColumnIndex() == PATH_COLUMN_INDEX;
+ }
+
+ public static boolean isBucketColumnHandle(HiveColumnHandle column)
+ {
+ return column.getHiveColumnIndex() == BUCKET_COLUMN_INDEX;
+ }
+
+ public static boolean isUpdateColumnHandle(HiveColumnHandle column)
+ {
+ return column.getHiveColumnIndex() == ROW_ID__COLUMN_INDEX;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCompressionCodec.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCompressionCodec.java
new file mode 100644
index 00000000..2f72f64d
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveCompressionCodec.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.orc.metadata.CompressionKind;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.io.compress.Lz4Codec;
+import org.apache.hadoop.io.compress.SnappyCodec;
+import org.apache.hadoop.io.compress.ZStandardCodec;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+
+import java.util.Optional;
+
+import static java.util.Objects.requireNonNull;
+
+public enum HiveCompressionCodec
+{
+ NONE(null, CompressionKind.NONE, CompressionCodecName.UNCOMPRESSED),
+ SNAPPY(SnappyCodec.class, CompressionKind.SNAPPY, CompressionCodecName.SNAPPY),
+ LZ4(Lz4Codec.class, CompressionKind.LZ4, CompressionCodecName.LZ4),
+ ZSTD(ZStandardCodec.class, CompressionKind.ZSTD, CompressionCodecName.ZSTD),
+ GZIP(GzipCodec.class, CompressionKind.ZLIB, CompressionCodecName.GZIP);
+
+ private final Optional> codec;
+ private final CompressionKind orcCompressionKind;
+ private final CompressionCodecName parquetCompressionCodec;
+
+ HiveCompressionCodec(Class extends CompressionCodec> codec, CompressionKind orcCompressionKind, CompressionCodecName parquetCompressionCodec)
+ {
+ this.codec = Optional.ofNullable(codec);
+ this.orcCompressionKind = requireNonNull(orcCompressionKind, "orcCompressionKind is null");
+ this.parquetCompressionCodec = requireNonNull(parquetCompressionCodec, "parquetCompressionCodec is null");
+ }
+
+ public Optional> getCodec()
+ {
+ return codec;
+ }
+
+ public CompressionKind getOrcCompressionKind()
+ {
+ return orcCompressionKind;
+ }
+
+ public CompressionCodecName getParquetCompressionCodec()
+ {
+ return parquetCompressionCodec;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConfig.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConfig.java
new file mode 100644
index 00000000..bb9056df
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConfig.java
@@ -0,0 +1,2099 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableList;
+import com.google.common.net.HostAndPort;
+import io.airlift.configuration.Config;
+import io.airlift.configuration.ConfigDescription;
+import io.airlift.configuration.DefunctConfig;
+import io.airlift.log.Logger;
+import io.airlift.units.DataSize;
+import io.airlift.units.Duration;
+import io.airlift.units.MaxDataSize;
+import io.airlift.units.MinDataSize;
+import io.airlift.units.MinDuration;
+import io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode;
+import io.prestosql.plugin.hive.s3.S3FileSystemType;
+import io.prestosql.spi.function.Mandatory;
+import io.prestosql.spi.queryeditorui.PropertyType;
+import org.joda.time.DateTimeZone;
+
+import javax.annotation.Nullable;
+import javax.validation.constraints.DecimalMax;
+import javax.validation.constraints.DecimalMin;
+import javax.validation.constraints.Max;
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotNull;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.Normalizer;
+import java.util.List;
+import java.util.Optional;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import static io.airlift.units.DataSize.Unit.BYTE;
+import static io.airlift.units.DataSize.Unit.GIGABYTE;
+import static io.airlift.units.DataSize.Unit.MEGABYTE;
+import static java.util.concurrent.TimeUnit.HOURS;
+import static java.util.concurrent.TimeUnit.MINUTES;
+
+@DefunctConfig({
+ "dfs.domain-socket-path",
+ "hive.file-system-cache-ttl",
+ "hive.max-global-split-iterator-threads",
+ "hive.max-sort-files-per-bucket",
+ "hive.bucket-writing",
+ "hive.optimized-reader.enabled",
+ "hive.orc.optimized-writer.enabled",
+ "hive.rcfile-optimized-writer.enabled",
+ "hive.time-zone",
+})
+public class HiveConfig
+{
+ private static final Logger log = Logger.get(HiveConfig.class);
+ private static final Splitter SPLITTER = Splitter.on(',').trimResults().omitEmptyStrings();
+ public static final double MIN_OFFLOAD_FACTOR = 0.5;
+ public static final long MIN_OFFLOAD_ROW_NUM = 500;
+
+ private DataSize maxSplitSize = new DataSize(64, MEGABYTE);
+ private int maxPartitionsPerScan = 100_000;
+ private int maxOutstandingSplits = 1_000;
+ private DataSize maxOutstandingSplitsSize = new DataSize(256, MEGABYTE);
+ private int maxSplitIteratorThreads = 1_000;
+ private int minPartitionBatchSize = 10;
+ private int maxPartitionBatchSize = 100;
+ private int maxInitialSplits = 200;
+ private int splitLoaderConcurrency = 4;
+ private Integer maxSplitsPerSecond;
+ private DataSize maxInitialSplitSize;
+ private int domainCompactionThreshold = 100;
+ private DataSize writerSortBufferSize = new DataSize(64, MEGABYTE);
+ private boolean forceLocalScheduling;
+ private boolean recursiveDirWalkerEnabled;
+
+ private int maxConcurrentFileRenames = 20;
+
+ private boolean allowCorruptWritesForTesting;
+
+ private Duration metastoreCacheTtl = new Duration(0, TimeUnit.SECONDS);
+ private Duration metastoreRefreshInterval = new Duration(1, TimeUnit.SECONDS);
+
+ private Duration metastoreDBCacheTtl = new Duration(0, TimeUnit.SECONDS);
+ private Duration metastoreDBRefreshInterval = new Duration(1, TimeUnit.SECONDS);
+
+ private long metastoreCacheMaximumSize = 10000;
+ private long perTransactionMetastoreCacheMaximumSize = 1000;
+ private int maxMetastoreRefreshThreads = 100;
+ private HostAndPort metastoreSocksProxy;
+ private Duration metastoreTimeout = new Duration(10, TimeUnit.SECONDS);
+
+ private Duration ipcPingInterval = new Duration(10, TimeUnit.SECONDS);
+ private Duration dfsTimeout = new Duration(60, TimeUnit.SECONDS);
+ private Duration dfsConnectTimeout = new Duration(500, TimeUnit.MILLISECONDS);
+ private Duration dfsKeyProviderCacheTtl = new Duration(30, TimeUnit.MINUTES);
+ private int dfsConnectMaxRetries = 5;
+ private boolean verifyChecksum = true;
+ private String domainSocketPath;
+
+ private S3FileSystemType s3FileSystemType = S3FileSystemType.PRESTO;
+
+ private HiveStorageFormat hiveStorageFormat = HiveStorageFormat.ORC;
+ private HiveCompressionCodec hiveCompressionCodec = HiveCompressionCodec.GZIP;
+ private boolean respectTableFormat = true;
+ private boolean immutablePartitions;
+ private boolean createEmptyBucketFiles;
+ private int maxPartitionsPerWriter = 100;
+ private int maxOpenSortFiles = 50;
+ private int writeValidationThreads = 16;
+
+ private List resourceConfigFiles = ImmutableList.of();
+
+ private DataSize textMaxLineLength = new DataSize(100, MEGABYTE);
+
+ private String orcLegacyTimeZone = TimeZone.getDefault().getID();
+
+ private String parquetTimeZone = TimeZone.getDefault().getID();
+ private boolean useParquetColumnNames;
+ private boolean failOnCorruptedParquetStatistics = true;
+ private DataSize parquetMaxReadBlockSize = new DataSize(16, MEGABYTE);
+
+ private boolean assumeCanonicalPartitionKeys;
+
+ private boolean useOrcColumnNames;
+ private boolean orcBloomFiltersEnabled;
+ private double orcDefaultBloomFilterFpp = 0.05;
+ private DataSize orcMaxMergeDistance = new DataSize(1, MEGABYTE);
+ private DataSize orcMaxBufferSize = new DataSize(8, MEGABYTE);
+ private DataSize orcTinyStripeThreshold = new DataSize(1, BYTE);
+ private DataSize orcStreamBufferSize = new DataSize(8, MEGABYTE);
+ private DataSize orcMaxReadBlockSize = new DataSize(16, MEGABYTE);
+ private boolean orcLazyReadSmallRanges = true;
+ private boolean orcWriteLegacyVersion;
+ private double orcWriterValidationPercentage;
+ private OrcWriteValidationMode orcWriterValidationMode = OrcWriteValidationMode.BOTH;
+
+ private boolean orcFileTailCacheEnabled;
+ private Duration orcFileTailCacheTtl = new Duration(4, HOURS);
+ private long orcFileTailCacheLimit = 50_000;
+ private boolean orcStripeFooterCacheEnabled;
+ private Duration orcStripeFooterCacheTtl = new Duration(4, HOURS);
+ private long orcStripeFooterCacheLimit = 250_000;
+ private boolean orcRowIndexCacheEnabled;
+ private Duration orcRowIndexCacheTtl = new Duration(4, HOURS);
+ private long orcRowIndexCacheLimit = 250_000;
+ private boolean orcBloomFiltersCacheEnabled;
+ private Duration orcBloomFiltersCacheTtl = new Duration(4, HOURS);
+ private long orcBloomFiltersCacheLimit = 250_000;
+ private boolean orcRowDataCacheEnabled;
+ private Duration orcRowDataCacheTtl = new Duration(4, HOURS);
+ private DataSize orcRowDataCacheMaximumWeight = new DataSize(20, GIGABYTE);
+
+ private String rcfileTimeZone = TimeZone.getDefault().getID();
+ private boolean rcfileWriterValidate;
+
+ private HiveMetastoreAuthenticationType hiveMetastoreAuthenticationType = HiveMetastoreAuthenticationType.NONE;
+ private HdfsAuthenticationType hdfsAuthenticationType = HdfsAuthenticationType.NONE;
+ private boolean hdfsImpersonationEnabled;
+ private boolean hdfsWireEncryptionEnabled;
+
+ private boolean skipDeletionForAlter;
+ private boolean skipTargetCleanupOnRollback;
+
+ private boolean bucketExecutionEnabled = true;
+ private boolean sortedWritingEnabled = true;
+
+ private int fileSystemMaxCacheSize = 1000;
+
+ private boolean optimizeMismatchedBucketCount;
+ private boolean writesToNonManagedTablesEnabled;
+ private boolean createsOfNonManagedTablesEnabled = true;
+
+ private boolean tableStatisticsEnabled = true;
+ private int partitionStatisticsSampleSize = 100;
+ private boolean ignoreCorruptedStatistics;
+ private boolean collectColumnStatisticsOnWrite = true;
+
+ private String recordingPath;
+ private boolean replay;
+ private Duration recordingDuration = new Duration(10, MINUTES);
+ private boolean s3SelectPushdownEnabled;
+ private int s3SelectPushdownMaxConnections = 500;
+
+ private boolean temporaryStagingDirectoryEnabled = true;
+ private String temporaryStagingDirectoryPath = "/tmp/presto-${USER}";
+
+ private Duration fileStatusCacheExpireAfterWrite = new Duration(24, TimeUnit.HOURS);
+ private long fileStatusCacheMaxSize = 1000 * 1000;
+ private List fileStatusCacheTables = ImmutableList.of();
+
+ private Optional hiveTransactionHeartbeatInterval = Optional.empty();
+ private int hiveTransactionHeartbeatThreads = 5;
+
+ private boolean tableCreatesWithLocationAllowed = true;
+
+ private boolean dynamicFilterPartitionFilteringEnabled = true;
+ private int dynamicFilteringRowFilteringThreshold = 2000;
+
+ private boolean orcCacheStatsMetricCollectionEnabled;
+
+ private int vacuumDeltaNumThreshold = 10;
+ private double vacuumDeltaPercentThreshold = 0.1;
+ private boolean autoVacuumEnabled;
+ private boolean orcPredicatePushdownEnabled;
+
+ private boolean omniDataSslEnabled;
+ private Optional omniDataSslPkiDir = Optional.empty();
+ private Optional omniDataSslClientCertFilePath = Optional.empty();
+ private Optional omniDataSslPrivateKeyFilePath = Optional.empty();
+ private Optional omniDataSslTrustCertFilePath = Optional.empty();
+ private Optional omniDataSslCrlFilePath = Optional.empty();
+
+ private boolean omniDataEnabled;
+ private boolean filterOffloadEnabled = true;
+ private double minFilterOffloadFactor = MIN_OFFLOAD_FACTOR;
+ private boolean aggregatorOffloadEnabled = true;
+ private double minAggregatorOffloadFactor = MIN_OFFLOAD_FACTOR;
+ private long minOffloadRowNumber = MIN_OFFLOAD_ROW_NUM;
+
+ private int hmsWriteBatchSize = 8;
+
+ public int getMaxInitialSplits()
+ {
+ return maxInitialSplits;
+ }
+
+ private boolean tlsEnabled;
+
+ private Duration vacuumCleanupRecheckInterval = new Duration(5, MINUTES);
+ private int vacuumServiceThreads = 2;
+ private int metastoreClientServiceThreads = 4;
+ private Optional vacuumCollectorInterval = Optional.of(new Duration(5, MINUTES));
+
+ private int maxNumbSplitsToGroup = 1;
+
+ private boolean workerMetaStoreCacheEnabled;
+
+ @Config("hive.max-initial-splits")
+ public HiveConfig setMaxInitialSplits(int maxInitialSplits)
+ {
+ this.maxInitialSplits = maxInitialSplits;
+ return this;
+ }
+
+ public DataSize getMaxInitialSplitSize()
+ {
+ if (maxInitialSplitSize == null) {
+ return new DataSize(maxSplitSize.getValue() / 2, maxSplitSize.getUnit());
+ }
+ return maxInitialSplitSize;
+ }
+
+ @Config("hive.max-initial-split-size")
+ public HiveConfig setMaxInitialSplitSize(DataSize maxInitialSplitSize)
+ {
+ this.maxInitialSplitSize = maxInitialSplitSize;
+ return this;
+ }
+
+ @Min(1)
+ public int getSplitLoaderConcurrency()
+ {
+ return splitLoaderConcurrency;
+ }
+
+ @Config("hive.split-loader-concurrency")
+ public HiveConfig setSplitLoaderConcurrency(int splitLoaderConcurrency)
+ {
+ this.splitLoaderConcurrency = splitLoaderConcurrency;
+ return this;
+ }
+
+ @Min(1)
+ @Nullable
+ public Integer getMaxSplitsPerSecond()
+ {
+ return maxSplitsPerSecond;
+ }
+
+ @Config("hive.max-splits-per-second")
+ @ConfigDescription("Throttles the maximum number of splits that can be assigned to tasks per second")
+ public HiveConfig setMaxSplitsPerSecond(Integer maxSplitsPerSecond)
+ {
+ this.maxSplitsPerSecond = maxSplitsPerSecond;
+ return this;
+ }
+
+ @Min(1)
+ public int getDomainCompactionThreshold()
+ {
+ return domainCompactionThreshold;
+ }
+
+ @Config("hive.domain-compaction-threshold")
+ @ConfigDescription("Maximum ranges to allow in a tuple domain without compacting it")
+ public HiveConfig setDomainCompactionThreshold(int domainCompactionThreshold)
+ {
+ this.domainCompactionThreshold = domainCompactionThreshold;
+ return this;
+ }
+
+ @MinDataSize("1MB")
+ @MaxDataSize("1GB")
+ public DataSize getWriterSortBufferSize()
+ {
+ return writerSortBufferSize;
+ }
+
+ @Config("hive.writer-sort-buffer-size")
+ public HiveConfig setWriterSortBufferSize(DataSize writerSortBufferSize)
+ {
+ this.writerSortBufferSize = writerSortBufferSize;
+ return this;
+ }
+
+ public boolean isForceLocalScheduling()
+ {
+ return forceLocalScheduling;
+ }
+
+ @Config("hive.force-local-scheduling")
+ public HiveConfig setForceLocalScheduling(boolean forceLocalScheduling)
+ {
+ this.forceLocalScheduling = forceLocalScheduling;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxConcurrentFileRenames()
+ {
+ return maxConcurrentFileRenames;
+ }
+
+ @Config("hive.max-concurrent-file-renames")
+ public HiveConfig setMaxConcurrentFileRenames(int maxConcurrentFileRenames)
+ {
+ this.maxConcurrentFileRenames = maxConcurrentFileRenames;
+ return this;
+ }
+
+ @Config("hive.recursive-directories")
+ public HiveConfig setRecursiveDirWalkerEnabled(boolean recursiveDirWalkerEnabled)
+ {
+ this.recursiveDirWalkerEnabled = recursiveDirWalkerEnabled;
+ return this;
+ }
+
+ public boolean getRecursiveDirWalkerEnabled()
+ {
+ return recursiveDirWalkerEnabled;
+ }
+
+ @NotNull
+ public DataSize getMaxSplitSize()
+ {
+ return maxSplitSize;
+ }
+
+ @Config("hive.max-split-size")
+ public HiveConfig setMaxSplitSize(DataSize maxSplitSize)
+ {
+ this.maxSplitSize = maxSplitSize;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxPartitionsPerScan()
+ {
+ return maxPartitionsPerScan;
+ }
+
+ @Config("hive.max-partitions-per-scan")
+ @ConfigDescription("Maximum allowed partitions for a single table scan")
+ public HiveConfig setMaxPartitionsPerScan(int maxPartitionsPerScan)
+ {
+ this.maxPartitionsPerScan = maxPartitionsPerScan;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxOutstandingSplits()
+ {
+ return maxOutstandingSplits;
+ }
+
+ @Config("hive.max-outstanding-splits")
+ @ConfigDescription("Target number of buffered splits for each table scan in a query, before the scheduler tries to pause itself")
+ public HiveConfig setMaxOutstandingSplits(int maxOutstandingSplits)
+ {
+ this.maxOutstandingSplits = maxOutstandingSplits;
+ return this;
+ }
+
+ @MinDataSize("1MB")
+ public DataSize getMaxOutstandingSplitsSize()
+ {
+ return maxOutstandingSplitsSize;
+ }
+
+ @Config("hive.max-outstanding-splits-size")
+ @ConfigDescription("Maximum amount of memory allowed for split buffering for each table scan in a query, before the query is failed")
+ public HiveConfig setMaxOutstandingSplitsSize(DataSize maxOutstandingSplits)
+ {
+ this.maxOutstandingSplitsSize = maxOutstandingSplits;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxSplitIteratorThreads()
+ {
+ return maxSplitIteratorThreads;
+ }
+
+ @Config("hive.max-split-iterator-threads")
+ public HiveConfig setMaxSplitIteratorThreads(int maxSplitIteratorThreads)
+ {
+ this.maxSplitIteratorThreads = maxSplitIteratorThreads;
+ return this;
+ }
+
+ @Deprecated
+ public boolean getAllowCorruptWritesForTesting()
+ {
+ return allowCorruptWritesForTesting;
+ }
+
+ @Deprecated
+ @Config("hive.allow-corrupt-writes-for-testing")
+ @ConfigDescription("Allow Hive connector to write data even when data will likely be corrupt")
+ public HiveConfig setAllowCorruptWritesForTesting(boolean allowCorruptWritesForTesting)
+ {
+ this.allowCorruptWritesForTesting = allowCorruptWritesForTesting;
+ return this;
+ }
+
+ @NotNull
+ public @MinDuration("0ms") Duration getMetastoreCacheTtl()
+ {
+ return metastoreCacheTtl;
+ }
+
+ @Config("hive.metastore-cache-ttl")
+ public HiveConfig setMetastoreCacheTtl(Duration metastoreCacheTtl)
+ {
+ this.metastoreCacheTtl = metastoreCacheTtl;
+ return this;
+ }
+
+ @NotNull
+ public @MinDuration("1ms") Duration getMetastoreRefreshInterval()
+ {
+ return metastoreRefreshInterval;
+ }
+
+ @Config("hive.metastore-refresh-interval")
+ public HiveConfig setMetastoreRefreshInterval(Duration metastoreRefreshInterval)
+ {
+ this.metastoreRefreshInterval = metastoreRefreshInterval;
+ return this;
+ }
+
+ @NotNull
+ public @MinDuration("0ms") Duration getMetastoreDBCacheTtl()
+ {
+ return metastoreDBCacheTtl;
+ }
+
+ @Config("hive.metastore-db-cache-ttl")
+ public HiveConfig setMetastoreDBCacheTtl(Duration metastoreCacheTtl)
+ {
+ this.metastoreDBCacheTtl = metastoreCacheTtl;
+ return this;
+ }
+
+ @NotNull
+ public @MinDuration("1ms") Duration getMetastoreDBRefreshInterval()
+ {
+ return metastoreDBRefreshInterval;
+ }
+
+ @Config("hive.metastore-db-refresh-interval")
+ public HiveConfig setMetastoreDBRefreshInterval(Duration metastoreDBRefreshInterval)
+ {
+ this.metastoreDBRefreshInterval = metastoreDBRefreshInterval;
+ return this;
+ }
+
+ @Min(1)
+ public long getMetastoreCacheMaximumSize()
+ {
+ return metastoreCacheMaximumSize;
+ }
+
+ @Config("hive.metastore-cache-maximum-size")
+ public HiveConfig setMetastoreCacheMaximumSize(long metastoreCacheMaximumSize)
+ {
+ this.metastoreCacheMaximumSize = metastoreCacheMaximumSize;
+ return this;
+ }
+
+ @Min(1)
+ public long getPerTransactionMetastoreCacheMaximumSize()
+ {
+ return perTransactionMetastoreCacheMaximumSize;
+ }
+
+ @Config("hive.per-transaction-metastore-cache-maximum-size")
+ public HiveConfig setPerTransactionMetastoreCacheMaximumSize(long perTransactionMetastoreCacheMaximumSize)
+ {
+ this.perTransactionMetastoreCacheMaximumSize = perTransactionMetastoreCacheMaximumSize;
+ return this;
+ }
+
+ @Min(10)
+ public int getMaxMetastoreRefreshThreads()
+ {
+ return maxMetastoreRefreshThreads;
+ }
+
+ @Config("hive.metastore-refresh-max-threads")
+ public HiveConfig setMaxMetastoreRefreshThreads(int maxMetastoreRefreshThreads)
+ {
+ this.maxMetastoreRefreshThreads = maxMetastoreRefreshThreads;
+ return this;
+ }
+
+ public HostAndPort getMetastoreSocksProxy()
+ {
+ return metastoreSocksProxy;
+ }
+
+ @Config("hive.metastore.thrift.client.socks-proxy")
+ public HiveConfig setMetastoreSocksProxy(HostAndPort metastoreSocksProxy)
+ {
+ this.metastoreSocksProxy = metastoreSocksProxy;
+ return this;
+ }
+
+ @NotNull
+ public Duration getMetastoreTimeout()
+ {
+ return metastoreTimeout;
+ }
+
+ @Config("hive.metastore-timeout")
+ public HiveConfig setMetastoreTimeout(Duration metastoreTimeout)
+ {
+ this.metastoreTimeout = metastoreTimeout;
+ return this;
+ }
+
+ @Min(1)
+ public int getMinPartitionBatchSize()
+ {
+ return minPartitionBatchSize;
+ }
+
+ @Config("hive.metastore.partition-batch-size.min")
+ public HiveConfig setMinPartitionBatchSize(int minPartitionBatchSize)
+ {
+ this.minPartitionBatchSize = minPartitionBatchSize;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxPartitionBatchSize()
+ {
+ return maxPartitionBatchSize;
+ }
+
+ @Config("hive.metastore.partition-batch-size.max")
+ public HiveConfig setMaxPartitionBatchSize(int maxPartitionBatchSize)
+ {
+ this.maxPartitionBatchSize = maxPartitionBatchSize;
+ return this;
+ }
+
+ @NotNull
+ public List getResourceConfigFiles()
+ {
+ return resourceConfigFiles;
+ }
+
+ @Mandatory(name = "hive.config.resources",
+ description = "An optional comma-separated list of HDFS configuration files. These files must exist on the machines running openLooKeng. Only specify this if absolutely necessary to access HDFS. Ensure to upload these files.",
+ defaultValue = "core-site.xml,hdfs-site.xml",
+ readOnly = true,
+ type = PropertyType.FILES)
+ @Config("hive.config.resources")
+ public HiveConfig setResourceConfigFiles(String files)
+ {
+ this.resourceConfigFiles = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(files);
+ return this;
+ }
+
+ public HiveConfig setResourceConfigFiles(List files)
+ {
+ this.resourceConfigFiles = ImmutableList.copyOf(files);
+ return this;
+ }
+
+ @NotNull
+ @MinDuration("1ms")
+ public Duration getIpcPingInterval()
+ {
+ return ipcPingInterval;
+ }
+
+ @Config("hive.dfs.ipc-ping-interval")
+ public HiveConfig setIpcPingInterval(Duration pingInterval)
+ {
+ this.ipcPingInterval = pingInterval;
+ return this;
+ }
+
+ @NotNull
+ @MinDuration("1ms")
+ public Duration getDfsTimeout()
+ {
+ return dfsTimeout;
+ }
+
+ @Config("hive.dfs-timeout")
+ public HiveConfig setDfsTimeout(Duration dfsTimeout)
+ {
+ this.dfsTimeout = dfsTimeout;
+ return this;
+ }
+
+ @NotNull
+ @MinDuration("0ms")
+ public Duration getDfsKeyProviderCacheTtl()
+ {
+ return dfsKeyProviderCacheTtl;
+ }
+
+ @Config("hive.dfs.key-provider.cache-ttl")
+ public HiveConfig setDfsKeyProviderCacheTtl(Duration dfsClientKeyProviderCacheTtl)
+ {
+ this.dfsKeyProviderCacheTtl = dfsClientKeyProviderCacheTtl;
+ return this;
+ }
+
+ @MinDuration("1ms")
+ @NotNull
+ public Duration getDfsConnectTimeout()
+ {
+ return dfsConnectTimeout;
+ }
+
+ @Config("hive.dfs.connect.timeout")
+ public HiveConfig setDfsConnectTimeout(Duration dfsConnectTimeout)
+ {
+ this.dfsConnectTimeout = dfsConnectTimeout;
+ return this;
+ }
+
+ @Min(0)
+ public int getDfsConnectMaxRetries()
+ {
+ return dfsConnectMaxRetries;
+ }
+
+ @Config("hive.dfs.connect.max-retries")
+ public HiveConfig setDfsConnectMaxRetries(int dfsConnectMaxRetries)
+ {
+ this.dfsConnectMaxRetries = dfsConnectMaxRetries;
+ return this;
+ }
+
+ public HiveStorageFormat getHiveStorageFormat()
+ {
+ return hiveStorageFormat;
+ }
+
+ @Config("hive.storage-format")
+ public HiveConfig setHiveStorageFormat(HiveStorageFormat hiveStorageFormat)
+ {
+ this.hiveStorageFormat = hiveStorageFormat;
+ return this;
+ }
+
+ public HiveCompressionCodec getHiveCompressionCodec()
+ {
+ return hiveCompressionCodec;
+ }
+
+ @Config("hive.compression-codec")
+ public HiveConfig setHiveCompressionCodec(HiveCompressionCodec hiveCompressionCodec)
+ {
+ this.hiveCompressionCodec = hiveCompressionCodec;
+ return this;
+ }
+
+ public boolean isRespectTableFormat()
+ {
+ return respectTableFormat;
+ }
+
+ @Config("hive.respect-table-format")
+ @ConfigDescription("Should new partitions be written using the existing table format or the default Presto format")
+ public HiveConfig setRespectTableFormat(boolean respectTableFormat)
+ {
+ this.respectTableFormat = respectTableFormat;
+ return this;
+ }
+
+ public boolean isImmutablePartitions()
+ {
+ return immutablePartitions;
+ }
+
+ @Config("hive.immutable-partitions")
+ @ConfigDescription("Can new data be inserted into existing partitions or existing unpartitioned tables")
+ public HiveConfig setImmutablePartitions(boolean immutablePartitions)
+ {
+ this.immutablePartitions = immutablePartitions;
+ return this;
+ }
+
+ public boolean isCreateEmptyBucketFiles()
+ {
+ return createEmptyBucketFiles;
+ }
+
+ @Config("hive.create-empty-bucket-files")
+ @ConfigDescription("Create empty files for buckets that have no data")
+ public HiveConfig setCreateEmptyBucketFiles(boolean createEmptyBucketFiles)
+ {
+ this.createEmptyBucketFiles = createEmptyBucketFiles;
+ return this;
+ }
+
+ @Min(1)
+ public int getMaxPartitionsPerWriter()
+ {
+ return maxPartitionsPerWriter;
+ }
+
+ @Config("hive.max-partitions-per-writers")
+ @ConfigDescription("Maximum number of partitions per writer")
+ public HiveConfig setMaxPartitionsPerWriter(int maxPartitionsPerWriter)
+ {
+ this.maxPartitionsPerWriter = maxPartitionsPerWriter;
+ return this;
+ }
+
+ @Min(2)
+ @Max(1000)
+ public int getMaxOpenSortFiles()
+ {
+ return maxOpenSortFiles;
+ }
+
+ @Config("hive.max-open-sort-files")
+ @ConfigDescription("Maximum number of writer temporary files to read in one pass")
+ public HiveConfig setMaxOpenSortFiles(int maxOpenSortFiles)
+ {
+ this.maxOpenSortFiles = maxOpenSortFiles;
+ return this;
+ }
+
+ public int getWriteValidationThreads()
+ {
+ return writeValidationThreads;
+ }
+
+ @Config("hive.write-validation-threads")
+ @ConfigDescription("Number of threads used for verifying data after a write")
+ public HiveConfig setWriteValidationThreads(int writeValidationThreads)
+ {
+ this.writeValidationThreads = writeValidationThreads;
+ return this;
+ }
+
+ public String getDomainSocketPath()
+ {
+ return domainSocketPath;
+ }
+
+ @Config("hive.dfs.domain-socket-path")
+ public HiveConfig setDomainSocketPath(String domainSocketPath)
+ {
+ this.domainSocketPath = domainSocketPath;
+ return this;
+ }
+
+ @NotNull
+ public S3FileSystemType getS3FileSystemType()
+ {
+ return s3FileSystemType;
+ }
+
+ @Config("hive.s3-file-system-type")
+ public HiveConfig setS3FileSystemType(S3FileSystemType s3FileSystemType)
+ {
+ this.s3FileSystemType = s3FileSystemType;
+ return this;
+ }
+
+ public boolean isVerifyChecksum()
+ {
+ return verifyChecksum;
+ }
+
+ @Config("hive.dfs.verify-checksum")
+ public HiveConfig setVerifyChecksum(boolean verifyChecksum)
+ {
+ this.verifyChecksum = verifyChecksum;
+ return this;
+ }
+
+ public boolean isUseOrcColumnNames()
+ {
+ return useOrcColumnNames;
+ }
+
+ @Config("hive.orc.use-column-names")
+ @ConfigDescription("Access ORC columns using names from the file")
+ public HiveConfig setUseOrcColumnNames(boolean useOrcColumnNames)
+ {
+ this.useOrcColumnNames = useOrcColumnNames;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getOrcMaxMergeDistance()
+ {
+ return orcMaxMergeDistance;
+ }
+
+ @Config("hive.orc.max-merge-distance")
+ public HiveConfig setOrcMaxMergeDistance(DataSize orcMaxMergeDistance)
+ {
+ this.orcMaxMergeDistance = orcMaxMergeDistance;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getOrcMaxBufferSize()
+ {
+ return orcMaxBufferSize;
+ }
+
+ @Config("hive.orc.max-buffer-size")
+ public HiveConfig setOrcMaxBufferSize(DataSize orcMaxBufferSize)
+ {
+ this.orcMaxBufferSize = orcMaxBufferSize;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getOrcStreamBufferSize()
+ {
+ return orcStreamBufferSize;
+ }
+
+ @Config("hive.orc.stream-buffer-size")
+ public HiveConfig setOrcStreamBufferSize(DataSize orcStreamBufferSize)
+ {
+ this.orcStreamBufferSize = orcStreamBufferSize;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getOrcTinyStripeThreshold()
+ {
+ return orcTinyStripeThreshold;
+ }
+
+ @Config("hive.orc.tiny-stripe-threshold")
+ public HiveConfig setOrcTinyStripeThreshold(DataSize orcTinyStripeThreshold)
+ {
+ this.orcTinyStripeThreshold = orcTinyStripeThreshold;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getOrcMaxReadBlockSize()
+ {
+ return orcMaxReadBlockSize;
+ }
+
+ @Config("hive.orc.max-read-block-size")
+ public HiveConfig setOrcMaxReadBlockSize(DataSize orcMaxReadBlockSize)
+ {
+ this.orcMaxReadBlockSize = orcMaxReadBlockSize;
+ return this;
+ }
+
+ @Deprecated
+ public boolean isOrcLazyReadSmallRanges()
+ {
+ return orcLazyReadSmallRanges;
+ }
+
+ // TODO remove config option once efficacy is proven
+ @Deprecated
+ @Config("hive.orc.lazy-read-small-ranges")
+ @ConfigDescription("ORC read small disk ranges lazily")
+ public HiveConfig setOrcLazyReadSmallRanges(boolean orcLazyReadSmallRanges)
+ {
+ this.orcLazyReadSmallRanges = orcLazyReadSmallRanges;
+ return this;
+ }
+
+ public boolean isOrcBloomFiltersEnabled()
+ {
+ return orcBloomFiltersEnabled;
+ }
+
+ @Config("hive.orc.bloom-filters.enabled")
+ public HiveConfig setOrcBloomFiltersEnabled(boolean orcBloomFiltersEnabled)
+ {
+ this.orcBloomFiltersEnabled = orcBloomFiltersEnabled;
+ return this;
+ }
+
+ public double getOrcDefaultBloomFilterFpp()
+ {
+ return orcDefaultBloomFilterFpp;
+ }
+
+ @Config("hive.orc.default-bloom-filter-fpp")
+ @ConfigDescription("ORC Bloom filter false positive probability")
+ public HiveConfig setOrcDefaultBloomFilterFpp(double orcDefaultBloomFilterFpp)
+ {
+ this.orcDefaultBloomFilterFpp = orcDefaultBloomFilterFpp;
+ return this;
+ }
+
+ public boolean isOrcWriteLegacyVersion()
+ {
+ return orcWriteLegacyVersion;
+ }
+
+ @Config("hive.orc.writer.use-legacy-version-number")
+ @ConfigDescription("Write ORC files with a version number that is readable by Hive 2.0.0 to 2.2.0")
+ public HiveConfig setOrcWriteLegacyVersion(boolean orcWriteLegacyVersion)
+ {
+ this.orcWriteLegacyVersion = orcWriteLegacyVersion;
+ return this;
+ }
+
+ @DecimalMin("0.0")
+ @DecimalMax("100.0")
+ public double getOrcWriterValidationPercentage()
+ {
+ return orcWriterValidationPercentage;
+ }
+
+ @Config("hive.orc.writer.validation-percentage")
+ @ConfigDescription("Percentage of ORC files to validate after write by re-reading the whole file")
+ public HiveConfig setOrcWriterValidationPercentage(double orcWriterValidationPercentage)
+ {
+ this.orcWriterValidationPercentage = orcWriterValidationPercentage;
+ return this;
+ }
+
+ @NotNull
+ public OrcWriteValidationMode getOrcWriterValidationMode()
+ {
+ return orcWriterValidationMode;
+ }
+
+ @Config("hive.orc.writer.validation-mode")
+ @ConfigDescription("Level of detail in ORC validation. Lower levels require more memory.")
+ public HiveConfig setOrcWriterValidationMode(OrcWriteValidationMode orcWriterValidationMode)
+ {
+ this.orcWriterValidationMode = orcWriterValidationMode;
+ return this;
+ }
+
+ public DateTimeZone getRcfileDateTimeZone()
+ {
+ return DateTimeZone.forTimeZone(TimeZone.getTimeZone(rcfileTimeZone));
+ }
+
+ @NotNull
+ public String getRcfileTimeZone()
+ {
+ return rcfileTimeZone;
+ }
+
+ @Config("hive.rcfile.time-zone")
+ @ConfigDescription("Time zone for RCFile binary read and write")
+ public HiveConfig setRcfileTimeZone(String rcfileTimeZone)
+ {
+ this.rcfileTimeZone = rcfileTimeZone;
+ return this;
+ }
+
+ public boolean isRcfileWriterValidate()
+ {
+ return rcfileWriterValidate;
+ }
+
+ @Config("hive.rcfile.writer.validate")
+ @ConfigDescription("Validate RCFile after write by re-reading the whole file")
+ public HiveConfig setRcfileWriterValidate(boolean rcfileWriterValidate)
+ {
+ this.rcfileWriterValidate = rcfileWriterValidate;
+ return this;
+ }
+
+ public boolean isAssumeCanonicalPartitionKeys()
+ {
+ return assumeCanonicalPartitionKeys;
+ }
+
+ @Config("hive.assume-canonical-partition-keys")
+ public HiveConfig setAssumeCanonicalPartitionKeys(boolean assumeCanonicalPartitionKeys)
+ {
+ this.assumeCanonicalPartitionKeys = assumeCanonicalPartitionKeys;
+ return this;
+ }
+
+ @MinDataSize("1B")
+ @MaxDataSize("1GB")
+ @NotNull
+ public DataSize getTextMaxLineLength()
+ {
+ return textMaxLineLength;
+ }
+
+ @Config("hive.text.max-line-length")
+ @ConfigDescription("Maximum line length for text files")
+ public HiveConfig setTextMaxLineLength(DataSize textMaxLineLength)
+ {
+ this.textMaxLineLength = textMaxLineLength;
+ return this;
+ }
+
+ public DateTimeZone getOrcLegacyDateTimeZone()
+ {
+ return DateTimeZone.forTimeZone(TimeZone.getTimeZone(orcLegacyTimeZone));
+ }
+
+ @NotNull
+ public String getOrcLegacyTimeZone()
+ {
+ return orcLegacyTimeZone;
+ }
+
+ @Config("hive.orc.time-zone")
+ @ConfigDescription("Time zone for legacy ORC files that do not contain a time zone")
+ public HiveConfig setOrcLegacyTimeZone(String orcLegacyTimeZone)
+ {
+ this.orcLegacyTimeZone = orcLegacyTimeZone;
+ return this;
+ }
+
+ public DateTimeZone getParquetDateTimeZone()
+ {
+ return DateTimeZone.forTimeZone(TimeZone.getTimeZone(parquetTimeZone));
+ }
+
+ @NotNull
+ public String getParquetTimeZone()
+ {
+ return parquetTimeZone;
+ }
+
+ @Config("hive.parquet.time-zone")
+ @ConfigDescription("Time zone for Parquet read and write")
+ public HiveConfig setParquetTimeZone(String parquetTimeZone)
+ {
+ this.parquetTimeZone = parquetTimeZone;
+ return this;
+ }
+
+ public boolean isUseParquetColumnNames()
+ {
+ return useParquetColumnNames;
+ }
+
+ @Config("hive.parquet.use-column-names")
+ @ConfigDescription("Access Parquet columns using names from the file")
+ public HiveConfig setUseParquetColumnNames(boolean useParquetColumnNames)
+ {
+ this.useParquetColumnNames = useParquetColumnNames;
+ return this;
+ }
+
+ public boolean isFailOnCorruptedParquetStatistics()
+ {
+ return failOnCorruptedParquetStatistics;
+ }
+
+ @Config("hive.parquet.fail-on-corrupted-statistics")
+ @ConfigDescription("Fail when scanning Parquet files with corrupted statistics")
+ public HiveConfig setFailOnCorruptedParquetStatistics(boolean failOnCorruptedParquetStatistics)
+ {
+ this.failOnCorruptedParquetStatistics = failOnCorruptedParquetStatistics;
+ return this;
+ }
+
+ @NotNull
+ public DataSize getParquetMaxReadBlockSize()
+ {
+ return parquetMaxReadBlockSize;
+ }
+
+ @Config("hive.parquet.max-read-block-size")
+ public HiveConfig setParquetMaxReadBlockSize(DataSize parquetMaxReadBlockSize)
+ {
+ this.parquetMaxReadBlockSize = parquetMaxReadBlockSize;
+ return this;
+ }
+
+ public boolean isOptimizeMismatchedBucketCount()
+ {
+ return optimizeMismatchedBucketCount;
+ }
+
+ @Config("hive.optimize-mismatched-bucket-count")
+ public HiveConfig setOptimizeMismatchedBucketCount(boolean optimizeMismatchedBucketCount)
+ {
+ this.optimizeMismatchedBucketCount = optimizeMismatchedBucketCount;
+ return this;
+ }
+
+ public List getFileStatusCacheTables()
+ {
+ return fileStatusCacheTables;
+ }
+
+ @Config("hive.file-status-cache-tables")
+ public HiveConfig setFileStatusCacheTables(String fileStatusCacheTables)
+ {
+ this.fileStatusCacheTables = SPLITTER.splitToList(fileStatusCacheTables);
+ return this;
+ }
+
+ public long getFileStatusCacheMaxSize()
+ {
+ return fileStatusCacheMaxSize;
+ }
+
+ @Config("hive.file-status-cache-size")
+ public HiveConfig setFileStatusCacheMaxSize(long fileStatusCacheMaxSize)
+ {
+ this.fileStatusCacheMaxSize = fileStatusCacheMaxSize;
+ return this;
+ }
+
+ public Duration getFileStatusCacheExpireAfterWrite()
+ {
+ return fileStatusCacheExpireAfterWrite;
+ }
+
+ @Config("hive.file-status-cache-expire-time")
+ public HiveConfig setFileStatusCacheExpireAfterWrite(Duration fileStatusCacheExpireAfterWrite)
+ {
+ this.fileStatusCacheExpireAfterWrite = fileStatusCacheExpireAfterWrite;
+ return this;
+ }
+
+ public int getMetastoreWriteBatchSize()
+ {
+ return hmsWriteBatchSize;
+ }
+
+ @Config("hive.metastore-write-batch-size")
+ @ConfigDescription("Batch size for writing to hms")
+ public HiveConfig setMetastoreWriteBatchSize(int hmsWriteBatchSize)
+ {
+ this.hmsWriteBatchSize = hmsWriteBatchSize;
+ return this;
+ }
+
+ public enum HiveMetastoreAuthenticationType
+ {
+ NONE,
+ KERBEROS
+ }
+
+ @NotNull
+ public HiveMetastoreAuthenticationType getHiveMetastoreAuthenticationType()
+ {
+ return hiveMetastoreAuthenticationType;
+ }
+
+ @Config("hive.metastore.authentication.type")
+ @ConfigDescription("Hive Metastore authentication type")
+ public HiveConfig setHiveMetastoreAuthenticationType(
+ HiveMetastoreAuthenticationType hiveMetastoreAuthenticationType)
+ {
+ this.hiveMetastoreAuthenticationType = hiveMetastoreAuthenticationType;
+ return this;
+ }
+
+ public enum HdfsAuthenticationType
+ {
+ NONE,
+ KERBEROS,
+ }
+
+ @NotNull
+ public HdfsAuthenticationType getHdfsAuthenticationType()
+ {
+ return hdfsAuthenticationType;
+ }
+
+ @Config("hive.hdfs.authentication.type")
+ @ConfigDescription("HDFS authentication type")
+ public HiveConfig setHdfsAuthenticationType(HdfsAuthenticationType hdfsAuthenticationType)
+ {
+ this.hdfsAuthenticationType = hdfsAuthenticationType;
+ return this;
+ }
+
+ public boolean isHdfsImpersonationEnabled()
+ {
+ return hdfsImpersonationEnabled;
+ }
+
+ @Config("hive.hdfs.impersonation.enabled")
+ @ConfigDescription("Should Presto user be impersonated when communicating with HDFS")
+ public HiveConfig setHdfsImpersonationEnabled(boolean hdfsImpersonationEnabled)
+ {
+ this.hdfsImpersonationEnabled = hdfsImpersonationEnabled;
+ return this;
+ }
+
+ public boolean isHdfsWireEncryptionEnabled()
+ {
+ return hdfsWireEncryptionEnabled;
+ }
+
+ @Config("hive.hdfs.wire-encryption.enabled")
+ @ConfigDescription("Should be turned on when HDFS wire encryption is enabled")
+ public HiveConfig setHdfsWireEncryptionEnabled(boolean hdfsWireEncryptionEnabled)
+ {
+ this.hdfsWireEncryptionEnabled = hdfsWireEncryptionEnabled;
+ return this;
+ }
+
+ public boolean isSkipDeletionForAlter()
+ {
+ return skipDeletionForAlter;
+ }
+
+ @Config("hive.skip-deletion-for-alter")
+ @ConfigDescription("Skip deletion of old partition data when a partition is deleted and then inserted in the same transaction")
+ public HiveConfig setSkipDeletionForAlter(boolean skipDeletionForAlter)
+ {
+ this.skipDeletionForAlter = skipDeletionForAlter;
+ return this;
+ }
+
+ public boolean isSkipTargetCleanupOnRollback()
+ {
+ return skipTargetCleanupOnRollback;
+ }
+
+ @Config("hive.skip-target-cleanup-on-rollback")
+ @ConfigDescription("Skip deletion of target directories when a metastore operation fails")
+ public HiveConfig setSkipTargetCleanupOnRollback(boolean skipTargetCleanupOnRollback)
+ {
+ this.skipTargetCleanupOnRollback = skipTargetCleanupOnRollback;
+ return this;
+ }
+
+ public boolean isBucketExecutionEnabled()
+ {
+ return bucketExecutionEnabled;
+ }
+
+ @Config("hive.bucket-execution")
+ @ConfigDescription("Enable bucket-aware execution: only use a single worker per bucket")
+ public HiveConfig setBucketExecutionEnabled(boolean bucketExecutionEnabled)
+ {
+ this.bucketExecutionEnabled = bucketExecutionEnabled;
+ return this;
+ }
+
+ public boolean isSortedWritingEnabled()
+ {
+ return sortedWritingEnabled;
+ }
+
+ @Config("hive.sorted-writing")
+ @ConfigDescription("Enable writing to bucketed sorted tables")
+ public HiveConfig setSortedWritingEnabled(boolean sortedWritingEnabled)
+ {
+ this.sortedWritingEnabled = sortedWritingEnabled;
+ return this;
+ }
+
+ public int getFileSystemMaxCacheSize()
+ {
+ return fileSystemMaxCacheSize;
+ }
+
+ @Config("hive.fs.cache.max-size")
+ @ConfigDescription("Hadoop FileSystem cache size")
+ public HiveConfig setFileSystemMaxCacheSize(int fileSystemMaxCacheSize)
+ {
+ this.fileSystemMaxCacheSize = fileSystemMaxCacheSize;
+ return this;
+ }
+
+ @Config("hive.non-managed-table-writes-enabled")
+ @ConfigDescription("Enable writes to non-managed (external) tables")
+ public HiveConfig setWritesToNonManagedTablesEnabled(boolean writesToNonManagedTablesEnabled)
+ {
+ this.writesToNonManagedTablesEnabled = writesToNonManagedTablesEnabled;
+ return this;
+ }
+
+ public boolean getWritesToNonManagedTablesEnabled()
+ {
+ return writesToNonManagedTablesEnabled;
+ }
+
+ @Deprecated
+ @Config("hive.non-managed-table-creates-enabled")
+ @ConfigDescription("Enable non-managed (external) table creates")
+ public HiveConfig setCreatesOfNonManagedTablesEnabled(boolean createsOfNonManagedTablesEnabled)
+ {
+ this.createsOfNonManagedTablesEnabled = createsOfNonManagedTablesEnabled;
+ return this;
+ }
+
+ @Deprecated
+ public boolean getCreatesOfNonManagedTablesEnabled()
+ {
+ return createsOfNonManagedTablesEnabled;
+ }
+
+ @Config("hive.table-statistics-enabled")
+ @ConfigDescription("Enable use of table statistics")
+ public HiveConfig setTableStatisticsEnabled(boolean tableStatisticsEnabled)
+ {
+ this.tableStatisticsEnabled = tableStatisticsEnabled;
+ return this;
+ }
+
+ public boolean isTableStatisticsEnabled()
+ {
+ return tableStatisticsEnabled;
+ }
+
+ @Min(1)
+ public int getPartitionStatisticsSampleSize()
+ {
+ return partitionStatisticsSampleSize;
+ }
+
+ @Config("hive.partition-statistics-sample-size")
+ @ConfigDescription("Maximum sample size of the partitions column statistics")
+ public HiveConfig setPartitionStatisticsSampleSize(int partitionStatisticsSampleSize)
+ {
+ this.partitionStatisticsSampleSize = partitionStatisticsSampleSize;
+ return this;
+ }
+
+ public boolean isIgnoreCorruptedStatistics()
+ {
+ return ignoreCorruptedStatistics;
+ }
+
+ @Config("hive.ignore-corrupted-statistics")
+ @ConfigDescription("Ignore corrupted statistics rather than failing")
+ public HiveConfig setIgnoreCorruptedStatistics(boolean ignoreCorruptedStatistics)
+ {
+ this.ignoreCorruptedStatistics = ignoreCorruptedStatistics;
+ return this;
+ }
+
+ public boolean isCollectColumnStatisticsOnWrite()
+ {
+ return collectColumnStatisticsOnWrite;
+ }
+
+ @Config("hive.collect-column-statistics-on-write")
+ @ConfigDescription("Enables automatic column level statistics collection on write")
+ public HiveConfig setCollectColumnStatisticsOnWrite(boolean collectColumnStatisticsOnWrite)
+ {
+ this.collectColumnStatisticsOnWrite = collectColumnStatisticsOnWrite;
+ return this;
+ }
+
+ @Config("hive.metastore-recording-path")
+ public HiveConfig setRecordingPath(String recordingPath)
+ {
+ this.recordingPath = recordingPath;
+ return this;
+ }
+
+ public String getRecordingPath()
+ {
+ return recordingPath;
+ }
+
+ @Config("hive.replay-metastore-recording")
+ public HiveConfig setReplay(boolean replay)
+ {
+ this.replay = replay;
+ return this;
+ }
+
+ public boolean isReplay()
+ {
+ return replay;
+ }
+
+ @Config("hive.metastore-recording-duration")
+ public HiveConfig setRecordingDuration(Duration recordingDuration)
+ {
+ this.recordingDuration = recordingDuration;
+ return this;
+ }
+
+ @NotNull
+ public Duration getRecordingDuration()
+ {
+ return recordingDuration;
+ }
+
+ public boolean isS3SelectPushdownEnabled()
+ {
+ return s3SelectPushdownEnabled;
+ }
+
+ @Config("hive.s3select-pushdown.enabled")
+ @ConfigDescription("Enable query pushdown to AWS S3 Select service")
+ public HiveConfig setS3SelectPushdownEnabled(boolean s3SelectPushdownEnabled)
+ {
+ this.s3SelectPushdownEnabled = s3SelectPushdownEnabled;
+ return this;
+ }
+
+ @Min(1)
+ public int getS3SelectPushdownMaxConnections()
+ {
+ return s3SelectPushdownMaxConnections;
+ }
+
+ @Config("hive.s3select-pushdown.max-connections")
+ public HiveConfig setS3SelectPushdownMaxConnections(int s3SelectPushdownMaxConnections)
+ {
+ this.s3SelectPushdownMaxConnections = s3SelectPushdownMaxConnections;
+ return this;
+ }
+
+ @Config("hive.temporary-staging-directory-enabled")
+ @ConfigDescription("Should use (if possible) temporary staging directory for write operations")
+ public HiveConfig setTemporaryStagingDirectoryEnabled(boolean temporaryStagingDirectoryEnabled)
+ {
+ this.temporaryStagingDirectoryEnabled = temporaryStagingDirectoryEnabled;
+ return this;
+ }
+
+ public boolean isTemporaryStagingDirectoryEnabled()
+ {
+ return temporaryStagingDirectoryEnabled;
+ }
+
+ @Config("hive.temporary-staging-directory-path")
+ @ConfigDescription("Location of temporary staging directory for write operations. Use ${USER} placeholder to use different location for each user.")
+ public HiveConfig setTemporaryStagingDirectoryPath(String temporaryStagingDirectoryPath)
+ {
+ this.temporaryStagingDirectoryPath = temporaryStagingDirectoryPath;
+ return this;
+ }
+
+ @NotNull
+ public String getTemporaryStagingDirectoryPath()
+ {
+ return temporaryStagingDirectoryPath;
+ }
+
+ @NotNull
+ public boolean isOrcFileTailCacheEnabled()
+ {
+ return orcFileTailCacheEnabled;
+ }
+
+ @Config("hive.orc.file-tail.cache.enabled")
+ @ConfigDescription("Enable caching of Orc file tail.")
+ public HiveConfig setOrcFileTailCacheEnabled(boolean orcFileTailCacheEnabled)
+ {
+ this.orcFileTailCacheEnabled = orcFileTailCacheEnabled;
+ return this;
+ }
+
+ @NotNull
+ public @MinDuration("0ms") Duration getOrcFileTailCacheTtl()
+ {
+ return orcFileTailCacheTtl;
+ }
+
+ @Config("hive.orc.file-tail.cache.ttl")
+ @ConfigDescription("Orc file tail cache TTL.")
+ public HiveConfig setOrcFileTailCacheTtl(Duration orcFileTailCacheTtl)
+ {
+ this.orcFileTailCacheTtl = orcFileTailCacheTtl;
+ return this;
+ }
+
+ @NotNull
+ public long getOrcFileTailCacheLimit()
+ {
+ return orcFileTailCacheLimit;
+ }
+
+ @Config("hive.orc.file-tail.cache.limit")
+ @ConfigDescription("Orc file tail cache limit.")
+ public HiveConfig setOrcFileTailCacheLimit(long orcFileTailCacheLimit)
+ {
+ this.orcFileTailCacheLimit = orcFileTailCacheLimit;
+ return this;
+ }
+
+ public boolean isOrcStripeFooterCacheEnabled()
+ {
+ return orcStripeFooterCacheEnabled;
+ }
+
+ @Config("hive.orc.stripe-footer.cache.enabled")
+ @ConfigDescription("Enable caching of Orc stripe footer.")
+ public HiveConfig setOrcStripeFooterCacheEnabled(boolean orcStripeFooterCacheEnabled)
+ {
+ this.orcStripeFooterCacheEnabled = orcStripeFooterCacheEnabled;
+ return this;
+ }
+
+ @MinDuration("0ms")
+ public Duration getOrcStripeFooterCacheTtl()
+ {
+ return orcStripeFooterCacheTtl;
+ }
+
+ @Config("hive.orc.stripe-footer.cache.ttl")
+ @ConfigDescription("Orc strip footer cache TTL.")
+ public HiveConfig setOrcStripeFooterCacheTtl(Duration orcStripeFooterCacheTtl)
+ {
+ this.orcStripeFooterCacheTtl = orcStripeFooterCacheTtl;
+ return this;
+ }
+
+ @Min(0)
+ public long getOrcStripeFooterCacheLimit()
+ {
+ return orcStripeFooterCacheLimit;
+ }
+
+ @Config("hive.orc.stripe-footer.cache.limit")
+ @ConfigDescription("Orc stripe footer cache limit.")
+ public HiveConfig setOrcStripeFooterCacheLimit(long orcStripeFooterCacheLimit)
+ {
+ this.orcStripeFooterCacheLimit = orcStripeFooterCacheLimit;
+ return this;
+ }
+
+ public boolean isOrcRowIndexCacheEnabled()
+ {
+ return orcRowIndexCacheEnabled;
+ }
+
+ @Config("hive.orc.row-index.cache.enabled")
+ @ConfigDescription("Enable caching of Orc row group index.")
+ public HiveConfig setOrcRowIndexCacheEnabled(boolean orcRowIndexCacheEnabled)
+ {
+ this.orcRowIndexCacheEnabled = orcRowIndexCacheEnabled;
+ return this;
+ }
+
+ @MinDuration("0ms")
+ public Duration getOrcRowIndexCacheTtl()
+ {
+ return orcRowIndexCacheTtl;
+ }
+
+ @Config("hive.orc.row-index.cache.ttl")
+ public HiveConfig setOrcRowIndexCacheTtl(Duration orcRowIndexCacheTtl)
+ {
+ this.orcRowIndexCacheTtl = orcRowIndexCacheTtl;
+ return this;
+ }
+
+ @Min(0)
+ public long getOrcRowIndexCacheLimit()
+ {
+ return orcRowIndexCacheLimit;
+ }
+
+ @Config("hive.orc.row-index.cache.limit")
+ public HiveConfig setOrcRowIndexCacheLimit(long orcRowIndexCacheLimit)
+ {
+ this.orcRowIndexCacheLimit = orcRowIndexCacheLimit;
+ return this;
+ }
+
+ public boolean isOrcBloomFiltersCacheEnabled()
+ {
+ return orcBloomFiltersCacheEnabled;
+ }
+
+ @Config("hive.orc.bloom-filters.cache.enabled")
+ @ConfigDescription("Enable caching of Orc bloom filters.")
+ public HiveConfig setOrcBloomFiltersCacheEnabled(boolean orcBloomFiltersCacheEnabled)
+ {
+ this.orcBloomFiltersCacheEnabled = orcBloomFiltersCacheEnabled;
+ return this;
+ }
+
+ @MinDuration("0ms")
+ public Duration getOrcBloomFiltersCacheTtl()
+ {
+ return orcBloomFiltersCacheTtl;
+ }
+
+ @Config("hive.orc.bloom-filters.cache.ttl")
+ public HiveConfig setOrcBloomFiltersCacheTtl(Duration orcBloomFiltersCacheTtl)
+ {
+ this.orcBloomFiltersCacheTtl = orcBloomFiltersCacheTtl;
+ return this;
+ }
+
+ @Min(0)
+ public long getOrcBloomFiltersCacheLimit()
+ {
+ return orcBloomFiltersCacheLimit;
+ }
+
+ @Config("hive.orc.bloom-filters.cache.limit")
+ public HiveConfig setOrcBloomFiltersCacheLimit(long orcBloomFiltersCacheLimit)
+ {
+ this.orcBloomFiltersCacheLimit = orcBloomFiltersCacheLimit;
+ return this;
+ }
+
+ public boolean isOrcRowDataCacheEnabled()
+ {
+ return orcRowDataCacheEnabled;
+ }
+
+ @Config("hive.orc.row-data.block.cache.enabled")
+ @ConfigDescription("Flag to enable caching Orc row data as blocks")
+ public HiveConfig setOrcRowDataCacheEnabled(boolean orcRowDataCacheEnabled)
+ {
+ this.orcRowDataCacheEnabled = orcRowDataCacheEnabled;
+ return this;
+ }
+
+ @MinDuration("0ms")
+ public Duration getOrcRowDataCacheTtl()
+ {
+ return orcRowDataCacheTtl;
+ }
+
+ @Config("hive.orc.row-data.block.cache.ttl")
+ @ConfigDescription("Orc Row data block cache TTL.")
+ public HiveConfig setOrcRowDataCacheTtl(Duration orcRowDataCacheTtl)
+ {
+ this.orcRowDataCacheTtl = orcRowDataCacheTtl;
+ return this;
+ }
+
+ public DataSize getOrcRowDataCacheMaximumWeight()
+ {
+ return orcRowDataCacheMaximumWeight;
+ }
+
+ @Config("hive.orc.row-data.block.cache.max.weight")
+ @ConfigDescription("Orc Row data block cache max weight.")
+ public HiveConfig setOrcRowDataCacheMaximumWeight(DataSize orcRowDataCacheMaximumWeight)
+ {
+ this.orcRowDataCacheMaximumWeight = orcRowDataCacheMaximumWeight;
+ return this;
+ }
+
+ @Config("hive.transaction-heartbeat-interval")
+ @ConfigDescription("Interval after which heartbeat is sent for open Hive transaction")
+ public HiveConfig setHiveTransactionHeartbeatInterval(Duration interval)
+ {
+ this.hiveTransactionHeartbeatInterval = Optional.ofNullable(interval);
+ return this;
+ }
+
+ @NotNull
+ public Optional getHiveTransactionHeartbeatInterval()
+ {
+ return hiveTransactionHeartbeatInterval;
+ }
+
+ public int getHiveTransactionHeartbeatThreads()
+ {
+ return hiveTransactionHeartbeatThreads;
+ }
+
+ @Config("hive.transaction-heartbeat-threads")
+ @ConfigDescription("Number of threads to run in the Hive transaction heartbeat service")
+ public HiveConfig setHiveTransactionHeartbeatThreads(int hiveTransactionHeartbeatThreads)
+ {
+ this.hiveTransactionHeartbeatThreads = hiveTransactionHeartbeatThreads;
+ return this;
+ }
+
+ @Config("hive.table-creates-with-location-allowed")
+ @ConfigDescription("Allow setting table location in CREATE TABLE and CREATE TABLE AS SELECT statements")
+ public HiveConfig setTableCreatesWithLocationAllowed(boolean tableCreatesWithLocationAllowed)
+ {
+ this.tableCreatesWithLocationAllowed = tableCreatesWithLocationAllowed;
+ return this;
+ }
+
+ public boolean getTableCreatesWithLocationAllowed()
+ {
+ return tableCreatesWithLocationAllowed;
+ }
+
+ public boolean isTlsEnabled()
+ {
+ return tlsEnabled;
+ }
+
+ @Config("hive.metastore.thrift.client.ssl.enabled")
+ @ConfigDescription("Whether TLS security is enabled")
+ public HiveConfig setTlsEnabled(boolean tlsEnabled)
+ {
+ this.tlsEnabled = tlsEnabled;
+ return this;
+ }
+
+ @Config("hive.dynamic-filter-partition-filtering")
+ @ConfigDescription("Filter out hive splits early based on partition value using dynamic filter")
+ public HiveConfig setDynamicFilterPartitionFilteringEnabled(boolean dynamicFilterPartitionFilteringEnabled)
+ {
+ this.dynamicFilterPartitionFilteringEnabled = dynamicFilterPartitionFilteringEnabled;
+ return this;
+ }
+
+ public boolean isDynamicFilterPartitionFilteringEnabled()
+ {
+ return dynamicFilterPartitionFilteringEnabled;
+ }
+
+ @Config("hive.dynamic-filtering-row-filtering-threshold")
+ @ConfigDescription("Filter out hive rows early if the dynamic filter size is below the threshold")
+ public HiveConfig setDynamicFilteringRowFilteringThreshold(int dynamicFilteringRowFilteringThreshold)
+ {
+ this.dynamicFilteringRowFilteringThreshold = dynamicFilteringRowFilteringThreshold;
+ return this;
+ }
+
+ @Min(1)
+ public int getDynamicFilteringRowFilteringThreshold()
+ {
+ return dynamicFilteringRowFilteringThreshold;
+ }
+
+ public boolean isOrcCacheStatsMetricCollectionEnabled()
+ {
+ return orcCacheStatsMetricCollectionEnabled;
+ }
+
+ @Config("hive.orc-cache-stats-metric-collection.enabled")
+ @ConfigDescription("Whether orc cache stats metric collection is enabled")
+ public HiveConfig setOrcCacheStatsMetricCollectionEnabled(boolean orcCacheStatsMetricCollectionEnabled)
+ {
+ this.orcCacheStatsMetricCollectionEnabled = orcCacheStatsMetricCollectionEnabled;
+ return this;
+ }
+
+ @Config("hive.vacuum-cleanup-recheck-interval")
+ @ConfigDescription("Interval after which vacuum cleanup task will be resubmitted")
+ public HiveConfig setVacuumCleanupRecheckInterval(Duration interval)
+ {
+ this.vacuumCleanupRecheckInterval = interval;
+ return this;
+ }
+
+ @NotNull
+ @MinDuration("5m")
+ public Duration getVacuumCleanupRecheckInterval()
+ {
+ return vacuumCleanupRecheckInterval;
+ }
+
+ @Config("hive.vacuum-service-threads")
+ @ConfigDescription("Number of threads to run in the vacuum service")
+ public HiveConfig setVacuumServiceThreads(int vacuumServiceThreads)
+ {
+ this.vacuumServiceThreads = vacuumServiceThreads;
+ return this;
+ }
+
+ public int getVacuumServiceThreads()
+ {
+ return vacuumServiceThreads;
+ }
+
+ @Config("hive.metastore-client-service-threads")
+ @ConfigDescription("Number of threads for metastore client")
+ public HiveConfig setMetastoreClientServiceThreads(int metastoreClientServiceThreads)
+ {
+ this.metastoreClientServiceThreads = metastoreClientServiceThreads;
+ return this;
+ }
+
+ public int getMetastoreClientServiceThreads()
+ {
+ return metastoreClientServiceThreads;
+ }
+
+ @Config("hive.vacuum-delta-num-threshold")
+ @ConfigDescription("Maximum number of delta directories to allow without compacting it")
+ public HiveConfig setVacuumDeltaNumThreshold(int vacuumDeltaNumThreshold)
+ {
+ this.vacuumDeltaNumThreshold = vacuumDeltaNumThreshold;
+ return this;
+ }
+
+ @Min(2)
+ public int getVacuumDeltaNumThreshold()
+ {
+ return vacuumDeltaNumThreshold;
+ }
+
+ @Config("hive.vacuum-delta-percent-threshold")
+ @ConfigDescription("Maximum percent of delta directories to allow without compacting it")
+ public HiveConfig setVacuumDeltaPercentThreshold(double vacuumDeltaPercentThreshold)
+ {
+ this.vacuumDeltaPercentThreshold = vacuumDeltaPercentThreshold;
+ return this;
+ }
+
+ @DecimalMin("0.1")
+ @DecimalMax("1.0")
+ public double getVacuumDeltaPercentThreshold()
+ {
+ return vacuumDeltaPercentThreshold;
+ }
+
+ @Config("hive.auto-vacuum-enabled")
+ @ConfigDescription("Enable auto-vacuum on Hive tables")
+ public HiveConfig setAutoVacuumEnabled(boolean autoVacuumEnabled)
+ {
+ this.autoVacuumEnabled = autoVacuumEnabled;
+ return this;
+ }
+
+ public boolean getAutoVacuumEnabled()
+ {
+ return autoVacuumEnabled;
+ }
+
+ @Config("hive.orc-predicate-pushdown-enabled")
+ @ConfigDescription("Enables processing of predicates within ORC reading")
+ public HiveConfig setOrcPredicatePushdownEnabled(boolean orcPredicatePushdownEnabled)
+ {
+ this.orcPredicatePushdownEnabled = orcPredicatePushdownEnabled;
+ return this;
+ }
+
+ public boolean isOrcPredicatePushdownEnabled()
+ {
+ return orcPredicatePushdownEnabled;
+ }
+
+ @Config("hive.vacuum-collector-interval")
+ @ConfigDescription("Interval after which vacuum collector task will be resubmitted")
+ public HiveConfig setVacuumCollectorInterval(Duration interval)
+ {
+ this.vacuumCollectorInterval = Optional.ofNullable(interval);
+ return this;
+ }
+
+ @NotNull
+ public Optional getVacuumCollectorInterval()
+ {
+ return vacuumCollectorInterval;
+ }
+
+ @Min(1)
+ public int getMaxSplitsToGroup()
+ {
+ return maxNumbSplitsToGroup;
+ }
+
+ @Config("hive.max-splits-to-group")
+ @ConfigDescription("max number of small splits can be grouped")
+ public HiveConfig setMaxSplitsToGroup(int maxNumbSplitsToGroup)
+ {
+ this.maxNumbSplitsToGroup = maxNumbSplitsToGroup;
+ return this;
+ }
+
+ @Config("hive.worker-metastore-cache-enabled")
+ public HiveConfig setWorkerMetaStoreCacheEnabled(boolean isEnabled)
+ {
+ this.workerMetaStoreCacheEnabled = isEnabled;
+ return this;
+ }
+
+ public boolean getWorkerMetaStoreCacheEnabled()
+ {
+ return this.workerMetaStoreCacheEnabled;
+ }
+
+ public boolean isOmniDataSslEnabled()
+ {
+ return omniDataSslEnabled;
+ }
+
+ private Optional getNormalizedFilePath(String filePath)
+ {
+ if (filePath == null || filePath.isEmpty()) {
+ return Optional.empty();
+ }
+ String outputPath;
+ try {
+ String normalizePath = Normalizer.normalize(filePath, Normalizer.Form.NFKC);
+ outputPath = new File(normalizePath).getCanonicalPath();
+ }
+ catch (IOException | IllegalArgumentException exception) {
+ log.error("File path [%s] is invalid, exception %s", filePath, exception.getMessage());
+ return Optional.empty();
+ }
+ File file = new File(outputPath);
+ if (!file.exists()) {
+ log.error("File [%s] is not exist.", outputPath);
+ return Optional.empty();
+ }
+ return Optional.of(outputPath);
+ }
+
+ @Config("omni-data.ssl.enabled")
+ public HiveConfig setOmniDataSslEnabled(boolean omniDataSslEnabled)
+ {
+ this.omniDataSslEnabled = omniDataSslEnabled;
+ return this;
+ }
+
+ public Optional getOmniDataSslPkiDir()
+ {
+ return omniDataSslPkiDir;
+ }
+
+ @Config("omni-data.ssl.pki.dir")
+ @ConfigDescription("Directory of Public Key Infrastructure.")
+ public HiveConfig setOmniDataSslPkiDir(String omniDataSslPkiDir)
+ {
+ this.omniDataSslPkiDir = getNormalizedFilePath(omniDataSslPkiDir);
+ return this;
+ }
+
+ public Optional getOmniDataSslClientCertFilePath()
+ {
+ return omniDataSslClientCertFilePath;
+ }
+
+ @Config("omni-data.ssl.client.cert.file.path")
+ @ConfigDescription("Path to the SSL client certificate file.")
+ public HiveConfig setOmniDataSslClientCertFilePath(String omniDataSslClientCertFilePath)
+ {
+ this.omniDataSslClientCertFilePath = getNormalizedFilePath(omniDataSslClientCertFilePath);
+ return this;
+ }
+
+ public Optional getOmniDataSslPrivateKeyFilePath()
+ {
+ return omniDataSslPrivateKeyFilePath;
+ }
+
+ @Config("omni-data.ssl.private.key.file.path")
+ @ConfigDescription("Path to the SSL private key file.")
+ public HiveConfig setOmniDataSslPrivateKeyFilePath(String omniDataSslPrivateKeyFilePath)
+ {
+ this.omniDataSslPrivateKeyFilePath = getNormalizedFilePath(omniDataSslPrivateKeyFilePath);
+ return this;
+ }
+
+ public Optional getOmniDataSslTrustCertFilePath()
+ {
+ return omniDataSslTrustCertFilePath;
+ }
+
+ @Config("omni-data.ssl.trust.cert.file.path")
+ @ConfigDescription("Path to the SSL trust certificate file.")
+ public HiveConfig setOmniDataSslTrustCertFilePath(String omniDataSslTrustCertFilePath)
+ {
+ this.omniDataSslTrustCertFilePath = getNormalizedFilePath(omniDataSslTrustCertFilePath);
+ return this;
+ }
+
+ public Optional getOmniDataSslCrlFilePath()
+ {
+ return omniDataSslCrlFilePath;
+ }
+
+ @Config("omni-data.ssl.crl.file.path")
+ @ConfigDescription("Path to the SSL Certificate Revocation List file.")
+ public HiveConfig setOmniDataSslCrlFilePath(String omniDataSslCrlFilePath)
+ {
+ this.omniDataSslCrlFilePath = getNormalizedFilePath(omniDataSslCrlFilePath);
+ return this;
+ }
+
+ @Config("hive.filter-offload-enabled")
+ @ConfigDescription("Enables offload filter operators to storage device.")
+ public HiveConfig setFilterOffloadEnabled(boolean filterOffloadEnabled)
+ {
+ this.filterOffloadEnabled = filterOffloadEnabled;
+ return this;
+ }
+
+ public boolean isFilterOffloadEnabled()
+ {
+ return filterOffloadEnabled;
+ }
+
+ @Config("hive.aggregator-offload-enabled")
+ @ConfigDescription("Enables offload aggregator operators to storage device.")
+ public HiveConfig setAggregatorOffloadEnabled(boolean aggregatorOffloadEnabled)
+ {
+ this.aggregatorOffloadEnabled = aggregatorOffloadEnabled;
+ return this;
+ }
+
+ @Config("hive.omnidata-enabled")
+ @ConfigDescription("Enables omnidata feature.")
+ public HiveConfig setOmniDataEnabled(boolean omniDataEnabled)
+ {
+ this.omniDataEnabled = omniDataEnabled;
+ return this;
+ }
+
+ public boolean isOmniDataEnabled()
+ {
+ return omniDataEnabled;
+ }
+
+ public boolean isAggregatorOffloadEnabled()
+ {
+ return aggregatorOffloadEnabled;
+ }
+
+ @Config("hive.min-filter-offload-factor")
+ @ConfigDescription("The minimum data filtering threshold for predicate expression offload.")
+ public HiveConfig setMinFilterOffloadFactor(double minFilterOffloadFactor)
+ {
+ this.minFilterOffloadFactor = minFilterOffloadFactor;
+ return this;
+ }
+
+ @DecimalMin("0.0")
+ @DecimalMax("1.0")
+ public double getMinFilterOffloadFactor()
+ {
+ return minFilterOffloadFactor;
+ }
+
+ @Config("hive.min-aggregator-offload-factor")
+ @ConfigDescription("The minimum data aggregation threshold for aggregation expression offload.")
+ public HiveConfig setMinAggregatorOffloadFactor(double minAggregatorOffloadFactor)
+ {
+ this.minAggregatorOffloadFactor = minAggregatorOffloadFactor;
+ return this;
+ }
+
+ @DecimalMin("0.0")
+ @DecimalMax("1.0")
+ public double getMinAggregatorOffloadFactor()
+ {
+ return minAggregatorOffloadFactor;
+ }
+
+ @Config("hive.min-offload-row-number")
+ @ConfigDescription("The minimum table size for operator offload.")
+ public HiveConfig setMinOffloadRowNumber(long filterFactor)
+ {
+ this.minOffloadRowNumber = filterFactor;
+ return this;
+ }
+
+ @Min(1)
+ public long getMinOffloadRowNumber()
+ {
+ return minOffloadRowNumber;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnector.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnector.java
new file mode 100644
index 00000000..dbcf916d
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnector.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import io.airlift.bootstrap.LifeCycleManager;
+import io.airlift.log.Logger;
+import io.prestosql.spi.classloader.ThreadContextClassLoader;
+import io.prestosql.spi.connector.Connector;
+import io.prestosql.spi.connector.ConnectorAccessControl;
+import io.prestosql.spi.connector.ConnectorMetadata;
+import io.prestosql.spi.connector.ConnectorNodePartitioningProvider;
+import io.prestosql.spi.connector.ConnectorPageSinkProvider;
+import io.prestosql.spi.connector.ConnectorPageSourceProvider;
+import io.prestosql.spi.connector.ConnectorPlanOptimizerProvider;
+import io.prestosql.spi.connector.ConnectorSplitManager;
+import io.prestosql.spi.connector.ConnectorTransactionHandle;
+import io.prestosql.spi.connector.SystemTable;
+import io.prestosql.spi.connector.classloader.ClassLoaderSafeConnectorMetadata;
+import io.prestosql.spi.procedure.Procedure;
+import io.prestosql.spi.session.PropertyMetadata;
+import io.prestosql.spi.transaction.IsolationLevel;
+
+import java.util.List;
+import java.util.Set;
+import java.util.function.Supplier;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static io.prestosql.spi.transaction.IsolationLevel.READ_UNCOMMITTED;
+import static io.prestosql.spi.transaction.IsolationLevel.checkConnectorSupports;
+import static java.util.Objects.requireNonNull;
+
+public class HiveConnector
+ implements Connector
+{
+ private static final Logger log = Logger.get(HiveConnector.class);
+
+ private final LifeCycleManager lifeCycleManager;
+ private final Supplier metadataFactory;
+ private final ConnectorSplitManager splitManager;
+ private final ConnectorPageSourceProvider pageSourceProvider;
+ private final ConnectorPageSinkProvider pageSinkProvider;
+ private final ConnectorNodePartitioningProvider nodePartitioningProvider;
+ private final Set systemTables;
+ private final Set procedures;
+ private final List> sessionProperties;
+ private final List> schemaProperties;
+ private final List> tableProperties;
+ private final List> analyzeProperties;
+
+ private final ConnectorAccessControl accessControl;
+ private final ClassLoader classLoader;
+ private final ConnectorPlanOptimizerProvider planOptimizerProvider;
+
+ private final HiveTransactionManager transactionManager;
+
+ public HiveConnector(
+ LifeCycleManager lifeCycleManager,
+ Supplier metadataFactory,
+ HiveTransactionManager transactionManager,
+ ConnectorSplitManager splitManager,
+ ConnectorPageSourceProvider pageSourceProvider,
+ ConnectorPageSinkProvider pageSinkProvider,
+ ConnectorNodePartitioningProvider nodePartitioningProvider,
+ Set systemTables,
+ Set procedures,
+ List> sessionProperties,
+ List> schemaProperties,
+ List> tableProperties,
+ List> analyzeProperties,
+ ConnectorAccessControl accessControl,
+ ConnectorPlanOptimizerProvider planOptimizerProvider,
+ ClassLoader classLoader)
+ {
+ this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null");
+ this.metadataFactory = requireNonNull(metadataFactory, "metadata is null");
+ this.transactionManager = requireNonNull(transactionManager, "transactionManager is null");
+ this.splitManager = requireNonNull(splitManager, "splitManager is null");
+ this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null");
+ this.pageSinkProvider = requireNonNull(pageSinkProvider, "pageSinkProvider is null");
+ this.nodePartitioningProvider = requireNonNull(nodePartitioningProvider, "nodePartitioningProvider is null");
+ this.systemTables = ImmutableSet.copyOf(requireNonNull(systemTables, "systemTables is null"));
+ this.procedures = ImmutableSet.copyOf(requireNonNull(procedures, "procedures is null"));
+ this.sessionProperties = ImmutableList.copyOf(requireNonNull(sessionProperties, "sessionProperties is null"));
+ this.schemaProperties = ImmutableList.copyOf(requireNonNull(schemaProperties, "schemaProperties is null"));
+ this.tableProperties = ImmutableList.copyOf(requireNonNull(tableProperties, "tableProperties is null"));
+ this.analyzeProperties = ImmutableList.copyOf(requireNonNull(analyzeProperties, "analyzeProperties is null"));
+ this.accessControl = requireNonNull(accessControl, "accessControl is null");
+ this.classLoader = requireNonNull(classLoader, "classLoader is null");
+ this.planOptimizerProvider = requireNonNull(planOptimizerProvider, "planOptimizerProvider is null");
+ }
+
+ @Override
+ public ConnectorMetadata getMetadata(ConnectorTransactionHandle transaction)
+ {
+ ConnectorMetadata metadata = transactionManager.get(transaction);
+ checkArgument(metadata != null, "no such transaction: %s", transaction);
+ return new ClassLoaderSafeConnectorMetadata(metadata, classLoader);
+ }
+
+ @Override
+ public ConnectorSplitManager getSplitManager()
+ {
+ return splitManager;
+ }
+
+ @Override
+ public ConnectorPageSourceProvider getPageSourceProvider()
+ {
+ return pageSourceProvider;
+ }
+
+ @Override
+ public ConnectorPageSinkProvider getPageSinkProvider()
+ {
+ return pageSinkProvider;
+ }
+
+ @Override
+ public ConnectorNodePartitioningProvider getNodePartitioningProvider()
+ {
+ return nodePartitioningProvider;
+ }
+
+ @Override
+ public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider()
+ {
+ return planOptimizerProvider;
+ }
+
+ @Override
+ public Set getSystemTables()
+ {
+ return systemTables;
+ }
+
+ @Override
+ public Set getProcedures()
+ {
+ return procedures;
+ }
+
+ @Override
+ public List> getSessionProperties()
+ {
+ return sessionProperties;
+ }
+
+ @Override
+ public List> getSchemaProperties()
+ {
+ return schemaProperties;
+ }
+
+ @Override
+ public List> getAnalyzeProperties()
+ {
+ return analyzeProperties;
+ }
+
+ @Override
+ public List> getTableProperties()
+ {
+ return tableProperties;
+ }
+
+ @Override
+ public ConnectorAccessControl getAccessControl()
+ {
+ return accessControl;
+ }
+
+ @Override
+ public boolean isSingleStatementWritesOnly()
+ {
+ return false;
+ }
+
+ @Override
+ public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly)
+ {
+ checkConnectorSupports(READ_UNCOMMITTED, isolationLevel);
+ ConnectorTransactionHandle transaction = new HiveTransactionHandle();
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
+ transactionManager.put(transaction, metadataFactory.get());
+ }
+ return transaction;
+ }
+
+ @Override
+ public void commit(ConnectorTransactionHandle transaction)
+ {
+ TransactionalMetadata metadata = transactionManager.remove(transaction);
+ checkArgument(metadata != null, "no such transaction: %s", transaction);
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
+ metadata.commit();
+ }
+ }
+
+ @Override
+ public void rollback(ConnectorTransactionHandle transaction)
+ {
+ TransactionalMetadata metadata = transactionManager.remove(transaction);
+ checkArgument(metadata != null, "no such transaction: %s", transaction);
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
+ metadata.rollback();
+ }
+ }
+
+ @Override
+ public final void shutdown()
+ {
+ try {
+ lifeCycleManager.stop();
+ }
+ catch (Exception e) {
+ log.error(e, "Error shutting down connector");
+ }
+ }
+
+ @Override
+ public ConnectorMetadata getConnectorMetadata()
+ {
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
+ return new ClassLoaderSafeConnectorMetadata(metadataFactory.get(), classLoader);
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java
new file mode 100644
index 00000000..f4b828c6
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.inject.Injector;
+import com.google.inject.Key;
+import com.google.inject.TypeLiteral;
+import io.airlift.bootstrap.Bootstrap;
+import io.airlift.bootstrap.LifeCycleManager;
+import io.airlift.event.client.EventModule;
+import io.airlift.json.JsonModule;
+import io.prestosql.plugin.base.jmx.MBeanServerModule;
+import io.prestosql.plugin.hive.authentication.HiveAuthenticationModule;
+import io.prestosql.plugin.hive.gcs.HiveGcsModule;
+import io.prestosql.plugin.hive.metastore.HiveMetastore;
+import io.prestosql.plugin.hive.metastore.HiveMetastoreModule;
+import io.prestosql.plugin.hive.omnidata.OmniDataNodeManager;
+import io.prestosql.plugin.hive.rule.HivePushdownUtil;
+import io.prestosql.plugin.hive.s3.HiveS3Module;
+import io.prestosql.plugin.hive.security.HiveSecurityModule;
+import io.prestosql.plugin.hive.security.SystemTableAwareAccessControl;
+import io.prestosql.spi.NodeManager;
+import io.prestosql.spi.PageIndexerFactory;
+import io.prestosql.spi.PageSorter;
+import io.prestosql.spi.VersionEmbedder;
+import io.prestosql.spi.classloader.ThreadContextClassLoader;
+import io.prestosql.spi.connector.Connector;
+import io.prestosql.spi.connector.ConnectorAccessControl;
+import io.prestosql.spi.connector.ConnectorContext;
+import io.prestosql.spi.connector.ConnectorFactory;
+import io.prestosql.spi.connector.ConnectorHandleResolver;
+import io.prestosql.spi.connector.ConnectorNodePartitioningProvider;
+import io.prestosql.spi.connector.ConnectorPageSinkProvider;
+import io.prestosql.spi.connector.ConnectorPageSourceProvider;
+import io.prestosql.spi.connector.ConnectorPlanOptimizerProvider;
+import io.prestosql.spi.connector.ConnectorSplitManager;
+import io.prestosql.spi.connector.classloader.ClassLoaderSafeConnectorPageSinkProvider;
+import io.prestosql.spi.connector.classloader.ClassLoaderSafeConnectorPageSourceProvider;
+import io.prestosql.spi.connector.classloader.ClassLoaderSafeConnectorSplitManager;
+import io.prestosql.spi.connector.classloader.ClassLoaderSafeNodePartitioningProvider;
+import io.prestosql.spi.function.FunctionMetadataManager;
+import io.prestosql.spi.function.StandardFunctionResolution;
+import io.prestosql.spi.heuristicindex.IndexClient;
+import io.prestosql.spi.plan.FilterStatsCalculatorService;
+import io.prestosql.spi.procedure.Procedure;
+import io.prestosql.spi.relation.RowExpressionService;
+import io.prestosql.spi.type.TypeManager;
+import org.weakref.jmx.guice.MBeanModule;
+
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Strings.isNullOrEmpty;
+import static com.google.common.base.Throwables.throwIfUnchecked;
+import static java.util.Objects.requireNonNull;
+
+public class HiveConnectorFactory
+ implements ConnectorFactory
+{
+ private final String name;
+ private final ClassLoader classLoader;
+ private final Optional metastore;
+
+ public HiveConnectorFactory(String name, ClassLoader classLoader, Optional metastore)
+ {
+ checkArgument(!isNullOrEmpty(name), "name is null or empty");
+ this.name = name;
+ this.classLoader = requireNonNull(classLoader, "classLoader is null");
+ this.metastore = requireNonNull(metastore, "metastore is null");
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+
+ @Override
+ public ConnectorHandleResolver getHandleResolver()
+ {
+ return new HiveHandleResolver();
+ }
+
+ @Override
+ public Connector create(String catalogName, Map config, ConnectorContext context)
+ {
+ requireNonNull(config, "config is null");
+
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
+ Bootstrap app = new Bootstrap(
+ new EventModule(),
+ new MBeanModule(),
+ new ConnectorObjectNameGeneratorModule(catalogName),
+ new JsonModule(),
+ new HiveModule(),
+ new HiveS3Module(),
+ new HiveGcsModule(),
+ new HiveMetastoreModule(metastore),
+ new HiveSecurityModule(),
+ new HiveAuthenticationModule(),
+ new HiveProcedureModule(),
+ new MBeanServerModule(),
+ binder -> {
+ binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion()));
+ binder.bind(NodeManager.class).toInstance(context.getNodeManager());
+ binder.bind(VersionEmbedder.class).toInstance(context.getVersionEmbedder());
+ binder.bind(TypeManager.class).toInstance(context.getTypeManager());
+ binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory());
+ binder.bind(PageSorter.class).toInstance(context.getPageSorter());
+ binder.bind(HiveCatalogName.class).toInstance(new HiveCatalogName(catalogName));
+ binder.bind(IndexClient.class).toInstance(context.getIndexClient());
+ binder.bind(StandardFunctionResolution.class).toInstance(context.getStandardFunctionResolution());
+ binder.bind(FunctionMetadataManager.class).toInstance(context.getFunctionMetadataManager());
+ binder.bind(FilterStatsCalculatorService.class).toInstance(context.getFilterStatsCalculatorService());
+ binder.bind(RowExpressionService.class).toInstance(context.getRowExpressionService());
+ });
+
+ Injector injector = app
+ .strictConfig()
+ .doNotInitializeLogging()
+ .setRequiredConfigurationProperties(config)
+ .initialize();
+
+ LifeCycleManager lifeCycleManager = injector.getInstance(LifeCycleManager.class);
+ HiveMetadataFactory metadataFactory = injector.getInstance(HiveMetadataFactory.class);
+ HiveTransactionManager transactionManager = injector.getInstance(HiveTransactionManager.class);
+ ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class);
+ ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class);
+ ConnectorPageSinkProvider pageSinkProvider = injector.getInstance(ConnectorPageSinkProvider.class);
+ ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class);
+ HiveSessionProperties hiveSessionProperties = injector.getInstance(HiveSessionProperties.class);
+ HiveTableProperties hiveTableProperties = injector.getInstance(HiveTableProperties.class);
+ HiveAnalyzeProperties hiveAnalyzeProperties = injector.getInstance(HiveAnalyzeProperties.class);
+ ConnectorAccessControl accessControl = new SystemTableAwareAccessControl(injector.getInstance(ConnectorAccessControl.class));
+ Set procedures = injector.getInstance(Key.get(new TypeLiteral>() {}));
+ ConnectorPlanOptimizerProvider planOptimizerProvider = injector.getInstance(ConnectorPlanOptimizerProvider.class);
+ OmniDataNodeManager nodeManagerInstance = injector.getInstance(OmniDataNodeManager.class);
+ HivePushdownUtil.setOmniDataNodeManager(nodeManagerInstance);
+ nodeManagerInstance.startPollingNodeStates();
+
+ return new HiveConnector(
+ lifeCycleManager,
+ metadataFactory,
+ transactionManager,
+ new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader),
+ new ClassLoaderSafeConnectorPageSourceProvider(connectorPageSource, classLoader),
+ new ClassLoaderSafeConnectorPageSinkProvider(pageSinkProvider, classLoader),
+ new ClassLoaderSafeNodePartitioningProvider(connectorDistributionProvider, classLoader),
+ ImmutableSet.of(),
+ procedures,
+ hiveSessionProperties.getSessionProperties(),
+ HiveSchemaProperties.SCHEMA_PROPERTIES,
+ hiveTableProperties.getTableProperties(),
+ hiveAnalyzeProperties.getAnalyzeProperties(),
+ accessControl,
+ planOptimizerProvider,
+ classLoader);
+ }
+ catch (Exception e) {
+ throwIfUnchecked(e);
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDecimalParser.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDecimalParser.java
new file mode 100644
index 00000000..8da87b81
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDecimalParser.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.spi.type.DecimalType;
+
+import java.math.BigDecimal;
+
+import static io.prestosql.spi.type.Decimals.rescale;
+import static java.math.RoundingMode.HALF_UP;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+public final class HiveDecimalParser
+{
+ private HiveDecimalParser() {}
+
+ public static BigDecimal parseHiveDecimal(byte[] bytes, int start, int length, DecimalType columnType)
+ {
+ BigDecimal parsed = new BigDecimal(new String(bytes, start, length, UTF_8));
+ if (parsed.scale() > columnType.getScale()) {
+ // Hive rounds HALF_UP too
+ parsed = parsed.setScale(columnType.getScale(), HALF_UP);
+ }
+ return rescale(parsed, columnType);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDeleteAsInsertTableHandle.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDeleteAsInsertTableHandle.java
new file mode 100644
index 00000000..766085bc
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveDeleteAsInsertTableHandle.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2018-2020. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.prestosql.plugin.hive.metastore.HivePageSinkMetadata;
+import io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle;
+
+import java.util.List;
+import java.util.Optional;
+
+public class HiveDeleteAsInsertTableHandle
+ extends HiveWritableTableHandle
+ implements ConnectorDeleteAsInsertTableHandle
+{
+ @JsonCreator
+ public HiveDeleteAsInsertTableHandle(
+ @JsonProperty("schemaName") String schemaName,
+ @JsonProperty("tableName") String tableName,
+ @JsonProperty("inputColumns") List inputColumns,
+ @JsonProperty("pageSinkMetadata") HivePageSinkMetadata pageSinkMetadata,
+ @JsonProperty("locationHandle") LocationHandle locationHandle,
+ @JsonProperty("bucketProperty") Optional bucketProperty,
+ @JsonProperty("tableStorageFormat") HiveStorageFormat tableStorageFormat,
+ @JsonProperty("partitionStorageFormat") HiveStorageFormat partitionStorageFormat)
+ {
+ super(
+ schemaName,
+ tableName,
+ inputColumns,
+ pageSinkMetadata,
+ locationHandle,
+ bucketProperty,
+ tableStorageFormat,
+ partitionStorageFormat,
+ false);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java
new file mode 100644
index 00000000..a1ae6b04
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.spi.ErrorCode;
+import io.prestosql.spi.ErrorCodeSupplier;
+import io.prestosql.spi.ErrorType;
+
+import static io.prestosql.spi.ErrorType.EXTERNAL;
+import static io.prestosql.spi.ErrorType.INTERNAL_ERROR;
+import static io.prestosql.spi.ErrorType.USER_ERROR;
+
+public enum HiveErrorCode
+ implements ErrorCodeSupplier
+{
+ HIVE_METASTORE_ERROR(0, EXTERNAL),
+ HIVE_CURSOR_ERROR(1, EXTERNAL),
+ HIVE_TABLE_OFFLINE(2, USER_ERROR),
+ HIVE_CANNOT_OPEN_SPLIT(3, EXTERNAL),
+ HIVE_FILE_NOT_FOUND(4, EXTERNAL),
+ HIVE_UNKNOWN_ERROR(5, EXTERNAL),
+ HIVE_PARTITION_OFFLINE(6, USER_ERROR),
+ HIVE_BAD_DATA(7, EXTERNAL),
+ HIVE_PARTITION_SCHEMA_MISMATCH(8, EXTERNAL),
+ HIVE_MISSING_DATA(9, EXTERNAL),
+ HIVE_INVALID_PARTITION_VALUE(10, EXTERNAL),
+ HIVE_TIMEZONE_MISMATCH(11, EXTERNAL),
+ HIVE_INVALID_METADATA(12, EXTERNAL),
+ HIVE_INVALID_VIEW_DATA(13, EXTERNAL),
+ HIVE_DATABASE_LOCATION_ERROR(14, EXTERNAL),
+ HIVE_PATH_ALREADY_EXISTS(15, EXTERNAL),
+ HIVE_FILESYSTEM_ERROR(16, EXTERNAL),
+ // code HIVE_WRITER_ERROR(17) is deprecated
+ HIVE_SERDE_NOT_FOUND(18, EXTERNAL),
+ HIVE_UNSUPPORTED_FORMAT(19, EXTERNAL),
+ HIVE_PARTITION_READ_ONLY(20, USER_ERROR),
+ HIVE_TOO_MANY_OPEN_PARTITIONS(21, USER_ERROR),
+ HIVE_CONCURRENT_MODIFICATION_DETECTED(22, EXTERNAL),
+ HIVE_COLUMN_ORDER_MISMATCH(23, USER_ERROR),
+ HIVE_FILE_MISSING_COLUMN_NAMES(24, EXTERNAL),
+ HIVE_WRITER_OPEN_ERROR(25, EXTERNAL),
+ HIVE_WRITER_CLOSE_ERROR(26, EXTERNAL),
+ HIVE_WRITER_DATA_ERROR(27, EXTERNAL),
+ HIVE_INVALID_BUCKET_FILES(28, EXTERNAL),
+ HIVE_EXCEEDED_PARTITION_LIMIT(29, USER_ERROR),
+ HIVE_WRITE_VALIDATION_FAILED(30, INTERNAL_ERROR),
+ HIVE_PARTITION_DROPPED_DURING_QUERY(31, EXTERNAL),
+ HIVE_TABLE_READ_ONLY(32, USER_ERROR),
+ HIVE_PARTITION_NOT_READABLE(33, USER_ERROR),
+ HIVE_TABLE_NOT_READABLE(34, USER_ERROR),
+ HIVE_TABLE_DROPPED_DURING_QUERY(35, EXTERNAL),
+ // HIVE_TOO_MANY_BUCKET_SORT_FILES(36) is deprecated
+ HIVE_CORRUPTED_COLUMN_STATISTICS(37, EXTERNAL),
+ HIVE_EXCEEDED_SPLIT_BUFFERING_LIMIT(38, USER_ERROR),
+ HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE(39, INTERNAL_ERROR),
+ HIVE_TABLE_LOCK_NOT_ACQUIRED(40, EXTERNAL),
+ HIVE_OPERATOR_OFFLOAD_FAIL(41, EXTERNAL)
+ /**/;
+
+ private final ErrorCode errorCode;
+
+ HiveErrorCode(int code, ErrorType type)
+ {
+ errorCode = new ErrorCode(code + 0x0100_0000, name(), type);
+ }
+
+ @Override
+ public ErrorCode toErrorCode()
+ {
+ return errorCode;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveEventClient.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveEventClient.java
new file mode 100644
index 00000000..6c53ce6c
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveEventClient.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.airlift.event.client.AbstractEventClient;
+import io.airlift.log.Logger;
+
+public class HiveEventClient
+ extends AbstractEventClient
+{
+ private static final Logger log = Logger.get(HiveEventClient.class);
+
+ @Override
+ public void postEvent(T event)
+ {
+ if (!(event instanceof WriteCompletedEvent)) {
+ return;
+ }
+ WriteCompletedEvent writeCompletedEvent = (WriteCompletedEvent) event;
+ log.debug("File created: query: %s, schema: %s, table: %s, partition: '%s', format: %s, size: %s, path: %s",
+ writeCompletedEvent.getQueryId(),
+ writeCompletedEvent.getSchemaName(),
+ writeCompletedEvent.getTableName(),
+ writeCompletedEvent.getPartitionName(),
+ writeCompletedEvent.getStorageFormat(),
+ writeCompletedEvent.getBytes(),
+ writeCompletedEvent.getPath());
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriter.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriter.java
new file mode 100644
index 00000000..ad008884
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableList;
+import io.prestosql.spi.Page;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.util.Optional;
+
+public interface HiveFileWriter
+{
+ long getWrittenBytes();
+
+ long getSystemMemoryUsage();
+
+ void appendRows(Page dataPage);
+
+ void commit();
+
+ void rollback();
+
+ long getValidationCpuNanos();
+
+ default Optional getVerificationTask()
+ {
+ return Optional.empty();
+ }
+
+ default void initWriter(boolean isAcid, Path path, FileSystem fileSystem)
+ {
+ }
+
+ default ImmutableList getExtraPartitionFiles()
+ {
+ return ImmutableList.of();
+ }
+
+ default ImmutableList getMiscData()
+ {
+ return ImmutableList.of();
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriterFactory.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriterFactory.java
new file mode 100644
index 00000000..b4fdbfce
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveFileWriterFactory.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.plugin.hive.metastore.StorageFormat;
+import io.prestosql.spi.connector.ConnectorSession;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.Properties;
+
+public interface HiveFileWriterFactory
+{
+ Optional createFileWriter(
+ Path path,
+ List inputColumnNames,
+ StorageFormat storageFormat,
+ Properties schema,
+ JobConf conf,
+ ConnectorSession session,
+ Optional acidOptions,
+ Optional acidWriteType);
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHandleResolver.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHandleResolver.java
new file mode 100644
index 00000000..2fb1d7c1
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHandleResolver.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.spi.connector.ColumnHandle;
+import io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle;
+import io.prestosql.spi.connector.ConnectorHandleResolver;
+import io.prestosql.spi.connector.ConnectorInsertTableHandle;
+import io.prestosql.spi.connector.ConnectorOutputTableHandle;
+import io.prestosql.spi.connector.ConnectorPartitioningHandle;
+import io.prestosql.spi.connector.ConnectorSplit;
+import io.prestosql.spi.connector.ConnectorTableHandle;
+import io.prestosql.spi.connector.ConnectorTransactionHandle;
+import io.prestosql.spi.connector.ConnectorUpdateTableHandle;
+import io.prestosql.spi.connector.ConnectorVacuumTableHandle;
+
+public class HiveHandleResolver
+ implements ConnectorHandleResolver
+{
+ @Override
+ public Class extends ConnectorTableHandle> getTableHandleClass()
+ {
+ return HiveTableHandle.class;
+ }
+
+ @Override
+ public Class extends ColumnHandle> getColumnHandleClass()
+ {
+ return HiveColumnHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorSplit> getSplitClass()
+ {
+ return HiveSplitWrapper.class;
+ }
+
+ @Override
+ public Class extends ConnectorOutputTableHandle> getOutputTableHandleClass()
+ {
+ return HiveOutputTableHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorInsertTableHandle> getInsertTableHandleClass()
+ {
+ return HiveInsertTableHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorUpdateTableHandle> getUpdateTableHandleClass()
+ {
+ return HiveUpdateTableHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorDeleteAsInsertTableHandle> getDeleteAsInsertTableHandleClass()
+ {
+ return HiveDeleteAsInsertTableHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorVacuumTableHandle> getVacuumTableHandleClass()
+ {
+ return HiveVacuumTableHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorTransactionHandle> getTransactionHandleClass()
+ {
+ return HiveTransactionHandle.class;
+ }
+
+ @Override
+ public Class extends ConnectorPartitioningHandle> getPartitioningHandleClass()
+ {
+ return HivePartitioningHandle.class;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHdfsConfiguration.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHdfsConfiguration.java
new file mode 100644
index 00000000..32000095
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveHdfsConfiguration.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.google.common.collect.ImmutableSet;
+import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
+import io.prestosql.plugin.hive.util.ConfigurationUtils;
+import org.apache.hadoop.conf.Configuration;
+
+import javax.inject.Inject;
+
+import java.net.URI;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+public class HiveHdfsConfiguration
+ implements HdfsConfiguration
+{
+ private static final Configuration INITIAL_CONFIGURATION = ConfigurationUtils.getInitialConfiguration();
+
+ @SuppressWarnings("ThreadLocalNotStaticFinal")
+ private final ThreadLocal hadoopConfiguration = new ThreadLocal()
+ {
+ @Override
+ protected Configuration initialValue()
+ {
+ Configuration configuration = new Configuration(false);
+ ConfigurationUtils.copy(INITIAL_CONFIGURATION, configuration);
+ initializer.initializeConfiguration(configuration);
+ return configuration;
+ }
+ };
+
+ private final HdfsConfigurationInitializer initializer;
+ private final Set dynamicProviders;
+
+ @Inject
+ public HiveHdfsConfiguration(HdfsConfigurationInitializer initializer, Set dynamicProviders)
+ {
+ this.initializer = requireNonNull(initializer, "initializer is null");
+ this.dynamicProviders = ImmutableSet.copyOf(requireNonNull(dynamicProviders, "dynamicProviders is null"));
+ }
+
+ @Override
+ public Configuration getConfiguration(HdfsContext context, URI uri)
+ {
+ if (dynamicProviders.isEmpty()) {
+ // use the same configuration for everything
+ return hadoopConfiguration.get();
+ }
+ Configuration config = ConfigurationUtils.copy(hadoopConfiguration.get());
+ for (DynamicConfigurationProvider provider : dynamicProviders) {
+ provider.updateConfiguration(config, context, uri);
+ }
+ return config;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInputInfo.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInputInfo.java
new file mode 100644
index 00000000..b62b8644
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInputInfo.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+public class HiveInputInfo
+{
+ private final List partitionIds;
+ // Code that serialize HiveInputInfo into log would often need the ability to limit the length of log entries.
+ // This boolean field allows such code to mark the log entry as length limited.
+ private final boolean truncated;
+
+ @JsonCreator
+ public HiveInputInfo(
+ @JsonProperty("partitionIds") List partitionIds,
+ @JsonProperty("truncated") boolean truncated)
+ {
+ this.partitionIds = partitionIds;
+ this.truncated = truncated;
+ }
+
+ @JsonProperty
+ public List getPartitionIds()
+ {
+ return partitionIds;
+ }
+
+ @JsonProperty
+ public boolean isTruncated()
+ {
+ return truncated;
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInsertTableHandle.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInsertTableHandle.java
new file mode 100644
index 00000000..c74642a9
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveInsertTableHandle.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.prestosql.plugin.hive.metastore.HivePageSinkMetadata;
+import io.prestosql.spi.connector.ConnectorInsertTableHandle;
+
+import java.util.List;
+import java.util.Optional;
+
+public class HiveInsertTableHandle
+ extends HiveWritableTableHandle
+ implements ConnectorInsertTableHandle
+{
+ @JsonCreator
+ public HiveInsertTableHandle(
+ @JsonProperty("schemaName") String schemaName,
+ @JsonProperty("tableName") String tableName,
+ @JsonProperty("inputColumns") List inputColumns,
+ @JsonProperty("pageSinkMetadata") HivePageSinkMetadata pageSinkMetadata,
+ @JsonProperty("locationHandle") LocationHandle locationHandle,
+ @JsonProperty("bucketProperty") Optional bucketProperty,
+ @JsonProperty("tableStorageFormat") HiveStorageFormat tableStorageFormat,
+ @JsonProperty("partitionStorageFormat") HiveStorageFormat partitionStorageFormat,
+ @JsonProperty("isOverwrite") boolean isOverwrite)
+ {
+ super(
+ schemaName,
+ tableName,
+ inputColumns,
+ pageSinkMetadata,
+ locationHandle,
+ bucketProperty,
+ tableStorageFormat,
+ partitionStorageFormat,
+ isOverwrite);
+ }
+}
diff --git a/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveLocationService.java b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveLocationService.java
new file mode 100644
index 00000000..f415684d
--- /dev/null
+++ b/omnidata/omnidata-openlookeng-connector/connector/src/main/java/io/prestosql/plugin/hive/HiveLocationService.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.prestosql.plugin.hive;
+
+import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
+import io.prestosql.plugin.hive.LocationHandle.WriteMode;
+import io.prestosql.plugin.hive.metastore.Partition;
+import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore;
+import io.prestosql.plugin.hive.metastore.Table;
+import io.prestosql.spi.PrestoException;
+import io.prestosql.spi.connector.ConnectorSession;
+import org.apache.hadoop.fs.Path;
+
+import javax.inject.Inject;
+
+import java.util.Optional;
+
+import static io.prestosql.plugin.hive.HiveSessionProperties.isTemporaryStagingDirectoryEnabled;
+import static io.prestosql.plugin.hive.HiveWriteUtils.createTemporaryPath;
+import static io.prestosql.plugin.hive.HiveWriteUtils.getTableDefaultLocation;
+import static io.prestosql.plugin.hive.HiveWriteUtils.isHdfsEncrypted;
+import static io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem;
+import static io.prestosql.plugin.hive.HiveWriteUtils.pathExists;
+import static io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY;
+import static io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY;
+import static io.prestosql.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY;
+import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+
+public class HiveLocationService
+ implements LocationService
+{
+ private final HdfsEnvironment hdfsEnvironment;
+
+ @Inject
+ public HiveLocationService(HdfsEnvironment hdfsEnvironment)
+ {
+ this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
+ }
+
+ @Override
+ public LocationHandle forNewTable(SemiTransactionalHiveMetastore metastore, ConnectorSession session, String schemaName, String tableName, Optional