From 32a50ae02771c54b2eb353109bea9dfa44d79d92 Mon Sep 17 00:00:00 2001 From: Bin Fan Date: Tue, 11 Jul 2023 22:03:36 -0700 Subject: [PATCH 1/2] Revert "Revert "Remove Table module"" This commit fully remove the Table module and related properities and proto This reverts commit c8d69917cc60569cf1a053e85de294c282f53284. --- common/transport/pom.xml | 2 +- .../proto/grpc/table/layout/hive/hive.proto | 50 -- .../main/proto/grpc/table/table_master.proto | 403 ------------ common/transport/src/main/proto/proto.lock | 22 - .../main/proto/proto/journal/journal.proto | 10 - .../src/main/proto/proto/journal/table.proto | 89 --- .../src/main/java/alluxio/Constants.java | 5 - .../main/java/alluxio/conf/PropertyKey.java | 94 +-- .../ServiceVersionClientServiceHandler.java | 3 - .../journal/JournalEntryAssociation.java | 11 - .../journal/checkpoint/CheckpointName.java | 4 - .../journal/JournalEntryAssociationTest.java | 10 - .../job/plan/transform/CompactConfig.java | 171 ----- .../job/plan/transform/FieldSchema.java | 107 ---- .../alluxio/job/plan/transform/Format.java | 80 --- .../job/plan/transform/HiveConstants.java | 121 ---- .../job/plan/transform/PartitionInfo.java | 160 ----- .../job/plan/transform/HiveConstantsTest.java | 31 - .../composite/CompositeConfigTest.java | 7 - .../job/plan/transform/CompactDefinition.java | 211 ------- .../job/plan/transform/CompactTask.java | 77 --- .../job/plan/transform/compact/Compactor.java | 36 -- .../job/plan/transform/format/JobPath.java | 124 ---- .../transform/format/ReadWriterUtils.java | 70 --- .../format/SchemaConversionUtils.java | 121 ---- .../plan/transform/format/TableReader.java | 69 -- .../job/plan/transform/format/TableRow.java | 33 - .../plan/transform/format/TableSchema.java | 24 - .../plan/transform/format/TableWriter.java | 75 --- .../plan/transform/format/csv/CsvReader.java | 130 ---- .../job/plan/transform/format/csv/CsvRow.java | 139 ---- .../plan/transform/format/csv/CsvSchema.java | 127 ---- .../plan/transform/format/csv/CsvUtils.java | 32 - .../plan/transform/format/csv/CsvWriter.java | 44 -- .../plan/transform/format/csv/Decimal.java | 95 --- .../plan/transform/format/orc/OrcReader.java | 103 --- .../job/plan/transform/format/orc/OrcRow.java | 134 ---- .../plan/transform/format/orc/OrcSchema.java | 85 --- .../format/parquet/ParquetReader.java | 102 --- .../transform/format/parquet/ParquetRow.java | 69 -- .../format/parquet/ParquetSchema.java | 60 -- .../format/parquet/ParquetWriter.java | 161 ----- .../format/tables/BytesCommitter.java | 34 - .../transform/format/tables/Committer.java | 27 - .../format/tables/RowsCommitter.java | 34 - .../transform/format/tables/TablesWriter.java | 94 --- .../services/alluxio.job.plan.PlanDefinition | 1 - .../job/plan/transform/BaseTransformTest.java | 47 -- .../CompactDefinitionSelectExecutorsTest.java | 81 --- .../plan/transform/format/JobPathTest.java | 80 --- .../plan/transform/format/ReadWriteTest.java | 77 --- .../transform/format/csv/DecimalTest.java | 50 -- .../transform/format/orc/OrcReaderTest.java | 52 -- dora/pom.xml | 1 - dora/shaded/client-hadoop3/pom.xml | 5 - dora/shaded/client/pom.xml | 5 - dora/table/base/pom.xml | 39 -- .../main/java/alluxio/table/ProtoUtils.java | 92 --- dora/table/client/pom.xml | 69 -- .../table/RetryHandlingTableMasterClient.java | 218 ------- .../client/table/TableMasterClient.java | 200 ------ dora/table/pom.xml | 37 -- dora/table/server/common/pom.xml | 69 -- .../alluxio/master/table/DatabaseInfo.java | 124 ---- .../table/common/BaseConfiguration.java | 100 --- .../alluxio/table/common/BaseProperty.java | 71 --- .../table/common/CatalogPathUtils.java | 62 -- .../table/common/ConfigurationUtils.java | 33 - .../java/alluxio/table/common/Layout.java | 76 --- .../alluxio/table/common/LayoutFactory.java | 29 - .../alluxio/table/common/LayoutRegistry.java | 71 --- .../alluxio/table/common/UdbPartition.java | 28 - .../table/common/layout/HiveLayout.java | 175 ------ .../common/transform/TransformContext.java | 58 -- .../common/transform/TransformDefinition.java | 105 ---- .../table/common/transform/TransformPlan.java | 90 --- .../transform/action/CompactAction.java | 98 --- .../transform/action/TransformAction.java | 29 - .../action/TransformActionFactory.java | 42 -- .../action/TransformActionRegistry.java | 81 --- .../action/TransformActionUtils.java | 54 -- .../table/common/udb/PathTranslator.java | 115 ---- .../table/common/udb/UdbBypassSpec.java | 76 --- .../table/common/udb/UdbConfiguration.java | 102 --- .../alluxio/table/common/udb/UdbContext.java | 101 --- .../alluxio/table/common/udb/UdbProperty.java | 44 -- .../alluxio/table/common/udb/UdbTable.java | 68 -- .../alluxio/table/common/udb/UdbUtils.java | 84 --- .../table/common/udb/UnderDatabase.java | 55 -- .../common/udb/UnderDatabaseFactory.java | 32 - .../common/udb/UnderDatabaseRegistry.java | 137 ---- .../alluxio.table.common.LayoutFactory | 12 - ...on.transform.action.TransformActionFactory | 12 - .../table/common/CatalogPathUtilsTest.java | 45 -- .../alluxio/table/common/TableTestUtils.java | 33 - .../table/common/layout/HiveLayoutTest.java | 89 --- .../transform/TransformDefinitionTest.java | 70 --- .../common/transform/TransformPlanTest.java | 45 -- .../transform/action/CompactActionTest.java | 82 --- .../transform/action/EarlyActionFactory.java | 28 - .../transform/action/LateActionFactory.java | 28 - .../action/TransformActionRegistryTest.java | 29 - .../table/common/udb/PathTranslatorTest.java | 205 ------ .../table/common/udb/UdbBypassSpecTest.java | 54 -- .../common/udb/UdbConfigurationTest.java | 106 ---- ...on.transform.action.TransformActionFactory | 2 - dora/table/server/master/pom.xml | 132 ---- .../alluxio/master/table/AlluxioCatalog.java | 478 -------------- .../master/table/BasePartitionScheme.java | 57 -- .../master/table/CatalogConfiguration.java | 45 -- .../alluxio/master/table/CatalogContext.java | 53 -- .../alluxio/master/table/CatalogProperty.java | 44 -- .../java/alluxio/master/table/Database.java | 594 ------------------ .../java/alluxio/master/table/DbConfig.java | 206 ------ .../master/table/DefaultTableMaster.java | 210 ------- .../java/alluxio/master/table/Domain.java | 122 ---- .../java/alluxio/master/table/Partition.java | 218 ------- .../alluxio/master/table/PartitionScheme.java | 72 --- .../master/table/PartitionedTableScheme.java | 49 -- .../main/java/alluxio/master/table/Table.java | 354 ----------- .../alluxio/master/table/TableMaster.java | 153 ----- .../TableMasterClientServiceHandler.java | 185 ------ .../master/table/TableMasterFactory.java | 61 -- .../table/UnpartitionedTableScheme.java | 48 -- .../table/transform/TransformJobInfo.java | 150 ----- .../table/transform/TransformManager.java | 478 -------------- .../services/alluxio.master.MasterFactory | 12 - .../master/table/AlluxioCatalogTest.java | 575 ----------------- .../alluxio/master/table/DbConfigTest.java | 115 ---- .../master/table/TableMasterFactoryTest.java | 87 --- .../alluxio/master/table/TestDatabase.java | 152 ----- .../alluxio/master/table/TestUdbFactory.java | 32 - .../alluxio/master/table/TestUdbTable.java | 193 ------ .../table/transform/TransformManagerTest.java | 355 ----------- ...uxio.table.common.udb.UnderDatabaseFactory | 12 - dora/table/server/pom.xml | 36 -- dora/table/server/underdb/glue/pom.xml | 152 ----- .../table/under/glue/GlueDatabase.java | 524 --------------- .../table/under/glue/GlueDatabaseFactory.java | 34 - .../table/under/glue/GluePartition.java | 60 -- .../alluxio/table/under/glue/GlueTable.java | 116 ---- .../alluxio/table/under/glue/GlueUtils.java | 366 ----------- .../alluxio/table/under/glue/Property.java | 257 -------- ...uxio.table.common.udb.UnderDatabaseFactory | 12 - .../table/under/glue/GlueDatabaseTest.java | 66 -- .../table/under/glue/GlueUtilsTest.java | 473 -------------- .../table/under/glue/MockGlueDatabase.java | 130 ---- .../table/under/glue/RemoteGlueTest.java | 76 --- dora/table/server/underdb/hive/pom.xml | 202 ------ .../table/under/hive/HiveDatabase.java | 324 ---------- .../table/under/hive/HiveDatabaseFactory.java | 34 - .../table/under/hive/HivePartition.java | 60 -- .../alluxio/table/under/hive/HiveTable.java | 106 ---- .../alluxio/table/under/hive/HiveUtils.java | 287 --------- .../alluxio/table/under/hive/Property.java | 172 ----- .../hive/util/AbstractHiveClientPool.java | 33 - .../hive/util/CompatibleMetastoreClient.java | 80 --- .../hive/util/DefaultHiveClientPool.java | 121 ---- .../under/hive/util/HMSClientFactory.java | 42 -- .../table/under/hive/util/HMSShim.java | 171 ----- .../under/hive/util/HiveClientPoolCache.java | 45 -- .../under/hive/util/HiveCompatibility.java | 40 -- ...uxio.table.common.udb.UnderDatabaseFactory | 12 - .../table/under/hive/HiveDatabaseTest.java | 93 --- .../table/under/hive/HiveUtilsTest.java | 383 ----------- dora/table/server/underdb/pom.xml | 87 --- dora/table/shell/pom.xml | 52 -- .../java/alluxio/cli/table/TableShell.java | 64 -- .../alluxio/cli/table/TableShellUtils.java | 72 --- .../table/command/AbstractTableCommand.java | 57 -- .../table/command/AttachDatabaseCommand.java | 136 ---- .../table/command/DetachDatabaseCommand.java | 77 --- .../table/command/ListDatabasesCommand.java | 129 ---- .../cli/table/command/LoadTableCommand.java | 115 ---- .../table/command/SyncDatabaseCommand.java | 72 --- .../table/command/TransformStatusCommand.java | 98 --- .../table/command/TransformTableCommand.java | 106 ---- .../cli/table/TransformTableCommandTest.java | 62 -- .../format/TableReaderIntegrationTest.java | 55 -- .../format/TableWriterIntegrationTest.java | 91 --- .../format/parquet/ParquetTestUtils.java | 46 -- 181 files changed, 2 insertions(+), 18859 deletions(-) delete mode 100644 common/transport/src/main/proto/grpc/table/layout/hive/hive.proto delete mode 100644 common/transport/src/main/proto/grpc/table/table_master.proto delete mode 100644 common/transport/src/main/proto/proto/journal/table.proto delete mode 100644 dora/job/common/src/main/java/alluxio/job/plan/transform/CompactConfig.java delete mode 100644 dora/job/common/src/main/java/alluxio/job/plan/transform/FieldSchema.java delete mode 100644 dora/job/common/src/main/java/alluxio/job/plan/transform/Format.java delete mode 100644 dora/job/common/src/main/java/alluxio/job/plan/transform/HiveConstants.java delete mode 100644 dora/job/common/src/main/java/alluxio/job/plan/transform/PartitionInfo.java delete mode 100644 dora/job/common/src/test/java/alluxio/job/plan/transform/HiveConstantsTest.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/CompactDefinition.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/CompactTask.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/compact/Compactor.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/JobPath.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/ReadWriterUtils.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/SchemaConversionUtils.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableReader.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableRow.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableSchema.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableWriter.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvReader.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvRow.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvSchema.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvUtils.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvWriter.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/Decimal.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcReader.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcRow.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcSchema.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetReader.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetRow.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetSchema.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetWriter.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/BytesCommitter.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/Committer.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/RowsCommitter.java delete mode 100644 dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/TablesWriter.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/BaseTransformTest.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/CompactDefinitionSelectExecutorsTest.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/format/JobPathTest.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/format/ReadWriteTest.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/format/csv/DecimalTest.java delete mode 100644 dora/job/server/src/test/java/alluxio/job/plan/transform/format/orc/OrcReaderTest.java delete mode 100644 dora/table/base/pom.xml delete mode 100644 dora/table/base/src/main/java/alluxio/table/ProtoUtils.java delete mode 100644 dora/table/client/pom.xml delete mode 100644 dora/table/client/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java delete mode 100644 dora/table/client/src/main/java/alluxio/client/table/TableMasterClient.java delete mode 100644 dora/table/pom.xml delete mode 100644 dora/table/server/common/pom.xml delete mode 100644 dora/table/server/common/src/main/java/alluxio/master/table/DatabaseInfo.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/BaseConfiguration.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/BaseProperty.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/CatalogPathUtils.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/ConfigurationUtils.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/Layout.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/LayoutFactory.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/LayoutRegistry.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/UdbPartition.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/layout/HiveLayout.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformContext.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformDefinition.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformPlan.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/action/CompactAction.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformAction.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionFactory.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionRegistry.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionUtils.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/PathTranslator.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbBypassSpec.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbConfiguration.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbContext.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbProperty.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbTable.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbUtils.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabase.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseFactory.java delete mode 100644 dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseRegistry.java delete mode 100644 dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.LayoutFactory delete mode 100644 dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/CatalogPathUtilsTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/TableTestUtils.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/layout/HiveLayoutTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformDefinitionTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformPlanTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/action/CompactActionTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/action/EarlyActionFactory.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/action/LateActionFactory.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/transform/action/TransformActionRegistryTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/udb/PathTranslatorTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbBypassSpecTest.java delete mode 100644 dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbConfigurationTest.java delete mode 100644 dora/table/server/common/src/test/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory delete mode 100644 dora/table/server/master/pom.xml delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/AlluxioCatalog.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/BasePartitionScheme.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/CatalogConfiguration.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/CatalogContext.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/CatalogProperty.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/Database.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/DbConfig.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/Domain.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/Partition.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/PartitionScheme.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/PartitionedTableScheme.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/Table.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/TableMaster.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/TableMasterClientServiceHandler.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/TableMasterFactory.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/UnpartitionedTableScheme.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformJobInfo.java delete mode 100644 dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java delete mode 100644 dora/table/server/master/src/main/resources/META-INF/services/alluxio.master.MasterFactory delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/AlluxioCatalogTest.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/DbConfigTest.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/TableMasterFactoryTest.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/TestDatabase.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/TestUdbFactory.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/TestUdbTable.java delete mode 100644 dora/table/server/master/src/test/java/alluxio/master/table/transform/TransformManagerTest.java delete mode 100644 dora/table/server/master/src/test/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory delete mode 100644 dora/table/server/pom.xml delete mode 100644 dora/table/server/underdb/glue/pom.xml delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabase.java delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabaseFactory.java delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GluePartition.java delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueTable.java delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueUtils.java delete mode 100644 dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/Property.java delete mode 100644 dora/table/server/underdb/glue/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory delete mode 100644 dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueDatabaseTest.java delete mode 100644 dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueUtilsTest.java delete mode 100644 dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/MockGlueDatabase.java delete mode 100644 dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/RemoteGlueTest.java delete mode 100644 dora/table/server/underdb/hive/pom.xml delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabase.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabaseFactory.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HivePartition.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveTable.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveUtils.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/Property.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/AbstractHiveClientPool.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/CompatibleMetastoreClient.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/DefaultHiveClientPool.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSClientFactory.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSShim.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveClientPoolCache.java delete mode 100644 dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveCompatibility.java delete mode 100644 dora/table/server/underdb/hive/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory delete mode 100644 dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveDatabaseTest.java delete mode 100644 dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveUtilsTest.java delete mode 100644 dora/table/server/underdb/pom.xml delete mode 100644 dora/table/shell/pom.xml delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/TableShell.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/TableShellUtils.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/AbstractTableCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/AttachDatabaseCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/DetachDatabaseCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/ListDatabasesCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/LoadTableCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/SyncDatabaseCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/TransformStatusCommand.java delete mode 100644 dora/table/shell/src/main/java/alluxio/cli/table/command/TransformTableCommand.java delete mode 100644 dora/table/shell/src/test/java/alluxio/cli/table/TransformTableCommandTest.java delete mode 100644 dora/tests/src/test/java/alluxio/job/plan/transform/format/TableReaderIntegrationTest.java delete mode 100644 dora/tests/src/test/java/alluxio/job/plan/transform/format/TableWriterIntegrationTest.java delete mode 100644 dora/tests/src/test/java/alluxio/job/plan/transform/format/parquet/ParquetTestUtils.java diff --git a/common/transport/pom.xml b/common/transport/pom.xml index 3e96050b2a24..e86dfd322d91 100644 --- a/common/transport/pom.xml +++ b/common/transport/pom.xml @@ -144,7 +144,7 @@ - + diff --git a/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto b/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto deleted file mode 100644 index 994b4da3d198..000000000000 --- a/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto +++ /dev/null @@ -1,50 +0,0 @@ -syntax = "proto2"; - -option java_multiple_files = true; -option java_package = "alluxio.grpc.table.layout.hive"; -option java_outer_classname = "HiveLayoutProto"; - -package alluxio.grpc.table.layout; - -import "grpc/table/table_master.proto"; - -message StorageFormat { - optional string serde = 1; - optional string input_format = 2; - optional string output_format = 3; - map serdelib_parameters = 4; -} - -message HiveBucketProperty { - repeated string bucketed_by = 1; - optional int64 bucket_count = 2; - repeated SortingColumn sorted_by = 3; -} - -message SortingColumn { - required string column_name = 1; - enum SortingOrder { - ASCENDING = 0; - DESCENDING = 1; - } - required SortingOrder order = 2; -} - -message Storage { - optional StorageFormat storage_format = 1; - optional string location = 2; - optional HiveBucketProperty bucket_property = 3; - optional bool skewed = 4; - map serde_parameters = 5; -} - -message PartitionInfo { - repeated string values = 1; // list of string values for each partition column - optional string partition_name = 2; // partition name in the form of "key=value" - - optional string db_name = 3; - optional string table_name = 4; - optional Storage storage = 5; - repeated FieldSchema data_cols = 6; - map parameters = 7; -} diff --git a/common/transport/src/main/proto/grpc/table/table_master.proto b/common/transport/src/main/proto/grpc/table/table_master.proto deleted file mode 100644 index b817eb884a02..000000000000 --- a/common/transport/src/main/proto/grpc/table/table_master.proto +++ /dev/null @@ -1,403 +0,0 @@ -syntax = "proto2"; - -option java_multiple_files = true; -option java_package = "alluxio.grpc.table"; -option java_outer_classname = "TableMasterProto"; - -package alluxio.grpc.table; - -import "grpc/job_master.proto"; - -message FieldSchema { - optional uint32 id = 1; - optional string name = 2; - optional string type = 3; - optional string comment = 4; -} - -message Schema { - repeated FieldSchema cols = 1; -} - -enum PrincipalType { - USER = 0; - ROLE = 1; -} - -message Database { - optional string db_name = 1; - optional string description = 2; - optional string location = 3; - map parameter = 4; - optional string owner_name = 5; - optional PrincipalType owner_type = 6; - optional string comment = 7; -} - -// next available id: 12 -message TableInfo { - optional string db_name = 1; - optional string table_name = 2; - enum TableType { - NATIVE = 0; - IMPORTED = 1; - } - optional TableType type = 3; - optional string owner = 4; - optional Schema schema = 5; - optional Layout layout = 6; - map parameters = 7; - - // partitioning scheme - repeated FieldSchema partition_cols = 8; - - optional int64 previous_version = 9; - optional int64 version = 10; - optional int64 version_creation_time = 11; -} - -// TODO(gpang): update -message LayoutSpec { - optional string spec = 1; -} - -// TODO(gpang): update -message PartitionSpec { - optional string spec = 1; -} - -message Layout { - optional string layout_type = 1; - optional LayoutSpec layout_spec = 2; - optional bytes layout_data = 3; - map stats = 4; -} - -message Transformation { - optional Layout layout = 1; - optional string definition = 2; -} - -// next available id: 6 -message Partition { - optional PartitionSpec partition_spec = 1; - optional Layout base_layout = 2; - /** - * The latest transformation is in the back of the list. - */ - repeated Transformation transformations = 3; - optional int64 version = 4; - optional int64 version_creation_time = 5; -} - -message ColumnStatisticsInfo { - optional string col_name = 1; - optional string col_type = 2; - optional ColumnStatisticsData data = 3; -} - -message ColumnStatisticsData { - oneof data { - BooleanColumnStatsData boolean_stats = 1; - LongColumnStatsData long_stats = 2; - DoubleColumnStatsData double_stats = 3; - StringColumnStatsData string_stats = 4; - BinaryColumnStatsData binary_stats = 5; - DecimalColumnStatsData decimal_stats = 6; - DateColumnStatsData date_stats = 7; - } -} - -message BooleanColumnStatsData { - optional int64 num_trues = 1; - optional int64 num_falses = 2; - optional int64 num_nulls = 3; - optional string bit_vectors = 4; -} - -message LongColumnStatsData { - optional int64 low_value = 1; - optional int64 high_value = 2; - optional int64 num_nulls = 3; - optional int64 num_distincts = 4; - optional string bit_vectors = 5; -} - -message DoubleColumnStatsData { - optional double low_value = 1; - optional double high_value = 2; - optional int64 num_nulls = 3; - optional int64 num_distincts = 4; - optional string bit_vectors = 5; -} - -message Decimal { - required int32 scale = 1; // force using scale first in Decimal.compareTo - required bytes unscaled = 2; -} -message DecimalColumnStatsData { - optional Decimal low_value = 1; - optional Decimal high_value = 2; - optional int64 num_nulls = 3; - optional int64 num_distincts = 4; - optional string bit_vectors = 5; -} - -message StringColumnStatsData { - optional int64 max_col_len = 1; - optional double avg_col_len = 2; - optional int64 num_nulls = 3; - optional int64 num_distincts = 4; - optional string bit_vectors = 5; -} - -message BinaryColumnStatsData { - optional int64 max_col_len = 1; - optional double avg_col_len = 2; - optional int64 num_nulls = 3; - optional string bit_vectors = 4; -} - -message Date { - required int64 days_since_epoch = 1; -} -message DateColumnStatsData { - optional Date low_value = 1; - optional Date high_value = 2; - optional int64 num_nulls = 3; - optional int64 num_distincts = 4; - optional string bit_vectors = 5; -} - -message SyncStatus { - map tables_errors = 1; - repeated string tables_ignored = 2; - repeated string tables_unchanged = 3; - repeated string tables_updated = 4; - repeated string tables_removed = 5; -} - -message GetAllDatabasesPRequest { -} - -message GetAllDatabasesPResponse { - repeated string database = 1; -} - -message GetAllTablesPRequest { - optional string database = 1; -} - -message GetAllTablesPResponse { - repeated string table = 1; -} - -message GetDatabasePRequest { - optional string db_name = 1; -} - -message GetDatabasePResponse { - optional Database db = 1; -} - -message GetTablePRequest { - optional string db_name = 1; - optional string table_name = 2; -} - -message GetTablePResponse { - optional TableInfo table_info = 1; -} - -message AttachDatabasePRequest { - optional string udb_type = 1; - optional string udb_connection_uri = 2; - optional string udb_db_name = 3; - optional string db_name = 4; - map options = 5; - optional bool ignore_sync_errors = 6; -} - -message AttachDatabasePResponse { - // TODO(gpang): remove in favor of status - optional bool success = 1; - optional SyncStatus sync_status = 2; -} - -message DetachDatabasePRequest { - optional string db_name = 1; -} - -message DetachDatabasePResponse { - optional bool success = 1; -} - -message SyncDatabasePRequest { - optional string db_name = 1; -} - -message SyncDatabasePResponse { - // TODO(gpang): remove in favor of status - optional bool success = 1; - optional SyncStatus status = 2; -} - -message FileStatistics { - map column = 1; //map column names to column statistics -} - -message GetTableColumnStatisticsPRequest { - optional string db_name = 1; - optional string table_name = 2; - repeated string col_names = 3; -} - -message GetPartitionColumnStatisticsPRequest { - optional string db_name = 1; - optional string table_name = 2; - repeated string col_names = 3; - repeated string part_names = 4; -} - -message GetTableColumnStatisticsPResponse { - repeated ColumnStatisticsInfo statistics = 1; -} - -message ColumnStatisticsList { - repeated ColumnStatisticsInfo statistics = 1; -} -message GetPartitionColumnStatisticsPResponse { - map partition_statistics = 1; -} - -message Value { - oneof value { - int64 long_type = 1; - double double_type = 2; - string string_type = 3; - bool boolean_type = 4; - } -} - -message Range { - optional Value low = 1; - optional Value high = 2; -} - -message RangeSet { - repeated Range ranges = 1; -} - -message EquatableValueSet { - repeated Value candidates = 1; - optional bool white_list = 2; -} - -message AllOrNoneSet { - optional bool all = 1; -} - -message Domain { - oneof value_set { - RangeSet range = 1; - EquatableValueSet equatable = 2; - AllOrNoneSet all_or_none = 3; - } -} - -message Constraint { - map column_constraints = 1; // maps column to constraint, columns not present are not constrained -} - -message ReadTablePRequest { - optional string db_name = 1; - optional string table_name = 2; - optional Constraint constraint = 3; -} - -message ReadTablePResponse { - repeated Partition partitions = 1; -} - -message TransformTablePRequest { - optional string db_name = 1; - optional string table_name = 2; - optional string definition = 3; -} - -message TransformTablePResponse { - optional int64 job_id = 1; -} - -message GetTransformJobInfoPRequest { - optional int64 job_id = 1; -} - -message TransformJobInfo { - optional string db_name = 1; - optional string table_name = 2; - optional string definition = 3; - optional int64 job_id = 4; - optional alluxio.grpc.job.Status job_status = 5; - optional string job_error = 6; -} - -message GetTransformJobInfoPResponse { - repeated TransformJobInfo info = 1; -} - -/** - * This interface contains table master service endpoints for Alluxio clients. - */ -service TableMasterClientService { - - /** - * Returns all databases in the catalog - */ - rpc GetAllDatabases(GetAllDatabasesPRequest) returns (GetAllDatabasesPResponse); - - /** - * Returns all tables in the database - */ - rpc GetAllTables(GetAllTablesPRequest) returns (GetAllTablesPResponse); - - /** - * Gets a database by name from the table master - */ - rpc GetDatabase(GetDatabasePRequest) returns (GetDatabasePResponse); - - /** - * Returns a specific table info - */ - rpc GetTable(GetTablePRequest) returns (GetTablePResponse); - - /** - * Attach an existing database into the catalog as a new database name - */ - rpc AttachDatabase(AttachDatabasePRequest) returns (AttachDatabasePResponse); - - /** - * Detach existing database into the catalog, removing any metadata about the table - */ - rpc DetachDatabase(DetachDatabasePRequest) returns (DetachDatabasePResponse); - - /** - * Sync existing database into the catalog - */ - rpc SyncDatabase(SyncDatabasePRequest) returns (SyncDatabasePResponse); - - rpc GetTableColumnStatistics(GetTableColumnStatisticsPRequest) returns (GetTableColumnStatisticsPResponse); - - rpc GetPartitionColumnStatistics(GetPartitionColumnStatisticsPRequest) returns (GetPartitionColumnStatisticsPResponse); - - rpc ReadTable(ReadTablePRequest) returns (ReadTablePResponse); - - rpc TransformTable(TransformTablePRequest) returns (TransformTablePResponse); - - /** - * Gets information of transformation jobs. - * If the job ID exists in the request, the information for that job is returned; - * Otherwise, information of all the jobs kept in table master will be returned. - */ - rpc GetTransformJobInfo(GetTransformJobInfoPRequest) returns (GetTransformJobInfoPResponse); -} diff --git a/common/transport/src/main/proto/proto.lock b/common/transport/src/main/proto/proto.lock index 74b2f7e8ff94..7d0b81d2f444 100644 --- a/common/transport/src/main/proto/proto.lock +++ b/common/transport/src/main/proto/proto.lock @@ -7777,28 +7777,6 @@ } ] } - ], - "imports": [ - { - "path": "grpc/table/table_master.proto" - } - ], - "package": { - "name": "alluxio.grpc.table.layout" - }, - "options": [ - { - "name": "java_multiple_files", - "value": "true" - }, - { - "name": "java_package", - "value": "alluxio.grpc.table.layout.hive" - }, - { - "name": "java_outer_classname", - "value": "HiveLayoutProto" - } ] } }, diff --git a/common/transport/src/main/proto/proto/journal/journal.proto b/common/transport/src/main/proto/proto/journal/journal.proto index b4e4bd84e837..6a968feee276 100644 --- a/common/transport/src/main/proto/proto/journal/journal.proto +++ b/common/transport/src/main/proto/proto/journal/journal.proto @@ -6,7 +6,6 @@ option java_package = "alluxio.proto.journal"; import "proto/journal/block.proto"; import "proto/journal/file.proto"; import "proto/journal/meta.proto"; -import "proto/journal/table.proto"; import "proto/journal/job.proto"; import "proto/journal/policy.proto"; // Wraps around all types of Alluxio journal entries. @@ -32,12 +31,9 @@ message JournalEntry { optional JournalOpPId operationId = 52; // action fields. optional ActiveSyncTxIdEntry active_sync_tx_id = 34; - optional AddTableEntry add_table = 43; - optional AddTablePartitionsEntry add_table_partitions = 51; optional AddSyncPointEntry add_sync_point = 32; optional AddMountPointEntry add_mount_point = 2; optional AsyncPersistRequestEntry async_persist_request = 16; - optional AttachDbEntry attach_db = 44; optional BlockContainerIdGeneratorEntry block_container_id_generator = 3; optional BlockInfoEntry block_info = 4; optional ClusterInfoEntry cluster_info = 42; @@ -45,7 +41,6 @@ message JournalEntry { optional DeleteBlockEntry delete_block = 29; optional DeleteFileEntry delete_file = 6; optional DeleteMountPointEntry delete_mount_point = 8; - optional DetachDbEntry detach_db = 45; optional InodeDirectoryEntry inode_directory = 9; optional InodeDirectoryIdGeneratorEntry inode_directory_id_generator = 10; optional InodeFileEntry inode_file = 11; @@ -54,15 +49,10 @@ message JournalEntry { optional PathPropertiesEntry path_properties = 40; optional PersistDirectoryEntry persist_directory = 15; optional RemovePathPropertiesEntry remove_path_properties = 41; - optional RemoveTableEntry remove_table = 50; - optional RemoveTransformJobInfoEntry remove_transform_job_info = 47; optional RemoveSyncPointEntry remove_sync_point = 33; optional RenameEntry rename = 19; optional SetAclEntry set_acl = 31; optional SetAttributeEntry set_attribute = 27; - optional AddTransformJobInfoEntry add_transform_job_info = 46; - optional CompleteTransformTableEntry complete_transform_table = 48; - optional UpdateDatabaseInfoEntry update_database_info = 49; optional UpdateUfsModeEntry update_ufs_mode = 30; optional UpdateInodeEntry update_inode = 35; optional UpdateInodeDirectoryEntry update_inode_directory = 36; diff --git a/common/transport/src/main/proto/proto/journal/table.proto b/common/transport/src/main/proto/proto/journal/table.proto deleted file mode 100644 index ed5669191b00..000000000000 --- a/common/transport/src/main/proto/proto/journal/table.proto +++ /dev/null @@ -1,89 +0,0 @@ -syntax = "proto2"; - -package alluxio.proto.journal; - -import "grpc/table/table_master.proto"; - -// Journal entry messages for the table master - -// next available id: 6 -message AttachDbEntry { - optional string udb_type = 1; - optional string udb_connection_uri = 2; - optional string udb_db_name = 3; - optional string db_name = 4; - map config = 5; -} - -// next available id: 2 -message DetachDbEntry { - optional string db_name = 1; -} - -// next available id: 13 -message AddTableEntry { - optional string db_name = 1; - optional string table_name = 2; - optional string owner = 3; - optional alluxio.grpc.table.Schema schema = 4; - optional alluxio.grpc.table.Layout layout = 5; - repeated alluxio.grpc.table.ColumnStatisticsInfo table_stats = 6; - map parameters = 7; - - // partitioning scheme - repeated alluxio.grpc.table.FieldSchema partition_cols = 8; - repeated alluxio.grpc.table.Partition partitions = 9; - - optional int64 previous_version = 10; - optional int64 version = 11; - optional int64 version_creation_time = 12; -} - -// next available id: 5 -message AddTablePartitionsEntry { - optional string db_name = 1; - optional string table_name = 2; - optional int64 version = 3; - repeated alluxio.grpc.table.Partition partitions = 4; -} - -// next available id: 5 -message RemoveTableEntry { - optional string db_name = 1; - optional string table_name = 2; - optional int64 version = 4; -} - -// next available id: 5 -message CompleteTransformTableEntry { - optional string db_name = 1; - optional string table_name = 2; - optional string definition = 3; - // Map from partition spec to transformed layout - map transformed_layouts = 4; -} - -// next available id: 6 -message AddTransformJobInfoEntry { - optional string db_name = 1; - optional string table_name = 2; - optional string definition = 3; - optional int64 job_id = 4; - map transformed_layouts = 5; -} - -// next available id: 3 -message RemoveTransformJobInfoEntry { - optional string db_name = 1; - optional string table_name = 2; -} - -// next available id: 6 -message UpdateDatabaseInfoEntry { - optional string db_name = 1; - optional string location = 2; - map parameter = 3; - optional string owner_name = 4; - optional alluxio.grpc.table.PrincipalType owner_type = 5; - optional string comment = 6; -} diff --git a/dora/core/common/src/main/java/alluxio/Constants.java b/dora/core/common/src/main/java/alluxio/Constants.java index c63346ee6584..fd3ebbb8f6e7 100644 --- a/dora/core/common/src/main/java/alluxio/Constants.java +++ b/dora/core/common/src/main/java/alluxio/Constants.java @@ -224,11 +224,6 @@ public final class Constants { public static final int PERSISTENCE_INVALID_JOB_ID = -1; public static final String PERSISTENCE_INVALID_UFS_PATH = ""; - // Table service - public static final String TABLE_MASTER_NAME = "TableMaster"; - public static final String TABLE_MASTER_CLIENT_SERVICE_NAME = "TableMasterClient"; - public static final long TABLE_MASTER_CLIENT_SERVICE_VERSION = 1; - // Medium name public static final String MEDIUM_MEM = "MEM"; public static final String MEDIUM_HDD = "HDD"; diff --git a/dora/core/common/src/main/java/alluxio/conf/PropertyKey.java b/dora/core/common/src/main/java/alluxio/conf/PropertyKey.java index ed9136367e89..fff3b4b9fa42 100755 --- a/dora/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/dora/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -2404,8 +2404,7 @@ public String toString() { dataSizeBuilder(Name.MASTER_EMBEDDED_JOURNAL_ENTRY_SIZE_MAX) .setDefaultValue("10MB") .setDescription("The maximum single journal entry size allowed to be flushed. " - + "This value should be smaller than 30MB. Set to a larger value to allow larger " - + "journal entries when using the Alluxio Catalog service.") + + "This value should be smaller than 30MB.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); @@ -7650,77 +7649,6 @@ public String toString() { .setScope(Scope.WORKER) .build(); - // - // Table service properties - // - public static final PropertyKey TABLE_ENABLED = - booleanBuilder(Name.TABLE_ENABLED) - .setDefaultValue(true) - .setDescription("(Experimental) Enables the table service.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_CATALOG_PATH = - stringBuilder(Name.TABLE_CATALOG_PATH) - .setDefaultValue("/catalog") - .setDescription("The Alluxio file path for the table catalog metadata.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_CATALOG_UDB_SYNC_TIMEOUT = - durationBuilder(Name.TABLE_CATALOG_UDB_SYNC_TIMEOUT) - .setDefaultValue("1h") - .setDescription("The timeout period for a db sync to finish in the catalog. If a sync" - + "takes longer than this timeout, the sync will be terminated.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE = - intBuilder(Name.TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE) - .setDefaultValue(500) - .setDescription("The maximum table partitions number in a single journal entry.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL = - durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL) - .setDefaultValue("10s") - .setDescription("Job monitor is a heartbeat thread in the transform manager, " - + "this is the time interval in milliseconds the job monitor heartbeat is run to " - + "check the status of the transformation jobs and update table and partition " - + "locations after transformation.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME = - durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME) - .setDefaultValue("300sec") - .setDescription("The length of time the Alluxio Table Master should keep information " - + "about finished transformation jobs before they are discarded.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MIN = - intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MIN) - .setDefaultValue(16) - .setDescription("The minimum capacity of the hive client pool per hive metastore") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MAX = - intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MAX) - .setDefaultValue(256) - .setDescription("The maximum capacity of the hive client pool per hive metastore") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey TABLE_LOAD_DEFAULT_REPLICATION = - intBuilder(Name.TABLE_LOAD_DEFAULT_REPLICATION) - .setDefaultValue(1) - .setDescription("The default replication number of files under the SDS table after " - + "load option.") - .setScope(Scope.CLIENT) - .build(); public static final PropertyKey HADOOP_SECURITY_AUTHENTICATION = stringBuilder(Name.HADOOP_SECURITY_AUTHENTICATION) .setDescription("HDFS authentication method.") @@ -9518,26 +9446,6 @@ public static final class Name { public static final String STANDALONE_FUSE_JVM_MONITOR_ENABLED = "alluxio.standalone.fuse.jvm.monitor.enabled"; - // - // Table service properties - // - public static final String TABLE_ENABLED = "alluxio.table.enabled"; - public static final String TABLE_CATALOG_PATH = "alluxio.table.catalog.path"; - public static final String TABLE_CATALOG_UDB_SYNC_TIMEOUT = - "alluxio.table.catalog.udb.sync.timeout"; - public static final String TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE = - "alluxio.table.journal.partitions.chunk.size"; - public static final String TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL = - "alluxio.table.transform.manager.job.monitor.interval"; - public static final String TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME = - "alluxio.table.transform.manager.job.history.retention.time"; - public static final String TABLE_UDB_HIVE_CLIENTPOOL_MIN = - "alluxio.table.udb.hive.clientpool.min"; - public static final String TABLE_UDB_HIVE_CLIENTPOOL_MAX = - "alluxio.table.udb.hive.clientpool.MAX"; - public static final String TABLE_LOAD_DEFAULT_REPLICATION = - "alluxio.table.load.default.replication"; - public static final String HADOOP_SECURITY_AUTHENTICATION = "alluxio.hadoop.security.authentication"; diff --git a/dora/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/dora/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index fe9bf7acd1cf..563f5e8c8c30 100644 --- a/dora/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/dora/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -115,9 +115,6 @@ public void getServiceVersion(GetServiceVersionPRequest request, case JOURNAL_MASTER_CLIENT_SERVICE: serviceVersion = Constants.JOURNAL_MASTER_CLIENT_SERVICE_VERSION; break; - case TABLE_MASTER_CLIENT_SERVICE: - serviceVersion = Constants.TABLE_MASTER_CLIENT_SERVICE_VERSION; - break; case RAFT_JOURNAL_SERVICE: serviceVersion = Constants.RAFT_JOURNAL_SERVICE_VERSION; break; diff --git a/dora/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java b/dora/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java index 9ad936030f20..9c518dcd67d5 100644 --- a/dora/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java +++ b/dora/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java @@ -64,17 +64,6 @@ public static String getMasterForEntry(JournalEntry entry) { || entry.hasPolicyRemove()) { return Constants.POLICY_ENGINE_NAME; } - if (entry.hasAttachDb() - || entry.hasAddTable() - || entry.hasAddTablePartitions() - || entry.hasRemoveTable() - || entry.hasDetachDb() - || entry.hasUpdateDatabaseInfo() - || entry.hasAddTransformJobInfo() - || entry.hasRemoveTransformJobInfo() - || entry.hasCompleteTransformTable()) { - return Constants.TABLE_MASTER_NAME; - } throw new IllegalStateException("Unrecognized journal entry: " + entry); } diff --git a/dora/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java b/dora/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java index 8870f7ec5d4f..68b90235730f 100644 --- a/dora/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java +++ b/dora/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java @@ -34,10 +34,6 @@ public enum CheckpointName { PINNED_INODE_FILE_IDS, REPLICATION_LIMITED_FILE_IDS, ROCKS_INODE_STORE, - TABLE_MASTER, - TABLE_MASTER_DATABASE, - TABLE_MASTER_CATALOG, - TABLE_MASTER_TRANSFORM_MANAGER, TO_BE_PERSISTED_FILE_IDS, TTL_BUCKET_LIST, SCHEDULER, diff --git a/dora/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java b/dora/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java index cd669ed46625..0daf30ea51e0 100644 --- a/dora/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java +++ b/dora/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java @@ -42,7 +42,6 @@ import alluxio.proto.journal.Meta.ClusterInfoEntry; import alluxio.proto.journal.Meta.PathPropertiesEntry; import alluxio.proto.journal.Meta.RemovePathPropertiesEntry; -import alluxio.proto.journal.Table; import org.junit.Rule; import org.junit.Test; @@ -65,18 +64,14 @@ public class JournalEntryAssociationTest { private static List ENTRIES = Arrays.asList( JournalEntry.newBuilder().setAddMountPoint(AddMountPointEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setAddSyncPoint(AddSyncPointEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setAddTable(Table.AddTableEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setAddTablePartitions(Table.AddTablePartitionsEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setActiveSyncTxId(File.ActiveSyncTxIdEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setAsyncPersistRequest(AsyncPersistRequestEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setAttachDb(Table.AttachDbEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setBlockContainerIdGenerator(BlockContainerIdGeneratorEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setBlockInfo(BlockInfoEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setClusterInfo(ClusterInfoEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setCompleteFile(CompleteFileEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setDeleteBlock(DeleteBlockEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setDeleteFile(DeleteFileEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setDetachDb(Table.DetachDbEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setDeleteMountPoint(DeleteMountPointEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setInodeDirectory(InodeDirectoryEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setInodeDirectoryIdGenerator(InodeDirectoryIdGeneratorEntry.getDefaultInstance()).build(), @@ -87,18 +82,13 @@ public class JournalEntryAssociationTest { JournalEntry.newBuilder().setPersistDirectory(PersistDirectoryEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setRemovePathProperties(RemovePathPropertiesEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setRemoveSyncPoint(RemoveSyncPointEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setRemoveTable(Table.RemoveTableEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setRename(RenameEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setSetAcl(SetAclEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setSetAttribute(SetAttributeEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setUpdateDatabaseInfo(Table.UpdateDatabaseInfoEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setUpdateUfsMode(UpdateUfsModeEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setUpdateInode(UpdateInodeEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setUpdateInodeDirectory(UpdateInodeDirectoryEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setUpdateInodeFile(UpdateInodeFileEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setAddTransformJobInfo(Table.AddTransformJobInfoEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setRemoveTransformJobInfo(Table.RemoveTransformJobInfoEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setLoadJob(alluxio.proto.journal.Job.LoadJobEntry.newBuilder() .setLoadPath("/test").setState(alluxio.proto.journal.Job.PJobState.CREATED) .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("1").build()).build(), diff --git a/dora/job/common/src/main/java/alluxio/job/plan/transform/CompactConfig.java b/dora/job/common/src/main/java/alluxio/job/plan/transform/CompactConfig.java deleted file mode 100644 index eeb9f88ba73a..000000000000 --- a/dora/job/common/src/main/java/alluxio/job/plan/transform/CompactConfig.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import alluxio.job.plan.PlanConfig; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; - -import java.util.Collection; -import javax.annotation.concurrent.ThreadSafe; - -/** - * Configuration for a job to compact files directly under a directory. - * - * Files will be compacted into a certain number of files, - * if the number of existing files is less than the specified number, then no compaction happens, - * otherwise, assume we want to compact 100 files to 10 files, then every 10 files will be - * compacted into one file. - * The original order of rows is preserved. - */ -@ThreadSafe -public final class CompactConfig implements PlanConfig { - private static final long serialVersionUID = -3434270994964559796L; - - private static final String NAME = "Compact"; - - private final PartitionInfo mInputPartitionInfo; - /** - * Files directly under this directory are compacted. - */ - private final String mInput; - - private final PartitionInfo mOutputPartitionInfo; - /** - * Compacted files are stored under this directory. - */ - private final String mOutput; - /** - * Max number of files after compaction. - */ - private final int mMaxNumFiles; - /** - * Minimum file size for compaction. - */ - private final long mMinFileSize; - - /** - * @param inputPartitionInfo the input partition info - * @param input the input directory - * @param outputPartitionInfo the output partition info - * @param output the output directory - * @param maxNumFiles the maximum number of files after compaction - * @param minFileSize the minimum file size for coalescing - */ - public CompactConfig(@JsonProperty("inputPartitionInfo") PartitionInfo inputPartitionInfo, - @JsonProperty("input") String input, - @JsonProperty("outputPartitionInfo") PartitionInfo outputPartitionInfo, - @JsonProperty("output") String output, - @JsonProperty("maxNumFiles") Integer maxNumFiles, - @JsonProperty("minFileSize") Long minFileSize) { - mInputPartitionInfo = inputPartitionInfo; - mInput = Preconditions.checkNotNull(input, "input"); - mOutputPartitionInfo = outputPartitionInfo; - mOutput = Preconditions.checkNotNull(output, "output"); - mMaxNumFiles = Preconditions.checkNotNull(maxNumFiles, "maxNumFiles"); - mMinFileSize = Preconditions.checkNotNull(minFileSize, "minFileSize"); - } - - /** - * @return the input partition info - */ - public PartitionInfo getInputPartitionInfo() { - return mInputPartitionInfo; - } - - /** - * @return the output partition info - */ - public PartitionInfo getOutputPartitionInfo() { - return mOutputPartitionInfo; - } - - /** - * @return the input directory - */ - public String getInput() { - return mInput; - } - - /** - * @return the output directory - */ - public String getOutput() { - return mOutput; - } - - /** - * @return the number of files after compaction - */ - public int getMaxNumFiles() { - return mMaxNumFiles; - } - - /** - * @return the file size - */ - public long getMinFileSize() { - return mMinFileSize; - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (this == obj) { - return true; - } - if (!(obj instanceof CompactConfig)) { - return false; - } - CompactConfig that = (CompactConfig) obj; - return mInputPartitionInfo.equals(that.mInputPartitionInfo) - && mOutputPartitionInfo.equals(that.mOutputPartitionInfo) - && mInput.equals(that.mInput) - && mOutput.equals(that.mOutput) - && mMaxNumFiles == that.mMaxNumFiles - && mMinFileSize == that.mMinFileSize; - } - - @Override - public int hashCode() { - return Objects.hashCode(mInputPartitionInfo, mOutputPartitionInfo, mInput, mOutput, - mMaxNumFiles, mMinFileSize); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("input", mInput) - .add("output", mOutput) - .add("maxNumFiles", mMaxNumFiles) - .add("minFileSize", mMinFileSize) - .add("inputPartitionInfo", mInputPartitionInfo) - .add("outputPartitionInfo", mOutputPartitionInfo) - .toString(); - } - - @Override - public String getName() { - return NAME; - } - - @Override - public Collection affectedPaths() { - return ImmutableList.of(mInput, mOutput); - } -} diff --git a/dora/job/common/src/main/java/alluxio/job/plan/transform/FieldSchema.java b/dora/job/common/src/main/java/alluxio/job/plan/transform/FieldSchema.java deleted file mode 100644 index e37e0caac1ca..000000000000 --- a/dora/job/common/src/main/java/alluxio/job/plan/transform/FieldSchema.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; - -import java.io.Serializable; - -/** - * Metadata of a field in the schema. - */ -public class FieldSchema implements Serializable { - private static final long serialVersionUID = 4573336558464588151L; - - private final int mId; - private final String mName; - private final String mType; - private final String mComment; - - /** - * @param id the id - * @param name the name - * @param type the type - * @param comment the comment - */ - public FieldSchema(@JsonProperty("id") int id, - @JsonProperty("name") String name, - @JsonProperty("type") String type, - @JsonProperty("comment") String comment) { - mId = id; - mName = name; - mType = type; - mComment = comment; - } - - /** - * @return the ID of the field - */ - public int getId() { - return mId; - } - - /** - * @return the name of the field - */ - public String getName() { - return mName; - } - - /** - * @return the type of the field - */ - public String getType() { - return mType; - } - - /** - * @return the comment of the field - */ - public String getComment() { - return mComment; - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (this == obj) { - return true; - } - if (!(obj instanceof FieldSchema)) { - return false; - } - FieldSchema that = (FieldSchema) obj; - return mId == that.mId - && mName.equals(that.mName) - && mType.equals(that.mType) - && mComment.equals(that.mComment); - } - - @Override - public int hashCode() { - return Objects.hashCode(mId, mName, mType, mComment); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("id", mId) - .add("name", mName) - .add("type", mType) - .add("comment", mComment) - .toString(); - } -} diff --git a/dora/job/common/src/main/java/alluxio/job/plan/transform/Format.java b/dora/job/common/src/main/java/alluxio/job/plan/transform/Format.java deleted file mode 100644 index f227c3729d41..000000000000 --- a/dora/job/common/src/main/java/alluxio/job/plan/transform/Format.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -/** - * Supported formats. - */ -public enum Format { - CSV(".csv"), - GZIP_CSV(".csv.gz"), - GZIP(".gz"), - ORC(".orc"), - PARQUET(".parquet"); - - private String mSuffix; - - /** - * @param suffix the suffix of the format - */ - Format(String suffix) { - mSuffix = suffix; - } - - /** - * @return the suffix of filename for the format - */ - public String getSuffix() { - return mSuffix; - } - - /** - * @param path the file path - * @return whether the path points to a (compressed) CSV file - */ - public static boolean isCsv(String path) { - return path.endsWith(CSV.getSuffix()) || path.endsWith(GZIP_CSV.getSuffix()); - } - - /** - * @param path the file path - * @return whether the path points to a parquet file - */ - public static boolean isParquet(String path) { - return path.endsWith(PARQUET.getSuffix()); - } - - /** - * @param path the file path - * @return whether the path points to a gzipped file - */ - public static boolean isGzipped(String path) { - return path.endsWith(GZIP.getSuffix()); - } - - /** - * @param path the file path - * @return the format of the file - */ - public static Format of(String path) { - if (path.endsWith(CSV.getSuffix())) { - return CSV; - } - if (path.endsWith(GZIP_CSV.getSuffix())) { - return GZIP_CSV; - } - if (path.endsWith(PARQUET.getSuffix())) { - return PARQUET; - } - throw new RuntimeException("Unsupported file format for " + path); - } -} diff --git a/dora/job/common/src/main/java/alluxio/job/plan/transform/HiveConstants.java b/dora/job/common/src/main/java/alluxio/job/plan/transform/HiveConstants.java deleted file mode 100644 index 196739d2a719..000000000000 --- a/dora/job/common/src/main/java/alluxio/job/plan/transform/HiveConstants.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -/** - * Constants related to Hive. - * - * @see SerdeConstants.java - */ -public class HiveConstants { - private HiveConstants() {} // Prevents initialization - - /** - * Number of lines to skip when reading from CSV. - */ - public static final String LINES_TO_SKIP = "skip.header.line.count"; - /** - * Field delimiter for CSV. - */ - public static final String FIELD_DELIM = "field.delim"; - /** - * Serialization format. - */ - public static final String SERIALIZATION_FORMAT = "serialization.format"; - /** - * Parquet serde class. - */ - public static final String PARQUET_SERDE_CLASS = - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"; - /** - * CSV serde class. - */ - public static final String CSV_SERDE_CLASS = "org.apache.hadoop.hive.serde2.OpenCSVSerde"; - /** - * ORC serde class. - */ - public static final String ORC_SERDE_CLASS = "org.apache.hadoop.hive.ql.io.orc.OrcSerde"; - /** - * Text input format. - */ - public static final String TEXT_INPUT_FORMAT_CLASS = "org.apache.hadoop.mapred.TextInputFormat"; - /** - * Parquet input format. - */ - public static final String PARQUET_INPUT_FORMAT_CLASS = - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"; - /** - * Parquet output format. - */ - public static final String PARQUET_OUTPUT_FORMAT_CLASS = - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"; - - /** - * Hive Types. - */ - public static final class Types { - private Types() {} // Prevents initialization - - // TODO(cc): how to support the following? - // public static final String VOID = "void"; - // public static final String INTERVAL_YEAR_MONTH = "interval_year_month"; - // public static final String INTERVAL_DAY_TIME = "interval_day_time"; - // public static final String TIMESTAMP_LOCAL = "timestamp with local time zone"; - // public static final String DATETIME = "datetime"; - - /** Hive bool type. */ - public static final String BOOLEAN = "boolean"; - /** Hive tiny int type. */ - public static final String TINYINT = "tinyint"; - /** Hive small int type. */ - public static final String SMALLINT = "smallint"; - /** Hive int type. */ - public static final String INT = "int"; - /** Hive big int type. */ - public static final String BIGINT = "bigint"; - /** Hive float type. */ - public static final String FLOAT = "float"; - /** Hive double type. */ - public static final String DOUBLE = "double"; - /** Hive string type. */ - public static final String STRING = "string"; - /** Hive varchar type. */ - public static final String VARCHAR = "varchar"; - /** Hive char type. */ - public static final String CHAR = "char"; - /** Hive date type. */ - public static final String DATE = "date"; - /** Hive timestamp type. */ - public static final String TIMESTAMP = "timestamp"; - /** Hive decimal type. */ - public static final String DECIMAL = "decimal"; - /** Hive binary type. */ - public static final String BINARY = "binary"; - - /** - * Filters out parts of type information to match the types constant for type checking. - * - * @param type the type - * @return type name matching the types constants - */ - public static String getHiveConstantType(String type) { - // filters out the non hive type information from types like "char(10)" - - int i = type.indexOf('('); - if (i == -1) { - return type; - } - return type.substring(0, i); - } - } -} diff --git a/dora/job/common/src/main/java/alluxio/job/plan/transform/PartitionInfo.java b/dora/job/common/src/main/java/alluxio/job/plan/transform/PartitionInfo.java deleted file mode 100644 index 73ecd87f8f17..000000000000 --- a/dora/job/common/src/main/java/alluxio/job/plan/transform/PartitionInfo.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; - -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; - -/** - * Metadata about a partition in Alluxio catalog service. - */ -public class PartitionInfo implements Serializable { - private static final long serialVersionUID = 6905153658064056381L; - - /** - * Key in Serde Properties to denote parquet compression method. - */ - public static final String PARQUET_COMPRESSION = "file.parquet.compression"; - - private final String mSerdeClass; - private final String mInputFormatClass; - private final HashMap mSerdeProperties; - private final HashMap mTableProperties; - private final ArrayList mFields; - - /** - * @param serdeClass the full serde class name - * @param inputFormatClass the full input format class name - * @param serdeProperties the serde Properties - * @param tableProperties the table Properties - * @param fields the fields - */ - public PartitionInfo(@JsonProperty("serdeClass") String serdeClass, - @JsonProperty("inputFormatClass") String inputFormatClass, - @JsonProperty("serdeProperties") HashMap serdeProperties, - @JsonProperty("tableProperties") HashMap tableProperties, - @JsonProperty("fields") ArrayList fields) { - mSerdeClass = serdeClass; - mInputFormatClass = inputFormatClass; - mSerdeProperties = serdeProperties; - mTableProperties = tableProperties; - mFields = fields; - } - - /** - * @param filename the filename - * @return the format of the files in the partition - * @throws IOException when failed to determine format - */ - @JsonIgnore - public Format getFormat(String filename) throws IOException { - if (mSerdeClass.equals(HiveConstants.PARQUET_SERDE_CLASS)) { - return Format.PARQUET; - } else if (mSerdeClass.equals(HiveConstants.CSV_SERDE_CLASS) - || (mInputFormatClass.equals(HiveConstants.TEXT_INPUT_FORMAT_CLASS) - && mSerdeProperties.containsKey(HiveConstants.SERIALIZATION_FORMAT))) { - if (filename.endsWith(Format.GZIP.getSuffix())) { - return Format.GZIP_CSV; - } - return Format.CSV; - } else if (mSerdeClass.equals(HiveConstants.ORC_SERDE_CLASS)) { - return Format.ORC; - } - // failed to get format from serde info, try to get it from extension - if (filename.endsWith(Format.CSV.getSuffix())) { - return Format.CSV; - } - if (filename.endsWith(Format.PARQUET.getSuffix())) { - return Format.PARQUET; - } - // both method failed, throw exception - throw new IOException("Cannot determine format for " + filename); - } - - /** - * @return the input format class name - */ - public String getInputFormatClass() { - return mInputFormatClass; - } - - /** - * @return the serde class name - */ - public String getSerdeClass() { - return mSerdeClass; - } - - /** - * @return the serde properties - */ - public HashMap getSerdeProperties() { - return mSerdeProperties; - } - - /** - * @return the table properties - */ - public HashMap getTableProperties() { - return mTableProperties; - } - - /** - * @return the fields - */ - public ArrayList getFields() { - return mFields; - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (this == obj) { - return true; - } - if (!(obj instanceof PartitionInfo)) { - return false; - } - PartitionInfo that = (PartitionInfo) obj; - return mSerdeClass.equals(that.mSerdeClass) - && mInputFormatClass.equals(that.mInputFormatClass) - && mSerdeProperties.equals(that.mSerdeProperties) - && mTableProperties.equals(that.mTableProperties) - && mFields.equals(that.mFields); - } - - @Override - public int hashCode() { - return Objects.hashCode(mSerdeClass, mInputFormatClass, mSerdeProperties, mTableProperties, - mFields); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("serdeClass", mSerdeClass) - .add("inputFormatClass", mInputFormatClass) - .add("serdeProperties", mSerdeProperties) - .add("tableProperties", mTableProperties) - .add("fields", mFields) - .toString(); - } -} diff --git a/dora/job/common/src/test/java/alluxio/job/plan/transform/HiveConstantsTest.java b/dora/job/common/src/test/java/alluxio/job/plan/transform/HiveConstantsTest.java deleted file mode 100644 index 7caf7dc89f65..000000000000 --- a/dora/job/common/src/test/java/alluxio/job/plan/transform/HiveConstantsTest.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import static org.junit.Assert.assertEquals; - -import org.junit.Test; - -/** - * Test for {@link HiveConstants}. - */ -public class HiveConstantsTest { - - @Test - public void testGetHiveConstantType() { - assertEquals(HiveConstants.Types.CHAR, HiveConstants.Types.getHiveConstantType("char(20)")); - assertEquals(HiveConstants.Types.VARCHAR, - HiveConstants.Types.getHiveConstantType("varchar(20)")); - assertEquals(HiveConstants.Types.DECIMAL, - HiveConstants.Types.getHiveConstantType("decimal(10, 20)")); - } -} diff --git a/dora/job/common/src/test/java/alluxio/job/workflow/composite/CompositeConfigTest.java b/dora/job/common/src/test/java/alluxio/job/workflow/composite/CompositeConfigTest.java index 32757dfa97b2..d7dd10a3d805 100644 --- a/dora/job/common/src/test/java/alluxio/job/workflow/composite/CompositeConfigTest.java +++ b/dora/job/common/src/test/java/alluxio/job/workflow/composite/CompositeConfigTest.java @@ -16,19 +16,15 @@ import alluxio.job.JobConfig; import alluxio.job.plan.load.LoadConfig; -import alluxio.job.plan.transform.CompactConfig; -import alluxio.job.plan.transform.PartitionInfo; import alluxio.job.util.SerializationUtils; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import org.apache.commons.io.FileUtils; import org.junit.Test; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; /** * Tests {@link CompositeConfig}. @@ -37,14 +33,11 @@ public final class CompositeConfigTest { private static final CompositeConfig CONFIG; static { - PartitionInfo pInfo = new PartitionInfo("serde", "inputformat", new HashMap<>(), - new HashMap<>(), new ArrayList<>()); ArrayList jobs = new ArrayList<>(); jobs.add(new CompositeConfig(new ArrayList<>(), true)); jobs.add(new CompositeConfig(new ArrayList<>(), false)); jobs.add(new CompositeConfig(Lists.newArrayList(new LoadConfig("/", 1, Collections.EMPTY_SET, Collections.EMPTY_SET, Collections.EMPTY_SET, Collections.EMPTY_SET, true)), true)); - jobs.add(new CompactConfig(pInfo, "/input", pInfo, "/output", 100, FileUtils.ONE_GB)); CONFIG = new CompositeConfig(jobs, true); } diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactDefinition.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactDefinition.java deleted file mode 100644 index 2cf8ddebf2b2..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactDefinition.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import alluxio.AlluxioURI; -import alluxio.client.file.URIStatus; -import alluxio.collections.Pair; -import alluxio.job.RunTaskContext; -import alluxio.job.SelectExecutorsContext; -import alluxio.job.plan.AbstractVoidPlanDefinition; -import alluxio.job.plan.transform.format.TableReader; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; -import alluxio.job.plan.transform.format.TableWriter; -import alluxio.job.util.SerializableVoid; -import alluxio.util.CommonUtils; -import alluxio.wire.WorkerInfo; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * The job definition for compacting files representing a structured table under a directory. - */ -public final class CompactDefinition - extends AbstractVoidPlanDefinition> { - private static final Logger LOG = LoggerFactory.getLogger(CompactDefinition.class); - private static final int TASKS_PER_WORKER = 10; - private static final String COMPACTED_FILE_PATTERN = "part-%d.parquet"; - private static final String SUCCESS_FILENAME = "_SUCCESS"; - private static final String CRC_FILENAME_SUFFIX = ".crc"; - - private static final Map COMPRESSION_RATIO = ImmutableMap.of( - Format.PARQUET, 1.0, - Format.CSV, 5.0, - Format.GZIP_CSV, 2.5, - Format.ORC, 1.0); - - /** - * Constructs a new {@link CompactDefinition}. - */ - public CompactDefinition() { - } - - @Override - public Class getJobConfigClass() { - return CompactConfig.class; - } - - private String getOutputPath(AlluxioURI outputDir, int outputIndex) { - return outputDir.join(String.format(COMPACTED_FILE_PATTERN, outputIndex)) - .toString(); - } - - private boolean shouldIgnore(URIStatus status) { - return status.isFolder() - || status.getName().equals(SUCCESS_FILENAME) - || status.getName().endsWith(CRC_FILENAME_SUFFIX); - } - - @Override - public Set>> selectExecutors(CompactConfig config, - List jobWorkers, SelectExecutorsContext context) throws Exception { - Preconditions.checkState(!jobWorkers.isEmpty(), "No job worker"); - AlluxioURI inputDir = new AlluxioURI(config.getInput()); - AlluxioURI outputDir = new AlluxioURI(config.getOutput()); - - List files = Lists.newArrayList(); - // use double to prevent overflow - double totalFileSize = 0; - for (URIStatus status : context.getFileSystem().listStatus(inputDir)) { - if (!shouldIgnore(status)) { - files.add(status); - totalFileSize += status.getLength(); - } - } - - Map> assignments = Maps.newHashMap(); - int maxNumFiles = config.getMaxNumFiles(); - long groupMinSize = config.getMinFileSize(); - - if (!files.isEmpty() && config.getInputPartitionInfo() != null) { - // adjust the group minimum size for source compression ratio - groupMinSize *= COMPRESSION_RATIO.get( - config.getInputPartitionInfo().getFormat(files.get(0).getName())); - } - - if (totalFileSize / groupMinSize > maxNumFiles) { - groupMinSize = Math.round(totalFileSize / maxNumFiles); - } - - // Files to be compacted are grouped into different groups, - // each group of files are compacted to one file, - // one task is to compact one group of files, - // different tasks are assigned to different workers in a round robin way. - // We keep adding files to the group, until adding more files makes it too big. - ArrayList group = new ArrayList<>(); - int workerIndex = 0; - int outputIndex = 0; - // Number of groups already generated - int groupIndex = 0; - long currentGroupSize = 0; - long halfGroupMinSize = groupMinSize / 2; - for (URIStatus file : files) { - // add the file to the group if - // 1. group is empty - // 2. group is the last group - // 3. group size with the new file is closer to the groupMinSize than group size without it - if (group.isEmpty() || groupIndex == maxNumFiles - 1 - || (currentGroupSize + file.getLength()) <= halfGroupMinSize - || (Math.abs(currentGroupSize + file.getLength() - groupMinSize) - <= Math.abs(currentGroupSize - groupMinSize))) { - group.add(inputDir.join(file.getName()).toString()); - currentGroupSize += file.getLength(); - } else { - WorkerInfo worker = jobWorkers.get(workerIndex++); - if (workerIndex == jobWorkers.size()) { - workerIndex = 0; - } - if (!assignments.containsKey(worker)) { - assignments.put(worker, new ArrayList<>()); - } - ArrayList tasks = assignments.get(worker); - tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex++))); - group = new ArrayList<>(); - group.add(inputDir.join(file.getName()).toString()); - currentGroupSize = file.getLength(); - groupIndex++; - } - } - // handle the last group - if (!group.isEmpty()) { - WorkerInfo worker = jobWorkers.get(workerIndex); - if (!assignments.containsKey(worker)) { - assignments.put(worker, new ArrayList<>()); - } - ArrayList tasks = assignments.get(worker); - tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex))); - } - - Set>> result = Sets.newHashSet(); - for (Map.Entry> assignment : assignments.entrySet()) { - List> partitioned = CommonUtils.partition( - assignment.getValue(), TASKS_PER_WORKER); - for (List compactTasks : partitioned) { - if (!compactTasks.isEmpty()) { - result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(compactTasks))); - } - } - } - - return result; - } - - @Override - public SerializableVoid runTask(CompactConfig config, ArrayList tasks, - RunTaskContext context) throws Exception { - for (CompactTask task : tasks) { - ArrayList inputs = task.getInputs(); - if (inputs.isEmpty()) { - continue; - } - AlluxioURI output = new AlluxioURI(task.getOutput()); - - TableSchema schema; - try (TableReader reader = TableReader.create(new AlluxioURI(inputs.get(0)), - config.getInputPartitionInfo())) { - schema = reader.getSchema(); - } - - try (TableWriter writer = TableWriter.create(schema, output, - config.getOutputPartitionInfo())) { - for (String input : inputs) { - try (TableReader reader = TableReader.create(new AlluxioURI(input), - config.getInputPartitionInfo())) { - for (TableRow row = reader.read(); row != null; row = reader.read()) { - writer.write(row); - } - } - } - } catch (Throwable e) { - try { - context.getFileSystem().delete(output); // outputUri is the output file - } catch (Throwable t) { - e.addSuppressed(t); - } - throw e; - } - } - return null; - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactTask.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactTask.java deleted file mode 100644 index 01b39a91629d..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/CompactTask.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; - -import java.io.Serializable; -import java.util.ArrayList; - -/** - * A task for a job worker to compact files into one file. - */ -public final class CompactTask implements Serializable { - private static final long serialVersionUID = -998740998086570018L; - - private final ArrayList mInputs; - private final String mOutput; - - /** - * @param inputs the input files to be compacted - * @param output the compacted file - */ - public CompactTask(ArrayList inputs, String output) { - mInputs = inputs; - mOutput = output; - } - - /** - * @return the inputs - */ - public ArrayList getInputs() { - return mInputs; - } - - /** - * @return the output - */ - public String getOutput() { - return mOutput; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof CompactTask)) { - return false; - } - CompactTask that = (CompactTask) o; - return mInputs.equals(that.mInputs) - && mOutput.equals(that.mOutput); - } - - @Override - public int hashCode() { - return Objects.hashCode(mInputs, mOutput); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("inputs", mInputs) - .add("output", mOutput) - .toString(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/compact/Compactor.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/compact/Compactor.java deleted file mode 100644 index 5b0855215cc5..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/compact/Compactor.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.compact; - -import alluxio.job.plan.transform.format.TableReader; -import alluxio.job.plan.transform.format.TableWriter; -import alluxio.job.plan.transform.format.tables.TablesWriter; - -import java.io.IOException; -import java.util.List; - -/** - * Compacts a list of inputs to an output. - * The output can either be a single table file, or a group of table files where each file is - * completed once it reaches a certain size. - */ -public interface Compactor { - /** - * Compacts a list of inputs to the output. - * Closing the readers and writer is the responsibility of the caller. - * - * @param inputs a list of table readers - * @param output a table writer, can be a {@link TablesWriter} - * @throws IOException when compaction fails - */ - void compact(List inputs, TableWriter output) throws IOException; -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/JobPath.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/JobPath.java deleted file mode 100644 index 61b08b2e2122..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/JobPath.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.client.ReadType; -import alluxio.client.WriteType; -import alluxio.conf.PropertyKey; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Objects; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.security.UserGroupInformation; - -import java.io.IOException; -import java.net.URI; -import java.util.concurrent.ConcurrentHashMap; - -/** - * Implementation of {@link Path} that has a cache for getting file system. - */ -public class JobPath extends Path { - private static final long serialVersionUID = 1427341926575998813L; - private static final ConcurrentHashMap CACHE = - new ConcurrentHashMap<>(); - - /** - * Calls {@link FileSystem}.get, created to make testing easier. - * @param uri the uri - * @param conf the conf - * @return the file system - * @throws IOException - */ - @VisibleForTesting - public static FileSystem fileSystemGet(URI uri, Configuration conf) throws IOException { - return FileSystem.get(uri, conf); - } - - private static class FileSystemKey { - - final String mScheme; - final String mAuthority; - final UserGroupInformation mUgi; - final ReadType mReadType; - final WriteType mWriteType; - - public FileSystemKey(JobPath path, Configuration conf) throws IOException { - URI uri = path.toUri(); - mScheme = uri.getScheme() == null ? "" : uri.getScheme().toLowerCase(); - mAuthority = uri.getAuthority() == null ? "" : uri.getAuthority().toLowerCase(); - mUgi = UserGroupInformation.getCurrentUser(); - mReadType = conf.getEnum(PropertyKey.USER_FILE_READ_TYPE_DEFAULT.getName(), - alluxio.conf.Configuration.getEnum( - PropertyKey.USER_FILE_READ_TYPE_DEFAULT, ReadType.class)); - mWriteType = conf.getEnum(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT.getName(), - alluxio.conf.Configuration.getEnum( - PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.class)); - } - - @Override - public int hashCode() { - return Objects.hashCode(mScheme, mAuthority, mUgi, mReadType, mWriteType); - } - - @Override - public boolean equals(Object o) { - if (o == null) { - return false; - } - if (this == o) { - return true; - } - if (!(o instanceof FileSystemKey)) { - return false; - } - - FileSystemKey that = (FileSystemKey) o; - return Objects.equal(mScheme, that.mScheme) - && Objects.equal(mAuthority, that.mAuthority) - && Objects.equal(mUgi, that.mUgi) - && Objects.equal(mReadType, that.mReadType) - && Objects.equal(mWriteType, that.mWriteType); - } - } - - /** - * Copy of the constructor in {@link Path}. - * @param scheme the scheme - * @param authority the authority - * @param path the path - */ - public JobPath(String scheme, String authority, String path) { - super(scheme, authority, path); - } - - @Override - public FileSystem getFileSystem(Configuration conf) throws IOException { - try { - return CACHE.computeIfAbsent(new FileSystemKey(this, conf), (key) -> { - try { - return fileSystemGet(this.toUri(), conf); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (RuntimeException e) { - Throwable cause = e.getCause(); - if (cause instanceof IOException) { - throw (IOException) cause; - } - throw e; - } - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/ReadWriterUtils.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/ReadWriterUtils.java deleted file mode 100644 index 7cbcaa303657..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/ReadWriterUtils.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.AlluxioURI; -import alluxio.Constants; -import alluxio.client.ReadType; -import alluxio.client.WriteType; -import alluxio.conf.PropertyKey; -import alluxio.exception.ExceptionMessage; -import alluxio.uri.NoAuthority; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.conf.Configuration; - -/** - * Utilities for implementing {@link TableReader} and {@link TableWriter}. - */ -public final class ReadWriterUtils { - private static final String ALLUXIO_HADOOP_FILESYSTEM_DISABLE_CACHE = - "fs.alluxio.impl.disable.cache"; - - /** - * Checks preconditions of uri. - * - * @param uri the URI to check - */ - public static void checkUri(AlluxioURI uri) { - Preconditions.checkArgument(uri.getScheme() != null && !uri.getScheme().isEmpty(), - ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_SCHEME.getMessage(uri)); - if (uri.getScheme().equals(Constants.SCHEME)) { - Preconditions.checkArgument(uri.getAuthority() != null - && !uri.getAuthority().equals(NoAuthority.INSTANCE), - ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_AUTHORITY.getMessage(uri)); - } - } - - /** - * @return a new Hadoop conf with alluxio read type set to no cache - */ - public static Configuration readNoCacheConf() { - Configuration conf = new Configuration(); - conf.setEnum(PropertyKey.USER_FILE_READ_TYPE_DEFAULT.getName(), ReadType.NO_CACHE); - // The cached filesystem might not be configured with the above read type. - conf.setBoolean(ALLUXIO_HADOOP_FILESYSTEM_DISABLE_CACHE, true); - return conf; - } - - /** - * @return a new Hadoop conf with alluxio write type set to through - */ - public static Configuration writeThroughConf() { - Configuration conf = new Configuration(); - conf.setEnum(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT.getName(), WriteType.THROUGH); - // The cached filesystem might not be configured with the above write type. - conf.setBoolean(ALLUXIO_HADOOP_FILESYSTEM_DISABLE_CACHE, true); - return conf; - } - - private ReadWriterUtils() {} // Prevent initialization -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/SchemaConversionUtils.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/SchemaConversionUtils.java deleted file mode 100644 index dfcbc7036b12..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/SchemaConversionUtils.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.job.plan.transform.format.csv.CsvUtils; -import alluxio.job.plan.transform.format.csv.Decimal; - -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -/** - * Utility Class for converting schema to Parquet. - */ -public class SchemaConversionUtils { - private static final String JAVA_CLASS_FLAG = "java-class"; - - /** - * Builds write schema. - * @param fields the fields - * @return the write schema - */ - public static Schema buildWriteSchema(List fields) - throws IOException { - SchemaBuilder.FieldAssembler assembler = - SchemaBuilder.record(Schema.Type.RECORD.getName()).fields(); - for (FieldSchema field : fields) { - assembler = buildWriteField(assembler, field); - } - return assembler.endRecord(); - } - - private static Schema makeOptional(Schema schema) { - return Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), schema)); - } - - /** - * Builds the fields that are consistent with {@link Schema}. - * @param assembler the field assembler - * @param field field to add to the field assembler - * @return new field assembler with the existing fields + the new field - */ - public static SchemaBuilder.FieldAssembler buildConsistentField( - SchemaBuilder.FieldAssembler assembler, FieldSchema field) throws IOException { - String name = field.getName(); - String type = field.getType(); - - switch (HiveConstants.Types.getHiveConstantType(type)) { - case HiveConstants.Types.BOOLEAN: - return assembler.optionalBoolean(name); - case HiveConstants.Types.TINYINT: - case HiveConstants.Types.SMALLINT: - case HiveConstants.Types.INT: - return assembler.optionalInt(name); - case HiveConstants.Types.DOUBLE: - return assembler.optionalDouble(name); - case HiveConstants.Types.FLOAT: - return assembler.optionalFloat(name); - case HiveConstants.Types.BIGINT: - return assembler.requiredLong(name); - case HiveConstants.Types.STRING: - case HiveConstants.Types.VARCHAR: - return assembler.optionalString(name); - case HiveConstants.Types.CHAR: - Schema schema = SchemaBuilder.builder().stringBuilder().prop(JAVA_CLASS_FLAG, - Character.class.getCanonicalName()) - .endString(); - schema = makeOptional(schema); - return assembler.name(name).type(schema).noDefault(); - default: - throw new IOException("Unsupported type " + type + " for field " + name); - } - } - - private static SchemaBuilder.FieldAssembler buildWriteField( - SchemaBuilder.FieldAssembler assembler, FieldSchema field) throws IOException { - if (!CsvUtils.isReadWriteTypeInconsistent(field.getType())) { - return buildConsistentField(assembler, field); - } - - String name = field.getName(); - String type = field.getType(); - Schema schema; - - switch (HiveConstants.Types.getHiveConstantType(type)) { - case HiveConstants.Types.DECIMAL: - Decimal decimal = new Decimal(type); - schema = LogicalTypes.decimal(decimal.getPrecision(), decimal.getScale()) - .addToSchema(Schema.create(Schema.Type.BYTES)); - schema = makeOptional(schema); - return assembler.name(name).type(schema).noDefault(); - case HiveConstants.Types.BINARY: - return assembler.optionalBytes(name); - case HiveConstants.Types.DATE: - schema = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); - schema = makeOptional(schema); - return assembler.name(name).type(schema).noDefault(); - case HiveConstants.Types.TIMESTAMP: - schema = LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG)); - schema = makeOptional(schema); - return assembler.name(name).type(schema).noDefault(); - default: - throw new IOException("Unsupported type " + type + " for field " + name); - } - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableReader.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableReader.java deleted file mode 100644 index 004005289536..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableReader.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.Format; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.job.plan.transform.format.csv.CsvReader; -import alluxio.job.plan.transform.format.orc.OrcReader; -import alluxio.job.plan.transform.format.parquet.ParquetReader; - -import java.io.Closeable; -import java.io.IOException; - -/** - * A reader for reading rows in a table. - */ -public interface TableReader extends Closeable { - /** - * @param uri the URI to the input - * @param pInfo the partition info from catalog service - * @return the reader for the input - * @throws IOException when failed to create the reader - */ - static TableReader create(AlluxioURI uri, PartitionInfo pInfo) throws IOException { - ReadWriterUtils.checkUri(uri); - Format format = pInfo.getFormat(uri.getName()); - switch (format) { - case CSV: - // fall through - case GZIP_CSV: - return CsvReader.create(uri, pInfo); - case PARQUET: - return ParquetReader.create(uri); - case ORC: - return OrcReader.create(uri); - default: - throw new IOException("Unsupported format: " + format); - } - } - - /** - * @return the table schema - * @throws IOException when failed to read the schema - */ - TableSchema getSchema() throws IOException; - - /** - * @return the next row or null if there are no more rows - * @throws IOException when read fails - */ - TableRow read() throws IOException; - - /** - * Closes the reader, which will close the underlying stream. - * - * @throws IOException when failing to close the underlying stream - */ - void close() throws IOException; -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableRow.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableRow.java deleted file mode 100644 index a0b4e7dffbe3..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableRow.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.job.plan.transform.format.parquet.ParquetRow; - -import java.io.IOException; - -/** - * A row in a table. - */ -public interface TableRow { - /** - * @return the row in parquet representation - * @throws IOException when failed to transform to parquet - */ - ParquetRow toParquet() throws IOException; - - /** - * @param column the column - * @return the column value - */ - Object getColumn(String column); -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableSchema.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableSchema.java deleted file mode 100644 index 8057cdc75771..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableSchema.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.job.plan.transform.format.parquet.ParquetSchema; - -/** - * The table schema. - */ -public interface TableSchema { - /** - * @return the schema in parquet representation - */ - ParquetSchema toParquet(); -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableWriter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableWriter.java deleted file mode 100644 index 0a819b18bb78..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/TableWriter.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.job.plan.transform.format.parquet.ParquetWriter; - -import java.io.Closeable; -import java.io.IOException; -import javax.annotation.Nullable; - -/** - * A writer for writing rows to a table. - */ -public interface TableWriter extends Closeable { - /** - * @param schema the table schema - * @param uri the URI to the output - * @return the writer for the output - */ - static TableWriter create(TableSchema schema, AlluxioURI uri) throws IOException { - return create(schema, uri, null); - } - - /** - * @param schema the table schema - * @param uri the URI to the output - * @param partitionInfo the partition info (default configuration is used if null) - * @return the writer for the output - */ - static TableWriter create(TableSchema schema, AlluxioURI uri, - @Nullable PartitionInfo partitionInfo) throws IOException { - ReadWriterUtils.checkUri(uri); - return ParquetWriter.create(schema, uri, partitionInfo); - } - - /** - * Writes a row. - * - * @param row a row - * @throws IOException when write fails - */ - void write(TableRow row) throws IOException; - - /** - * Closes a writer, which means the table is complete now. - * - * @throws IOException when failing to close the underlying output stream - */ - void close() throws IOException; - - /** - * @return the number of rows that have been written - */ - int getRows(); - - /** - * Note that the bytes written should take compression and encoding into consideration. - * If the writer writes to a file, the bytes written should be an estimate of the actual bytes - * written to the file. - * - * @return the number of bytes that have been written - */ - long getBytes(); -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvReader.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvReader.java deleted file mode 100644 index 77017a56a109..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvReader.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.Format; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.job.plan.transform.format.JobPath; -import alluxio.job.plan.transform.format.ReadWriterUtils; -import alluxio.job.plan.transform.format.TableReader; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; - -import com.google.common.io.Closer; -import org.apache.avro.generic.GenericData.Record; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.parquet.cli.csv.AvroCSVReader; -import org.apache.parquet.cli.csv.CSVProperties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Map; -import java.util.zip.GZIPInputStream; - -/** - * A reader for reading {@link CsvRow}. - */ -public final class CsvReader implements TableReader { - private static final Logger LOG = LoggerFactory.getLogger(CsvReader.class); - - private final FileSystem mFs; - private final AvroCSVReader mReader; - private final Closer mCloser; - private final CsvSchema mSchema; - - private CsvReader(JobPath inputPath, PartitionInfo pInfo) throws IOException { - mCloser = Closer.create(); - try { - mSchema = new CsvSchema(pInfo.getFields()); - - Configuration conf = ReadWriterUtils.readNoCacheConf(); - mFs = inputPath.getFileSystem(conf); - boolean isGzipped = pInfo.getFormat(inputPath.getName()).equals(Format.GZIP_CSV); - InputStream input = open(mFs, inputPath, isGzipped); - - CSVProperties props = buildProperties(pInfo.getTableProperties(), - pInfo.getSerdeProperties()); - - try { - mReader = mCloser.register(new AvroCSVReader<>(input, props, mSchema.getReadSchema(), - Record.class, false)); - } catch (RuntimeException e) { - throw new IOException("Failed to create CSV reader", e); - } - } catch (IOException e) { - try { - mCloser.close(); - } catch (IOException ioe) { - e.addSuppressed(ioe); - } - throw e; - } - } - - private CSVProperties buildProperties(Map tableProperties, - Map serdeProperties) { - CSVProperties.Builder propsBuilder = new CSVProperties.Builder(); - if (tableProperties.containsKey(HiveConstants.LINES_TO_SKIP)) { - propsBuilder.linesToSkip(Integer.parseInt(tableProperties.get(HiveConstants.LINES_TO_SKIP))); - } - if (serdeProperties.containsKey(HiveConstants.FIELD_DELIM)) { - propsBuilder.delimiter(serdeProperties.get(HiveConstants.FIELD_DELIM)); - } - return propsBuilder.build(); - } - - private static InputStream open(FileSystem fs, Path path, boolean isGzipped) throws IOException { - InputStream stream = fs.open(path); - if (isGzipped) { - stream = new GZIPInputStream(stream); - } - return stream; - } - - /** - * Creates a CSV reader. - * - * @param uri the URI to the input - * @param pInfo the partition info - * @return the reader - * @throws IOException when failed to create the reader - */ - public static CsvReader create(AlluxioURI uri, PartitionInfo pInfo) throws IOException { - JobPath path = new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath()); - return new CsvReader(path, pInfo); - } - - @Override - public TableSchema getSchema() throws IOException { - return mSchema; - } - - @Override - public TableRow read() throws IOException { - try { - return mReader.hasNext() ? new CsvRow(mSchema, mReader.next()) : null; - } catch (Throwable e) { - throw new IOException(e.getMessage(), e.getCause()); - } - } - - @Override - public void close() throws IOException { - mCloser.close(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvRow.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvRow.java deleted file mode 100644 index a18f020a1035..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvRow.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.parquet.ParquetRow; - -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData.Record; -import org.apache.avro.generic.GenericRecordBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.sql.Timestamp; -import java.time.LocalDate; -import java.util.Base64; -import javax.validation.constraints.NotNull; - -/** - * A row in a CSV table represented in Avro format. - */ -public final class CsvRow implements TableRow { - private static final Logger LOG = LoggerFactory.getLogger(CsvRow.class); - - private final CsvSchema mSchema; - private final Record mRecord; - - /** - * @param schema the CSV schema - * @param record the representation of a row in a Parquet table in the Avro format - */ - public CsvRow(@NotNull CsvSchema schema, @NotNull Record record) { - mSchema = Preconditions.checkNotNull(schema, "schema"); - mRecord = Preconditions.checkNotNull(record, "record"); - } - - @Override - public Object getColumn(String column) { - return mRecord.get(column); - } - - @Override - public ParquetRow toParquet() throws IOException { - Schema writeSchema = mSchema.getWriteSchema(); - GenericRecordBuilder recordBuilder = new GenericRecordBuilder(writeSchema); - for (FieldSchema field : mSchema.getAlluxioSchema()) { - String name = field.getName(); - String type = field.getType(); - Object value = mRecord.get(name); - value = convert(value, name, type); - recordBuilder.set(writeSchema.getField(name), value); - } - return new ParquetRow(recordBuilder.build()); - } - - /* - * @param value the value read based on the read schema - * @param name the name of the field - * @param type the type of the value based on Alluxio table schema - * @return the value in format of the write schema - * @throws IOException when conversion failed - */ - private Object convert(Object value, String name, String type) throws IOException { - if (!CsvUtils.isReadWriteTypeInconsistent(type)) { - return value; - } - - // Value is read from CSV as a string. - String v = (String) value; - - // Interpretation of the string is based on the following documents: - // - // cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-dates - // github.com/apache/parquet-format/blob/master/LogicalTypes.md - - switch (HiveConstants.Types.getHiveConstantType(type)) { - case HiveConstants.Types.DECIMAL: - // CSV: 12.34, precision=2, scale=4 - // Parquet: byte[] representation of number 123400 - Decimal decimal = new Decimal(type); - return decimal.toParquetBytes(v); - case HiveConstants.Types.BINARY: - // CSV: binary is encoded into base64, then encoded as UTF-8 - // Parquet: the decoded byte array - return Base64.getDecoder().decode(v.getBytes(StandardCharsets.UTF_8)); - case HiveConstants.Types.DATE: - // CSV: 2019-01-02 - // Parquet: days from the Unix epoch - try { - return LocalDate.parse(v).toEpochDay(); - } catch (Throwable e) { - throw new IOException("Failed to parse '" + v + "' as DATE: " + e); - } - case HiveConstants.Types.TIMESTAMP: - // CSV: 2019-10-29 10:17:42.338 - // Parquet: milliseconds from the Unix epoch - try { - return Timestamp.valueOf(v).getTime(); - } catch (Throwable e) { - throw new IOException("Failed to parse '" + v + "' as TIMESTAMP: " + e); - } - default: - throw new IOException("Unsupported type " + type + " for field " + name); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof CsvRow)) { - return false; - } - CsvRow that = (CsvRow) o; - return Objects.equal(mRecord, that.mRecord) - && Objects.equal(mSchema, that.mSchema); - } - - @Override - public int hashCode() { - return Objects.hashCode(mRecord, mSchema); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvSchema.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvSchema.java deleted file mode 100644 index 83fd30d07161..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvSchema.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.format.SchemaConversionUtils; -import alluxio.job.plan.transform.format.TableSchema; -import alluxio.job.plan.transform.format.parquet.ParquetSchema; - -import com.google.common.base.Objects; -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; - -import java.io.IOException; -import java.util.ArrayList; -import javax.validation.constraints.NotNull; - -/** - * CSV table schema in Avro format. - */ -public final class CsvSchema implements TableSchema { - - /** The schema from Alluxio table master. */ - private final ArrayList mAlluxioSchema; - /** The schema for reading from CSV. */ - private final Schema mReadSchema; - /** The schema for writing. */ - private final Schema mWriteSchema; - - /** - * {@link CsvReader} uses {@link org.apache.parquet.cli.csv.AvroCSVReader} to read records from - * CSV. {@link org.apache.parquet.cli.csv.AvroCSVReader} internally uses - * {@link org.apache.parquet.cli.csv.RecordBuilder}, which does not support BYTES, and some - * logical types such as DECIMAL. So we use readSchema to let AvroCSVReader be able to read the - * record, then when writing out the record, we interpret the record according to - * writeSchema. - * - * For example, if a column is of type DECIMAL, in readSchema, the type will be STRING, - * but in writeSchema, it's logical type DECIMAL backed by BYTES. - * - * @param schema the schema from Alluxio table master - * @throws IOException when failed to initialize schema - */ - public CsvSchema(@NotNull ArrayList schema) throws IOException { - mAlluxioSchema = schema; - mReadSchema = buildReadSchema(Schema.Type.RECORD.getName(), schema); - mWriteSchema = SchemaConversionUtils.buildWriteSchema(schema); - } - - /** - * @return the schema from Alluxio table master - */ - public ArrayList getAlluxioSchema() { - return mAlluxioSchema; - } - - /** - * @return the schema for reading from CSV - */ - public Schema getReadSchema() { - return mReadSchema; - } - - /** - * @return the schema for writing - */ - public Schema getWriteSchema() { - return mWriteSchema; - } - - @Override - public ParquetSchema toParquet() { - return new ParquetSchema(mWriteSchema); - } - - @Override - public int hashCode() { - return Objects.hashCode(mAlluxioSchema, mReadSchema, mWriteSchema); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof CsvSchema)) { - return false; - } - CsvSchema that = (CsvSchema) o; - return Objects.equal(mAlluxioSchema, that.mAlluxioSchema) - && Objects.equal(mReadSchema, that.mReadSchema) - && Objects.equal(mWriteSchema, that.mWriteSchema); - } - - private Schema buildReadSchema(String name, ArrayList fields) throws IOException { - SchemaBuilder.FieldAssembler assembler = - SchemaBuilder.record(name).fields(); - for (FieldSchema field : fields) { - assembler = buildReadField(assembler, field); - } - return assembler.endRecord(); - } - - private SchemaBuilder.FieldAssembler buildReadField( - SchemaBuilder.FieldAssembler assembler, FieldSchema field) throws IOException { - if (!CsvUtils.isReadWriteTypeInconsistent(field.getType())) { - return SchemaConversionUtils.buildConsistentField(assembler, field); - } - - String name = field.getName(); - // 1. Use string for arbitrary precision for decimal. - // 2. Use string for UTF-8 encoded binary values. - // 3. Use string for CSV text format of date and timestamp: - // date: 2019-01-02 - // timestamp: 2019-10-29 10:17:42.338 - return assembler.optionalString(name); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvUtils.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvUtils.java deleted file mode 100644 index 0f41310225ee..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvUtils.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.job.plan.transform.HiveConstants; - -/** - * Utilities for implementing csv reader and writer. - */ -public class CsvUtils { - private CsvUtils() {} // Prevents initialization - - /** - * @param type the type of a field - * @return whether the field has different types in read and write schema - */ - public static boolean isReadWriteTypeInconsistent(String type) { - return type.startsWith(HiveConstants.Types.DECIMAL) - || type.equals(HiveConstants.Types.BINARY) - || type.equals(HiveConstants.Types.DATE) - || type.equals(HiveConstants.Types.TIMESTAMP); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvWriter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvWriter.java deleted file mode 100644 index 6464eb5b9328..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/CsvWriter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableWriter; - -import java.io.IOException; - -/** - * A writer for writing {@link CsvRow}. - */ -public final class CsvWriter implements TableWriter { - @Override - public void write(TableRow row) throws IOException { - // TODO(cc) - } - - @Override - public void close() throws IOException { - // TODO(cc) - } - - @Override - public int getRows() { - // TODO(cc) - return 0; - } - - @Override - public long getBytes() { - // TODO(cc) - return 0; - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/Decimal.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/Decimal.java deleted file mode 100644 index 465a332ba1ec..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/csv/Decimal.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import alluxio.collections.Pair; - -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.math.BigDecimal; - -/** - * Logical decimal type in Parquet. - */ -public class Decimal { - private static final Logger LOG = LoggerFactory.getLogger(Decimal.class); - - private final int mPrecision; - private final int mScale; - - /** - * @param type the type definition, like "decimal(10, 2)" - */ - public Decimal(String type) { - Pair precisionAndScale = getPrecisionAndScale(type); - mPrecision = precisionAndScale.getFirst(); - mScale = precisionAndScale.getSecond(); - } - - /** - * @return the precision - */ - public int getPrecision() { - return mPrecision; - } - - /** - * @return the scale - */ - public int getScale() { - return mScale; - } - - /** - * Returns the decimal's precision and scale from the type definition. - * - * @param type the type definition, like "decimal(10, 2)" - * @return the decimal's precision and scale as a Pair - */ - public static Pair getPrecisionAndScale(String type) { - type = type.trim(); - String param = type.substring(8, type.length() - 1); - String[] params = param.split(","); - return new Pair<>(Integer.parseInt(params[0].trim()), Integer.parseInt(params[1].trim())); - } - - /** - * @param v the string value - * @return the decimal with the expected scale - */ - public BigDecimal toBigDecimal(String v) { - int pointIndex = v.indexOf('.'); - int fractionLen = 0; - if (pointIndex != -1) { - fractionLen = v.length() - pointIndex - 1; - } else { - v += "."; - } - - if (fractionLen >= mScale) { - v = v.substring(0, v.length() - (fractionLen - mScale)); - } else { - v = StringUtils.rightPad(v, v.length() + (mScale - fractionLen), '0'); - } - return new BigDecimal(v); - } - - /** - * @param v the string value - * @return the encoded bytes to write to parquet - */ - public byte[] toParquetBytes(String v) { - return toBigDecimal(v).unscaledValue().toByteArray(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcReader.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcReader.java deleted file mode 100644 index 8cb1c5f39c9e..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcReader.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.orc; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.format.JobPath; -import alluxio.job.plan.transform.format.ReadWriterUtils; -import alluxio.job.plan.transform.format.TableReader; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; - -import com.google.common.io.Closer; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.OrcFile; -import org.apache.orc.Reader; -import org.apache.orc.RecordReader; - -import java.io.IOException; -import java.util.List; - -/** - * The Orc reader. - */ -public final class OrcReader implements TableReader { - - private final Closer mCloser; - private final OrcSchema mSchema; - private final Reader mReader; - private final RecordReader mRows; - private final List mFieldNames; - - /** - * The current processing batch of the Orc reader for the read() operation. - */ - private VectorizedRowBatch mCurrentBatch; - /** - * The row position inside the vectorized row batch to return next for read(). - */ - private int mCurrentBatchPosition; - - private OrcReader(JobPath inputPath) throws IOException { - mCloser = Closer.create(); - try { - - Configuration conf = ReadWriterUtils.readNoCacheConf(); - - mReader = mCloser.register(OrcFile.createReader(inputPath, OrcFile.readerOptions(conf))); - mFieldNames = mReader.getSchema().getFieldNames(); - mRows = mReader.rows(); - - mSchema = new OrcSchema(mReader); - } catch (IOException e) { - try { - mCloser.close(); - } catch (IOException ioe) { - e.addSuppressed(ioe); - } - throw e; - } - } - - /** - * @param uri the alluxio uri of the orc file - * @return new instance of OrcReader - */ - public static OrcReader create(AlluxioURI uri) throws IOException { - JobPath path = new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath()); - return new OrcReader(path); - } - - @Override - public TableSchema getSchema() { - return mSchema; - } - - @Override - public TableRow read() throws IOException { - if (mCurrentBatch == null || mCurrentBatch.size <= mCurrentBatchPosition) { - mCurrentBatch = mReader.getSchema().createRowBatch(); - mCurrentBatchPosition = 0; - if (!mRows.nextBatch(mCurrentBatch)) { - return null; - } - } - - return new OrcRow(mSchema, mCurrentBatch, mCurrentBatchPosition++, mFieldNames); - } - - @Override - public void close() throws IOException { - mCloser.close(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcRow.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcRow.java deleted file mode 100644 index 16f0f5ea413d..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcRow.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.orc; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.csv.Decimal; -import alluxio.job.plan.transform.format.parquet.ParquetRow; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * A row in a Orc table. - */ -public class OrcRow implements TableRow { - private final VectorizedRowBatch mBatch; - private final int mPosition; - private final Map mColumnNamePosition; - - private final OrcSchema mSchema; - - /** - * Constructor for OrcRow. - * @param schema the schema - * @param batch the vectorized row batch - * @param position the row position inside the vectorized row batch - * @param fieldNames ordered list of field names - */ - public OrcRow(OrcSchema schema, VectorizedRowBatch batch, int position, - List fieldNames) { - mSchema = schema; - mBatch = batch; - mPosition = position; - mColumnNamePosition = new HashMap<>(); - - for (int i = 0; i < fieldNames.size(); i++) { - final String fieldName = fieldNames.get(i); - - mColumnNamePosition.put(fieldName, i); - } - } - - @Override - public ParquetRow toParquet() throws IOException { - Schema writeSchema = mSchema.getWriteSchema(); - GenericRecordBuilder recordBuilder = new GenericRecordBuilder(writeSchema); - for (FieldSchema field : mSchema.getAlluxioSchema()) { - String name = field.getName(); - String type = field.getType(); - Object value = getColumn(name); - recordBuilder.set(writeSchema.getField(name), convert(value, name, type)); - } - return new ParquetRow(recordBuilder.build()); - } - - @Override - public Object getColumn(String column) { - final Integer columnPosition = mColumnNamePosition.get(column); - - if (columnPosition == null) { - throw new IllegalArgumentException("Invalid column name: " + column); - } - - final ColumnVector col = mBatch.cols[columnPosition]; - - if (col.isNull[mPosition]) { - return null; - } - - if (col instanceof TimestampColumnVector) { - return ((TimestampColumnVector) col).asScratchTimestamp(mPosition).getTime(); - } else if (col instanceof VoidColumnVector) { - return null; - } else if (col instanceof DecimalColumnVector) { - return ((DecimalColumnVector) col).vector[mPosition] - .getHiveDecimal(); - } else if (col instanceof LongColumnVector) { - return ((LongColumnVector) col).vector[mPosition]; - } else if (col instanceof BytesColumnVector) { - BytesColumnVector bcv = (BytesColumnVector) col; - return Arrays.copyOfRange(bcv.vector[mPosition], bcv.start[mPosition], - bcv.start[mPosition] + bcv.length[mPosition]); - } else if (col instanceof DoubleColumnVector) { - return ((DoubleColumnVector) col).vector[mPosition]; - } - - throw new UnsupportedOperationException("Unsupported column vector: " - + col.getClass().getName()); - } - - private Object convert(Object value, String name, String type) { - if (value == null) { - return null; - } - - switch (HiveConstants.Types.getHiveConstantType(type)) { - case HiveConstants.Types.DECIMAL: - final Decimal decimal = new Decimal(type); - return ((HiveDecimal) value).bigIntegerBytesScaled(decimal.getScale()); - case HiveConstants.Types.VARCHAR: - case HiveConstants.Types.CHAR: - case HiveConstants.Types.STRING: - return new String((byte[]) value); - default: - return value; - } - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcSchema.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcSchema.java deleted file mode 100644 index f0aaf1b9ba96..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/orc/OrcSchema.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.orc; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.format.SchemaConversionUtils; -import alluxio.job.plan.transform.format.TableSchema; -import alluxio.job.plan.transform.format.parquet.ParquetSchema; - -import org.apache.avro.Schema; -import org.apache.orc.Reader; -import org.apache.orc.TypeDescription; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * The Orc Schema. - */ -public class OrcSchema implements TableSchema { - private final ArrayList mAlluxioSchema; - - private final Schema mWriteSchema; - - /** - * Default constructor for OrcSchema. - * @param reader the orc reader - */ - public OrcSchema(Reader reader) throws IOException { - final List fieldNames = reader.getSchema().getFieldNames(); - mAlluxioSchema = new ArrayList<>(fieldNames.size()); - for (int i = 0; i < fieldNames.size(); i++) { - final String fieldName = fieldNames.get(i); - - final String type = getType(reader.getSchema().getChildren().get(i)); - - mAlluxioSchema.add(new FieldSchema(i, fieldName, type, "")); - } - - mWriteSchema = SchemaConversionUtils.buildWriteSchema(mAlluxioSchema); - } - - private String getType(TypeDescription typeDescription) { - final TypeDescription.Category category = typeDescription.getCategory(); - switch (category) { - case DECIMAL: - return String.format("decimal(%d,%d)", typeDescription.getPrecision(), - typeDescription.getScale()); - case CHAR: - case VARCHAR: - return String.format("%s(%d)", category.getName(), typeDescription.getMaxLength()); - default: - return category.getName(); - } - } - - @Override - public ParquetSchema toParquet() { - return new ParquetSchema(mWriteSchema); - } - - /** - * @return the alluxio schema - */ - public ArrayList getAlluxioSchema() { - return mAlluxioSchema; - } - - /** - * @return the write schema - */ - public Schema getWriteSchema() { - return mWriteSchema; - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetReader.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetReader.java deleted file mode 100644 index 896ad4c5c4aa..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetReader.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.parquet; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.format.JobPath; -import alluxio.job.plan.transform.format.ReadWriterUtils; -import alluxio.job.plan.transform.format.TableReader; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericData.Record; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.parquet.ParquetReadOptions; -import org.apache.parquet.avro.AvroParquetReader; -import org.apache.parquet.avro.AvroSchemaConverter; -import org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; -import org.apache.parquet.hadoop.util.HadoopInputFile; -import org.apache.parquet.io.InputFile; - -import java.io.IOException; - -/** - * A reader for reading {@link ParquetRow}. - */ -public final class ParquetReader implements TableReader { - private final org.apache.parquet.hadoop.ParquetReader mReader; - private final ParquetSchema mSchema; - private final ParquetMetadata mMetadata; - - private ParquetReader(org.apache.parquet.hadoop.ParquetReader reader, Schema schema, - ParquetMetadata metadata) { - mReader = reader; - mSchema = new ParquetSchema(schema); - mMetadata = metadata; - } - - /** - * Creates a parquet reader. - * - * @param uri the URI to the input - * @return the reader - * @throws IOException when failed to create the reader - */ - public static ParquetReader create(AlluxioURI uri) throws IOException { - Path inputPath = new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath()); - Configuration conf = ReadWriterUtils.readNoCacheConf(); - InputFile inputFile = HadoopInputFile.fromPath(inputPath, conf); - org.apache.parquet.hadoop.ParquetReader reader = - AvroParquetReader.builder(inputFile) - .disableCompatibility() - .withDataModel(GenericData.get()) - .withConf(conf) - .build(); - - Schema schema; - ParquetMetadata footer; - try (ParquetFileReader r = new ParquetFileReader(inputFile, - ParquetReadOptions.builder().build())) { - footer = r.getFooter(); - schema = new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema()); - } - - return new ParquetReader(reader, schema, footer); - } - - @Override - public TableSchema getSchema() throws IOException { - return mSchema; - } - - /** - * @return the Parquet metadata - */ - public ParquetMetadata getMetadata() { - return mMetadata; - } - - @Override - public TableRow read() throws IOException { - Record record = mReader.read(); - return record == null ? null : new ParquetRow(record); - } - - @Override - public void close() throws IOException { - mReader.close(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetRow.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetRow.java deleted file mode 100644 index f5f5afa2fa47..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetRow.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.parquet; - -import alluxio.job.plan.transform.format.TableRow; - -import com.google.common.base.Preconditions; -import org.apache.avro.generic.GenericData.Record; - -import java.io.IOException; -import java.util.Objects; -import javax.validation.constraints.NotNull; - -/** - * A row in a Parquet table represented in Avro format. - */ -public final class ParquetRow implements TableRow { - private final Record mRecord; - - /** - * @param record the representation of a row in a Parquet table in the Avro format - */ - public ParquetRow(@NotNull Record record) { - mRecord = Preconditions.checkNotNull(record, "record"); - } - - @Override - public Object getColumn(String column) { - return mRecord.get(column); - } - - /** - * @return the row represented in Avro format - */ - public Record getRecord() { - return mRecord; - } - - @Override - public ParquetRow toParquet() throws IOException { - return this; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof ParquetRow)) { - return false; - } - ParquetRow that = (ParquetRow) o; - return Objects.equals(mRecord, that.mRecord); - } - - @Override - public int hashCode() { - return mRecord.hashCode(); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetSchema.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetSchema.java deleted file mode 100644 index 5833c68d643a..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetSchema.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.parquet; - -import alluxio.job.plan.transform.format.TableSchema; - -import com.google.common.base.Objects; -import org.apache.avro.Schema; - -/** - * Parquet table schema in Avro format. - */ -public final class ParquetSchema implements TableSchema { - private final Schema mSchema; - - /** - * @param schema the table schema in Avro format - */ - public ParquetSchema(Schema schema) { - mSchema = schema; - } - - /** - * @return the schema - */ - public Schema getSchema() { - return mSchema; - } - - @Override - public ParquetSchema toParquet() { - return this; - } - - @Override - public int hashCode() { - return mSchema.hashCode(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof ParquetSchema)) { - return false; - } - ParquetSchema that = (ParquetSchema) o; - return Objects.equal(mSchema, that.mSchema); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetWriter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetWriter.java deleted file mode 100644 index 5c1847c63c81..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/parquet/ParquetWriter.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.parquet; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.job.plan.transform.format.JobPath; -import alluxio.job.plan.transform.format.ReadWriterUtils; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; -import alluxio.job.plan.transform.format.TableWriter; - -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericData.Record; -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.avro.AvroParquetWriter; -import org.apache.parquet.column.ParquetProperties; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; -import org.apache.parquet.hadoop.util.HadoopOutputFile; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import javax.annotation.Nullable; - -/** - * A writer for writing {@link ParquetRow}. - */ -public final class ParquetWriter implements TableWriter { - private static final Logger LOG = LoggerFactory.getLogger(ParquetWriter.class); - // https://github.com/apache/parquet-mr/blob/master/parquet-hadoop/src/main/java - // /org/apache/parquet/hadoop/InternalParquetRecordWriter.java#L46 - private static final int MAX_IN_MEMORY_RECORDS = 10000; - private static final int ROW_GROUP_SIZE = - org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE; - private static final String DEFAULT_COMPRESSION_CODEC = CompressionCodecName.SNAPPY.name(); - - private final org.apache.parquet.hadoop.ParquetWriter mWriter; - private long mRecordSize; // bytes - private int mRows; - - private ParquetWriter(org.apache.parquet.hadoop.ParquetWriter writer) { - mWriter = writer; - } - - /** - * Creates a Parquet writer. - * - * @param schema the schema - * @param uri the URI to the output - * @return the writer - */ - public static ParquetWriter create(TableSchema schema, AlluxioURI uri) - throws IOException { - return ParquetWriter.create(schema, uri, ROW_GROUP_SIZE, true, DEFAULT_COMPRESSION_CODEC); - } - - /** - * Creates a parquet writer based on the partitionInfo. - * - * @param schema the schema - * @param uri the URI to the output - * @param partitionInfo the partitionInfo (default configuration is used if null) - * @return the writer - */ - public static ParquetWriter create(TableSchema schema, AlluxioURI uri, - @Nullable PartitionInfo partitionInfo) throws IOException { - String compressionCodec = DEFAULT_COMPRESSION_CODEC; - if (partitionInfo != null) { - compressionCodec = partitionInfo.getSerdeProperties().getOrDefault( - PartitionInfo.PARQUET_COMPRESSION, DEFAULT_COMPRESSION_CODEC); - } - return ParquetWriter.create(schema, uri, ROW_GROUP_SIZE, true, compressionCodec); - } - - /** - * Creates a Parquet writer specifying a row group size and whether to have dictionary enabled. - * - * @param schema the schema - * @param uri the URI to the output - * @param rowGroupSize the row group size - * @param enableDictionary whether to enable dictionary - * @return the writer - */ - public static ParquetWriter create(TableSchema schema, AlluxioURI uri, int rowGroupSize, - boolean enableDictionary) throws IOException { - return ParquetWriter.create(schema, uri, rowGroupSize, enableDictionary, - DEFAULT_COMPRESSION_CODEC); - } - - /** - * Creates a Parquet writer specifying a row group size. - * - * @param schema the schema - * @param uri the URI to the output - * @param rowGroupSize the row group size - * @param enableDictionary whether to enable dictionary - * @param compressionCodec the compression codec name - * @return the writer - */ - public static ParquetWriter create(TableSchema schema, AlluxioURI uri, int rowGroupSize, - boolean enableDictionary, String compressionCodec) - throws IOException { - Configuration conf = ReadWriterUtils.writeThroughConf(); - ParquetSchema parquetSchema = schema.toParquet(); - return new ParquetWriter(AvroParquetWriter.builder( - HadoopOutputFile.fromPath( - new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath()), conf)) - .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0) - .withConf(conf) - .withCompressionCodec(CompressionCodecName.fromConf(compressionCodec)) - .withRowGroupSize(rowGroupSize) - .withDictionaryPageSize(org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE) - .withDictionaryEncoding(enableDictionary) - .withPageSize(org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE) - .withDataModel(GenericData.get()) - .withSchema(parquetSchema.getSchema()) - .build()); - } - - @Override - public void write(TableRow row) throws IOException { - ParquetRow parquetRow = row.toParquet(); - mWriter.write(parquetRow.getRecord()); - mRows++; - if (mRows == 1) { - mRecordSize = mWriter.getDataSize(); - } - } - - @Override - public void close() throws IOException { - mWriter.close(); - } - - @Override - public int getRows() { - return mRows; - } - - @Override - public long getBytes() { - // getDataSize returns the on-disk size + in-memory size, - // on-disk size takes compression and encoding into consideration, - // but in-memory size does not. - // This method returns the estimated lower bound of the on-disk size by subtracting an - // estimated upper bound of in-memory size. - // After closing, the on-disk size will be larger due to flushing the in-memory records to disk. - return Math.max(0, - mWriter.getDataSize() - Math.max(ROW_GROUP_SIZE, MAX_IN_MEMORY_RECORDS * mRecordSize)); - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/BytesCommitter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/BytesCommitter.java deleted file mode 100644 index 1abfec71235a..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/BytesCommitter.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.tables; - -import alluxio.job.plan.transform.format.TableWriter; - -/** - * Determines whether to commit a table file based on the size of the file in bytes. - * The last committed table file might have fewer bytes than the specified size. - */ -public class BytesCommitter implements Committer { - private final long mBytes; - - /** - * @param bytes the size of a table file in bytes - */ - public BytesCommitter(long bytes) { - mBytes = bytes; - } - - @Override - public boolean shouldCommit(TableWriter writer) { - return writer.getBytes() >= mBytes; - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/Committer.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/Committer.java deleted file mode 100644 index 66f2c18add4f..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/Committer.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.tables; - -import alluxio.job.plan.transform.format.TableWriter; - -/** - * Determines when to commit/complete a table file when writing a stream of rows to a group of - * table files. - * Designed to be used in {@link TablesWriter}. - */ -public interface Committer { - /** - * @param writer the current writer containing statistics about the data that has been written - * @return whether the current table file should commit/complete - */ - boolean shouldCommit(TableWriter writer); -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/RowsCommitter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/RowsCommitter.java deleted file mode 100644 index f2bb5377c9a3..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/RowsCommitter.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.tables; - -import alluxio.job.plan.transform.format.TableWriter; - -/** - * Determines whether to commit based on the number of rows that have been written. - * Note that the last table file might contain rows fewer than the specified number. - */ -public class RowsCommitter implements Committer { - private final int mRows; - - /** - * @param rows the number of rows in one table file - */ - public RowsCommitter(int rows) { - mRows = rows; - } - - @Override - public boolean shouldCommit(TableWriter writer) { - return writer.getRows() >= mRows; - } -} diff --git a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/TablesWriter.java b/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/TablesWriter.java deleted file mode 100644 index 9a7059bf006d..000000000000 --- a/dora/job/server/src/main/java/alluxio/job/plan/transform/format/tables/TablesWriter.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.tables; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.format.TableRow; -import alluxio.job.plan.transform.format.TableSchema; -import alluxio.job.plan.transform.format.TableWriter; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; - -/** - * Writes a stream of rows to a list of tables, when {@link Committer} determines that the current - * table should be committed/completed, the table file is completed and a new table file is created. - */ -public class TablesWriter implements TableWriter { - private static final Logger LOG = LoggerFactory.getLogger(TablesWriter.class); - private static final String FILE_NAME_PATTERN = "part-%d.parquet"; - - private final Committer mCommitter; - private final TableSchema mSchema; - private final AlluxioURI mOutputDir; - private TableWriter mWriter; - private int mPart; - private int mRows; - private int mBytes; - - private TablesWriter(TableSchema schema, Committer committer, AlluxioURI outputDir, - TableWriter initialWriter) { - mOutputDir = outputDir; - mSchema = schema; - mWriter = initialWriter; - mCommitter = committer; - mPart = 0; - mRows = 0; - mBytes = 0; - } - - /** - * @param schema the table schema - * @param committer the committer - * @param outputDir the output directory - * @return a new writer - * @throws IOException when failed to create an internal table writer - */ - public static TablesWriter create(TableSchema schema, Committer committer, AlluxioURI outputDir) - throws IOException { - return new TablesWriter(schema, committer, outputDir, createWriter(schema, outputDir, 0)); - } - - @Override - public void write(TableRow row) throws IOException { - mWriter.write(row); - if (mCommitter.shouldCommit(mWriter)) { - mRows += mWriter.getRows(); - mBytes += mWriter.getBytes(); - mWriter.close(); - mWriter = createWriter(mSchema, mOutputDir, ++mPart); - } - } - - @Override - public void close() throws IOException { - mWriter.close(); - } - - @Override - public int getRows() { - return mRows + mWriter.getRows(); - } - - @Override - public long getBytes() { - return mBytes + mWriter.getBytes(); - } - - private static TableWriter createWriter(TableSchema schema, AlluxioURI outputDir, int part) - throws IOException { - String filename = String.format(FILE_NAME_PATTERN, part); - return TableWriter.create(schema, outputDir.join(filename)); - } -} diff --git a/dora/job/server/src/main/resources/META-INF/services/alluxio.job.plan.PlanDefinition b/dora/job/server/src/main/resources/META-INF/services/alluxio.job.plan.PlanDefinition index 707186084f86..1aaeb15cf654 100644 --- a/dora/job/server/src/main/resources/META-INF/services/alluxio.job.plan.PlanDefinition +++ b/dora/job/server/src/main/resources/META-INF/services/alluxio.job.plan.PlanDefinition @@ -5,5 +5,4 @@ alluxio.job.plan.persist.PersistDefinition alluxio.job.plan.replicate.MoveDefinition alluxio.job.plan.replicate.SetReplicaDefinition alluxio.job.plan.stress.StressBenchDefinition -alluxio.job.plan.transform.CompactDefinition alluxio.job.plan.NoopPlanDefinition diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/BaseTransformTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/BaseTransformTest.java deleted file mode 100644 index 3e27a2bd4d8e..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/BaseTransformTest.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData.Record; -import org.apache.commons.lang3.StringUtils; - -import java.util.ArrayList; -import java.util.List; - -/** - * Base class for unit testing the transform package. - * Contains predefined table definition. - */ -public abstract class BaseTransformTest { - protected static final Schema SCHEMA; - protected static final Record RECORD; - protected static final String[] COLUMNS = new String[]{"a", "b", "c"}; - protected static final Integer[] VALUES = new Integer[]{ - Integer.parseInt("1001", 2), - Integer.parseInt("1100", 2), - Integer.parseInt("0110", 2), - }; - protected static final String ZORDER = StringUtils.leftPad("110011001100", 32 * 3, '0'); - - static { - List fields = new ArrayList<>(COLUMNS.length); - for (String column : COLUMNS) { - fields.add(new Schema.Field(column, Schema.create(Schema.Type.INT), null, null)); - } - SCHEMA = Schema.createRecord("schema", null, null, false, fields); - RECORD = new Record(SCHEMA); - for (int i = 0; i < COLUMNS.length; i++) { - RECORD.put(COLUMNS[i], VALUES[i]); - } - } -} diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/CompactDefinitionSelectExecutorsTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/CompactDefinitionSelectExecutorsTest.java deleted file mode 100644 index 16cd963f267f..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/CompactDefinitionSelectExecutorsTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform; - -import static org.junit.Assert.assertEquals; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import alluxio.AlluxioURI; -import alluxio.client.file.URIStatus; -import alluxio.collections.Pair; -import alluxio.job.JobServerContext; -import alluxio.job.SelectExecutorsContext; -import alluxio.job.plan.SelectExecutorsTest; -import alluxio.wire.WorkerInfo; - -import org.apache.commons.io.FileUtils; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.PrimitiveIterator; -import java.util.Random; -import java.util.Set; - -public class CompactDefinitionSelectExecutorsTest extends SelectExecutorsTest { - - private static final String INPUT_DIR = "/input"; - private static final String OUTPUT_DIR = "/output"; - private static final PrimitiveIterator.OfLong LONG_STREAM - = (new Random()).longs(FileUtils.ONE_GB * 2, FileUtils.ONE_GB * 10).iterator(); - - @Test - public void testExecutorsParallel() throws Exception { - int tasksPerWorker = 10; - int numCompactedFiles = 100; - int totalFiles = 5000; - - PartitionInfo mockPartitionInfo = mock(PartitionInfo.class); - when(mockPartitionInfo.getFormat(any())).thenReturn(Format.CSV); - - CompactConfig config = new CompactConfig(mockPartitionInfo, INPUT_DIR, mockPartitionInfo, - OUTPUT_DIR, numCompactedFiles, 2 * FileUtils.ONE_GB); - - List inputFiles = new ArrayList<>(); - for (int i = 0; i < totalFiles; i++) { - inputFiles.add(newFile(Integer.toString(i))); - } - - when(mMockFileSystem.listStatus(new AlluxioURI(INPUT_DIR))).thenReturn(inputFiles); - - Set>> result = new CompactDefinition().selectExecutors( - config, SelectExecutorsTest.JOB_WORKERS, new SelectExecutorsContext(1, - new JobServerContext(mMockFileSystem, mMockFileSystemContext, mMockUfsManager))); - assertEquals(JOB_WORKERS.size() * tasksPerWorker, result.size()); - - int allCompactTasks = 0; - for (Pair> tasks : result) { - allCompactTasks += tasks.getSecond().size(); - } - assertEquals(numCompactedFiles, allCompactTasks); - } - - private URIStatus newFile(String name) { - URIStatus mockFileStatus = mock(URIStatus.class); - when(mockFileStatus.isFolder()).thenReturn(false); - when(mockFileStatus.getName()).thenReturn(name); - when(mockFileStatus.getLength()).thenReturn(LONG_STREAM.next()); - return mockFileStatus; - } -} diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/JobPathTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/format/JobPathTest.java deleted file mode 100644 index 586707648218..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/JobPathTest.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockStatic; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import alluxio.client.ReadType; -import alluxio.conf.PropertyKey; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.security.UserGroupInformation; -import org.junit.Test; -import org.mockito.MockedStatic; - -public class JobPathTest { - - @Test - public void testCache() throws Exception { - try (MockedStatic jobPathMocked = mockStatic(JobPath.class); - MockedStatic userGroupInfoMocked = - mockStatic(UserGroupInformation.class)) { - jobPathMocked.when(() -> JobPath.fileSystemGet(any(), any())) - .thenAnswer((p) -> mock(FileSystem.class)); - userGroupInfoMocked.when(UserGroupInformation::getCurrentUser).thenReturn(null); - - Configuration conf = new Configuration(); - JobPath jobPath = new JobPath("foo", "bar", "/baz"); - FileSystem fileSystem = jobPath.getFileSystem(conf); - - verify(JobPath.class, times(1)); - JobPath.fileSystemGet(any(), any()); - - assertEquals(fileSystem, jobPath.getFileSystem(conf)); - verify(JobPath.class, times(1)); - JobPath.fileSystemGet(any(), any()); - - conf.set(PropertyKey.USER_FILE_READ_TYPE_DEFAULT.toString(), ReadType.NO_CACHE.toString()); - FileSystem newFileSystem = jobPath.getFileSystem(conf); - assertNotEquals(fileSystem, newFileSystem); - verify(JobPath.class, times(2)); - JobPath.fileSystemGet(any(), any()); - - conf.set("foo", "bar"); - assertEquals(newFileSystem, jobPath.getFileSystem(conf)); - verify(JobPath.class, times(2)); - JobPath.fileSystemGet(any(), any()); - - jobPath = new JobPath("foo", "bar", "/bar"); - assertEquals(newFileSystem, jobPath.getFileSystem(conf)); - verify(JobPath.class, times(2)); - JobPath.fileSystemGet(any(), any()); - - jobPath = new JobPath("foo", "baz", "/bar"); - assertNotEquals(newFileSystem, jobPath.getFileSystem(conf)); - verify(JobPath.class, times(3)); - JobPath.fileSystemGet(any(), any()); - - jobPath = new JobPath("bar", "bar", "/bar"); - assertNotEquals(newFileSystem, jobPath.getFileSystem(conf)); - verify(JobPath.class, times(4)); - JobPath.fileSystemGet(any(), any()); - } - } -} diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/ReadWriteTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/format/ReadWriteTest.java deleted file mode 100644 index 27fd2b1444f6..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/ReadWriteTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import static org.junit.Assert.assertEquals; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.BaseTransformTest; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.job.plan.transform.format.parquet.ParquetRow; -import alluxio.job.plan.transform.format.parquet.ParquetSchema; - -import com.google.common.collect.Lists; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.io.File; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * Tests {@link TableReader} and {@link TableWriter}. - */ -public final class ReadWriteTest extends BaseTransformTest { - @Rule - public TemporaryFolder mTempFolder = new TemporaryFolder(); - - private PartitionInfo mPartitionInfo = new PartitionInfo(HiveConstants.PARQUET_SERDE_CLASS, - HiveConstants.PARQUET_INPUT_FORMAT_CLASS, new HashMap<>(), new HashMap<>(), - new ArrayList<>()); - - @Test - public void readWrite() throws Exception { - final File file = mTempFolder.newFile("test.parquet"); - Files.delete(file.toPath()); - final int numRows = 10; - - TableSchema schema = new ParquetSchema(SCHEMA); - TableRow row = new ParquetRow(RECORD); - AlluxioURI uri = new AlluxioURI("file:///" + file.getPath()); - try (TableWriter writer = TableWriter.create(schema, uri, mPartitionInfo)) { - for (int r = 0; r < numRows; r++) { - writer.write(row); - } - } - - List rows = Lists.newArrayList(); - uri = new AlluxioURI("file:///" + file.getPath()); - try (TableReader reader = TableReader.create(uri, mPartitionInfo)) { - assertEquals(schema, reader.getSchema()); - for (TableRow r = reader.read(); r != null; r = reader.read()) { - rows.add(r); - } - } - - assertEquals(numRows, rows.size()); - for (TableRow r : rows) { - assertEquals(row, r); - for (int i = 0; i < COLUMNS.length; i++) { - assertEquals(VALUES[i], r.getColumn(COLUMNS[i])); - } - } - } -} diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/csv/DecimalTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/format/csv/DecimalTest.java deleted file mode 100644 index 5ed6d79bf50f..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/csv/DecimalTest.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.csv; - -import static org.junit.Assert.assertEquals; - -import alluxio.collections.Pair; - -import org.junit.Test; - -import java.math.BigInteger; - -public class DecimalTest { - - @Test - public void testBigDecimal() { - Decimal decimal = new Decimal("decimal(10,1)"); - - double v = decimal.toBigDecimal("10.555").doubleValue(); - - assertEquals(10.5, v, 0.0000001); - } - - @Test - public void testParquetBytes() { - Decimal decimal = new Decimal("decimal(10,1)"); - - byte[] bytes = decimal.toParquetBytes("10.555"); - - BigInteger bigInteger = new BigInteger(bytes); - assertEquals(105, bigInteger.intValue()); - } - - @Test - public void testPrecisionAndScale() { - Pair precisionAndScale = Decimal.getPrecisionAndScale("decimal(10 , 2) "); - - assertEquals((Integer) 10, precisionAndScale.getFirst()); - assertEquals((Integer) 2, precisionAndScale.getSecond()); - } -} diff --git a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/orc/OrcReaderTest.java b/dora/job/server/src/test/java/alluxio/job/plan/transform/format/orc/OrcReaderTest.java deleted file mode 100644 index 684d3ca3b6dd..000000000000 --- a/dora/job/server/src/test/java/alluxio/job/plan/transform/format/orc/OrcReaderTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.orc; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - -import alluxio.AlluxioURI; -import alluxio.job.plan.transform.format.TableRow; - -import org.junit.Test; - -import java.io.File; -import java.io.IOException; - -public class OrcReaderTest { - - @Test - public void testOrcFile() throws IOException { - String resourceName = "TestOrcFile.columnProjection.orc"; - final int expectedRows = 21000; - - ClassLoader classLoader = getClass().getClassLoader(); - File file = new File(classLoader.getResource(resourceName).getFile()); - String absolutePath = file.getAbsolutePath(); - - final OrcReader orcReader = OrcReader.create(new AlluxioURI("file://" + absolutePath)); - - final TableRow row0 = orcReader.read(); - assertEquals(-1155869325L, row0.getColumn("int1")); - assertEquals("bb2c72394b1ab9f8", new String((byte[]) row0.getColumn("string1"))); - - final TableRow row1 = orcReader.read(); - assertEquals(431529176L, row1.getColumn("int1")); - assertEquals("e6c5459001105f17", new String((byte[]) row1.getColumn("string1"))); - - for (int i = 0; i < expectedRows - 2; i++) { - assertNotNull(orcReader.read()); - } - assertNull(orcReader.read()); - } -} diff --git a/dora/pom.xml b/dora/pom.xml index 5e219e2b8167..b2f599508ee2 100644 --- a/dora/pom.xml +++ b/dora/pom.xml @@ -32,7 +32,6 @@ shaded shell stress - table tests underfs diff --git a/dora/shaded/client-hadoop3/pom.xml b/dora/shaded/client-hadoop3/pom.xml index c0d016b03d0a..1e9a925e0618 100644 --- a/dora/shaded/client-hadoop3/pom.xml +++ b/dora/shaded/client-hadoop3/pom.xml @@ -87,11 +87,6 @@ alluxio-core-client-fs ${project.version} - - org.alluxio - alluxio-table-client - ${project.version} - diff --git a/dora/shaded/client/pom.xml b/dora/shaded/client/pom.xml index 74464079067e..c25131cef2e9 100644 --- a/dora/shaded/client/pom.xml +++ b/dora/shaded/client/pom.xml @@ -87,11 +87,6 @@ alluxio-core-client-fs ${project.version} - - org.alluxio - alluxio-table-client - ${project.version} - diff --git a/dora/table/base/pom.xml b/dora/table/base/pom.xml deleted file mode 100644 index 2b84f378fa44..000000000000 --- a/dora/table/base/pom.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - 4.0.0 - - org.alluxio - alluxio-table - 301-SNAPSHOT - - alluxio-table-base - jar - Alluxio Table - Base Module - Base module in Alluxio Table - - - - - ${project.parent.parent.parent.basedir}/build - false - - - - - org.alluxio - alluxio-core-transport - ${project.version} - - - diff --git a/dora/table/base/src/main/java/alluxio/table/ProtoUtils.java b/dora/table/base/src/main/java/alluxio/table/ProtoUtils.java deleted file mode 100644 index e9dbb2aa890e..000000000000 --- a/dora/table/base/src/main/java/alluxio/table/ProtoUtils.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table; - -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.Transformation; -import alluxio.grpc.table.layout.hive.PartitionInfo; - -import com.google.protobuf.InvalidProtocolBufferException; - -import java.util.List; -import java.util.Objects; - -/** - * Protobuf related utils. - */ -public final class ProtoUtils { - /** - * @param partition the partition proto - * @return true if the partition has the hive layout, false otherwise - */ - public static boolean hasHiveLayout(Partition partition) { - if (!partition.hasBaseLayout()) { - return false; - } - Layout layout = partition.getBaseLayout(); - // TODO(gpang): use a layout registry - return Objects.equals(layout.getLayoutType(), "hive"); - } - - /** - * @param layout the layout proto - * @return true if the layout is a hive layout, false otherwise - */ - public static boolean isHiveLayout(Layout layout) { - return Objects.equals(layout.getLayoutType(), "hive"); - } - - private static Layout getCurrentLayout(Partition partition) { - List transformations = partition.getTransformationsList(); - return transformations.isEmpty() - ? partition.getBaseLayout() - : transformations.get(transformations.size() - 1).getLayout(); - } - - /** - * @param partition the partition proto - * @return the hive-specific partition proto - */ - public static PartitionInfo extractHiveLayout(Partition partition) - throws InvalidProtocolBufferException { - if (!hasHiveLayout(partition)) { - if (partition.hasBaseLayout()) { - throw new IllegalStateException( - "Cannot parse hive-layout. layoutType: " + partition.getBaseLayout().getLayoutType()); - } else { - throw new IllegalStateException("Cannot parse hive-layout from missing layout"); - } - } - Layout layout = getCurrentLayout(partition); - if (!layout.hasLayoutData()) { - throw new IllegalStateException("Cannot parse hive-layout from empty layout data"); - } - return PartitionInfo.parseFrom(layout.getLayoutData()); - } - - /** - * @param layout the layout proto - * @return the hive-specific partition proto - */ - public static PartitionInfo toHiveLayout(Layout layout) - throws InvalidProtocolBufferException { - if (!isHiveLayout(layout)) { - throw new IllegalStateException( - "Cannot parse hive-layout. layoutType: " + layout.getLayoutType()); - } - if (!layout.hasLayoutData()) { - throw new IllegalStateException("Cannot parse hive-layout from empty layout data"); - } - return PartitionInfo.parseFrom(layout.getLayoutData()); - } -} diff --git a/dora/table/client/pom.xml b/dora/table/client/pom.xml deleted file mode 100644 index 8ffbec23efb9..000000000000 --- a/dora/table/client/pom.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - 4.0.0 - - org.alluxio - alluxio-table - 301-SNAPSHOT - - alluxio-table-client - jar - Alluxio Table - Client - Client for Alluxio Table - - - - - ${project.parent.parent.parent.basedir}/build - false - - - - - - com.google.protobuf - protobuf-java - - - io.grpc - grpc-core - - - - - org.alluxio - alluxio-core-common - ${project.version} - - - org.alluxio - alluxio-core-transport - ${project.version} - - - org.alluxio - alluxio-table-base - ${project.version} - - - - - org.alluxio - alluxio-core-common - ${project.version} - test-jar - test - - - diff --git a/dora/table/client/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java b/dora/table/client/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java deleted file mode 100644 index cf325f2402a9..000000000000 --- a/dora/table/client/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.client.table; - -import alluxio.AbstractMasterClient; -import alluxio.Constants; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.grpc.ServiceType; -import alluxio.grpc.table.AttachDatabasePRequest; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.Database; -import alluxio.grpc.table.DetachDatabasePRequest; -import alluxio.grpc.table.GetAllDatabasesPRequest; -import alluxio.grpc.table.GetAllTablesPRequest; -import alluxio.grpc.table.GetDatabasePRequest; -import alluxio.grpc.table.GetPartitionColumnStatisticsPRequest; -import alluxio.grpc.table.GetTableColumnStatisticsPRequest; -import alluxio.grpc.table.GetTablePRequest; -import alluxio.grpc.table.GetTransformJobInfoPRequest; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.ReadTablePRequest; -import alluxio.grpc.table.SyncDatabasePRequest; -import alluxio.grpc.table.SyncStatus; -import alluxio.grpc.table.TableInfo; -import alluxio.grpc.table.TableMasterClientServiceGrpc; -import alluxio.grpc.table.TransformJobInfo; -import alluxio.grpc.table.TransformTablePRequest; -import alluxio.master.MasterClientContext; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import javax.annotation.concurrent.ThreadSafe; - -/** - * A wrapper for the gRPC client to interact with the table master. - */ -@ThreadSafe -public final class RetryHandlingTableMasterClient extends AbstractMasterClient - implements TableMasterClient { - private static final Logger RPC_LOG = LoggerFactory.getLogger(TableMasterClient.class); - private TableMasterClientServiceGrpc.TableMasterClientServiceBlockingStub mClient = null; - - /** - * Creates a new block master client. - * - * @param conf master client configuration - */ - public RetryHandlingTableMasterClient(MasterClientContext conf) { - super(conf); - } - - @Override - protected ServiceType getRemoteServiceType() { - return ServiceType.TABLE_MASTER_CLIENT_SERVICE; - } - - @Override - protected String getServiceName() { - return Constants.TABLE_MASTER_CLIENT_SERVICE_NAME; - } - - @Override - protected long getServiceVersion() { - return Constants.TABLE_MASTER_CLIENT_SERVICE_VERSION; - } - - @Override - protected void afterConnect() { - mClient = TableMasterClientServiceGrpc.newBlockingStub(mChannel); - } - - @Override - public List getAllDatabases() throws AlluxioStatusException { - return retryRPC(() -> mClient.getAllDatabases( - GetAllDatabasesPRequest.newBuilder().build()).getDatabaseList(), - RPC_LOG, "GetAllDatabases", ""); - } - - @Override - public Database getDatabase(String databaseName) throws AlluxioStatusException { - return retryRPC(() -> mClient.getDatabase(GetDatabasePRequest.newBuilder() - .setDbName(databaseName).build()), - RPC_LOG, "GetDatabase", "databaseName=%s", databaseName).getDb(); - } - - @Override - public List getAllTables(String databaseName) throws AlluxioStatusException { - return retryRPC(() -> mClient.getAllTables( - GetAllTablesPRequest.newBuilder().setDatabase(databaseName).build()).getTableList(), - RPC_LOG, "GetAllTables", "databaseName=%s", databaseName); - } - - @Override - public TableInfo getTable(String databaseName, String tableName) throws AlluxioStatusException { - return retryRPC(() -> mClient.getTable( - GetTablePRequest.newBuilder().setDbName(databaseName).setTableName(tableName).build()) - .getTableInfo(), RPC_LOG, "GetTable", "databaseName=%s,tableName=%s", - databaseName, tableName); - } - - @Override - public SyncStatus attachDatabase(String udbType, String udbConnectionUri, String udbDbName, - String dbName, Map configuration, boolean ignoreSyncErrors) - throws AlluxioStatusException { - return retryRPC(() -> mClient.attachDatabase( - AttachDatabasePRequest.newBuilder().setUdbType(udbType) - .setUdbConnectionUri(udbConnectionUri).setUdbDbName(udbDbName).setDbName(dbName) - .putAllOptions(configuration).setIgnoreSyncErrors(ignoreSyncErrors).build()) - .getSyncStatus(), - RPC_LOG, "AttachDatabase", "udbType=%s,udbConnectionUri=%s,udbDbName=%s,dbName=%s," - + "configuration=%s,ignoreSyncErrors=%s", - udbType, udbConnectionUri, udbDbName, dbName, configuration, ignoreSyncErrors); - } - - @Override - public boolean detachDatabase(String dbName) - throws AlluxioStatusException { - return retryRPC(() -> mClient.detachDatabase( - DetachDatabasePRequest.newBuilder().setDbName(dbName).build()).getSuccess(), - RPC_LOG, "DetachDatabase", "dbName=%s", dbName); - } - - @Override - public SyncStatus syncDatabase(String dbName) throws AlluxioStatusException { - return retryRPC(() -> mClient.syncDatabase( - SyncDatabasePRequest.newBuilder().setDbName(dbName).build()).getStatus(), - RPC_LOG, "SyncDatabase", "dbName=%s", dbName); - } - - @Override - public List readTable(String databaseName, String tableName, Constraint constraint) - throws AlluxioStatusException { - return retryRPC(() -> mClient.readTable( - ReadTablePRequest.newBuilder().setDbName(databaseName).setTableName(tableName) - .setConstraint(constraint).build()).getPartitionsList(), - RPC_LOG, "ReadTable", "databaseName=%s,tableName=%s,constraint=%s", databaseName, tableName, - constraint); - } - - @Override - public List getTableColumnStatistics( - String databaseName, - String tableName, - List columnNames) throws AlluxioStatusException { - return retryRPC(() -> mClient.getTableColumnStatistics( - GetTableColumnStatisticsPRequest.newBuilder().setDbName(databaseName) - .setTableName(tableName).addAllColNames(columnNames).build()).getStatisticsList(), - RPC_LOG, "GetTableColumnStatistics", - "databaseName=%s,tableName=%s,columnNames=%s", databaseName, tableName, columnNames); - } - - @Override - public List getPartitionNames( - String databaseName, - String tableName) throws AlluxioStatusException { - return null; - } - - @Override - public Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - List partitionNames, - List columnNames) throws AlluxioStatusException { - return retryRPC(() -> mClient.getPartitionColumnStatistics( - GetPartitionColumnStatisticsPRequest.newBuilder().setDbName(databaseName) - .setTableName(tableName).addAllColNames(columnNames) - .addAllPartNames(partitionNames).build()).getPartitionStatisticsMap(), - RPC_LOG, "GetPartitionColumnStatistics", - "databaseName=%s,tableName=%s,partitionNames=%s,columnNames=%s", - databaseName, tableName, partitionNames, columnNames) - .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, - e -> e.getValue().getStatisticsList(), (e1, e2) -> e1)); - } - - @Override - public long transformTable(String dbName, String tableName, String definition) - throws AlluxioStatusException { - return retryRPC(() -> mClient.transformTable( - TransformTablePRequest.newBuilder() - .setDbName(dbName) - .setTableName(tableName) - .setDefinition(definition) - .build()).getJobId(), - RPC_LOG, "TransformTable", "dbName=%s,tableName=%s,definition=%s", - dbName, tableName, definition); - } - - @Override - public TransformJobInfo getTransformJobInfo(long jobId) throws AlluxioStatusException { - return retryRPC(() -> mClient.getTransformJobInfo( - GetTransformJobInfoPRequest.newBuilder() - .setJobId(jobId) - .build()).getInfo(0), - RPC_LOG, "GetTransformJobInfo", "jobId=%d", jobId); - } - - @Override - public List getAllTransformJobInfo() throws AlluxioStatusException { - return retryRPC(() -> mClient.getTransformJobInfo( - GetTransformJobInfoPRequest.newBuilder().build()).getInfoList(), - RPC_LOG, "GetAllTransformJobInfo", ""); - } -} diff --git a/dora/table/client/src/main/java/alluxio/client/table/TableMasterClient.java b/dora/table/client/src/main/java/alluxio/client/table/TableMasterClient.java deleted file mode 100644 index db3b32e1af93..000000000000 --- a/dora/table/client/src/main/java/alluxio/client/table/TableMasterClient.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.client.table; - -import alluxio.Client; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.Database; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.SyncStatus; -import alluxio.grpc.table.TableInfo; -import alluxio.grpc.table.TransformJobInfo; -import alluxio.master.MasterClientContext; - -import java.util.List; -import java.util.Map; -import javax.annotation.concurrent.ThreadSafe; - -/** - * A client to use for interacting with a table master. - */ -@ThreadSafe -public interface TableMasterClient extends Client { - /** - * Factory for {@link TableMasterClient}. - */ - class Factory { - - private Factory() { - } // prevent instantiation - - /** - * Factory method for {@link TableMasterClient}. - * - * @param conf master client configuration - * @return a new {@link TableMasterClient} instance - */ - public static TableMasterClient create(MasterClientContext conf) { - return new RetryHandlingTableMasterClient(conf); - } - } - - /** - * Get a list of all database names. - * - * @return list of database names - * @throws AlluxioStatusException - */ - List getAllDatabases() throws AlluxioStatusException; - - /** - * Get database metadata. - * - * @param databaseName database name - * @return database metadata - */ - Database getDatabase(String databaseName) throws AlluxioStatusException; - - /** - * Get a list of all table names. - * - * @param databaseName database name - * @return list of table names - * @throws AlluxioStatusException - */ - List getAllTables(String databaseName) throws AlluxioStatusException; - - /** - * Get table metadata. - * - * @param databaseName database name - * @param tableName table name - * @return table metadata - * @throws AlluxioStatusException - */ - TableInfo getTable(String databaseName, String tableName) throws AlluxioStatusException; - - /** - * Attaches an existing database. - * - * @param udbType the database type - * @param udbConnectionUri the udb connection uri - * @param udbDbName the database name in the udb - * @param dbName the database name in Alluxio - * @param configuration the configuration map - * @param ignoreSyncErrors will ignore sync errors if true - * @return the sync status for the attach - * @throws AlluxioStatusException - */ - SyncStatus attachDatabase(String udbType, String udbConnectionUri, String udbDbName, - String dbName, Map configuration, boolean ignoreSyncErrors) - throws AlluxioStatusException; - - /** - * Detaches an existing database in the catalog master. - * - * @param dbName database name - * @return true if database created successfully - * @throws AlluxioStatusException - */ - boolean detachDatabase(String dbName) - throws AlluxioStatusException; - - /** - * Syncs an existing database in the catalog master. - * - * @param dbName database name - * @return the sync status - */ - SyncStatus syncDatabase(String dbName) throws AlluxioStatusException; - - /** - * Returns metadata for reading a table given constraints. - * - * @param databaseName database name - * @param tableName table name - * @param constraint constraint on the columns - * @return list of partitions - * @throws AlluxioStatusException - */ - List readTable(String databaseName, String tableName, Constraint constraint) - throws AlluxioStatusException; - - /** - * Get table column statistics with given database name, - * table name and list of column names. - * - * @param databaseName database name - * @param tableName table name - * @param columnNames column names - * @return list of column statistics - * @throws AlluxioStatusException - */ - List getTableColumnStatistics( - String databaseName, - String tableName, - List columnNames) throws AlluxioStatusException; - - /** - * Get partition names with given database name and table name. - * - * @param databaseName database name - * @param tableName table name - * @return list of partition names - * @throws AlluxioStatusException - */ - List getPartitionNames( - String databaseName, - String tableName) throws AlluxioStatusException; - - /** - * Get column statistics for selected partition and column. - * - * @param databaseName database name - * @param tableName table name - * @param partitionNames partition names - * @param columnNames column names - * @return Map<String partitionName, Map<String columnName, - * columnStatistics>> - * @throws AlluxioStatusException - */ - Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - List partitionNames, - List columnNames) throws AlluxioStatusException; - - /** - * Transforms a table. - * - * @param dbName the database name - * @param tableName the table name - * @param definition the transformation definition - * @return job ID which can be used to poll the job status from job service - * @throws AlluxioStatusException - */ - long transformTable(String dbName, String tableName, String definition) - throws AlluxioStatusException; - - /** - * @param jobId the transformation job's ID - * @return the job info - */ - TransformJobInfo getTransformJobInfo(long jobId) throws AlluxioStatusException; - - /** - * @return a list of information for all transformation jobs - */ - List getAllTransformJobInfo() throws AlluxioStatusException; -} diff --git a/dora/table/pom.xml b/dora/table/pom.xml deleted file mode 100644 index 6a977c99f52e..000000000000 --- a/dora/table/pom.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - 4.0.0 - - org.alluxio - alluxio-dora - 301-SNAPSHOT - - alluxio-table - pom - Alluxio Table - Table utility in Alluxio - - - base - client - server - shell - - - - - ${project.parent.parent.basedir}/build - - diff --git a/dora/table/server/common/pom.xml b/dora/table/server/common/pom.xml deleted file mode 100644 index e6a77bfd6e0e..000000000000 --- a/dora/table/server/common/pom.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - 4.0.0 - - org.alluxio - alluxio-table-server - 301-SNAPSHOT - - alluxio-table-server-common - jar - Alluxio Table - Server - Common - Common components for Alluxio Table servers - - - - - ${project.parent.parent.parent.parent.basedir}/build - false - - - - - org.alluxio - alluxio-core-client-fs - ${project.version} - - - org.alluxio - alluxio-core-server-common - ${project.version} - - - org.alluxio - alluxio-table-base - ${project.version} - - - org.alluxio - alluxio-job-common - ${project.version} - - - org.alluxio - alluxio-core-transport - ${project.version} - compile - - - - - org.alluxio - alluxio-core-common - ${project.version} - test-jar - test - - - diff --git a/dora/table/server/common/src/main/java/alluxio/master/table/DatabaseInfo.java b/dora/table/server/common/src/main/java/alluxio/master/table/DatabaseInfo.java deleted file mode 100644 index 5a961fab94dc..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/master/table/DatabaseInfo.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.PrincipalType; - -import com.google.common.base.MoreObjects; - -import java.util.Collections; -import java.util.Map; -import java.util.Objects; -import javax.annotation.Nullable; - -/** - * The database information class. - */ -public class DatabaseInfo { - @Nullable - private final String mLocation; - @Nullable - private final String mOwnerName; - @Nullable - private final PrincipalType mOwnerType; - @Nullable - private final String mComment; - private final Map mParameters; - - /** - * Full constructor for database info. - * @param location location - * @param ownerName owner name - * @param ownerType owner type - * @param comment comment - * @param params parameters - */ - public DatabaseInfo(String location, String ownerName, PrincipalType ownerType, String comment, - Map params) { - mLocation = location; - mOwnerName = ownerName; - mOwnerType = ownerType; - mComment = comment; - if (params == null) { - mParameters = Collections.emptyMap(); - } else { - mParameters = params; - } - } - - /** - * @return the location - */ - public String getLocation() { - return mLocation; - } - - /** - * @return the owner name - */ - public String getOwnerName() { - return mOwnerName; - } - - /** - * @return the owner type - */ - public PrincipalType getOwnerType() { - return mOwnerType; - } - - /** - * @return the comment - */ - public String getComment() { - return mComment; - } - - /** - * @return the parameter - */ - public Map getParameters() { - return mParameters; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - DatabaseInfo that = (DatabaseInfo) o; - return Objects.equals(mLocation, that.mLocation) - && Objects.equals(mOwnerName, that.mOwnerName) - && mOwnerType == that.mOwnerType - && Objects.equals(mComment, that.mComment) - && Objects.equals(mParameters, that.mParameters); - } - - @Override - public int hashCode() { - return Objects.hash(mLocation, mOwnerName, mOwnerType, mComment, mParameters); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("location", mLocation) - .add("ownerName", mOwnerName) - .add("ownerType", mOwnerType) - .add("comment", mComment) - .add("parameters", mParameters) - .toString(); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/BaseConfiguration.java b/dora/table/server/common/src/main/java/alluxio/table/common/BaseConfiguration.java deleted file mode 100644 index 58d819b93ae3..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/BaseConfiguration.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import alluxio.exception.ExceptionMessage; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -/** - * This represents a configuration of the catalog. - * - * @param the type of property that this instance is used for - */ -public abstract class BaseConfiguration { - private static final Logger LOG = LoggerFactory.getLogger(BaseConfiguration.class); - - protected final ConcurrentHashMap mValues; - - protected BaseConfiguration() { - mValues = new ConcurrentHashMap<>(); - } - - /** - * Creates an instance. - * - * @param values the map of values to copy from - */ - public BaseConfiguration(Map values) { - mValues = new ConcurrentHashMap<>(); - mValues.putAll(values); - } - - /** - * Returns the value of this property, or the default value if the property is not defined. - * - * @param property the property to get the value for - * @return the property value - */ - public String get(T property) { - String value = mValues.get(property.getName()); - if (value == null) { - return property.getDefaultValue(); - } - return value; - } - - /** - * Return the int value of this property , or the default value if the property is not defined. - * - * @param property the property to get the int value - * @return the int value of property - */ - public int getInt(T property) { - String rawValue = get(property); - - try { - return Integer.parseInt(rawValue); - } catch (NumberFormatException e) { - throw new RuntimeException( - ExceptionMessage.KEY_NOT_INTEGER.getMessage(rawValue, property)); - } - } - - /** - * Return the boolean value of this property. - * - * @param property the property to get the boolean value - * @return the boolean value of property - */ - public boolean getBoolean(T property) { - String rawValue = get(property); - - try { - return Boolean.parseBoolean(rawValue); - } catch (Exception e) { - throw new RuntimeException( - ExceptionMessage.KEY_NOT_BOOLEAN.getMessage(rawValue, property)); - } - } - - /** - * @return the full map of the configuration - */ - public Map getMap() { - return mValues; - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/BaseProperty.java b/dora/table/server/common/src/main/java/alluxio/table/common/BaseProperty.java deleted file mode 100644 index 937ec009b2ad..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/BaseProperty.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This represents a property name and default value for the UDB. - */ -public abstract class BaseProperty { - private static final Logger LOG = LoggerFactory.getLogger(BaseProperty.class); - - protected final String mName; - protected final String mDescription; - protected final String mDefaultValue; - - protected BaseProperty(String name, String description, String defaultValue) { - mName = name; - mDescription = description; - mDefaultValue = defaultValue; - } - - /** - * @return the property name - */ - public String getName() { - return mName; - } - - /** - * @return the property description - */ - public String getDescription() { - return mDescription; - } - - /** - * @return the property default value - */ - public String getDefaultValue() { - return mDefaultValue; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - BaseProperty that = (BaseProperty) o; - return mName.equals(that.mName); - } - - @Override - public int hashCode() { - return mName.hashCode(); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/CatalogPathUtils.java b/dora/table/server/common/src/main/java/alluxio/table/common/CatalogPathUtils.java deleted file mode 100644 index d1d2ae97b272..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/CatalogPathUtils.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import alluxio.AlluxioURI; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.util.io.PathUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A collection of utility methods for catalog paths. - * - * Catalog paths for tables look like: - * /<catalog base dir>/<dbName1>/tables/<tableName1>/<udbType>/... - * /_internal_/... - * /<tableName2>/<udbType>/... - * /_internal_/... - * /<catalog base dir>/<dbName2>/tables/<tableName3>/<udbType>/... - * /_internal_/... - */ -public class CatalogPathUtils { - private static final Logger LOG = LoggerFactory.getLogger(CatalogPathUtils.class); - private static final String TABLES_ROOT = "tables"; - private static final String INTERNAL_ROOT = "_internal_"; - - private CatalogPathUtils() {} // prevent instantiation - - /** - * @param dbName the database name - * @param tableName the table name - * @param udbType the udb type - * @return the AlluxioURI for the path for the specified table - */ - public static AlluxioURI getTablePathUdb(String dbName, String tableName, String udbType) { - return new AlluxioURI(PathUtils - .concatPath(Configuration.get(PropertyKey.TABLE_CATALOG_PATH), dbName, TABLES_ROOT, - tableName, udbType)); - } - - /** - * @param dbName the database name - * @param tableName the table name - * @return the AlluxioURI for the path for the specified table, for internal data - */ - public static AlluxioURI getTablePathInternal(String dbName, String tableName) { - return new AlluxioURI(PathUtils - .concatPath(Configuration.get(PropertyKey.TABLE_CATALOG_PATH), dbName, TABLES_ROOT, - tableName, INTERNAL_ROOT)); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/ConfigurationUtils.java b/dora/table/server/common/src/main/java/alluxio/table/common/ConfigurationUtils.java deleted file mode 100644 index d6486332d809..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/ConfigurationUtils.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A set of utility methods for configuration. - */ -public class ConfigurationUtils { - public static final String MOUNT_PREFIX = "mount.option."; - private static final Logger LOG = LoggerFactory.getLogger(ConfigurationUtils.class); - - private ConfigurationUtils() {} // prevent instantiation - - /** - * @param udbType the udb type - * @return the prefix of the property name, for a given udb type - */ - public static String getUdbPrefix(String udbType) { - return String.format("udb-%s.", udbType); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/Layout.java b/dora/table/server/common/src/main/java/alluxio/table/common/Layout.java deleted file mode 100644 index 161d3ccffc68..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/Layout.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import alluxio.AlluxioURI; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.LayoutSpec; -import alluxio.table.common.transform.TransformContext; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; - -import com.google.protobuf.Message; - -import java.io.IOException; -import java.util.Map; - -/** - * An interface for a table/partition layout. - */ -public interface Layout { - /** - * @return the type of table/partition layout - */ - String getType(); - - /** - * @return the layout specification - */ - String getSpec(); - - /** - * @return a proto representing the data for this table/partition layout - */ - Message getData(); - - /** - * @return a map of proto representing the statistics data for this partition - */ - Map getColumnStatsData(); - - /** - * @return the location of the layout - */ - AlluxioURI getLocation(); - - /** - * @param transformContext the {@link TransformContext} - * @param definition the transform definition - * @return a new {@code TransformPlan} representing the layout transformation - */ - TransformPlan getTransformPlan(TransformContext transformContext, TransformDefinition definition) - throws IOException; - - /** - * @return the proto representation - */ - default alluxio.grpc.table.Layout toProto() { - return alluxio.grpc.table.Layout.newBuilder() - .setLayoutType(getType()) - .setLayoutSpec(LayoutSpec.newBuilder() - .setSpec(getSpec()) - .build()) - .setLayoutData(getData().toByteString()) - .putAllStats(getColumnStatsData()) - .build(); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/LayoutFactory.java b/dora/table/server/common/src/main/java/alluxio/table/common/LayoutFactory.java deleted file mode 100644 index 86de308035b3..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/LayoutFactory.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -/** - * The layout factory interface. - */ -public interface LayoutFactory { - - /** - * @return the type of layout for the factory - */ - String getType(); - - /** - * @param layoutProto the proto representation of the layout - * @return a new instance of the layout - */ - Layout create(alluxio.grpc.table.Layout layoutProto); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/LayoutRegistry.java b/dora/table/server/common/src/main/java/alluxio/table/common/LayoutRegistry.java deleted file mode 100644 index 519bb3e11e06..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/LayoutRegistry.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashMap; -import java.util.Map; -import java.util.ServiceLoader; - -/** - * The registry of layout implementations. - */ -public class LayoutRegistry { - private static final Logger LOG = LoggerFactory.getLogger(LayoutRegistry.class); - - private volatile Map mFactories; - - /** - * Creates an instance. - */ - public LayoutRegistry() { - mFactories = new HashMap<>(); - } - - /** - * Refreshes the registry by service loading classes. - */ - public void refresh() { - Map map = new HashMap<>(); - for (LayoutFactory factory : ServiceLoader - .load(LayoutFactory.class, LayoutRegistry.class.getClassLoader())) { - LayoutFactory existingFactory = map.get(factory.getType()); - if (existingFactory != null) { - LOG.warn( - "Ignoring duplicate layout type '{}' found in factory {}. Existing factory: {}", - factory.getType(), factory.getClass(), existingFactory.getClass()); - } - map.put(factory.getType(), factory); - } - mFactories = map; - LOG.info("Registered Table Layouts: " + String.join(",", mFactories.keySet())); - } - - /** - * Creates a new instance of a {@link Layout}. - * - * @param layoutProto the proto representation of the layout - * @return a new instance of the layout - */ - public Layout create(alluxio.grpc.table.Layout layoutProto) { - Map map = mFactories; - String type = layoutProto.getLayoutType(); - LayoutFactory factory = map.get(type); - if (factory == null) { - throw new IllegalStateException( - String.format("LayoutFactory for type '%s' does not exist.", type)); - } - return factory.create(layoutProto); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/UdbPartition.java b/dora/table/server/common/src/main/java/alluxio/table/common/UdbPartition.java deleted file mode 100644 index f24835beef94..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/UdbPartition.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -/** - * A representation of a table partition in the udb. - */ -public interface UdbPartition { - /** - * @return the partition specification - */ - // TODO(gpang): update spec api - String getSpec(); - - /** - * @return the partition layout - */ - Layout getLayout(); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/layout/HiveLayout.java b/dora/table/server/common/src/main/java/alluxio/table/common/layout/HiveLayout.java deleted file mode 100644 index f242c1cef27b..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/layout/HiveLayout.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.layout; - -import alluxio.AlluxioURI; -import alluxio.conf.Configuration; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.grpc.table.layout.hive.StorageFormat; -import alluxio.job.plan.transform.HiveConstants; -import alluxio.table.common.Layout; -import alluxio.table.common.LayoutFactory; -import alluxio.table.common.transform.TransformContext; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; -import alluxio.util.ConfigurationUtils; - -import com.google.protobuf.InvalidProtocolBufferException; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import java.util.stream.Collectors; - -/** - * Hive layout implementation. - */ -public class HiveLayout implements Layout { - private static final Logger LOG = LoggerFactory.getLogger(HiveLayout.class); - - public static final String TYPE = "hive"; - - /** - * Factory to create layout implementation. - */ - public static class HiveLayoutFactory implements LayoutFactory { - @Override - public String getType() { - return TYPE; - } - - @Override - public Layout create(alluxio.grpc.table.Layout layoutProto) { - if (!TYPE.equals(layoutProto.getLayoutType())) { - throw new IllegalStateException( - "Cannot parse HiveLayout from layout type: " + layoutProto.getLayoutType()); - } - if (!layoutProto.hasLayoutData()) { - throw new IllegalStateException("Cannot parse layout from empty layout data"); - } - - try { - PartitionInfo partitionInfo = PartitionInfo.parseFrom(layoutProto.getLayoutData()); - return new HiveLayout(partitionInfo, new ArrayList<>(layoutProto.getStatsMap().values())); - } catch (InvalidProtocolBufferException e) { - throw new IllegalStateException("Cannot parse HiveLayout from proto layout", e); - } - } - } - - private final PartitionInfo mPartitionInfo; - private final Map mPartitionStatsInfo; - - /** - * Creates an instance. - * - * @param partitionInfo the partition info - * @param stats column statistics - */ - public HiveLayout(PartitionInfo partitionInfo, List stats) { - mPartitionInfo = partitionInfo; - mPartitionStatsInfo = stats.stream().collect(Collectors.toMap( - ColumnStatisticsInfo::getColName, e -> e, (e1, e2) -> e2)); - } - - @Override - public String getType() { - return TYPE; - } - - @Override - public String getSpec() { - return mPartitionInfo.getPartitionName(); - } - - @Override - public PartitionInfo getData() { - return mPartitionInfo; - } - - @Override - public AlluxioURI getLocation() { - return new AlluxioURI(mPartitionInfo.getStorage().getLocation()); - } - - @Override - public Map getColumnStatsData() { - return mPartitionStatsInfo; - } - - private HiveLayout transformLayout(AlluxioURI transformedUri, TransformDefinition definition) { - final Properties properties = definition.getProperties(); - - // TODO(cc): assumption here is the transformed data is in Parquet format. - final StorageFormat.Builder storageFormatBuilder = mPartitionInfo.getStorage() - .getStorageFormat().toBuilder() - .setSerde(HiveConstants.PARQUET_SERDE_CLASS) - .setInputFormat(HiveConstants.PARQUET_INPUT_FORMAT_CLASS) - .setOutputFormat(HiveConstants.PARQUET_OUTPUT_FORMAT_CLASS); - - final String compressionKey = alluxio.job.plan.transform.PartitionInfo.PARQUET_COMPRESSION; - final String compression = properties.getProperty(compressionKey); - if (!StringUtils.isEmpty(compression)) { - storageFormatBuilder.putSerdelibParameters(compressionKey, compression); - } - - PartitionInfo info = mPartitionInfo.toBuilder() - .putAllParameters(mPartitionInfo.getParametersMap()) - .setStorage(mPartitionInfo.getStorage().toBuilder() - .setStorageFormat(storageFormatBuilder - .build()) - .setLocation(transformedUri.toString()) - .build()) - .build(); - List stats = new ArrayList<>(mPartitionStatsInfo.values()); - return new HiveLayout(info, stats); - } - - @Override - public TransformPlan getTransformPlan(TransformContext transformContext, - TransformDefinition definition) throws IOException { - AlluxioURI outputPath = transformContext.generateTransformedPath(); - AlluxioURI outputUri = new AlluxioURI( - ConfigurationUtils.getSchemeAuthority(Configuration.global()) - + outputPath.getPath()); - HiveLayout transformedLayout = transformLayout(outputUri, definition); - return new TransformPlan(this, transformedLayout, definition); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (this == obj) { - return true; - } - if (!(obj instanceof HiveLayout)) { - return false; - } - HiveLayout that = (HiveLayout) obj; - return Objects.equals(mPartitionInfo, that.mPartitionInfo) - && Objects.equals(mPartitionStatsInfo, that.mPartitionStatsInfo); - } - - @Override - public int hashCode() { - return Objects.hash(mPartitionInfo, mPartitionStatsInfo); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformContext.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformContext.java deleted file mode 100644 index 2f358958bfa0..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformContext.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform; - -import alluxio.AlluxioURI; -import alluxio.table.common.CatalogPathUtils; -import alluxio.util.CommonUtils; - -import org.apache.commons.lang3.time.FastDateFormat; - -import java.util.Date; -import java.util.Locale; -import java.util.TimeZone; - -/** - * The context for generating transformation plans. - */ -public class TransformContext { - private static final FastDateFormat DATE_FORMAT = - FastDateFormat.getInstance("yyyyMMdd-HHmmss-SSS", TimeZone.getDefault(), Locale.getDefault()); - - private final String mDbName; - private final String mTableName; - private final String mIdentifier; - - /** - * Creates an instance. - * - * @param dbName the database name - * @param tableName the table name - * @param identifier the identifier for this transformation - */ - public TransformContext(String dbName, String tableName, String identifier) { - mDbName = dbName; - mTableName = tableName; - mIdentifier = identifier; - } - - /** - * @return a newly generated path to a transformed partition - */ - public AlluxioURI generateTransformedPath() { - String random = - String.format("%s-%s", DATE_FORMAT.format(new Date()), CommonUtils.randomAlphaNumString(5)); - // append a random identifier, to avoid collisions - return CatalogPathUtils.getTablePathInternal(mDbName, mTableName).join(mIdentifier) - .join(random); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformDefinition.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformDefinition.java deleted file mode 100644 index 5de3cef7ff5b..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformDefinition.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform; - -import alluxio.table.common.transform.action.TransformAction; -import alluxio.table.common.transform.action.TransformActionRegistry; - -import java.io.IOException; -import java.io.StringReader; -import java.util.Collections; -import java.util.List; -import java.util.Properties; - -/** - * The definition of a transformation. - */ -public class TransformDefinition { - private final String mDefinition; - private final List mActions; - private final Properties mProperties; - - /** - * The user-provided definition is normalized by: - * 1. trimming whitespaces and semicolon from the beginning and end; - * 2. normalize to lower case. - * @param definition the string definition - * @param actions the list of actions - * @param properties the list of properties extracted from definition - */ - private TransformDefinition(String definition, List actions, - Properties properties) { - // TODO(bradley): derive definition string from properties or vice versa - mDefinition = normalize(definition); - mActions = actions; - mProperties = properties; - } - - private String normalize(String definition) { - definition = definition.trim(); - if (definition.endsWith(";")) { - definition = definition.substring(0, definition.length() - 1); - } - return definition.toLowerCase(); - } - - /** - * @return the normalized user-provided definition - */ - public String getDefinition() { - return mDefinition; - } - - /** - * @return the list of actions for this transformation - */ - public List getActions() { - return mActions; - } - - /** - * @return the list of properties extracted from the user-provided definition - */ - public Properties getProperties() { - return mProperties; - } - - /** - * @param definition the string definition - * @return the {@link TransformDefinition} representation - */ - public static TransformDefinition parse(String definition) { - definition = definition.trim(); - - if (definition.isEmpty()) { - return new TransformDefinition(definition, Collections.emptyList(), new Properties()); - } - - // accept semicolon as new lines for inline definitions - definition = definition.replace(";", "\n"); - - final Properties properties = new Properties(); - - final StringReader reader = new StringReader(definition); - - try { - properties.load(reader); - } catch (IOException e) { - // The only way this throws an IOException is if the definition is null which isn't possible. - return new TransformDefinition(definition, Collections.emptyList(), properties); - } - - final List actions = TransformActionRegistry.create(properties); - - return new TransformDefinition(definition, actions, properties); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformPlan.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformPlan.java deleted file mode 100644 index 2e83e7d76a23..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/TransformPlan.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform; - -import alluxio.job.JobConfig; -import alluxio.table.common.Layout; -import alluxio.table.common.transform.action.TransformAction; - -import java.util.ArrayList; - -/** - * The plan for a transformation. - */ -public class TransformPlan { - /** - * The base layout to transform from. - */ - private final Layout mBaseLayout; - /** - * The layout to transform to. - */ - private final Layout mTransformedLayout; - /** - * The list of jobs to execute the plan. - */ - private final ArrayList mJobConfigs; - - /** - * A list of jobs will be computed based on the provided transform definition. - * - * @param baseLayout the layout to transform from - * @param transformedLayout the layout to transform to - * @param definition the transformation definition - */ - public TransformPlan(Layout baseLayout, Layout transformedLayout, - TransformDefinition definition) { - mBaseLayout = baseLayout; - mTransformedLayout = transformedLayout; - mJobConfigs = computeJobConfigs(definition); - } - - private ArrayList computeJobConfigs(TransformDefinition definition) { - ArrayList actions = new ArrayList<>(); - Layout baseLayout = mBaseLayout; - boolean deleteSrc = false; - - for (TransformAction action : definition.getActions()) { - actions.add(action.generateJobConfig(baseLayout, mTransformedLayout, deleteSrc)); - baseLayout = mTransformedLayout; - deleteSrc = true; - } - - if (actions.isEmpty()) { - throw new IllegalArgumentException( - "At least one action should be defined for the transformation"); - } - - return actions; - } - - /** - * @return the base layout - */ - public Layout getBaseLayout() { - return mBaseLayout; - } - - /** - * @return the transformed layout - */ - public Layout getTransformedLayout() { - return mTransformedLayout; - } - - /** - * @return the list of job configurations to be executed sequentially - */ - public ArrayList getJobConfigs() { - return mJobConfigs; - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/CompactAction.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/CompactAction.java deleted file mode 100644 index 52a885da81ba..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/CompactAction.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import alluxio.job.JobConfig; -import alluxio.job.plan.transform.CompactConfig; -import alluxio.table.common.Layout; - -import com.google.common.base.Preconditions; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; - -import java.util.Properties; - -/** - * The definition of the compact action. - */ -public class CompactAction implements TransformAction { - - private static final String NUM_FILES_OPTION = "file.count.max"; - private static final String FILE_SIZE_OPTION = "file.size.min"; - private static final long DEFAULT_FILE_SIZE = FileUtils.ONE_GB * 2; - private static final int DEFAULT_NUM_FILES = 100; - - /** - * Expected number of files after compaction. - */ - private final int mNumFiles; - /** - * Default file size after coalescing. - */ - private final long mFileSize; - - /** - * Factory to create an instance. - */ - public static class CompactActionFactory implements TransformActionFactory { - - @Override - public int getOrder() { - return 0; - } - - @Override - public TransformAction create(Properties properties) { - final String numFilesString = properties.getProperty(NUM_FILES_OPTION); - - final String fileSizeString = properties.getProperty(FILE_SIZE_OPTION); - - if (StringUtils.isEmpty(numFilesString) && StringUtils.isEmpty(fileSizeString)) { - return null; - } - - int numFiles = DEFAULT_NUM_FILES; - if (!StringUtils.isEmpty(numFilesString)) { - numFiles = Integer.parseInt(numFilesString); - } - - long fileSize = DEFAULT_FILE_SIZE; - if (!StringUtils.isEmpty(fileSizeString)) { - fileSize = Long.parseLong(fileSizeString); - } - - Preconditions.checkArgument(numFiles > 0, - "Write action must have positive number of files"); - return new CompactAction(numFiles, fileSize); - } - - @Override - public String toString() { - return "CompactActionFactory"; - } - } - - private CompactAction(int numFiles, long fileSize) { - mNumFiles = numFiles; - mFileSize = fileSize; - } - - @Override - public JobConfig generateJobConfig(Layout base, Layout transformed, boolean deleteSrc) { - alluxio.job.plan.transform.PartitionInfo basePartitionInfo = - TransformActionUtils.generatePartitionInfo(base); - alluxio.job.plan.transform.PartitionInfo transformedPartitionInfo = - TransformActionUtils.generatePartitionInfo(transformed); - return new CompactConfig(basePartitionInfo, base.getLocation().toString(), - transformedPartitionInfo, transformed.getLocation().toString(), mNumFiles, mFileSize); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformAction.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformAction.java deleted file mode 100644 index 98b36597b123..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformAction.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import alluxio.job.JobConfig; -import alluxio.table.common.Layout; - -/** - * The definition of an individual transformation action. - */ -public interface TransformAction { - - /** - * @param base the layout to transform from - * @param transformed the layout to transform to - * @param deleteSrc whether the src file should be deleted - * @return the job configuration for this action - */ - JobConfig generateJobConfig(Layout base, Layout transformed, boolean deleteSrc); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionFactory.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionFactory.java deleted file mode 100644 index 9b5f47b9892a..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionFactory.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import java.util.Properties; -import javax.annotation.Nullable; - -/** - * The action factory interface. - */ -public interface TransformActionFactory { - - /** - * Returns the order of the transform action. Transform actions with a lower number - * will be executed before transform actions with a higher number. - * Behavior is undefined for actions with equal number. - * - * @return integer representing order number - */ - default int getOrder() { - return 100; - } - - /** - * Creates a new instance of an action based on the properties. Null should be returned - * when the particular action is not necessary. - * - * @param definition the raw definition of the action - * @return a new instance of an action - */ - @Nullable - TransformAction create(Properties definition); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionRegistry.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionRegistry.java deleted file mode 100644 index 2decbf2ca531..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionRegistry.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Properties; -import java.util.ServiceLoader; - -/** - * The registry of transform actions. - */ -public class TransformActionRegistry { - private static final Logger LOG = LoggerFactory.getLogger(TransformActionRegistry.class); - - // List of TransformActionFactories ordered in the order returned by getOrder - private static final List FACTORIES = new ArrayList<>(); - - static { - refresh(); - } - - private TransformActionRegistry() {} // prevent instantiation - - /** - * Creates a new instance of an ordered list of {@link TransformAction}. - * The ordering here is the order that the Actions should be executed in. - * - * @param definition the raw definition of the action - * @return a new instance of an action - */ - public static List create(Properties definition) { - final ArrayList actions = new ArrayList<>(); - for (TransformActionFactory factory : FACTORIES) { - // TODO(bradyoo): make this more efficient when FACTORIES.size() > 50 - final TransformAction transformAction = factory.create(definition); - if (transformAction != null) { - actions.add(transformAction); - } - } - return actions; - } - - /** - * @return the list of TransformActionFactories - */ - @VisibleForTesting - public static List getFactories() { - return Collections.unmodifiableList(FACTORIES); - } - - /** - * Refreshes the registry by service loading classes. - */ - private static void refresh() { - FACTORIES.clear(); - for (TransformActionFactory factory : ServiceLoader - .load(TransformActionFactory.class, TransformActionFactory.class.getClassLoader())) { - FACTORIES.add(factory); - } - FACTORIES.sort(Comparator.comparingInt((factory) -> factory.getOrder())); - - LOG.info("Registered Transform actions: " + StringUtils.join(FACTORIES, ",")); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionUtils.java b/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionUtils.java deleted file mode 100644 index e66912c251fd..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/transform/action/TransformActionUtils.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import alluxio.job.plan.transform.FieldSchema; -import alluxio.job.plan.transform.PartitionInfo; -import alluxio.table.ProtoUtils; -import alluxio.table.common.Layout; - -import com.google.protobuf.InvalidProtocolBufferException; - -import java.util.ArrayList; -import java.util.HashMap; - -/** - * Utilities for implementing {@link TransformAction}. - */ -public class TransformActionUtils { - private TransformActionUtils() {} // Prevents initialization - - /** - * @param layout the layout to retrieve partition info from - * @return the generated partition info - */ - public static PartitionInfo generatePartitionInfo(Layout layout) { - alluxio.grpc.table.layout.hive.PartitionInfo partitionInfo; - try { - partitionInfo = ProtoUtils.toHiveLayout(layout.toProto()); - } catch (InvalidProtocolBufferException e) { - throw new IllegalStateException(e); - } - String serdeClass = partitionInfo.getStorage().getStorageFormat().getSerde(); - String inputFormat = partitionInfo.getStorage().getStorageFormat().getInputFormat(); - - ArrayList colList = new ArrayList<>(partitionInfo.getDataColsList().size()); - for (alluxio.grpc.table.FieldSchema col : partitionInfo.getDataColsList()) { - colList.add(new FieldSchema(col.getId(), col.getName(), col.getType(), col.getComment())); - } - - return new alluxio.job.plan.transform.PartitionInfo(serdeClass, inputFormat, - new HashMap<>(partitionInfo.getStorage().getStorageFormat().getSerdelibParametersMap()), - new HashMap<>(partitionInfo.getParametersMap()), - colList); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/PathTranslator.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/PathTranslator.java deleted file mode 100644 index 0e5892145e10..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/PathTranslator.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.AlluxioURI; -import alluxio.conf.Configuration; -import alluxio.exception.InvalidPathException; -import alluxio.util.ConfigurationUtils; -import alluxio.util.io.PathUtils; - -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; - -/** - * Utilities to convert to and from ufs paths and alluxio paths. - */ -public class PathTranslator { - private static final Logger LOG = LoggerFactory.getLogger(PathTranslator.class); - private static final String SCHEME_AUTHORITY_PREFIX = - ConfigurationUtils.getSchemeAuthority(Configuration.global()); - private static final AlluxioURI BASE_URI = new AlluxioURI(SCHEME_AUTHORITY_PREFIX); - private final BiMap mPathMap; - - /** - * Construct a path translator. - */ - public PathTranslator() { - mPathMap = HashBiMap.create(); - } - - /** - * Add a mapping to the path translator. - * - * @param alluxioPath the alluxio path - * @param ufsPath the corresponding ufs path - * - * @return PathTranslator object - */ - public PathTranslator addMapping(String alluxioPath, String ufsPath) { - mPathMap.put(new AlluxioURI(alluxioPath), new AlluxioURI(ufsPath)); - return this; - } - - /** - * Returns the corresponding alluxio path, for the specified ufs path. - * - * @param ufsPath the ufs path to translate - * @return the corresponding alluxio path - * @throws IOException if the ufs path is not mounted - */ - public String toAlluxioPath(String ufsPath) throws IOException { - String suffix = ufsPath.endsWith("/") ? "/" : ""; - AlluxioURI ufsUri = new AlluxioURI(ufsPath); - // first look for an exact match - if (mPathMap.inverse().containsKey(ufsUri)) { - AlluxioURI match = mPathMap.inverse().get(ufsUri); - if (match.equals(ufsUri)) { - // bypassed UFS path, return as is - return ufsPath; - } - return checkAndAddSchemeAuthority(mPathMap.inverse().get(ufsUri)) + suffix; - } - // otherwise match by longest prefix - BiMap.Entry longestPrefix = null; - int longestPrefixDepth = -1; - for (BiMap.Entry entry : mPathMap.entrySet()) { - try { - AlluxioURI valueUri = entry.getValue(); - if (valueUri.isAncestorOf(ufsUri) && valueUri.getDepth() > longestPrefixDepth) { - longestPrefix = entry; - longestPrefixDepth = valueUri.getDepth(); - } - } catch (InvalidPathException e) { - throw new IOException(e); - } - } - if (longestPrefix == null) { - // TODO(yuzhu): instead of throwing an exception, mount the path? - throw new IOException(String - .format("Failed to translate ufs path (%s). Mapping missing from translator", ufsPath)); - } - if (longestPrefix.getKey().equals(longestPrefix.getValue())) { - // return ufsPath if set the key and value to be same when bypass path. - return ufsPath; - } - try { - String difference = PathUtils.subtractPaths(ufsUri.getPath(), - longestPrefix.getValue().getPath()); - AlluxioURI mappedUri = longestPrefix.getKey().join(difference); - return checkAndAddSchemeAuthority(mappedUri) + suffix; - } catch (InvalidPathException e) { - throw new IOException(e); - } - } - - private static AlluxioURI checkAndAddSchemeAuthority(AlluxioURI input) { - if (!input.hasScheme()) { - return new AlluxioURI(BASE_URI, input.getPath(), false); - } - return input; - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbBypassSpec.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbBypassSpec.java deleted file mode 100644 index 6a3a14075184..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbBypassSpec.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import java.util.Map; -import java.util.Set; - -/** - * Tables and partitions bypassing specification. - */ -public final class UdbBypassSpec { - /** - * Map of table name to set of partition names. - * Keyed by a table's name, the value set contains names of partitions in that table. - * An empty set indicates all partitions of that table, if any, should be bypassed. - */ - private final Map> mTablePartMap; - - /** - * @param tablePartMap table to partition map - */ - public UdbBypassSpec(Map> tablePartMap) { - mTablePartMap = tablePartMap; - } - - /** - * Checks if a table should be bypassed. - * - * @param tableName the table name - * @return true if the table is configured to be bypassed, false otherwise - * @see UdbBypassSpec#hasFullTable(String) - */ - public boolean hasTable(String tableName) { - return mTablePartMap.containsKey(tableName); - } - - /** - * Checks if all partitions of a table should be bypassed. - * - * @param tableName the table name - * @return true if the table is configured to be fully bypassed, false otherwise - * @see UdbBypassSpec#hasTable(String) - */ - public boolean hasFullTable(String tableName) { - // empty set indicates all partitions should be bypassed - return hasTable(tableName) && mTablePartMap.get(tableName).size() == 0; - } - - /** - * Checks by a partition's name if it should be bypassed. - * - * @param tableName the table name - * @param partitionName the partition name - * @return true if the partition should be bypassed, false otherwise - */ - public boolean hasPartition(String tableName, String partitionName) { - if (!hasTable(tableName)) { - return false; - } - Set parts = mTablePartMap.get(tableName); - if (parts.size() == 0) { - // empty set indicates all partitions should be bypassed - return true; - } - return parts.contains(partitionName); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbConfiguration.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbConfiguration.java deleted file mode 100644 index 5d9892535521..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbConfiguration.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.table.common.BaseConfiguration; -import alluxio.table.common.ConfigurationUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * This represents a configuration of the UDB. - */ -public class UdbConfiguration extends BaseConfiguration { - private static final Logger LOG = LoggerFactory.getLogger(UdbConfiguration.class); - - // {...} group the scheme/authority, and are not special to various shells - private static final Pattern CONFIG_PATTERN = Pattern.compile("(\\{.*\\})\\.(.+?)"); - public static final String READ_ONLY_OPTION = "readonly"; - public static final String SHARED_OPTION = "shared"; - public static final String REGEX_PREFIX = "regex:"; - - protected final Map> mMountOptions; - - /** - * Creates an instance. - * - * @param values the map of values - */ - public UdbConfiguration(Map values) { - super(values); - mMountOptions = new HashMap<>(values.size()); - for (Map.Entry entry : values.entrySet()) { - if (entry.getKey().startsWith(ConfigurationUtils.MOUNT_PREFIX)) { - String key = entry.getKey().substring(ConfigurationUtils.MOUNT_PREFIX.length()); - Matcher m = CONFIG_PATTERN.matcher(key); - if (m.matches()) { - // templateSchemeAuthority can be a regex string. - String templateSchemeAuthority = m.group(1); - String option = m.group(2); - - // remove the bracket around the scheme://authority - templateSchemeAuthority = - templateSchemeAuthority.substring(1, templateSchemeAuthority.length() - 1); - if (!templateSchemeAuthority.endsWith("/")) { - // include the trailing '/' - templateSchemeAuthority += "/"; - } - - Map optionMap = mMountOptions.get(templateSchemeAuthority); - if (optionMap == null) { - optionMap = new HashMap<>(); - optionMap.put(option, entry.getValue()); - mMountOptions.put(templateSchemeAuthority, optionMap); - } else { - optionMap.put(option, entry.getValue()); - } - } - } - } - } - - /** - * Returns the mount option for a particular scheme and authority URL. - * - * @param concreteSchemeAuthority scheme://authority/ (expected to have a trailing '/') - * @return mount options in a map of or matched given concreteSchemeAuthority - */ - public Map getMountOption(String concreteSchemeAuthority) { - if (!concreteSchemeAuthority.endsWith("/")) { - // include the trailing '/' - concreteSchemeAuthority += "/"; - } - Map map = - mMountOptions.getOrDefault(concreteSchemeAuthority, Collections.emptyMap()); - if (map.equals(Collections.emptyMap())) { - for (Entry> entry : mMountOptions.entrySet()) { - if (entry.getKey().startsWith(REGEX_PREFIX) - && concreteSchemeAuthority.matches(entry.getKey().substring(REGEX_PREFIX.length()))) { - return entry.getValue(); - } - } - } - return map; - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbContext.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbContext.java deleted file mode 100644 index 17ea523b217a..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbContext.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.AlluxioURI; -import alluxio.client.file.FileSystem; -import alluxio.table.common.CatalogPathUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The context for the udb. - */ -public class UdbContext { - private static final Logger LOG = LoggerFactory.getLogger(UdbContext.class); - - private final UnderDatabaseRegistry mUdbRegistry; - private final FileSystem mFileSystem; - - /** The udb type. */ - private final String mType; - /** The connection uri for the udb. */ - private final String mConnectionUri; - /** The name of the database in the udb. */ - private final String mUdbDbName; - /** The name of the database in Alluxio. */ - private final String mDbName; - - /** - * Creates an instance. - * - * @param udbRegistry the udb registry - * @param fileSystem the alluxio fs client - * @param type the db type - * @param connectionUri the connection uri for the udb - * @param udbDbName name of the database in the udb - * @param dbName name of the database in Alluxio - */ - public UdbContext(UnderDatabaseRegistry udbRegistry, FileSystem fileSystem, String type, - String connectionUri, String udbDbName, String dbName) { - mUdbRegistry = udbRegistry; - mFileSystem = fileSystem; - mType = type; - mConnectionUri = connectionUri; - mUdbDbName = udbDbName; - mDbName = dbName; - } - - /** - * @return the db name in Alluxio - */ - public String getDbName() { - return mDbName; - } - - /** - * @return the alluxio fs client - */ - public FileSystem getFileSystem() { - return mFileSystem; - } - - /** - * @return the udb registry - */ - public UnderDatabaseRegistry getUdbRegistry() { - return mUdbRegistry; - } - - /** - * @return the connection uri for the udb - */ - public String getConnectionUri() { - return mConnectionUri; - } - - /** - * @return the db name in Udb - */ - public String getUdbDbName() { - return mUdbDbName; - } - - /** - * @param tableName the table name - * @return the AlluxioURI for the table location for the specified table name - */ - public AlluxioURI getTableLocation(String tableName) { - return CatalogPathUtils.getTablePathUdb(mDbName, tableName, mType); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbProperty.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbProperty.java deleted file mode 100644 index cf28aefdce5e..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbProperty.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.table.common.BaseProperty; -import alluxio.table.common.ConfigurationUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This represents a property name and default value for the UDB. - */ -public class UdbProperty extends BaseProperty { - private static final Logger LOG = LoggerFactory.getLogger(UdbProperty.class); - - /** - * Creates an instance. - * - * @param name the property name - * @param description the property description - * @param defaultValue the default value - */ - public UdbProperty(String name, String description, String defaultValue) { - super(name, description, defaultValue); - } - - /** - * @param udbType the udb type - * @return returns the full name of the property, including the prefix - */ - public String getFullName(String udbType) { - return String.format("%s%s", ConfigurationUtils.getUdbPrefix(udbType), mName); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbTable.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbTable.java deleted file mode 100644 index 5409eadd3954..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbTable.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.Schema; -import alluxio.table.common.UdbPartition; - -import java.util.List; -import java.util.Map; - -/** - * The interface for the underdb table. - */ -public interface UdbTable { - - /** - * @return the table name - */ - String getName(); - - /** - * @return the table schema - */ - Schema getSchema(); - - /** - * @return the table owner - */ - String getOwner(); - - /** - * @return the map of parameters - */ - Map getParameters(); - - /** - * @return the list of partition columns - */ - List getPartitionCols(); - - /** - * @return the layout for the table (could differ from partition layouts) - */ - Layout getLayout(); - - // TODO(gpang): generalize statistics - /** - * @return statistics of the table - */ - List getStatistics(); - - /** - * @return returns partitions for the table - */ - List getPartitions(); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbUtils.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbUtils.java deleted file mode 100644 index 0b87325a7813..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UdbUtils.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.AlluxioURI; -import alluxio.Constants; -import alluxio.exception.AlluxioException; -import alluxio.exception.InvalidPathException; -import alluxio.grpc.CreateDirectoryPOptions; -import alluxio.grpc.MountPOptions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; - -/** - * Udb related utils. - */ -public class UdbUtils { - private UdbUtils() {} - - private static final Logger LOG = LoggerFactory.getLogger(UdbUtils.class); - - /** - * Mount ufs path to alluxio path. - * - * @param tableName Table name - * @param ufsUri the uri of ufs - * @param tableUri the alluxio uri for table - * @param udbContext udb context - * @param udbConfiguration Udb configurations - * @return table uri - * @throws IOException - * @throws AlluxioException - */ - public static String mountAlluxioPath(String tableName, AlluxioURI ufsUri, AlluxioURI tableUri, - UdbContext udbContext, UdbConfiguration udbConfiguration) - throws IOException, AlluxioException { - if (Objects.equals(ufsUri.getScheme(), Constants.SCHEME)) { - // already an alluxio uri, return the alluxio uri - return ufsUri.toString(); - } - try { - tableUri = udbContext.getFileSystem().reverseResolve(ufsUri); - LOG.debug("Trying to mount table {} location {}, but it is already mounted at location {}", - tableName, ufsUri, tableUri); - return tableUri.getPath(); - } catch (InvalidPathException e) { - // ufs path not mounted, continue - } - // make sure the parent exists - udbContext.getFileSystem().createDirectory(tableUri.getParent(), - CreateDirectoryPOptions.newBuilder().setRecursive(true).setAllowExists(true).build()); - Map mountOptionMap = udbConfiguration.getMountOption( - String.format("%s://%s/", ufsUri.getScheme(), ufsUri.getAuthority().toString())); - MountPOptions.Builder option = MountPOptions.newBuilder(); - for (Map.Entry entry : mountOptionMap.entrySet()) { - if (entry.getKey().equals(UdbConfiguration.READ_ONLY_OPTION)) { - option.setReadOnly(Boolean.parseBoolean(entry.getValue())); - } else if (entry.getKey().equals(UdbConfiguration.SHARED_OPTION)) { - option.setShared(Boolean.parseBoolean(entry.getValue())); - } else { - option.putProperties(entry.getKey(), entry.getValue()); - } - } - udbContext.getFileSystem().mount(tableUri, ufsUri, option.build()); - - LOG.info("mounted table {} location {} to Alluxio location {} with mountOption {}", - tableName, ufsUri, tableUri, option.build()); - return tableUri.getPath(); - } -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabase.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabase.java deleted file mode 100644 index f89ac19ff587..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabase.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.master.table.DatabaseInfo; - -import java.io.IOException; -import java.util.List; - -/** - * The database interface. - */ -public interface UnderDatabase { - - /** - * @return the database type - */ - String getType(); - - /** - * @return the database name - */ - String getName(); - - /** - * @return a list of table names - */ - List getTableNames() throws IOException; - - /** - * @param tableName the table name - * @param bypassSpec table and partition bypass specification - * @return the {@link UdbTable} for the specified table name - */ - UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException; - - /** - * @return the {@link UdbContext} - */ - UdbContext getUdbContext(); - - /** - * @return get database info - */ - DatabaseInfo getDatabaseInfo() throws IOException; -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseFactory.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseFactory.java deleted file mode 100644 index 13505d416b18..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseFactory.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -/** - * The under database factory interface. - */ -public interface UnderDatabaseFactory { - - /** - * @return the type of under database for the factory - */ - String getType(); - - /** - * Creates a new instance of the udb. Creation must not interact with external services. - * - * @param udbContext the db context - * @param configuration configuration values - * @return a new instance of the under database - */ - UnderDatabase create(UdbContext udbContext, UdbConfiguration configuration); -} diff --git a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseRegistry.java b/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseRegistry.java deleted file mode 100644 index be64e62e6844..000000000000 --- a/dora/table/server/common/src/main/java/alluxio/table/common/udb/UnderDatabaseRegistry.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.extensions.ExtensionsClassLoader; -import alluxio.util.io.PathUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.ServiceLoader; - -/** - * The registry of under database implementations. - */ -public class UnderDatabaseRegistry { - private static final Logger LOG = LoggerFactory.getLogger(UnderDatabaseRegistry.class); - private static final String UDB_EXTENSION_PATTERN = "alluxio-table-server-underdb-*.jar"; - - private volatile Map mFactories; - - /** - * Creates an instance. - */ - public UnderDatabaseRegistry() { - mFactories = new HashMap<>(); - } - - /** - * Refreshes the registry by service loading classes. - */ - public void refresh() { - Map map = new HashMap<>(); - - String libDir = PathUtils.concatPath(Configuration.get(PropertyKey.HOME), "lib"); - LOG.info("Loading udb jars from {}", libDir); - List files = new ArrayList<>(); - try (DirectoryStream stream = Files - .newDirectoryStream(Paths.get(libDir), UDB_EXTENSION_PATTERN)) { - for (Path entry : stream) { - if (entry.toFile().isFile()) { - files.add(entry.toFile()); - } - } - } catch (IOException e) { - LOG.warn("Failed to load udb libs from {}. error: {}", libDir, e.toString()); - } - - // Load the UDBs from libraries - for (File jar : files) { - try { - URL extensionURL = jar.toURI().toURL(); - ClassLoader extensionsClassLoader = new ExtensionsClassLoader(new URL[] {extensionURL}, - ClassLoader.getSystemClassLoader()); - - for (UnderDatabaseFactory factory : ServiceLoader - .load(UnderDatabaseFactory.class, extensionsClassLoader)) { - UnderDatabaseFactory existingFactory = map.get(factory.getType()); - if (existingFactory != null) { - LOG.warn( - "Ignoring duplicate under database type '{}' found in {}. Existing factory: {}", - factory.getType(), factory.getClass(), existingFactory.getClass()); - } - map.put(factory.getType(), factory); - } - } catch (Throwable t) { - LOG.warn("Failed to load udb jar {}", jar, t); - } - } - - // Load the UDBs from the default classloader - for (UnderDatabaseFactory factory : ServiceLoader - .load(UnderDatabaseFactory.class, UnderDatabaseRegistry.class.getClassLoader())) { - UnderDatabaseFactory existingFactory = map.get(factory.getType()); - if (existingFactory != null) { - LOG.warn("Ignoring duplicate under database type '{}' found in {}. Existing factory: {}", - factory.getType(), factory.getClass(), existingFactory.getClass()); - } - map.put(factory.getType(), factory); - } - - mFactories = map; - LOG.info("Registered UDBs: " + String.join(",", mFactories.keySet())); - } - - /** - * Creates a new instance of an {@link UnderDatabase}. - * - * @param udbContext the db context - * @param type the udb type - * @param configuration the udb configuration - * @return a new udb instance - */ - public UnderDatabase create(UdbContext udbContext, String type, UdbConfiguration configuration) { - Map map = mFactories; - UnderDatabaseFactory factory = map.get(type); - if (factory == null) { - throw new IllegalArgumentException( - String.format("UdbFactory for type '%s' does not exist.", type)); - } - - ClassLoader previousClassLoader = Thread.currentThread().getContextClassLoader(); - try { - // Use the extension class loader of the factory. - Thread.currentThread().setContextClassLoader(factory.getClass().getClassLoader()); - return factory.create(udbContext, configuration); - } catch (Throwable e) { - // Catching Throwable rather than Exception to catch service loading errors - throw new IllegalStateException( - String.format("Failed to create UnderDb by factory %s", factory), e); - } finally { - Thread.currentThread().setContextClassLoader(previousClassLoader); - } - } -} diff --git a/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.LayoutFactory b/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.LayoutFactory deleted file mode 100644 index 51314f0b901b..000000000000 --- a/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.LayoutFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.table.common.layout.HiveLayout$HiveLayoutFactory diff --git a/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory b/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory deleted file mode 100644 index 60d5187aea43..000000000000 --- a/dora/table/server/common/src/main/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.table.common.transform.action.CompactAction$CompactActionFactory diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/CatalogPathUtilsTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/CatalogPathUtilsTest.java deleted file mode 100644 index 69bd10dfd55c..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/CatalogPathUtilsTest.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import static org.junit.Assert.assertEquals; - -import alluxio.AlluxioURI; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.util.io.PathUtils; - -import org.junit.Test; - -public class CatalogPathUtilsTest { - - @Test - public void tablePathUdb() { - String dbName = "dbName"; - String tableName = "tableName"; - String udbType = "udbType"; - AlluxioURI path = CatalogPathUtils.getTablePathUdb(dbName, tableName, udbType); - - assertEquals(path.getPath(), PathUtils.concatPath(Configuration.get( - PropertyKey.TABLE_CATALOG_PATH), dbName, "tables", tableName, udbType)); - } - - @Test - public void tablePathInternal() { - String dbName = "dbName"; - String tableName = "tableName"; - AlluxioURI path = CatalogPathUtils.getTablePathInternal(dbName, tableName); - - assertEquals(path.getPath(), PathUtils.concatPath(Configuration.get( - PropertyKey.TABLE_CATALOG_PATH), dbName, "tables", tableName, "_internal_")); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/TableTestUtils.java b/dora/table/server/common/src/test/java/alluxio/table/common/TableTestUtils.java deleted file mode 100644 index 2d146d3d4d8c..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/TableTestUtils.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common; - -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.table.common.layout.HiveLayout; - -import java.util.Collections; - -public class TableTestUtils { - /** - * @param location the layout's location - * @return a layout for the location - */ - public static HiveLayout createLayout(String location) { - PartitionInfo partitionInfo = PartitionInfo.newBuilder() - .setStorage(Storage.newBuilder().setLocation(location).build()) - .build(); - return new HiveLayout(partitionInfo, Collections.emptyList()); - } - - private TableTestUtils() {} // Prevent initialization -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/layout/HiveLayoutTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/layout/HiveLayoutTest.java deleted file mode 100644 index fb9a9ebcb914..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/layout/HiveLayoutTest.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.layout; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.table.common.Layout; -import alluxio.table.common.LayoutRegistry; -import alluxio.util.CommonUtils; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ThreadLocalRandom; - -public class HiveLayoutTest { - - @Test - public void toProto() throws Exception { - HiveLayout layout = createRandom(); - assertNotNull(layout.toProto()); - } - - @Test - public void factoryCreate() throws Exception { - HiveLayout layout = createRandom(); - assertNotNull(new HiveLayout.HiveLayoutFactory().create(layout.toProto())); - } - - @Test - public void registryCreate() throws Exception { - HiveLayout layout = createRandom(); - assertNotNull(new HiveLayout.HiveLayoutFactory().create(layout.toProto())); - - LayoutRegistry registry = new LayoutRegistry(); - registry.refresh(); - Layout instance = registry.create(layout.toProto()); - assertNotNull(instance); - assertEquals(layout.toProto(), instance.toProto()); - } - - @Test - public void factoryConversions() throws Exception { - HiveLayout layout = createRandom(); - alluxio.grpc.table.Layout layoutProto = layout.toProto(); - Layout layout2 = new HiveLayout.HiveLayoutFactory().create(layoutProto); - alluxio.grpc.table.Layout layout2Proto = layout2.toProto(); - assertEquals(layoutProto, layout2Proto); - } - - private HiveLayout createRandom() { - PartitionInfo.Builder pib = PartitionInfo.newBuilder(); - - pib.setDbName(CommonUtils.randomAlphaNumString(10)); - pib.setTableName(CommonUtils.randomAlphaNumString(10)); - for (int i = 0; i < ThreadLocalRandom.current().nextInt(1, 5); i++) { - pib.addDataCols( - FieldSchema.newBuilder().setName(CommonUtils.randomAlphaNumString(10)).build()); - } - pib.setPartitionName(CommonUtils.randomAlphaNumString(10)); - pib.setStorage(Storage.newBuilder().setLocation(CommonUtils.randomAlphaNumString(10)).build()); - for (int i = 0; i < ThreadLocalRandom.current().nextInt(0, 5); i++) { - pib.addValues(CommonUtils.randomAlphaNumString(10)); - } - - List stats = new ArrayList<>(); - for (int i = 0; i < ThreadLocalRandom.current().nextInt(0, 5); i++) { - stats.add(ColumnStatisticsInfo.newBuilder().setColName(CommonUtils.randomAlphaNumString(10)) - .setColType(CommonUtils.randomAlphaNumString(10)).build()); - } - - return new HiveLayout(pib.build(), stats); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformDefinitionTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformDefinitionTest.java deleted file mode 100644 index 2aa6e3a1b652..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformDefinitionTest.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.fail; - -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; - -public class TransformDefinitionTest { - - @Test - public void parse() { - List definitions = Arrays.asList( - "file.count.max=2", - " file.count.max=2 ", - " file.count.max:2", - "file.count.max:2;", - "file.count.max:2\nfile.count.max:2" - ); - - parseValidInternal(definitions); - } - - @Test - public void parseInvalid() { - List definitions = Arrays.asList( - "file.count.max:", - "file.does.not.exist:2", - "" - ); - parseInvalidInternal(definitions); - } - - private void parseValidInternal(List definitions) { - for (String definition : definitions) { - final TransformDefinition transformDefinition = TransformDefinition.parse(definition); - assertNotNull("Should be parsable: " + definition, transformDefinition); - assertEquals("Should be parsable: " + definition, - 1, transformDefinition.getActions().size()); - } - } - - private void parseInvalidInternal(List definitions) { - for (String definition : definitions) { - TransformDefinition transformDefinition = null; - try { - transformDefinition = TransformDefinition.parse(definition); - } catch (Exception e) { - // ignore - } - if (transformDefinition != null && !transformDefinition.getActions().isEmpty()) { - fail("Should not be parsable: " + definition); - } - } - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformPlanTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformPlanTest.java deleted file mode 100644 index eca8cddb053c..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/TransformPlanTest.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform; - -import static org.junit.Assert.assertEquals; - -import alluxio.job.JobConfig; -import alluxio.job.plan.transform.CompactConfig; -import alluxio.table.common.TableTestUtils; -import alluxio.table.common.layout.HiveLayout; - -import org.junit.Test; - -import java.util.ArrayList; - -public class TransformPlanTest { - @Test - public void getJobConfigs() { - HiveLayout from = TableTestUtils.createLayout("/from"); - HiveLayout to = TableTestUtils.createLayout("/to"); - TransformDefinition definition = - TransformDefinition.parse("file.count.max=12"); - - TransformPlan plan = new TransformPlan(from, to, definition); - assertEquals(from, plan.getBaseLayout()); - assertEquals(to, plan.getTransformedLayout()); - ArrayList jobs = plan.getJobConfigs(); - assertEquals(1, jobs.size()); - assertEquals(CompactConfig.class, jobs.get(0).getClass()); - - CompactConfig compact = (CompactConfig) jobs.get(0); - assertEquals("/from", compact.getInput()); - assertEquals("/to", compact.getOutput()); - assertEquals(12, compact.getMaxNumFiles()); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/CompactActionTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/CompactActionTest.java deleted file mode 100644 index 178eb9133ed7..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/CompactActionTest.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import static org.junit.Assert.assertEquals; - -import alluxio.job.JobConfig; -import alluxio.job.plan.transform.CompactConfig; -import alluxio.table.common.TableTestUtils; -import alluxio.table.common.layout.HiveLayout; -import alluxio.table.common.transform.TransformDefinition; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.util.List; - -public class CompactActionTest { - @Rule - public ExpectedException mException = ExpectedException.none(); - - public CompactAction parse(String definition) { - TransformDefinition transformDefinition = TransformDefinition.parse(definition); - - final List actions = transformDefinition.getActions(); - assertEquals(1, actions.size()); - final TransformAction action = actions.get(0); - - assertEquals(CompactAction.class, action.getClass()); - return (CompactAction) action; - } - - @Test - public void invalidNumFiles() { - String definition = "file.count.max=0"; - mException.expect(IllegalArgumentException.class); - mException.expectMessage("Write action must have positive number of files"); - - parse(definition); - } - - @Test - public void dynamicNumFiles() { - final CompactAction compactAction = parse("file.count.max=1000;file.size.min=1024"); - - HiveLayout from = TableTestUtils.createLayout("/from"); - HiveLayout to = TableTestUtils.createLayout("/to"); - JobConfig job = compactAction.generateJobConfig(from, to, false); - assertEquals(CompactConfig.class, job.getClass()); - - CompactConfig compact = (CompactConfig) job; - assertEquals("/from", compact.getInput()); - assertEquals("/to", compact.getOutput()); - assertEquals(1000, compact.getMaxNumFiles()); - assertEquals(1024, compact.getMinFileSize()); - } - - @Test - public void generateJobConfig() { - final CompactAction compactAction = parse("file.count.max=12"); - - HiveLayout from = TableTestUtils.createLayout("/from"); - HiveLayout to = TableTestUtils.createLayout("/to"); - JobConfig job = compactAction.generateJobConfig(from, to, false); - assertEquals(CompactConfig.class, job.getClass()); - - CompactConfig compact = (CompactConfig) job; - assertEquals("/from", compact.getInput()); - assertEquals("/to", compact.getOutput()); - assertEquals(12, compact.getMaxNumFiles()); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/EarlyActionFactory.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/EarlyActionFactory.java deleted file mode 100644 index e8c9b2cb0524..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/EarlyActionFactory.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import java.util.Properties; -import javax.annotation.Nullable; - -public class EarlyActionFactory implements TransformActionFactory { - @Override - public int getOrder() { - return -1000; - } - - @Nullable - @Override - public TransformAction create(Properties definition) { - return null; - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/LateActionFactory.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/LateActionFactory.java deleted file mode 100644 index ba716f0730d8..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/LateActionFactory.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import java.util.Properties; -import javax.annotation.Nullable; - -public class LateActionFactory implements TransformActionFactory { - @Override - public int getOrder() { - return 1000; - } - - @Nullable - @Override - public TransformAction create(Properties definition) { - return null; - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/TransformActionRegistryTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/TransformActionRegistryTest.java deleted file mode 100644 index 31eed709b0c6..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/transform/action/TransformActionRegistryTest.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.transform.action; - -import static org.junit.Assert.assertEquals; - -import org.junit.Test; - -import java.util.List; - -public class TransformActionRegistryTest { - - @Test - public void testOrder() { - final List factories = TransformActionRegistry.getFactories(); - - assertEquals(EarlyActionFactory.class, factories.get(0).getClass()); - assertEquals(LateActionFactory.class, factories.get(factories.size() - 1).getClass()); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/udb/PathTranslatorTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/udb/PathTranslatorTest.java deleted file mode 100644 index e6065f26f955..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/udb/PathTranslatorTest.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import static org.junit.Assert.assertEquals; - -import alluxio.ConfigurationRule; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.util.io.PathUtils; - -import com.google.common.collect.ImmutableMap; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.io.IOException; - -public class PathTranslatorTest { - private static final String MASTER_HOSTNAME = "master"; - private static final int MASTER_RPC_PORT = 11111; - private static final String ALLUXIO_URI_AUTHORITY = "alluxio://" + MASTER_HOSTNAME; - private static final String ALLUXIO_URI_AUTHORITY_WITH_PORT = - ALLUXIO_URI_AUTHORITY + ":" + MASTER_RPC_PORT; - - @Rule - public ExpectedException mException = ExpectedException.none(); - - @Rule - public ConfigurationRule mConfiguration = - new ConfigurationRule( - ImmutableMap.of(PropertyKey.MASTER_HOSTNAME, MASTER_HOSTNAME, - PropertyKey.MASTER_RPC_PORT, MASTER_RPC_PORT), - Configuration.modifiableGlobal()); - - private PathTranslator mTranslator; - - @Before - public void before() throws Exception { - mTranslator = new PathTranslator(); - } - - @Test - public void noMapping() throws Exception { - mTranslator.addMapping("alluxio:///my/table/directory", "ufs://a/the/ufs/location"); - mException.expect(IOException.class); - mTranslator.toAlluxioPath("ufs://b/no/mapping"); - } - - @Test - public void noMappingSingleLevel() throws Exception { - mTranslator.addMapping("alluxio:///my/table/directory", "ufs://a/the/ufs/location"); - mException.expect(IOException.class); - mTranslator.toAlluxioPath("ufs://a/the/ufs/no_loc"); - } - - @Test - public void exactMatch() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "ufs://a/the/ufs/location"; - mTranslator.addMapping(alluxioPath, ufsPath); - assertEquals(alluxioPath, mTranslator.toAlluxioPath(ufsPath)); - } - - @Test - public void singleSubDirectory() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "ufs://a/the/ufs/location"; - mTranslator.addMapping(alluxioPath, ufsPath); - assertEquals(PathUtils.concatPath(alluxioPath, "subdir"), - mTranslator.toAlluxioPath(PathUtils.concatPath(ufsPath, "subdir"))); - } - - @Test - public void multipleSubdir() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "ufs://a/the/ufs/location"; - mTranslator.addMapping(alluxioPath, ufsPath); - assertEquals(PathUtils.concatPath(alluxioPath, "subdir/a/b/c"), - mTranslator.toAlluxioPath(PathUtils.concatPath(ufsPath, "subdir/a/b/c"))); - } - - @Test - public void samePathDifferentUfs() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "/the/ufs/location"; - mTranslator.addMapping(alluxioPath, "ufs-a://" + ufsPath); - mException.expect(IOException.class); - mTranslator.toAlluxioPath("ufs-b://" + ufsPath); - } - - @Test - public void noSchemeUfs() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "/the/ufs/location"; - mTranslator.addMapping(alluxioPath, ufsPath); - assertEquals(alluxioPath, mTranslator.toAlluxioPath(ufsPath)); - } - - @Test - public void trailingSeparator() throws Exception { - String alluxioPath = "alluxio:///my/table/directory"; - String ufsPath = "/the/ufs/location"; - mTranslator.addMapping(alluxioPath + "/", ufsPath + "/"); - assertEquals(alluxioPath, mTranslator.toAlluxioPath(ufsPath)); - assertEquals(alluxioPath + "/", mTranslator.toAlluxioPath(ufsPath + "/")); - mTranslator.addMapping(alluxioPath, ufsPath); - assertEquals(alluxioPath, mTranslator.toAlluxioPath(ufsPath)); - assertEquals(alluxioPath + "/", mTranslator.toAlluxioPath(ufsPath + "/")); - } - - @Test - public void prefixBoundaryWithinPathComponent() throws Exception { - mTranslator.addMapping("alluxio:///table_a", "ufs://a/table1"); - mTranslator.addMapping("alluxio:///table_b", "ufs://a/table11"); - assertEquals("alluxio:///table_b/part1", - mTranslator.toAlluxioPath("ufs://a/table11/part1")); - assertEquals("alluxio:///table_a/part1", - mTranslator.toAlluxioPath("ufs://a/table1/part1")); - } - - @Test - public void deepestMatch() throws Exception { - mTranslator.addMapping("alluxio:///db1/tables/table1", "ufs://a/db1/table1"); - mTranslator.addMapping("alluxio:///db1/fragments/a", "ufs://a/db1"); - mTranslator.addMapping("alluxio:///db1/fragments/b", "ufs://b/db1"); - assertEquals("alluxio:///db1/tables/table1/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table1/part1")); - assertEquals("alluxio:///db1/fragments/b/table1/part1", - mTranslator.toAlluxioPath("ufs://b/db1/table1/part1")); - assertEquals("alluxio:///db1/fragments/a/table2/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table2/part1")); - } - - @Test - public void alluxioUriWithSchemeOnly() throws Exception { - mTranslator.addMapping("alluxio:///db1/tables/table1", "ufs://a/db1/table1"); - // non-exact match - assertEquals("alluxio:///db1/tables/table1/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table1/part1")); - // exact match - assertEquals("alluxio:///db1/tables/table1", - mTranslator.toAlluxioPath("ufs://a/db1/table1")); - // trailing slash is preserved - assertEquals("alluxio:///db1/tables/table1/", - mTranslator.toAlluxioPath("ufs://a/db1/table1/")); - } - - @Test - public void alluxioUriWithSchemeAndAuthority() throws Exception { - mTranslator.addMapping( - ALLUXIO_URI_AUTHORITY + "/db1/tables/table1", "ufs://a/db1/table1"); - // non-exact match - assertEquals(ALLUXIO_URI_AUTHORITY + "/db1/tables/table1/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table1/part1")); - // exact match - assertEquals(ALLUXIO_URI_AUTHORITY + "/db1/tables/table1", - mTranslator.toAlluxioPath("ufs://a/db1/table1")); - // trailing slash is preserved - assertEquals(ALLUXIO_URI_AUTHORITY + "/db1/tables/table1/", - mTranslator.toAlluxioPath("ufs://a/db1/table1/")); - } - - @Test - public void alluxioUriPurePath() throws Exception { - mTranslator.addMapping("/db1/tables/table1", "ufs://a/db1/table1"); - // non-exact match - assertEquals(ALLUXIO_URI_AUTHORITY_WITH_PORT + "/db1/tables/table1/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table1/part1")); - // exact match - assertEquals(ALLUXIO_URI_AUTHORITY_WITH_PORT + "/db1/tables/table1", - mTranslator.toAlluxioPath("ufs://a/db1/table1")); - // trailing slash is preserved - assertEquals(ALLUXIO_URI_AUTHORITY_WITH_PORT + "/db1/tables/table1/", - mTranslator.toAlluxioPath("ufs://a/db1/table1/")); - } - - @Test - public void bypassedUfsUri() throws Exception { - mTranslator.addMapping("ufs://a/db1/table1", "ufs://a/db1/table1"); - assertEquals("ufs://a/db1/table1", - mTranslator.toAlluxioPath("ufs://a/db1/table1")); - assertEquals("ufs://a/db1/table1/part1", - mTranslator.toAlluxioPath("ufs://a/db1/table1/part1")); - } - - @Test - public void bypassedUfsPath() throws Exception { - mTranslator.addMapping("/a/db1/table1", "/a/db1/table1"); - assertEquals("/a/db1/table1", - mTranslator.toAlluxioPath("/a/db1/table1")); - assertEquals("/a/db1/table1/part1", - mTranslator.toAlluxioPath("/a/db1/table1/part1")); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbBypassSpecTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbBypassSpecTest.java deleted file mode 100644 index c0cae4fff22d..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbBypassSpecTest.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import org.junit.Test; - -public class UdbBypassSpecTest { - @Test - public void tableAndPartitionNames() { - UdbBypassSpec spec = new UdbBypassSpec( - ImmutableMap.of("table1", ImmutableSet.of("part1", "part2"))); - assertTrue(spec.hasTable("table1")); - assertFalse(spec.hasFullTable("table1")); - assertTrue(spec.hasPartition("table1", "part1")); - assertTrue(spec.hasPartition("table1", "part2")); - assertFalse(spec.hasPartition("table1", "part3")); - } - - @Test - public void tableNamesOnly() { - UdbBypassSpec spec = new UdbBypassSpec( - ImmutableMap.of("table2", ImmutableSet.of())); - assertTrue(spec.hasTable("table2")); - assertTrue(spec.hasFullTable("table2")); - assertTrue(spec.hasPartition("table2", "part1")); - assertTrue(spec.hasPartition("table2", "part2")); - assertTrue(spec.hasPartition("table2", "part3")); - } - - @Test - public void nonExistentTable() { - UdbBypassSpec spec = new UdbBypassSpec( - ImmutableMap.of("table3", ImmutableSet.of())); - assertFalse(spec.hasTable("table4")); - assertFalse(spec.hasFullTable("table4")); - assertFalse(spec.hasPartition("table4", "part1")); - assertFalse(spec.hasPartition("table4", "part2")); - assertFalse(spec.hasPartition("table4", "part3")); - } -} diff --git a/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbConfigurationTest.java b/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbConfigurationTest.java deleted file mode 100644 index e43db9814312..000000000000 --- a/dora/table/server/common/src/test/java/alluxio/table/common/udb/UdbConfigurationTest.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.common.udb; - -import static org.junit.Assert.assertEquals; - -import alluxio.table.common.ConfigurationUtils; -import alluxio.util.CommonUtils; - -import com.google.common.collect.ImmutableMap; -import org.junit.Test; -import org.powermock.reflect.Whitebox; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ThreadLocalRandom; - -public class UdbConfigurationTest { - - @Test - public void multipleUfsMountOptions() { - Map opts = new ImmutableMap.Builder() - .put("my.special.key", "myspecialvalue") - .put(ConfigurationUtils.MOUNT_PREFIX + "{ufs://a.a}.key1", "v1") - .put(ConfigurationUtils.MOUNT_PREFIX + "{ufs://a.a}.key2", "v2") - .put(ConfigurationUtils.MOUNT_PREFIX + "{ufs://b.b}.key2", "v3") - .put(ConfigurationUtils.MOUNT_PREFIX + "{file}.key2", "v4") - .build(); - - UdbConfiguration conf = new UdbConfiguration(opts); - assertEquals(3, Whitebox.>getInternalState(conf, "mMountOptions").size()); - assertEquals(0, conf.getMountOption("").size()); - assertEquals(1, conf.getMountOption("ufs://b.b").size()); - assertEquals(2, conf.getMountOption("ufs://a.a").size()); - assertEquals(1, conf.getMountOption("file").size()); - } - - @Test - public void mountOptions() { - testMountOptions("SCHEME" + randomString(), true); - testMountOptions("SCHEME" + randomString(), false); - testMountOptions(UdbConfiguration.REGEX_PREFIX + ".*", "SCHEME" + randomString(), - true, true); - testMountOptions(UdbConfiguration.REGEX_PREFIX + ".*", "SCHEME" + randomString(), - false, true); - testMountOptions(UdbConfiguration.REGEX_PREFIX + "SCHE.E.*", "SCHEME" + randomString(), - true, true); - testMountOptions(UdbConfiguration.REGEX_PREFIX + "SCHE.E.*", "SCHEME" + randomString(), - false, true); - testMountOptions(UdbConfiguration.REGEX_PREFIX + "SCHEME1.*", "SCHEME2" + randomString(), - true, false); - testMountOptions(UdbConfiguration.REGEX_PREFIX + "SCHEME1.*", "SCHEME2" + randomString(), - false, false); - } - - private void testMountOptions(String concreteSchemeAuthority, boolean specifyTrailingSlash) { - testMountOptions(concreteSchemeAuthority, concreteSchemeAuthority, specifyTrailingSlash, true); - } - - private void testMountOptions(String templateSchemeAuthority, String concreteSchemeAuthority, - boolean specifyTrailingSlash, boolean expectedExist) { - Map values = new HashMap<>(); - for (int i = 0; i < 20; i++) { - values.put("PROPERTY" + randomString(), "VALUE" + randomString()); - } - - Map properties = new HashMap<>(); - for (Map.Entry entry : values.entrySet()) { - String schemeOption = String.format("{%s}", templateSchemeAuthority); - if (specifyTrailingSlash) { - schemeOption = String.format("{%s/}", templateSchemeAuthority); - } - properties.put( - String.format("%s%s.%s", ConfigurationUtils.MOUNT_PREFIX, schemeOption, entry.getKey()), - entry.getValue()); - } - UdbConfiguration conf = new UdbConfiguration(properties); - - // query for mount options with and without the trailing slash - if (!expectedExist) { - values = Collections.emptyMap(); - } - assertEquals(values, conf.getMountOption(concreteSchemeAuthority)); - assertEquals(values, conf.getMountOption(concreteSchemeAuthority + "/")); - } - - private String randomString() { - List parts = new ArrayList<>(); - for (int i = 0; i < ThreadLocalRandom.current().nextInt(1, 4); i++) { - parts.add(CommonUtils.randomAlphaNumString(5)); - } - return String.join(".", parts); - } -} diff --git a/dora/table/server/common/src/test/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory b/dora/table/server/common/src/test/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory deleted file mode 100644 index fb2f4a5e1c79..000000000000 --- a/dora/table/server/common/src/test/resources/META-INF/services/alluxio.table.common.transform.action.TransformActionFactory +++ /dev/null @@ -1,2 +0,0 @@ -alluxio.table.common.transform.action.EarlyActionFactory -alluxio.table.common.transform.action.LateActionFactory diff --git a/dora/table/server/master/pom.xml b/dora/table/server/master/pom.xml deleted file mode 100644 index c78b6a50158c..000000000000 --- a/dora/table/server/master/pom.xml +++ /dev/null @@ -1,132 +0,0 @@ - - - 4.0.0 - - alluxio-table-server - org.alluxio - 301-SNAPSHOT - - alluxio-table-server-master - jar - Alluxio Table - Server - Master - Alluxio Table master server - - - - - ${project.parent.parent.parent.parent.basedir}/build - - - - - - com.google.guava - guava - - - - - org.alluxio - alluxio-core-common - ${project.version} - - - org.alluxio - alluxio-core-transport - ${project.version} - - - org.alluxio - alluxio-core-client-fs - ${project.version} - - - org.alluxio - alluxio-core-server-common - ${project.version} - - - org.alluxio - alluxio-core-server-master - ${project.version} - - - org.alluxio - alluxio-job-client - ${project.version} - - - org.alluxio - alluxio-table-base - ${project.version} - - - org.alluxio - alluxio-table-server-common - ${project.version} - - - - - com.google.guava - guava-testlib - test - - - - - org.alluxio - alluxio-core-common - ${project.version} - test-jar - test - - - org.alluxio - alluxio-core-server-master - ${project.version} - test-jar - test - - - org.alluxio - alluxio-underfs-local - ${project.version} - test - - - - - - - src/main/resources - true - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - - - - diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/AlluxioCatalog.java b/dora/table/server/master/src/main/java/alluxio/master/table/AlluxioCatalog.java deleted file mode 100644 index ce97ba0234be..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/AlluxioCatalog.java +++ /dev/null @@ -1,478 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.client.file.FileSystem; -import alluxio.collections.Pair; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.ColumnStatisticsList; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.SyncStatus; -import alluxio.master.journal.JournalContext; -import alluxio.master.journal.JournalEntryIterable; -import alluxio.master.journal.Journaled; -import alluxio.master.journal.checkpoint.CheckpointName; -import alluxio.proto.journal.Journal; -import alluxio.resource.CloseableIterator; -import alluxio.resource.LockResource; -import alluxio.table.common.Layout; -import alluxio.table.common.LayoutRegistry; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UnderDatabaseRegistry; -import alluxio.util.StreamUtils; - -import com.google.common.collect.Maps; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; - -/** - * Class representing the Alluxio catalog. - */ -public class AlluxioCatalog implements Journaled { - private static final Logger LOG = LoggerFactory.getLogger(AlluxioCatalog.class); - - private final Map mDBs = new ConcurrentHashMap<>(); - /** - * These locks enforce serialization of updates of the database map, {@link #mDBs}. - * Reads do not need to be serialized w.r.t. the writes, and the data structure - * (ConcurrentHashMap) is thread-safe. - */ - private final Map mDbLocks = new ConcurrentHashMap<>(); - private final UnderDatabaseRegistry mUdbRegistry; - private final LayoutRegistry mLayoutRegistry; - private final FileSystem mFileSystem; - - /** - * Creates an instance. - */ - public AlluxioCatalog() { - mFileSystem = FileSystem.Factory.create(); - mUdbRegistry = new UnderDatabaseRegistry(); - mUdbRegistry.refresh(); - mLayoutRegistry = new LayoutRegistry(); - mLayoutRegistry.refresh(); - } - - private LockResource getDbLock(String dbName) { - // TODO(gpang): update concurrency model to OCC for the catalog - ReentrantLock lock = mDbLocks.compute(dbName, - (key, value) -> value == null ? new ReentrantLock() : value); - return new LockResource(lock); - } - - /** - * @return the layout registry - */ - public LayoutRegistry getLayoutRegistry() { - return mLayoutRegistry; - } - - /** - * Attaches an udb database to Alluxio catalog. - * - * @param journalContext journal context - * @param udbType the database type - * @param udbConnectionUri the udb connection uri - * @param udbDbName the database name in the udb - * @param dbName the database name in Alluxio - * @param map the configuration - * @param ignoreSyncErrors if true, will ignore syncing errors during the attach - * @return the sync status for the attach - */ - public SyncStatus attachDatabase(JournalContext journalContext, String udbType, - String udbConnectionUri, String udbDbName, String dbName, Map map, - boolean ignoreSyncErrors) - throws IOException { - try (LockResource l = getDbLock(dbName)) { - if (mDBs.containsKey(dbName)) { - throw new IOException(String - .format("Unable to attach database. Database name %s (type: %s) already exists.", - dbName, udbType)); - } - - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder().setAttachDb( - alluxio.proto.journal.Table.AttachDbEntry.newBuilder() - .setUdbType(udbType) - .setUdbConnectionUri(udbConnectionUri) - .setUdbDbName(udbDbName) - .setDbName(dbName) - .putAllConfig(map).build()).build()); - - boolean syncError = false; - try { - SyncStatus status = mDBs.get(dbName).sync(journalContext); - syncError = status.getTablesErrorsCount() > 0; - return status; - } catch (Exception e) { - // Failed to connect to and sync the udb. - syncError = true; - // log the error and stack - LOG.error(String.format("Sync (during attach) failed for db '%s'.", dbName), e); - throw new IOException(String - .format("Failed to connect underDb for Alluxio db '%s': %s", dbName, - e.getMessage()), e); - } finally { - if (syncError && !ignoreSyncErrors) { - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder().setDetachDb( - alluxio.proto.journal.Table.DetachDbEntry.newBuilder().setDbName(dbName).build()) - .build()); - } - } - } - } - - /** - * Syncs a database. - * - * @param journalContext journal context - * @param dbName database name - * @return the resulting sync status - */ - public SyncStatus syncDatabase(JournalContext journalContext, String dbName) throws IOException { - try (LockResource l = getDbLock(dbName)) { - Database db = getDatabaseByName(dbName); - return db.sync(journalContext); - } catch (Exception e) { - // log the error and stack - LOG.error(String.format("Sync failed for db '%s'.", dbName), e); - throw new IOException( - String.format("Sync failed for db '%s'. error: %s", dbName, e.getMessage()), e); - } - } - - /** - * Removes an existing database. - * - * @param journalContext journal context - * @param dbName the database name - * @return true if database successfully created - */ - public boolean detachDatabase(JournalContext journalContext, String dbName) - throws IOException { - try (LockResource l = getDbLock(dbName)) { - if (!mDBs.containsKey(dbName)) { - throw new IOException(String - .format("Unable to detach database. Database name %s does not exist", dbName)); - } - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder().setDetachDb( - alluxio.proto.journal.Table.DetachDbEntry.newBuilder() - .setDbName(dbName).build()).build()); - return true; - } - } - - /** - * Get a table object by name. - * - * @param dbName the database name - * @param tableName the table name - * @return a table object - */ - public Table getTable(String dbName, String tableName) throws IOException { - return getTableInternal(dbName, tableName); - } - - private Table getTableInternal(String dbName, String tableName) throws IOException { - Database db = getDatabaseByName(dbName); - return db.getTable(tableName); - } - - /** - * Get all databases. - * - * @return a list of all database names - */ - public List getAllDatabases() throws IOException { - // TODO(gpang): update api to return collection or iterator? - return new ArrayList<>(mDBs.keySet()); - } - - /** - * Get Database by its name. - * - * @param dbName database name - * @return a database object - */ - public alluxio.grpc.table.Database getDatabase(String dbName) throws IOException { - Database db = getDatabaseByName(dbName); - DatabaseInfo dbInfo = db.getDatabaseInfo(); - - alluxio.grpc.table.Database.Builder builder = alluxio.grpc.table.Database.newBuilder() - .setDbName(db.getName()) - .putAllParameter(dbInfo.getParameters()); - if (dbInfo.getComment() != null) { - builder.setComment(dbInfo.getComment()); - } - if (dbInfo.getLocation() != null) { - builder.setLocation(dbInfo.getLocation()); - } - if (dbInfo.getOwnerName() != null) { - builder.setOwnerName(dbInfo.getOwnerName()); - } - if (dbInfo.getOwnerType() != null) { - builder.setOwnerType(dbInfo.getOwnerType()); - } - return builder.build(); - } - - private Database getDatabaseByName(String dbName) throws NotFoundException { - Database db = mDBs.get(dbName); - if (db == null) { - throw new NotFoundException(ExceptionMessage.DATABASE_DOES_NOT_EXIST.getMessage(dbName)); - } - return db; - } - - /** - * Get a list of tables in a database. - * - * @param dbName database name - * @return a list of table names in the database - */ - public List getAllTables(String dbName) throws IOException { - Database db = getDatabaseByName(dbName); - return db.getTables().stream().map(Table::getName).collect(Collectors.toList()); - } - - /** - * Returns the statistics for the specified table. - * - * @param dbName the database name - * @param tableName the table name - * @param colNames column names - * @return the statistics for the specified table - */ - public List getTableColumnStatistics(String dbName, String tableName, - List colNames) throws IOException { - Table table = getTableInternal(dbName, tableName); - return table.getStatistics().stream().filter(info -> colNames.contains(info.getColName())) - .collect(Collectors.toList()); - } - - /** - * Returns the statistics for the specified table. - * - * @param dbName the database name - * @param tableName the table name - * @param partNames partition names - * @param colNames column names - * @return the statistics for the partitions for a specific table - */ - public Map getPartitionColumnStatistics(String dbName, - String tableName, List partNames, List colNames) throws IOException { - Table table = getTableInternal(dbName, tableName); - List partitions = table.getPartitions(); - return partitions.stream().filter(p -> partNames.contains(p.getSpec())) - .map(p -> new Pair<>(p.getSpec(), - ColumnStatisticsList.newBuilder().addAllStatistics( - p.getLayout().getColumnStatsData().entrySet().stream() - .filter(entry -> colNames.contains(entry.getKey())) - .map(Map.Entry::getValue).collect(Collectors.toList())).build())) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (e1, e2) -> e2)); - } - - /** - * Returns the partitions based on a constraint for the specified table. - * - * @param dbName the database name - * @param tableName the table name - * @param constraint the column contraint - * @return the partition info for the specified table - */ - public List readTable(String dbName, String tableName, - Constraint constraint) throws IOException { - Table table = getTableInternal(dbName, tableName); - // TODO(david): implement partition pruning - return table.getPartitions().stream().map(Partition::toProto).collect(Collectors.toList()); - } - - /** - * Completes table transformation by updating the layouts of the table's partitions. - * - * @param journalContext the journal context - * @param dbName the database name - * @param tableName the table name - * @param definition the transformation definition - * @param transformedLayouts map from partition spec to the transformed layouts - */ - public void completeTransformTable(JournalContext journalContext, String dbName, String tableName, - String definition, Map transformedLayouts) throws IOException { - try (LockResource l = getDbLock(dbName)) { - // Check existence of table. - getTableInternal(dbName, tableName); - alluxio.proto.journal.Table.CompleteTransformTableEntry entry = - alluxio.proto.journal.Table.CompleteTransformTableEntry.newBuilder() - .setDbName(dbName) - .setTableName(tableName) - .setDefinition(definition) - .putAllTransformedLayouts(Maps.transformValues(transformedLayouts, Layout::toProto)) - .build(); - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder() - .setCompleteTransformTable(entry).build()); - } - } - - /** - * @param dbName the database name - * @param tableName the table name - * @param definition the transformation definition - * @return the transformation plan for the table - */ - public List getTransformPlan(String dbName, String tableName, - TransformDefinition definition) throws IOException { - return getTableInternal(dbName, tableName).getTransformPlans(definition); - } - - private void apply(alluxio.proto.journal.Table.AttachDbEntry entry) { - String udbType = entry.getUdbType(); - String udbConnectionUri = entry.getUdbConnectionUri(); - String udbDbName = entry.getUdbDbName(); - String dbName = entry.getDbName(); - - CatalogContext catalogContext = new CatalogContext(mUdbRegistry, mLayoutRegistry); - UdbContext udbContext = - new UdbContext(mUdbRegistry, mFileSystem, udbType, udbConnectionUri, udbDbName, dbName); - - Database db = - Database.create(catalogContext, udbContext, udbType, dbName, entry.getConfigMap()); - mDBs.put(dbName, db); - } - - private void apply(alluxio.proto.journal.Table.DetachDbEntry entry) { - String dbName = entry.getDbName(); - mDBs.remove(dbName); - } - - private void apply(alluxio.proto.journal.Table.CompleteTransformTableEntry entry) { - String dbName = entry.getDbName(); - String tableName = entry.getTableName(); - Table table; - try { - table = getTableInternal(dbName, tableName); - } catch (IOException e) { - throw new RuntimeException(e); - } - for (Map.Entry e : entry.getTransformedLayoutsMap() - .entrySet()) { - String spec = e.getKey(); - Layout layout = mLayoutRegistry.create(e.getValue()); - Partition partition = table.getPartition(spec); - partition.transform(entry.getDefinition(), layout); - LOG.debug("Transformed partition {} of table {}.{} to {} with definition {}", - spec, dbName, tableName, layout.getLocation(), entry.getDefinition()); - } - } - - @Override - public boolean processJournalEntry(Journal.JournalEntry entry) { - if (entry.hasAttachDb()) { - apply(entry.getAttachDb()); - return true; - } else if (entry.hasUpdateDatabaseInfo()) { - Database db = mDBs.get(entry.getUpdateDatabaseInfo().getDbName()); - return db.processJournalEntry(entry); - } else if (entry.hasAddTable()) { - Database db = mDBs.get(entry.getAddTable().getDbName()); - return db.processJournalEntry(entry); - } else if (entry.hasAddTablePartitions()) { - Database db = mDBs.get(entry.getAddTablePartitions().getDbName()); - return db.processJournalEntry(entry); - } else if (entry.hasRemoveTable()) { - Database db = mDBs.get(entry.getRemoveTable().getDbName()); - return db.processJournalEntry(entry); - } else if (entry.hasDetachDb()) { - apply(entry.getDetachDb()); - return true; - } else if (entry.hasCompleteTransformTable()) { - apply(entry.getCompleteTransformTable()); - return true; - } - return false; - } - - @Override - public void resetState() { - mDBs.clear(); - } - - private Iterator getDbIterator() { - final Iterator> it = mDBs.entrySet().iterator(); - return new Iterator() { - private Map.Entry mEntry = null; - - @Override - public boolean hasNext() { - if (mEntry != null) { - return true; - } - if (it.hasNext()) { - mEntry = it.next(); - return true; - } - return false; - } - - @Override - public Journal.JournalEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - String dbName = mEntry.getKey(); - Database database = mEntry.getValue(); - UdbContext udbContext = database.getUdb().getUdbContext(); - mEntry = null; - - return Journal.JournalEntry.newBuilder().setAttachDb( - alluxio.proto.journal.Table.AttachDbEntry.newBuilder() - .setUdbType(database.getType()) - .setUdbConnectionUri(udbContext.getConnectionUri()) - .setUdbDbName(udbContext.getUdbDbName()) - .setDbName(dbName) - .putAllConfig(database.getConfig()).build()).build(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException( - "GetDbIteratorr#Iterator#remove is not supported."); - } - }; - } - - @Override - public CloseableIterator getJournalEntryIterator() { - List> componentIters = StreamUtils - .map(JournalEntryIterable::getJournalEntryIterator, mDBs.values()); - - return CloseableIterator.concat( - CloseableIterator.noopCloseable(getDbIterator()), CloseableIterator.concat(componentIters)); - } - - @Override - public CheckpointName getCheckpointName() { - return CheckpointName.TABLE_MASTER_CATALOG; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/BasePartitionScheme.java b/dora/table/server/master/src/main/java/alluxio/master/table/BasePartitionScheme.java deleted file mode 100644 index 726f940fad11..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/BasePartitionScheme.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Base implementation of PartitionScheme with default implementation of - * {@link PartitionScheme#getPartition(String)} and {@link PartitionScheme#getPartitions()}. - */ -public abstract class BasePartitionScheme implements PartitionScheme { - protected final List mPartitions; - protected final Map mSpecToPartition; - - /** - * A map from partition spec to partitions is computed from the partitions. - * - * @param partitions list of partitions - */ - public BasePartitionScheme(List partitions) { - mPartitions = partitions; - mSpecToPartition = new HashMap<>(); - for (Partition partition : mPartitions) { - mSpecToPartition.put(partition.getSpec(), partition); - } - } - - @Override - public void addPartitions(List partitions) { - mPartitions.addAll(partitions); - for (Partition partition : mPartitions) { - mSpecToPartition.put(partition.getSpec(), partition); - } - } - - @Override - public List getPartitions() { - return Collections.unmodifiableList(mPartitions); - } - - @Override - public Partition getPartition(String spec) { - return mSpecToPartition.get(spec); - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogConfiguration.java b/dora/table/server/master/src/main/java/alluxio/master/table/CatalogConfiguration.java deleted file mode 100644 index 58aed1e6a35e..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogConfiguration.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.table.common.BaseConfiguration; -import alluxio.table.common.ConfigurationUtils; -import alluxio.table.common.udb.UdbConfiguration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashMap; -import java.util.Map; - -/** - * This represents a configuration of the catalog. - */ -public class CatalogConfiguration extends BaseConfiguration { - private static final Logger LOG = LoggerFactory.getLogger(CatalogConfiguration.class); - - CatalogConfiguration(Map values) { - super(values); - } - - UdbConfiguration getUdbConfiguration(String udbType) { - String udbPrefix = ConfigurationUtils.getUdbPrefix(udbType); - HashMap map = new HashMap<>(mValues.size()); - for (Map.Entry entry : mValues.entrySet()) { - if (entry.getKey().startsWith(udbPrefix)) { - String key = entry.getKey().substring(udbPrefix.length()); - map.put(key, entry.getValue()); - } - } - return new UdbConfiguration(map); - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogContext.java b/dora/table/server/master/src/main/java/alluxio/master/table/CatalogContext.java deleted file mode 100644 index 179150c0cb0f..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogContext.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.table.common.LayoutRegistry; -import alluxio.table.common.udb.UnderDatabaseRegistry; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The context for the catalog. - */ -public class CatalogContext { - private static final Logger LOG = LoggerFactory.getLogger(CatalogContext.class); - - private final UnderDatabaseRegistry mUdbRegistry; - private final LayoutRegistry mLayoutRegistry; - - /** - * Creates an instance. - * - * @param udbRegistry the udb registry - * @param layoutRegistry the layout registry - */ - public CatalogContext(UnderDatabaseRegistry udbRegistry, LayoutRegistry layoutRegistry) { - mUdbRegistry = udbRegistry; - mLayoutRegistry = layoutRegistry; - } - - /** - * @return the layout registry - */ - public LayoutRegistry getLayoutRegistry() { - return mLayoutRegistry; - } - - /** - * @return the udb registry - */ - public UnderDatabaseRegistry getUdbRegistry() { - return mUdbRegistry; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogProperty.java b/dora/table/server/master/src/main/java/alluxio/master/table/CatalogProperty.java deleted file mode 100644 index bf155167b74f..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/CatalogProperty.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.table.common.BaseProperty; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This represents a property name and default value for the catalog. - */ -public class CatalogProperty extends BaseProperty { - private static final Logger LOG = LoggerFactory.getLogger(CatalogProperty.class); - - public static final int DEFAULT_DB_SYNC_THREADS = 4; - - private CatalogProperty(String name, String description, String defaultValue) { - super(name, description, defaultValue); - } - - public static final CatalogProperty DB_IGNORE_TABLES = - new CatalogProperty("catalog.db.ignore.udb.tables", - "The comma-separated list of table names to ignore from the UDB.", ""); - public static final CatalogProperty DB_SYNC_THREADS = - new CatalogProperty("catalog.db.sync.threads", - "The maximum number of threads to use when parallel syncing all the tables from the " - + "under database (UDB) to the catalog. If this is set too large, the threads may " - + "overload the UDB, and if set too low, syncing a database with many tables may " - + "take a long time.", - Integer.toString(DEFAULT_DB_SYNC_THREADS)); - public static final CatalogProperty DB_CONFIG_FILE = - new CatalogProperty("catalog.db.config.file", - "The config file for the UDB.", ""); -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/Database.java b/dora/table/server/master/src/main/java/alluxio/master/table/Database.java deleted file mode 100644 index d9910c0470f1..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/Database.java +++ /dev/null @@ -1,594 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.FileStatistics; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.SyncStatus; -import alluxio.master.journal.JournalContext; -import alluxio.master.journal.Journaled; -import alluxio.master.journal.checkpoint.CheckpointName; -import alluxio.proto.journal.Journal; -import alluxio.resource.CloseableIterator; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UdbTable; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.util.CommonUtils; -import alluxio.util.ConfigurationUtils; -import alluxio.util.executor.ExecutorServiceFactories; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Iterators; -import com.google.common.collect.Sets; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Supplier; -import javax.annotation.Nullable; - -/** - * The database implementation that manages a collection of tables. - */ -public class Database implements Journaled { - private static final Logger LOG = LoggerFactory.getLogger(Database.class); - - private final CatalogContext mContext; - private final String mType; - private final String mName; - private final Map mTables; - private final UnderDatabase mUdb; - private final CatalogConfiguration mConfig; - private final Set mIgnoreTables; - private final String mConfigPath; - private DbConfig mDbConfig; - private final long mUdbSyncTimeoutMs = - Configuration.getMs(PropertyKey.TABLE_CATALOG_UDB_SYNC_TIMEOUT); - - private DatabaseInfo mDatabaseInfo; - - private Database(CatalogContext context, String type, String name, UnderDatabase udb, - CatalogConfiguration config) { - mContext = context; - mType = type; - mName = name; - mTables = new ConcurrentHashMap<>(); - mUdb = udb; - mConfig = config; - mIgnoreTables = Sets.newHashSet( - ConfigurationUtils.parseAsList(mConfig.get(CatalogProperty.DB_IGNORE_TABLES), ",")); - mConfigPath = mConfig.get(CatalogProperty.DB_CONFIG_FILE); - mDbConfig = DbConfig.empty(); - } - - /** - * Creates an instance of a database. - * - * @param catalogContext the catalog context - * @param udbContext the db context - * @param type the database type - * @param name the database name - * @param configMap the configuration - * @return the database instance - */ - public static Database create(CatalogContext catalogContext, UdbContext udbContext, String type, - String name, Map configMap) { - CatalogConfiguration configuration = new CatalogConfiguration(configMap); - try { - UnderDatabase udb = udbContext.getUdbRegistry() - .create(udbContext, type, configuration.getUdbConfiguration(type)); - return new Database(catalogContext, type, name, udb, configuration); - } catch (Exception e) { - throw new IllegalArgumentException("Creating udb failed for database name: " + name, e); - } - } - - /** - * @return the catalog context - */ - public CatalogContext getContext() { - return mContext; - } - - /** - * @return returns the database name - */ - public String getName() { - return mName; - } - - /** - * @return returns database info - */ - public DatabaseInfo getDatabaseInfo() { - return mDatabaseInfo; - } - - /** - * @return returns the database type - */ - public String getType() { - return mType; - } - - /** - * @return the {@link UnderDatabase} - */ - public UnderDatabase getUdb() { - return mUdb; - } - - /** - * @return the list of all tables - */ - public List getTables() { - return new ArrayList<>(mTables.values()); - } - - /** - * @param tableName the table name - * @return the {@link Table} for the specified table name - */ - public Table getTable(String tableName) throws NotFoundException { - Table table = mTables.get(tableName); - if (table == null) { - throw new NotFoundException(ExceptionMessage.TABLE_DOES_NOT_EXIST - .getMessage(tableName, mName)); - } - return table; - } - - /** - * Creates a new table within this database. - * - * @param tableName the new table name - * @param schema the schema for the table - * @return the {@link Table} for the newly created table - */ - public Table createTable(String tableName, Schema schema) { - // TODO(gpang): implement - return mTables.get(tableName); - } - - /** - * @param tableName the table name - * @return statistics for the specified table name - */ - public Map getStatistics(String tableName) { - // TODO(gpang): implement - return Collections.emptyMap(); - } - - /** - * - * @return the configuration for the database - */ - public Map getConfig() { - return mConfig.getMap(); - } - - /** - * Syncs the metadata from the under db. To avoid concurrent sync operations, this requires - * external synchronization. - * - * @param context journal context - * @return the resulting sync status - */ - public SyncStatus sync(JournalContext context) throws IOException { - // Keep track of the status of each syncing table. - // Synchronization is necessary if accessed concurrently from multiple threads - SyncStatus.Builder builder = SyncStatus.newBuilder(); - - if (!mConfigPath.equals(CatalogProperty.DB_CONFIG_FILE.getDefaultValue())) { - if (!Files.exists(Paths.get(mConfigPath))) { - throw new FileNotFoundException(mConfigPath); - } - ObjectMapper mapper = new ObjectMapper(); - try { - mDbConfig = mapper.readValue(new File(mConfigPath), DbConfig.class); - } catch (JsonProcessingException e) { - LOG.error("Failed to deserialize UDB config file {}, stays unsynced", mConfigPath, e); - throw e; - } - } - DatabaseInfo newDbInfo = mUdb.getDatabaseInfo(); - if (!newDbInfo.equals(mDatabaseInfo)) { - applyAndJournal(context, Journal.JournalEntry.newBuilder() - .setUpdateDatabaseInfo(toJournalProto(newDbInfo, mName)).build()); - } - - Set udbTableNames = new HashSet<>(mUdb.getTableNames()); - - // keeps track of how many tables have been synced - final AtomicInteger tablesSynced = new AtomicInteger(); - // # of synced tables, after which a log message is printed for progress - final int progressBatch = - (udbTableNames.size() < 100) ? udbTableNames.size() : udbTableNames.size() / 10; - - // sync each table in parallel, with the executor service - List> tasks = new ArrayList<>(udbTableNames.size()); - final Database thisDb = this; - for (String tableName : udbTableNames) { - if (mIgnoreTables.contains(tableName)) { - // this table should be ignored. - builder.addTablesIgnored(tableName); - tablesSynced.incrementAndGet(); - continue; - } - tasks.add(() -> { - // Save all exceptions - try { - Table previousTable = mTables.get(tableName); - UdbTable udbTable = mUdb.getTable(tableName, mDbConfig.getUdbBypassSpec()); - Table newTable = Table.create(thisDb, udbTable, previousTable); - - if (newTable != null) { - // table was created or was updated - alluxio.proto.journal.Table.AddTableEntry addTableEntry - = newTable.getTableJournalProto(); - Journal.JournalEntry entry = - Journal.JournalEntry.newBuilder().setAddTable(addTableEntry).build(); - applyAndJournal(context, entry); - // separate the possible big table entry into multiple smaller table partitions entry - newTable.getTablePartitionsJournalProto().forEach((partitionsEntry) -> { - applyAndJournal(context, Journal.JournalEntry - .newBuilder().setAddTablePartitions(partitionsEntry).build()); - }); - synchronized (builder) { - builder.addTablesUpdated(tableName); - } - } else { - synchronized (builder) { - builder.addTablesUnchanged(tableName); - } - } - } catch (Exception e) { - LOG.error(String.format("Sync thread failed for %s.%s", thisDb.mName, tableName), e); - synchronized (builder) { - builder.putTablesErrors(tableName, e.toString()); - } - } finally { - int syncedTables = tablesSynced.incrementAndGet(); - int percentage = -1; - // Only log at regular intervals, or when complete - if (syncedTables % progressBatch == 0) { - // compute percentage, cap at 99% - percentage = Math.min(Math.round(100.0f * syncedTables / udbTableNames.size()), 99); - } - if (syncedTables == udbTableNames.size()) { - percentage = 100; - } - if (percentage != -1) { - LOG.info("Syncing db {} progress: completed {} of {} tables ({}%)", mName, syncedTables, - udbTableNames.size(), percentage); - } - } - return null; - }); - } - - // create a thread pool to parallelize the sync - int threads; - try { - threads = Integer.parseInt(mConfig.get(CatalogProperty.DB_SYNC_THREADS)); - } catch (NumberFormatException e) { - LOG.warn("Catalog property {} with value {} cannot be parsed as an int", - CatalogProperty.DB_SYNC_THREADS.getName(), mConfig.get(CatalogProperty.DB_SYNC_THREADS)); - threads = CatalogProperty.DEFAULT_DB_SYNC_THREADS; - } - if (threads < 1) { - // if invalid, set to the default - threads = CatalogProperty.DEFAULT_DB_SYNC_THREADS; - } - ExecutorService service = - ExecutorServiceFactories.fixedThreadPool(String.format("Catalog-Sync-%s", mName), threads) - .create(); - try { - CommonUtils.invokeAll(service, tasks, mUdbSyncTimeoutMs); - } catch (Exception e) { - throw new IOException("Failed to sync database " + mName + ". error: " + e.toString(), e); - } finally { - // shutdown the thread pool - service.shutdownNow(); - String errorMessage = - String.format("waiting for db-sync thread pool to shut down. db: %s", mName); - try { - if (!service.awaitTermination(5, TimeUnit.SECONDS)) { - LOG.warn("Timed out " + errorMessage); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted while " + errorMessage); - } - } - - for (Table existingTable : mTables.values()) { - if (!udbTableNames.contains(existingTable.getName())) { - // this table no longer exists in udb - alluxio.proto.journal.Table.RemoveTableEntry removeTableEntry = - alluxio.proto.journal.Table.RemoveTableEntry.newBuilder() - .setDbName(mName) - .setTableName(existingTable.getName()) - .setVersion(existingTable.getVersion()) - .build(); - Journal.JournalEntry entry = Journal.JournalEntry.newBuilder() - .setRemoveTable(removeTableEntry) - .build(); - applyAndJournal(context, entry); - builder.addTablesRemoved(existingTable.getName()); - } - } - return builder.build(); - } - - @Override - public void applyAndJournal(Supplier context, Journal.JournalEntry entry) { - // This is journaled differently from others components, since optimistic concurrency control - // is utilized. There are no external locks for the table, so the locking will happen during - // the access of the tables map. - processJournalEntryInternal(entry, context.get()); - } - - @Override - public boolean processJournalEntry(Journal.JournalEntry entry) { - // Do not journal when processing journal entries - return processJournalEntryInternal(entry, null); - } - - /** - * @param entry the journal entry to process - * @param context the journal context, will not journal if null - * @return whether the entry type is supported by this journaled object - */ - private boolean processJournalEntryInternal(Journal.JournalEntry entry, - @Nullable JournalContext context) { - if (entry.hasAddTable()) { - return applyAddTable(context, entry); - } - if (entry.hasAddTablePartitions()) { - return applyAddTablePartitions(context, entry); - } - if (entry.hasRemoveTable()) { - return applyRemoveTable(context, entry); - } - if (entry.hasUpdateDatabaseInfo()) { - return applyUpdateDbInfo(context, entry); - } - return false; - } - - private boolean applyUpdateDbInfo(@Nullable JournalContext context, Journal.JournalEntry entry) { - alluxio.proto.journal.Table.UpdateDatabaseInfoEntry updateDb = entry.getUpdateDatabaseInfo(); - if (!updateDb.getDbName().equals(mName)) { - return false; - } - if (context != null) { - context.append(entry); - } - mDatabaseInfo = new DatabaseInfo(updateDb.getLocation(), updateDb.getOwnerName(), - updateDb.getOwnerType(), updateDb.getComment(), updateDb.getParameterMap()); - return true; - } - - private boolean applyAddTable(@Nullable JournalContext context, Journal.JournalEntry entry) { - alluxio.proto.journal.Table.AddTableEntry addTable = entry.getAddTable(); - if (!addTable.getDbName().equals(mName)) { - return false; - } - - Table newTable = Table.create(this, addTable); - mTables.compute(newTable.getName(), (key, existingTable) -> { - boolean writeNewTable = false; - if (existingTable == null && (newTable.getVersion() == Table.FIRST_VERSION)) { - // this table is being newly inserted, and has the expected first version - LOG.info("Adding new table {}.{}", mName, newTable.getName()); - writeNewTable = true; - } - - if (existingTable != null && (newTable.getPreviousVersion() == existingTable.getVersion())) { - // Previous table already exists, and matches the new table's previous version - LOG.info("Updating table {}.{} to version {}", mName, newTable.getName(), - newTable.getVersion()); - writeNewTable = true; - } - - if (writeNewTable) { - // The new table has been successfully validated, so update the map with the new table, - // and journal the entry if the journal context exists. - if (context != null) { - context.append(entry); - } - return newTable; - } else { - // The table to add does not validate with the existing table, so another thread must - // have updated the map. Do not modify the map. - return existingTable; - } - }); - - return true; - } - - private boolean applyAddTablePartitions(@Nullable JournalContext context, - Journal.JournalEntry entry) { - alluxio.proto.journal.Table.AddTablePartitionsEntry addTablePartitions - = entry.getAddTablePartitions(); - if (!addTablePartitions.getDbName().equals(mName)) { - return false; - } - - mTables.compute(addTablePartitions.getTableName(), (key, existingTable) -> { - if (existingTable != null) { - if (addTablePartitions.getVersion() == existingTable.getVersion()) { - LOG.info("Adding {} partitions to table {}.{}", addTablePartitions.getPartitionsCount(), - mName, addTablePartitions.getTableName()); - if (context != null) { - context.append(entry); - } - existingTable.addPartitions(addTablePartitions); - return existingTable; - } - LOG.info("Will not add partitions to table {}.{}, because of mismatched versions. " - + "version-to-add-partitions: {} existing-version: {}", - mName, addTablePartitions.getTableName(), - addTablePartitions.getVersion(), existingTable.getVersion()); - } - LOG.debug("Cannot add partitions to table {}.{}, because it does not exist.", mName, - addTablePartitions.getTableName()); - return existingTable; - }); - - return true; - } - - private boolean applyRemoveTable(@Nullable JournalContext context, Journal.JournalEntry entry) { - alluxio.proto.journal.Table.RemoveTableEntry removeTable = entry.getRemoveTable(); - if (!removeTable.getDbName().equals(mName)) { - return false; - } - - mTables.compute(removeTable.getTableName(), (key, existingTable) -> { - if (existingTable != null) { - if (removeTable.getVersion() == existingTable.getVersion()) { - // this table is being removed, and has the expected version - LOG.info("Removing table {}.{}", mName, removeTable.getTableName()); - if (context != null) { - context.append(entry); - } - return null; - } - LOG.info("Will not remove table {}.{}, because of mismatched versions. " - + "version-to-delete: {} existing-version: {}", mName, removeTable.getTableName(), - removeTable.getVersion(), existingTable.getVersion()); - } - LOG.debug("Cannot remove table {}.{}, because it does not exist.", mName, - removeTable.getTableName()); - return existingTable; - }); - - return true; - } - - @Override - public void resetState() { - mTables.clear(); - } - - private Iterator getTableIterator() { - final Iterator
it = getTables().iterator(); - return new Iterator() { - private Table mEntry = null; - private Iterator mPartitionIterator; - - @Override - public boolean hasNext() { - if (mEntry != null) { - return true; - } - if (mPartitionIterator != null && mPartitionIterator.hasNext()) { - return true; - } - if (it.hasNext()) { - mEntry = it.next(); - mPartitionIterator = mEntry.getTablePartitionsJournalProto().iterator(); - return true; - } - return false; - } - - @Override - public Journal.JournalEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - if (mEntry != null) { - Table table = mEntry; - mEntry = null; - alluxio.proto.journal.Table.AddTableEntry addTableEntry = table.getTableJournalProto(); - return Journal.JournalEntry.newBuilder().setAddTable(addTableEntry).build(); - } - if (mPartitionIterator != null && mPartitionIterator.hasNext()) { - return Journal.JournalEntry.newBuilder() - .setAddTablePartitions(mPartitionIterator.next()).build(); - } - // should not reach here - throw new NoSuchElementException(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException( - "GetTableIterator#Iterator#remove is not supported."); - } - }; - } - - @Override - public CloseableIterator getJournalEntryIterator() { - Journal.JournalEntry entry = Journal.JournalEntry.newBuilder().setUpdateDatabaseInfo( - toJournalProto(getDatabaseInfo(), mName)).build(); - return CloseableIterator.noopCloseable( - Iterators.concat(Iterators.singletonIterator(entry), getTableIterator())); - } - - @Override - public CheckpointName getCheckpointName() { - return CheckpointName.TABLE_MASTER_DATABASE; - } - - /** - * @param dbInfo database info object - * @param dbName database name - * @return the journal proto representation - */ - public static alluxio.proto.journal.Table.UpdateDatabaseInfoEntry toJournalProto( - DatabaseInfo dbInfo, String dbName) { - alluxio.proto.journal.Table.UpdateDatabaseInfoEntry.Builder builder = - alluxio.proto.journal.Table.UpdateDatabaseInfoEntry.newBuilder() - .setDbName(dbName).putAllParameter(dbInfo.getParameters()); - if (dbInfo.getComment() != null) { - builder.setComment(dbInfo.getComment()); - } - if (dbInfo.getLocation() != null) { - builder.setLocation(dbInfo.getLocation()); - } - if (dbInfo.getOwnerName() != null) { - builder.setOwnerName(dbInfo.getOwnerName()); - } - if (dbInfo.getOwnerType() != null) { - builder.setOwnerType(dbInfo.getOwnerType()); - } - return builder.build(); - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/DbConfig.java b/dora/table/server/master/src/main/java/alluxio/master/table/DbConfig.java deleted file mode 100644 index bc9478989a55..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/DbConfig.java +++ /dev/null @@ -1,206 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.table.common.udb.UdbBypassSpec; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.google.common.base.Preconditions; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; -import javax.annotation.Nullable; - -/** - * The Alluxio db config information. - */ -public final class DbConfig { - private final BypassEntry mBypassEntry; - - /** - * @param bypassEntry bypass entry - */ - @JsonCreator - public DbConfig(@JsonProperty("bypass") @Nullable BypassEntry bypassEntry) { - mBypassEntry = bypassEntry == null ? new BypassEntry(Collections.emptySet()) : bypassEntry; - } - - /** - * Returns an empty configuration. - * - * @return an empty config instance - */ - public static DbConfig empty() { - return new DbConfig(new BypassEntry(Collections.emptySet())); - } - - /** - * @return the {@link BypassEntry} from config file - */ - public BypassEntry getBypassEntry() { - return mBypassEntry; - } - - /** - * @return the {@link UdbBypassSpec} object - */ - public UdbBypassSpec getUdbBypassSpec() { - return mBypassEntry.toUdbBypassSpec(); - } - - /** - * Bypass configuration entry from config file. - */ - public static final class BypassEntry { - @JsonProperty("tables") - private final Set mEntries; - - /** - * @param entries set of {@link BypassTableEntry}s - */ - @JsonCreator - public BypassEntry(@JsonProperty("tables") @Nullable Set entries) { - mEntries = entries == null ? Collections.emptySet() : entries; - } - - /** - * Converts to a {@link UdbBypassSpec} object. - * - * @return the {@link UdbBypassSpec} object - */ - public UdbBypassSpec toUdbBypassSpec() { - Map> map = mEntries.stream().collect( - Collectors.toMap(BypassTableEntry::getTable, BypassTableEntry::getPartitions)); - return new UdbBypassSpec(map); - } - - /** - * @return tables bypassed - */ - public Set getBypassedTables() { - return mEntries.stream().map(BypassTableEntry::getTable).collect(Collectors.toSet()); - } - - /** - * @return {@link BypassTableEntry}s - */ - public Set getBypassTableEntries() { - return mEntries; - } - } - - /** - * Table to partitions mapping. - */ - @JsonDeserialize(using = BypassTableEntryDeserializer.class) - public static class BypassTableEntry { - private final String mTableName; - private final Set mPartitions; - - /** - * @param tableName table name - * @param partitions partition names - */ - @JsonCreator - public BypassTableEntry(@JsonProperty("table") String tableName, - @JsonProperty("partitions") Set partitions) { - Preconditions.checkArgument(!tableName.isEmpty(), "empty table name"); - mTableName = tableName; - mPartitions = partitions; - } - - /** - * @return table name - */ - public String getTable() { - return mTableName; - } - - /** - * @return partition names - */ - public Set getPartitions() { - return mPartitions; - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } else if (other == null) { - return false; - } else if (getClass() != other.getClass()) { - return false; - } - BypassTableEntry entry = (BypassTableEntry) other; - return Objects.equals(mTableName, entry.mTableName); - } - - @Override - public int hashCode() { - return Objects.hashCode(mTableName); - } - } - - /** - * Deserializer of BypassTableEntry - * - * Enables flexible syntax: either a single table name can be specified, and all belonging - * partitions will be bypassed; - * or an object of form - * {"table": "tableName", "partitions": ["part1", "part2"]} - * can be used, and individual partitions can be specified. - */ - public static class BypassTableEntryDeserializer extends JsonDeserializer { - @Override - public BypassTableEntry deserialize(JsonParser jp, DeserializationContext cxt) - throws IOException, JsonProcessingException { - ObjectMapper mapper = (ObjectMapper) jp.getCodec(); - JsonNode node = mapper.readTree(jp); - String tableName; - Set partitions; - if (!node.isTextual() && !node.isObject()) { - throw new JsonParseException(mapper.treeAsTokens(node), "invalid syntax"); - } else if (node.isTextual()) { - // single table name, all partitions are bypassed - tableName = node.asText(); - partitions = Collections.emptySet(); - } else { - // a {"table": "table", "partitions": ["part1", "part2"]} object - if (!node.hasNonNull("table")) { - throw new JsonParseException(mapper.treeAsTokens(node), "missing table name"); - } - tableName = node.get("table").asText(); - JsonNode partitionsList = node.get("partitions"); - partitions = mapper.convertValue(partitionsList, new TypeReference>() {}); - if (partitions == null) { - partitions = Collections.emptySet(); - } - } - return new BypassTableEntry(tableName, partitions); - } - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java b/dora/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java deleted file mode 100644 index f3267710144f..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.Constants; -import alluxio.Server; -import alluxio.client.job.JobMasterClient; -import alluxio.clock.SystemClock; -import alluxio.exception.ExceptionMessage; -import alluxio.grpc.GrpcService; -import alluxio.grpc.ServiceType; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.ColumnStatisticsList; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.Database; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.SyncStatus; -import alluxio.master.AbstractMaster; -import alluxio.master.MasterContext; -import alluxio.master.journal.DelegatingJournaled; -import alluxio.master.journal.JournalContext; -import alluxio.master.journal.Journaled; -import alluxio.master.journal.JournaledGroup; -import alluxio.master.journal.checkpoint.CheckpointName; -import alluxio.master.table.transform.TransformJobInfo; -import alluxio.master.table.transform.TransformManager; -import alluxio.security.authentication.ClientContextServerInjector; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.util.executor.ExecutorServiceFactories; - -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; -import io.grpc.ServerInterceptors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; - -/** - * This table master manages catalogs metadata information. - */ -public class DefaultTableMaster extends AbstractMaster - implements TableMaster, DelegatingJournaled { - private static final Logger LOG = LoggerFactory.getLogger(DefaultTableMaster.class); - private static final Set> DEPS = ImmutableSet.of(); - public static final String DEFAULT_TRANSFORMATION = "file.count.max=100"; - - private final AlluxioCatalog mCatalog; - private final TransformManager mTransformManager; - private final JournaledGroup mJournaledComponents; - - /** - * Constructor for DefaultTableMaster. - * - * @param context core master context - * @param jobMasterClient the job master client for transformation - */ - public DefaultTableMaster(MasterContext context, JobMasterClient jobMasterClient) { - super(context, new SystemClock(), - ExecutorServiceFactories.cachedThreadPool(Constants.TABLE_MASTER_NAME)); - mCatalog = new AlluxioCatalog(); - mTransformManager = new TransformManager(this::createJournalContext, mCatalog, jobMasterClient); - mJournaledComponents = new JournaledGroup(Lists.newArrayList(mCatalog, mTransformManager), - CheckpointName.TABLE_MASTER); - } - - @Override - public SyncStatus attachDatabase(String udbType, String udbConnectionUri, - String udbDbName, String dbName, Map configuration, boolean ignoreSyncErrors) - throws IOException { - try (JournalContext journalContext = createJournalContext()) { - return mCatalog.attachDatabase(journalContext, udbType, udbConnectionUri, udbDbName, dbName, - configuration, ignoreSyncErrors); - } - } - - @Override - public boolean detachDatabase(String dbName) - throws IOException { - try (JournalContext journalContext = createJournalContext()) { - return mCatalog.detachDatabase(journalContext, dbName); - } - } - - @Override - public List getAllDatabases() throws IOException { - return mCatalog.getAllDatabases(); - } - - @Override - public List getAllTables(String databaseName) throws IOException { - return mCatalog.getAllTables(databaseName); - } - - @Override - public Table getTable(String dbName, String tableName) throws IOException { - return mCatalog.getTable(dbName, tableName); - } - - @Override - public List getTableColumnStatistics(String dbName, String tableName, - List colNames) throws IOException { - return mCatalog.getTableColumnStatistics(dbName, tableName, colNames); - } - - @Override - public List readTable(String dbName, String tableName, - Constraint constraint) throws IOException { - return mCatalog.readTable(dbName, tableName, constraint); - } - - @Override - public Map getPartitionColumnStatistics(String dbName, - String tableName, List partNamesList, List colNamesList) throws IOException { - return mCatalog.getPartitionColumnStatistics(dbName, tableName, partNamesList, colNamesList); - } - - @Override - public long transformTable(String dbName, String tableName, String definition) - throws IOException { - if (definition == null || definition.trim().isEmpty()) { - definition = DEFAULT_TRANSFORMATION; - } - TransformDefinition transformDefinition = TransformDefinition.parse(definition); - return mTransformManager.execute(dbName, tableName, transformDefinition); - } - - @Override - public TransformJobInfo getTransformJobInfo(long jobId) throws IOException { - Optional info = mTransformManager.getTransformJobInfo(jobId); - if (!info.isPresent()) { - throw new IOException(ExceptionMessage.TRANSFORM_JOB_DOES_NOT_EXIST.getMessage(jobId)); - } - return info.get(); - } - - @Override - public List getAllTransformJobInfo() throws IOException { - return mTransformManager.getAllTransformJobInfo(); - } - - @Override - public SyncStatus syncDatabase(String dbName) throws IOException { - try (JournalContext journalContext = createJournalContext()) { - return mCatalog.syncDatabase(journalContext, dbName); - } - } - - @Override - public Database getDatabase(String dbName) throws IOException { - return mCatalog.getDatabase(dbName); - } - - @Override - public Set> getDependencies() { - return DEPS; - } - - @Override - public String getName() { - return Constants.TABLE_MASTER_NAME; - } - - @Override - public Map getServices() { - Map services = new HashMap<>(); - services.put(ServiceType.TABLE_MASTER_CLIENT_SERVICE, - new GrpcService(ServerInterceptors.intercept( - new TableMasterClientServiceHandler(this), - new ClientContextServerInjector()))); - return services; - } - - @Override - public void start(Boolean isLeader) throws IOException { - super.start(isLeader); - if (isLeader) { - mTransformManager.start(getExecutorService(), mMasterContext.getUserState()); - } - } - - @Override - public void stop() throws IOException { - super.stop(); - } - - @Override - public void close() throws IOException { - super.close(); - } - - @Override - public Journaled getDelegate() { - return mJournaledComponents; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/Domain.java b/dora/table/server/master/src/main/java/alluxio/master/table/Domain.java deleted file mode 100644 index ed14fc9b3468..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/Domain.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.collections.Pair; -import alluxio.grpc.table.Range; -import alluxio.grpc.table.Value; - -import java.util.ArrayList; -import java.util.List; - -// TODO(david): make this class templated - -/** - * Domain represetation to check values. - * - * @param type of the element - */ -public abstract class Domain { - abstract boolean isInDomain(T obj); - - /** - * Parse from protobuf domain to Domain class. - * - * @param domain proto representation - * @return a Domain object - */ - public static Domain parseFrom(alluxio.grpc.table.Domain domain) { - if (domain.hasAllOrNone()) { - return new AllOrNoneDomain(domain.getAllOrNone().getAll()); - } - if (domain.hasEquatable()) { - return new EquatableDomain(domain.getEquatable().getWhiteList(), - domain.getEquatable().getCandidatesList()); - } - if (domain.hasRange()) { - return new RangeDomain(domain.getRange().getRangesList()); - } - return new AllOrNoneDomain(false); - } - - private static Comparable convert(Value candidate) { - if (candidate.hasStringType()) { - return candidate.getStringType(); - } - if (candidate.hasBooleanType()) { - return candidate.getBooleanType(); - } - if (candidate.hasDoubleType()) { - return candidate.getDoubleType(); - } - if (candidate.hasLongType()) { - return candidate.getLongType(); - } - return null; - } - - private static class AllOrNoneDomain extends Domain { - private boolean mAll; - - public AllOrNoneDomain(boolean all) { - super(); - mAll = all; - } - - @Override - boolean isInDomain(Object obj) { - return mAll; - } - } - - private static class EquatableDomain extends Domain { - private boolean mWhiteList; - private List mObjects; - - public EquatableDomain(boolean whiteList, List candidatesList) { - super(); - mWhiteList = whiteList; - mObjects = new ArrayList<>(); - for (Value candidate: candidatesList) { - mObjects.add(convert(candidate)); - } - } - - @Override - boolean isInDomain(Object obj) { - return mWhiteList == mObjects.contains(obj); - } - } - - private static class RangeDomain extends Domain { - private List> mRanges; - - public RangeDomain(List rangesList) { - super(); - mRanges = new ArrayList<>(); - for (Range range : rangesList) { - mRanges.add(new Pair<>(convert(range.getLow()), convert(range.getHigh()))); - } - } - - @Override - boolean isInDomain(Object obj) { - for (Pair pair : mRanges) { - if ((pair.getFirst() == null || pair.getFirst().compareTo(obj) <= 0) - && (pair.getSecond() == null || pair.getSecond().compareTo(obj) >= 0)) { - return true; - } - } - return false; - } - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/Partition.java b/dora/table/server/master/src/main/java/alluxio/master/table/Partition.java deleted file mode 100644 index 3b2042e5cd51..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/Partition.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.PartitionSpec; -import alluxio.table.common.Layout; -import alluxio.table.common.LayoutRegistry; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.transform.TransformContext; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; -import alluxio.util.CommonUtils; - -import java.io.IOException; -import java.util.List; - -/** - * The table partition class. - */ -public class Partition { - private static final long FIRST_VERSION = 1; - - private final String mPartitionSpec; - private final Layout mBaseLayout; - private final long mVersion; - private final long mVersionCreationTime; - private volatile Transformation mTransformation; - - /** - * Information kept for the latest transformation on the partition. - */ - private static final class Transformation { - /** The definition of the transformation. */ - private String mDefinition; - /** The transformed layout. */ - private Layout mLayout; - - /** - * @param definition the transformation definition - * @param layout the transformed layout - */ - public Transformation(String definition, Layout layout) { - mDefinition = definition; - mLayout = layout; - } - - /** - * @return the transformation definition - */ - public String getDefinition() { - return mDefinition; - } - - /** - * @return the transformed layout - */ - public Layout getLayout() { - return mLayout; - } - - /** - * @return the proto representation - */ - public alluxio.grpc.table.Transformation toProto() { - return alluxio.grpc.table.Transformation.newBuilder() - .setDefinition(mDefinition) - .setLayout(mLayout.toProto()) - .build(); - } - - /** - * @param layoutRegistry the layout registry - * @param proto the proto representation - * @return the java representation - */ - public static Transformation fromProto(LayoutRegistry layoutRegistry, - alluxio.grpc.table.Transformation proto) { - return new Transformation(proto.getDefinition(), layoutRegistry.create(proto.getLayout())); - } - } - - /** - * Creates an instance. - * - * @param partitionSpec the partition spec - * @param baseLayout the partition layout - * @param version the version - * @param versionCreationTime the version creation time - */ - private Partition(String partitionSpec, Layout baseLayout, long version, - long versionCreationTime) { - mPartitionSpec = partitionSpec; - mBaseLayout = baseLayout; - mVersion = version; - mVersionCreationTime = versionCreationTime; - } - - /** - * Creates an instance from a udb partition. - * - * @param udbPartition the udb partition - */ - public Partition(UdbPartition udbPartition) { - this(udbPartition.getSpec(), udbPartition.getLayout(), FIRST_VERSION, - CommonUtils.getCurrentMs()); - } - - /** - * Creates the next version of an existing partition. - * - * @param udbPartition the udb partition - * @return a new Partition instance representing the next version of this partition - */ - public Partition createNext(UdbPartition udbPartition) { - return new Partition(udbPartition.getSpec(), udbPartition.getLayout(), getVersion() + 1, - CommonUtils.getCurrentMs()); - } - - /** - * @return the current layout - */ - public Layout getLayout() { - return mTransformation == null ? mBaseLayout : mTransformation.getLayout(); - } - - /** - * @return the base layout - */ - public Layout getBaseLayout() { - return mBaseLayout; - } - - /** - * @return the version - */ - public long getVersion() { - return mVersion; - } - - /** - * Transform the partition. - * - * @param definition the transformation definition - * @param layout the transformed layout - */ - public void transform(String definition, Layout layout) { - mTransformation = new Transformation(definition, layout); - } - - /** - * @param definition the transformation definition - * @return whether the latest transformation of Partition has the same definition - */ - public boolean isTransformed(String definition) { - return mTransformation != null - && mTransformation.getDefinition().equals(definition); - } - - /** - * @return the partition speck - */ - public String getSpec() { - return mPartitionSpec; - } - - /** - * Returns a plan to transform this partition. - * - * @param transformContext the {@link TransformContext} - * @param definition the transformation definition - * @return the transformation plan - */ - public TransformPlan getTransformPlan(TransformContext transformContext, - TransformDefinition definition) throws IOException { - return mBaseLayout.getTransformPlan(transformContext, definition); - } - - /** - * @return the proto representation - */ - public alluxio.grpc.table.Partition toProto() { - alluxio.grpc.table.Partition.Builder builder = alluxio.grpc.table.Partition.newBuilder() - .setPartitionSpec(PartitionSpec.newBuilder().setSpec(mPartitionSpec).build()) - .setBaseLayout(mBaseLayout.toProto()) - .setVersion(mVersion) - .setVersionCreationTime(mVersionCreationTime); - if (mTransformation != null) { - builder.addTransformations(mTransformation.toProto()); - } - return builder.build(); - } - - /** - * @param layoutRegistry the layout registry - * @param proto the proto representation - * @return the java representation - */ - public static Partition fromProto(LayoutRegistry layoutRegistry, - alluxio.grpc.table.Partition proto) { - Partition partition = new Partition(proto.getPartitionSpec().getSpec(), - layoutRegistry.create(proto.getBaseLayout()), proto.getVersion(), - proto.getVersionCreationTime()); - List transformations = proto.getTransformationsList(); - if (!transformations.isEmpty()) { - partition.mTransformation = Transformation.fromProto(layoutRegistry, transformations.get(0)); - } - return partition; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/PartitionScheme.java b/dora/table/server/master/src/main/java/alluxio/master/table/PartitionScheme.java deleted file mode 100644 index 962756a9b5e1..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/PartitionScheme.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; - -import java.util.List; - -/** - * Interface of the partition scheme. - */ -public interface PartitionScheme { - /** - * Add a list of partitions. - * @param partitions partitions to add - */ - void addPartitions(List partitions); - - /** - * Get a list of partitions. - * - * @return a list of partitions - */ - List getPartitions(); - - /** - * @param spec the partition spec - * @return the corresponding partition, or null if spec does not exist - */ - Partition getPartition(String spec); - - /** - * Get table layout. - * - * @return table info - */ - Layout getTableLayout(); - - /** - * Get partition columns. - * - * @return partition columns - */ - List getPartitionCols(); - - /** - * create a partition scheme object. - * - * @param partitions partitions - * @param layout table layout - * @param partCols table partition columns - * @return a partition scheme object - */ - static PartitionScheme create(List partitions, Layout layout, - List partCols) { - if (partCols.isEmpty()) { - return new UnpartitionedTableScheme(partitions); - } else { - return new PartitionedTableScheme(partitions, layout, partCols); - } - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/PartitionedTableScheme.java b/dora/table/server/master/src/main/java/alluxio/master/table/PartitionedTableScheme.java deleted file mode 100644 index 3a78c69cc1f6..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/PartitionedTableScheme.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; - -import java.util.List; - -/** - * Partitioned Table Scheme. - */ -public class PartitionedTableScheme extends BasePartitionScheme { - private final Layout mTableInfo; - private final List mPartCols; - - /** - * constructor for PartitionedTableScheme. - * - * @param partitions list of partitions - * @param tableInfo table info - * @param partCols partition columns - */ - PartitionedTableScheme(List partitions, Layout tableInfo, - List partCols) { - super(partitions); - mTableInfo = tableInfo; - mPartCols = partCols; - } - - @Override - public Layout getTableLayout() { - return mTableInfo; - } - - @Override - public List getPartitionCols() { - return mPartCols; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/Table.java b/dora/table/server/master/src/main/java/alluxio/master/table/Table.java deleted file mode 100644 index 7777f43e7e6f..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/Table.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.TableInfo; -import alluxio.proto.journal.Table.AddTableEntry; -import alluxio.proto.journal.Table.AddTablePartitionsEntry; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.transform.TransformContext; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; -import alluxio.table.common.udb.UdbTable; -import alluxio.util.CommonUtils; - -import com.google.common.collect.Lists; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Collectors; -import javax.annotation.Nullable; -import javax.annotation.concurrent.NotThreadSafe; - -/** - * The table implementation which manages all the versions of the table. - */ -@NotThreadSafe -public class Table { - private static final Logger LOG = LoggerFactory.getLogger(Table.class); - private static final long UNDEFINED_VERSION = -1; - private static final int PARTITIONS_CHUNK_SIZE = Configuration - .getInt(PropertyKey.TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE); - - public static final long FIRST_VERSION = 1; - - private final Database mDatabase; - private final String mName; - private final long mVersion; - private final long mVersionCreationTime; - private final long mPreviousVersion; - - private final Schema mSchema; - private final PartitionScheme mPartitionScheme; - private final String mOwner; - private final List mStatistics; - private final Map mParameters; - - /** - * @param database the database - * @param udbTable the udb table to sync from - * @param previousTable the previous table, or {@code null} if creating first version of table - */ - private Table(Database database, UdbTable udbTable, @Nullable Table previousTable) { - mDatabase = database; - mVersion = previousTable == null ? FIRST_VERSION : previousTable.mVersion + 1; - mPreviousVersion = previousTable == null ? UNDEFINED_VERSION : previousTable.mVersion; - mVersionCreationTime = CommonUtils.getCurrentMs(); - - mName = udbTable.getName(); - mSchema = udbTable.getSchema(); - mOwner = udbTable.getOwner() == null ? "" : udbTable.getOwner(); - mStatistics = udbTable.getStatistics(); - mParameters = new HashMap<>(udbTable.getParameters()); - - // TODO(gpang): inspect listing of table or partition location? - - // Compare udb partitions with the existing partitions - List partitions = new ArrayList<>(udbTable.getPartitions().size()); - if (previousTable != null) { - // spec to existing partition - Map existingPartitions = - previousTable.mPartitionScheme.getPartitions().stream() - .collect(Collectors.toMap(Partition::getSpec, Function.identity())); - for (UdbPartition udbPartition : udbTable.getPartitions()) { - Partition newPartition = existingPartitions.get(udbPartition.getSpec()); - if (newPartition == null) { - // partition does not exist yet - newPartition = new Partition(udbPartition); - if (LOG.isDebugEnabled()) { - LOG.debug("Existing table {}.{} adding UDB partition: {}", - database.getName(), mName, udbPartition); - } - } else if (!newPartition.getBaseLayout().equals(udbPartition.getLayout())) { - // existing partition is updated - newPartition = newPartition.createNext(udbPartition); - if (LOG.isDebugEnabled()) { - LOG.debug("Existing table {}.{} updating UDB partition {}", - database.getName(), mName, udbPartition); - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Existing table {}.{} keeping partition spec: {}", - database.getName(), mName, udbPartition); - } - } - partitions.add(newPartition); - } - LOG.info("Updating existing table {}.{} with {} total partitions.", - database.getName(), mName, partitions.size()); - } else { - // Use all the udb partitions - partitions = - udbTable.getPartitions().stream().map(Partition::new).collect(Collectors.toList()); - LOG.info("Creating new table {}.{} with {} total partitions.", - database.getName(), mName, partitions.size()); - if (LOG.isDebugEnabled()) { - udbTable.getPartitions().stream().forEach(udbPartition -> - LOG.debug("New table {}.{} adding UDB partition: {}.", - database.getName(), mName, udbPartition)); - } - } - mPartitionScheme = - PartitionScheme.create(partitions, udbTable.getLayout(), udbTable.getPartitionCols()); - } - - private Table(Database database, alluxio.proto.journal.Table.AddTableEntry entry) { - List partitions = entry.getPartitionsList().stream() - .map(p -> Partition.fromProto(database.getContext().getLayoutRegistry(), p)) - .collect(Collectors.toList()); - - mDatabase = database; - mName = entry.getTableName(); - mPreviousVersion = entry.getPreviousVersion(); - mVersion = entry.getVersion(); - mVersionCreationTime = entry.getVersionCreationTime(); - mSchema = entry.getSchema(); - mPartitionScheme = - PartitionScheme.create(partitions, entry.getLayout(), entry.getPartitionColsList()); - mOwner = entry.getOwner(); - mStatistics = entry.getTableStatsList(); - mParameters = new HashMap<>(entry.getParametersMap()); - } - - /** - * @param database the database - * @param udbTable the udb table - * @param previousTable the previous table, or {@code null} if creating first version of table - * @return a new (or first) version of the table based on the udb table, or {@code null} if there - * no changes in the udb table - */ - public static Table create(Database database, UdbTable udbTable, @Nullable Table previousTable) { - if (previousTable != null && !previousTable.shouldSync(udbTable)) { - // no need for a new version - return null; - } - return new Table(database, udbTable, previousTable); - } - - /** - * @param database the database - * @param entry the add table journal entry - * @return a new instance - */ - public static Table create(Database database, alluxio.proto.journal.Table.AddTableEntry entry) { - return new Table(database, entry); - } - - /** - * Add partitions to the current table. - * - * @param entry the add table partitions entry - */ - public void addPartitions(alluxio.proto.journal.Table.AddTablePartitionsEntry entry) { - mPartitionScheme.addPartitions(entry.getPartitionsList().stream() - .map(p -> Partition.fromProto(mDatabase.getContext().getLayoutRegistry(), p)) - .collect(Collectors.toList())); - } - - /** - * @return the table name - */ - public String getName() { - return mName; - } - - /** - * @param spec the partition spec - * @return the corresponding partition - */ - public Partition getPartition(String spec) { - return mPartitionScheme.getPartition(spec); - } - - /** - * @return the list of partitions - */ - public List getPartitions() { - return mPartitionScheme.getPartitions(); - } - - /** - * @return the table schema - */ - public Schema getSchema() { - return mSchema; - } - - /** - * @return the statistics - */ - public List getStatistics() { - return mStatistics; - } - - /** - * @return the table version - */ - public long getVersion() { - return mVersion; - } - - /** - * @return the previous table version - */ - public long getPreviousVersion() { - return mPreviousVersion; - } - - /** - * Returns a list of plans to transform the table, according to the transformation definition. - * - * @param definition the transformation definition - * @return a list of {@link TransformPlan} to transform this table - */ - public List getTransformPlans(TransformDefinition definition) throws IOException { - List plans = new ArrayList<>(getPartitions().size()); - for (Partition partition : getPartitions()) { - if (!partition.isTransformed(definition.getDefinition())) { - TransformContext transformContext = - new TransformContext(mDatabase.getName(), mName, partition.getSpec()); - plans.add(partition.getTransformPlan(transformContext, definition)); - } - } - return plans; - } - - /** - * @param udbTable the udb table to check against - * @return true if the table should be synced, because of differences in the udb table - */ - public boolean shouldSync(UdbTable udbTable) { - if (!Objects.equals(mName, udbTable.getName()) - || !Objects.equals(mSchema, udbTable.getSchema()) - || !Objects.equals(mOwner, udbTable.getOwner()) - || !Objects.equals(mStatistics, udbTable.getStatistics()) - || !Objects.equals(mParameters, udbTable.getParameters())) { - // some fields are different - return true; - } - - Map existingPartitions = mPartitionScheme.getPartitions().stream() - .collect(Collectors.toMap(Partition::getSpec, Function.identity())); - if (existingPartitions.size() != udbTable.getPartitions().size()) { - return true; - } - - for (UdbPartition udbPartition : udbTable.getPartitions()) { - Partition newPartition = existingPartitions.get(udbPartition.getSpec()); - if (newPartition == null - || !newPartition.getBaseLayout().equals(udbPartition.getLayout())) { - // mismatch of a partition - return true; - } - } - - return false; - } - - /** - * @return the proto representation - */ - public TableInfo toProto() { - TableInfo.Builder builder = TableInfo.newBuilder() - .setDbName(mDatabase.getName()) - .setTableName(mName) - .setSchema(mSchema) - .setOwner(mOwner) - .putAllParameters(mParameters) - .addAllPartitionCols(mPartitionScheme.getPartitionCols()) - .setLayout(mPartitionScheme.getTableLayout()) - .setPreviousVersion(mPreviousVersion) - .setVersion(mVersion) - .setVersionCreationTime(mVersionCreationTime); - - return builder.build(); - } - - /** - * @return the journal proto representation - */ - public AddTableEntry getTableJournalProto() { - AddTableEntry.Builder builder = AddTableEntry.newBuilder() - .setDbName(mDatabase.getName()) - .setTableName(mName) - .addAllTableStats(mStatistics) - .setSchema(mSchema) - .setOwner(mOwner) - .putAllParameters(mParameters) - .addAllPartitionCols(mPartitionScheme.getPartitionCols()) - .setLayout(mPartitionScheme.getTableLayout()) - .setPreviousVersion(mPreviousVersion) - .setVersion(mVersion) - .setVersionCreationTime(mVersionCreationTime); - - List partitions = getPartitions(); - if (partitions.size() <= PARTITIONS_CHUNK_SIZE) { - builder.addAllPartitions(partitions.stream().map(Partition::toProto) - .collect(Collectors.toList())); - } - return builder.build(); - } - - /** - * @return the journal proto representation - */ - public List getTablePartitionsJournalProto() { - List partitionEntries = new ArrayList<>(); - List partitions = getPartitions(); - if (partitions.size() <= PARTITIONS_CHUNK_SIZE) { - return partitionEntries; - } - - for (List partitionChunk : Lists.partition(partitions, PARTITIONS_CHUNK_SIZE)) { - partitionEntries.add(AddTablePartitionsEntry.newBuilder() - .setDbName(mDatabase.getName()) - .setTableName(mName) - .setVersion(mVersion) - .addAllPartitions(partitionChunk.stream().map(Partition::toProto) - .collect(Collectors.toList())) - .build()); - } - - return partitionEntries; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/TableMaster.java b/dora/table/server/master/src/main/java/alluxio/master/table/TableMaster.java deleted file mode 100644 index 39305d4e272a..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/TableMaster.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.ColumnStatisticsList; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.Database; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.SyncStatus; -import alluxio.master.Master; -import alluxio.master.table.transform.TransformJobInfo; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -/** - * Interface of the table master that manages the table service. - */ -public interface TableMaster extends Master { - - /** - * Attach an existing database to the catalog. - * - * @param udbType the database type - * @param udbConnectionUri the udb connection uri - * @param udbDbName the database name in the udb - * @param dbName the database name in Alluxio - * @param configuration the configuration - * @param ignoreSyncErrors if true, will ignore sync errors during the attach - * @return the sync status for the attach - */ - SyncStatus attachDatabase(String udbType, String udbConnectionUri, String udbDbName, - String dbName, Map configuration, boolean ignoreSyncErrors) - throws IOException; - - /** - * Remove an existing database in the catalog. - * - * @param dbName the database name to remove - * @return true if deletion is successful - */ - boolean detachDatabase(String dbName) - throws IOException; - - /** - * Get a listing of all databases. - * - * @return a list of database - */ - List getAllDatabases() throws IOException; - - /** - * Get a listing of all tables in a database. - * - * @param databaseName database name - * - * @return a list of tables - */ - List getAllTables(String databaseName) throws IOException; - - /** - * Gets a database object. - * - * @param dbName the database name - * @return a database object - */ - Database getDatabase(String dbName) throws IOException; - - /** - * Get a table. - * - * @param databaseName database name - * @param tableName table name - * - * @return a Table object - */ - Table getTable(String databaseName, String tableName) throws IOException; - - /** - * Get statistics on the table. - * - * @param databaseName database name - * @param tableName table name - * @param colNames column names - * @return a list of column statistics info - */ - List getTableColumnStatistics(String databaseName, String tableName, - List colNames) throws IOException; - - /** - * Returns metadata for reading a table given constraints. - * - * @param dbName database name - * @param tableName table name - * @param constraint constraint - * @return a list of partition information - */ - List readTable(String dbName, String tableName, Constraint constraint) - throws IOException; - - /** - * Get statistics on the partitions. - * - * @param dbName database name - * @param tableName table name - * @param partNamesList partition names - * @param colNamesList column names - * @return a map mapping partition names to a list of column statistics info - */ - Map getPartitionColumnStatistics(String dbName, - String tableName, List partNamesList, List colNamesList) - throws IOException; - - /** - * Transforms a table to a new table. - * - * @param dbName the database name - * @param tableName the table name - * @param definition the transformation definition - * @return the job ID - */ - long transformTable(String dbName, String tableName, String definition) throws IOException; - - /** - * @param jobId the job ID - * @return the information for the transformation job - */ - TransformJobInfo getTransformJobInfo(long jobId) throws IOException; - - /** - * @return a list of information for all the transformation jobs - */ - List getAllTransformJobInfo() throws IOException; - - /** - * Syncs a database. - * - * @param dbName the database name - * @return the resulting sync status - */ - SyncStatus syncDatabase(String dbName) throws IOException; -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterClientServiceHandler.java b/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterClientServiceHandler.java deleted file mode 100644 index aff24951e784..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterClientServiceHandler.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.RpcUtils; -import alluxio.grpc.table.AttachDatabasePRequest; -import alluxio.grpc.table.AttachDatabasePResponse; -import alluxio.grpc.table.DetachDatabasePRequest; -import alluxio.grpc.table.DetachDatabasePResponse; -import alluxio.grpc.table.GetAllDatabasesPRequest; -import alluxio.grpc.table.GetAllDatabasesPResponse; -import alluxio.grpc.table.GetAllTablesPRequest; -import alluxio.grpc.table.GetAllTablesPResponse; -import alluxio.grpc.table.GetDatabasePRequest; -import alluxio.grpc.table.GetDatabasePResponse; -import alluxio.grpc.table.GetPartitionColumnStatisticsPRequest; -import alluxio.grpc.table.GetPartitionColumnStatisticsPResponse; -import alluxio.grpc.table.GetTableColumnStatisticsPRequest; -import alluxio.grpc.table.GetTableColumnStatisticsPResponse; -import alluxio.grpc.table.GetTablePRequest; -import alluxio.grpc.table.GetTablePResponse; -import alluxio.grpc.table.GetTransformJobInfoPRequest; -import alluxio.grpc.table.GetTransformJobInfoPResponse; -import alluxio.grpc.table.ReadTablePRequest; -import alluxio.grpc.table.ReadTablePResponse; -import alluxio.grpc.table.SyncDatabasePRequest; -import alluxio.grpc.table.SyncDatabasePResponse; -import alluxio.grpc.table.SyncStatus; -import alluxio.grpc.table.TableMasterClientServiceGrpc; -import alluxio.grpc.table.TransformTablePRequest; -import alluxio.grpc.table.TransformTablePResponse; -import alluxio.master.table.transform.TransformJobInfo; - -import com.google.common.base.Preconditions; -import io.grpc.stub.StreamObserver; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.stream.Collectors; - -/** - * This class is a gRPC handler for table master RPCs. - */ -public class TableMasterClientServiceHandler - extends TableMasterClientServiceGrpc.TableMasterClientServiceImplBase { - private static final Logger LOG = LoggerFactory.getLogger(TableMasterClientServiceHandler.class); - - private final TableMaster mTableMaster; - - /** - * Creates a new instance of {@link TableMasterClientServiceHandler}. - * - * @param tableMaster the {@link TableMaster} the handler uses internally - */ - public TableMasterClientServiceHandler(TableMaster tableMaster) { - Preconditions.checkNotNull(tableMaster, "tableMaster"); - mTableMaster = tableMaster; - } - - @Override - public void attachDatabase(AttachDatabasePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> { - SyncStatus status = mTableMaster - .attachDatabase(request.getUdbType(), request.getUdbConnectionUri(), - request.getUdbDbName(), request.getDbName(), request.getOptionsMap(), - request.getIgnoreSyncErrors()); - return AttachDatabasePResponse.newBuilder().setSuccess(status.getTablesErrorsCount() == 0) - .setSyncStatus(status).build(); - }, "attachDatabase", "", responseObserver); - } - - @Override - public void detachDatabase(DetachDatabasePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> DetachDatabasePResponse.newBuilder().setSuccess(mTableMaster - .detachDatabase(request.getDbName())).build(), "detachDatabase", "", - responseObserver); - } - - @Override - public void getAllDatabases(GetAllDatabasesPRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetAllDatabasesPResponse.newBuilder() - .addAllDatabase(mTableMaster.getAllDatabases()).build(), - "getAllDatabases", "", responseObserver); - } - - @Override - public void getAllTables(GetAllTablesPRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetAllTablesPResponse.newBuilder() - .addAllTable(mTableMaster.getAllTables(request.getDatabase())).build(), - "getAllTables", "", responseObserver); - } - - @Override - public void getDatabase(GetDatabasePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetDatabasePResponse.newBuilder().setDb( - mTableMaster.getDatabase(request.getDbName())).build(), - "getDatabase", "", responseObserver); - } - - @Override - public void getTable(GetTablePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> { - Table table = mTableMaster.getTable(request.getDbName(), request.getTableName()); - if (table != null) { - return GetTablePResponse.newBuilder().setTableInfo(table.toProto()).build(); - } - return GetTablePResponse.getDefaultInstance(); - }, "getTable", "", responseObserver); - } - - @Override - public void getTableColumnStatistics(GetTableColumnStatisticsPRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetTableColumnStatisticsPResponse.newBuilder().addAllStatistics( - mTableMaster.getTableColumnStatistics(request.getDbName(), - request.getTableName(), request.getColNamesList())).build(), - "getTableColumnStatistics", "", responseObserver); - } - - @Override - public void getPartitionColumnStatistics(GetPartitionColumnStatisticsPRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetPartitionColumnStatisticsPResponse.newBuilder() - .putAllPartitionStatistics(mTableMaster.getPartitionColumnStatistics( - request.getDbName(), request.getTableName(), request.getPartNamesList(), - request.getColNamesList())).build(), - "getPartitionColumnStatistics", "", responseObserver); - } - - @Override - public void readTable(ReadTablePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> ReadTablePResponse.newBuilder().addAllPartitions(mTableMaster - .readTable(request.getDbName(), request.getTableName(), request.getConstraint())) - .build(), "readTable", "", responseObserver); - } - - @Override - public void transformTable(TransformTablePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> TransformTablePResponse.newBuilder().setJobId(mTableMaster - .transformTable(request.getDbName(), request.getTableName(), request.getDefinition())) - .build(), "transformTable", "", responseObserver); - } - - @Override - public void syncDatabase(SyncDatabasePRequest request, - StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> { - SyncStatus status = mTableMaster.syncDatabase(request.getDbName()); - return SyncDatabasePResponse.newBuilder().setSuccess(status.getTablesErrorsCount() == 0) - .setStatus(status).build(); - }, "syncDatabase", "", responseObserver); - } - - @Override - public void getTransformJobInfo(GetTransformJobInfoPRequest request, - StreamObserver responseObserver) { - if (request.hasJobId()) { - RpcUtils.call(LOG, () -> GetTransformJobInfoPResponse.newBuilder().addInfo(mTableMaster - .getTransformJobInfo(request.getJobId()).toProto()).build(), - "getTransformJobInfo", "", responseObserver); - } else { - RpcUtils.call(LOG, () -> GetTransformJobInfoPResponse.newBuilder().addAllInfo(mTableMaster - .getAllTransformJobInfo().stream().map(TransformJobInfo::toProto) - .collect(Collectors.toList())).build(), - "getTransformJobInfo", "", responseObserver); - } - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterFactory.java b/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterFactory.java deleted file mode 100644 index 76d8a62bbd2f..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/TableMasterFactory.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.ClientContext; -import alluxio.Constants; -import alluxio.client.job.JobMasterClient; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.master.CoreMasterContext; -import alluxio.master.MasterFactory; -import alluxio.master.MasterRegistry; -import alluxio.worker.job.JobMasterClientContext; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.concurrent.ThreadSafe; - -/** - * Factory to create a {@link TableMaster} instance. - */ -@ThreadSafe -public final class TableMasterFactory implements MasterFactory { - private static final Logger LOG = LoggerFactory.getLogger(TableMasterFactory.class); - - /** - * Constructs a new {@link TableMasterFactory}. - */ - public TableMasterFactory() {} - - @Override - public boolean isEnabled() { - return Configuration.getBoolean(PropertyKey.TABLE_ENABLED); - } - - @Override - public String getName() { - return Constants.TABLE_MASTER_NAME; - } - - @Override - public TableMaster create(MasterRegistry registry, CoreMasterContext context) { - LOG.info("Creating {} ", TableMaster.class.getName()); - - JobMasterClient jobMasterClient = JobMasterClient.Factory.create(JobMasterClientContext - .newBuilder(ClientContext.create(Configuration.global())).build()); - TableMaster master = new DefaultTableMaster(context, jobMasterClient); - registry.add(TableMaster.class, master); - return master; - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/UnpartitionedTableScheme.java b/dora/table/server/master/src/main/java/alluxio/master/table/UnpartitionedTableScheme.java deleted file mode 100644 index 19c8c23d0e75..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/UnpartitionedTableScheme.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; - -import com.google.common.base.Preconditions; - -import java.util.Collections; -import java.util.List; - -/** - * Unpartitoned table scheme. - */ -public class UnpartitionedTableScheme extends BasePartitionScheme { - private final Partition mPartition; - - /** - * Constructor for UnpartitionedTableScheme. - * - * @param partitions a list of partitions - */ - public UnpartitionedTableScheme(List partitions) { - super(partitions); - Preconditions.checkArgument(partitions.size() == 1); - mPartition = partitions.get(0); - } - - @Override - public Layout getTableLayout() { - return mPartition.getLayout().toProto(); - } - - @Override - public List getPartitionCols() { - return Collections.emptyList(); - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformJobInfo.java b/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformJobInfo.java deleted file mode 100644 index 9fa2d4b60c9a..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformJobInfo.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table.transform; - -import alluxio.collections.Pair; -import alluxio.job.wire.Status; -import alluxio.table.common.Layout; - -import com.google.common.base.Preconditions; - -import java.util.Collections; -import java.util.Map; -import javax.annotation.concurrent.ThreadSafe; -import javax.validation.constraints.NotNull; - -/** - * Information kept for a transformation job. - */ -@ThreadSafe -public final class TransformJobInfo { - private final Pair mDbTable; - private final String mDefinition; - private final Map mTransformedLayouts; - private final long mJobId; - private volatile Status mJobStatus; - private volatile String mJobErrorMessage; - - /** - * The default job status on construction is {@link Status#RUNNING}. - * - * @param db the database name - * @param table the table name - * @param definition the transformation definition - * @param jobId the job ID - * @param transformedLayouts the mapping from a partition spec to its transformed layout - */ - public TransformJobInfo(@NotNull String db, @NotNull String table, @NotNull String definition, - long jobId, @NotNull Map transformedLayouts) { - Preconditions.checkNotNull(db, "db"); - Preconditions.checkNotNull(table, "table"); - Preconditions.checkNotNull(definition, "definition"); - Preconditions.checkNotNull(transformedLayouts, "transformedLayouts"); - - mDbTable = new Pair<>(db, table); - mDefinition = definition; - mTransformedLayouts = Collections.unmodifiableMap(transformedLayouts); - mJobId = jobId; - mJobStatus = Status.RUNNING; - mJobErrorMessage = ""; - } - - /** - * @return the database name - */ - public String getDb() { - return mDbTable.getFirst(); - } - - /** - * @return the table name - */ - public String getTable() { - return mDbTable.getSecond(); - } - - /** - * @return the (db, table) pair - */ - public Pair getDbTable() { - return mDbTable; - } - - /** - * @return the transformation definition - */ - public String getDefinition() { - return mDefinition; - } - - /** - * @return a read-only mapping from a partition spec to its transformed layout - */ - public Map getTransformedLayouts() { - return mTransformedLayouts; - } - - /** - * @return the job ID - */ - public long getJobId() { - return mJobId; - } - - /** - * @return the job status - */ - public Status getJobStatus() { - return mJobStatus; - } - - /** - * Sets the job status. - * - * @param status the job status - */ - public void setJobStatus(@NotNull Status status) { - Preconditions.checkNotNull(status, "status"); - mJobStatus = status; - } - - /** - * @return the job error message or empty if there is no error - */ - public String getJobErrorMessage() { - return mJobErrorMessage; - } - - /** - * Sets the job error message. - * - * @param error the error - */ - public void setJobErrorMessage(@NotNull String error) { - Preconditions.checkNotNull(error, "error"); - mJobErrorMessage = error; - } - - /** - * @return the proto representation - */ - public alluxio.grpc.table.TransformJobInfo toProto() { - return alluxio.grpc.table.TransformJobInfo.newBuilder() - .setDbName(getDb()) - .setTableName(getTable()) - .setDefinition(getDefinition()) - .setJobId(getJobId()) - .setJobStatus(getJobStatus().toProto()) - .setJobError(getJobErrorMessage()) - .build(); - } -} diff --git a/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java b/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java deleted file mode 100644 index ba7b9bab3a65..000000000000 --- a/dora/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java +++ /dev/null @@ -1,478 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table.transform; - -import alluxio.client.job.JobMasterClient; -import alluxio.collections.Pair; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.status.NotFoundException; -import alluxio.exception.status.UnavailableException; -import alluxio.heartbeat.FixedIntervalSupplier; -import alluxio.heartbeat.HeartbeatContext; -import alluxio.heartbeat.HeartbeatExecutor; -import alluxio.heartbeat.HeartbeatThread; -import alluxio.job.JobConfig; -import alluxio.job.wire.JobInfo; -import alluxio.job.wire.Status; -import alluxio.job.workflow.composite.CompositeConfig; -import alluxio.master.journal.DelegatingJournaled; -import alluxio.master.journal.JournalContext; -import alluxio.master.journal.Journaled; -import alluxio.master.journal.checkpoint.CheckpointName; -import alluxio.master.table.AlluxioCatalog; -import alluxio.master.table.Partition; -import alluxio.proto.journal.Journal; -import alluxio.proto.journal.Journal.JournalEntry; -import alluxio.proto.journal.Table.AddTransformJobInfoEntry; -import alluxio.proto.journal.Table.RemoveTransformJobInfoEntry; -import alluxio.resource.CloseableIterator; -import alluxio.security.user.UserState; -import alluxio.table.common.Layout; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; - -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.collect.Iterators; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; - -/** - * Manages transformations. - * - * It executes a transformation by submitting a job to the job service. - * - * A background thread periodically checks whether the job succeeds, fails, or is still running, - * the period is configurable by {@link PropertyKey#TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL}. - * - * It keeps information about all running transformations not only in memory, - * but also in the journal,so even if it is restarted, the information of the previous running - * transformations is not lost. - * - * It keeps history of all succeeded or failed transformations in memory for a time period which is - * configurable by {@link PropertyKey#TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME}. - * - * If the job succeeds, it updates the {@link Partition}'s location by - * {@link AlluxioCatalog#completeTransformTable(JournalContext, String, String, String, Map)}. - */ -public class TransformManager implements DelegatingJournaled { - private static final Logger LOG = LoggerFactory.getLogger(TransformManager.class); - private static final long INVALID_JOB_ID = -1; - - /** The function used to create a {@link JournalContext}. */ - private final ThrowingSupplier mCreateJournalContext; - - /** The catalog. */ - private final AlluxioCatalog mCatalog; - - /** The client to talk to job master. */ - private final JobMasterClient mJobMasterClient; - - /** - * A cache from job ID to the job's information. - * It contains history of finished jobs, no matter whether it succeeds or fails. - * Each history is kept for a configurable time period. - * This is not journaled, so after restarting TableMaster, the job history is lost. - */ - private final Cache mJobHistory = CacheBuilder.newBuilder() - .expireAfterWrite(Configuration.getMs( - PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME), - TimeUnit.MILLISECONDS) - .build(); - - /** Journaled state. */ - private final State mState = new State(); - - /** - * An internal job master client will be created. - * - * @param createJournalContext journal context creator - * @param catalog the table catalog - * @param jobMasterClient the job master client - */ - public TransformManager( - ThrowingSupplier createJournalContext, - AlluxioCatalog catalog, JobMasterClient jobMasterClient) { - mCreateJournalContext = createJournalContext; - mCatalog = catalog; - mJobMasterClient = jobMasterClient; - } - - /** - * Starts background heartbeats. - * The heartbeats are stopped when unchecked error happens inside the heartbeats, - * or the hearbeat threads are interrupted, - * or the provided executor service is stopped. - * This class will not stop the executor service. - * - * @param executorService the executor service for executing heartbeat threads - * @param userState the user state for the heartbeat - */ - public void start(ExecutorService executorService, UserState userState) { - executorService.submit( - new HeartbeatThread(HeartbeatContext.MASTER_TABLE_TRANSFORMATION_MONITOR, new JobMonitor(), - () -> new FixedIntervalSupplier( - Configuration.getMs(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL)), - Configuration.global(), userState)); - } - - /** - * Executes the plans for the table transformation. - * - * This method executes a transformation job with type{@link CompositeConfig}, - * the transformation job concurrently executes the plans, - * each plan has a list of jobs to be executed sequentially. - * - * This method triggers the execution of the transformation job asynchronously without waiting - * for it to finish. The returned job ID can be used to poll the job service for the status of - * this transformation. - * - * @param dbName the database name - * @param tableName the table name - * @param definition the parsed transformation definition - * @return the job ID for the transformation job - * @throws IOException when there is an ongoing transformation on the table, or the transformation - * job fails to be started, or all partitions of the table have been transformed with the same - * definition - */ - public long execute(String dbName, String tableName, TransformDefinition definition) - throws IOException { - List plans = mCatalog.getTransformPlan(dbName, tableName, definition); - if (plans.isEmpty()) { - throw new IOException(ExceptionMessage.TABLE_ALREADY_TRANSFORMED.getMessage( - dbName, tableName, definition.getDefinition())); - } - Pair dbTable = new Pair<>(dbName, tableName); - // Atomically try to acquire the permit to execute the transformation job. - // This PUT does not need to be journaled, because if this PUT succeeds and master crashes, - // when master restarts, this temporary placeholder entry will not exist, which is correct - // behavior. - Long existingJobId = mState.acquireJobPermit(dbTable); - if (existingJobId != null) { - if (existingJobId == INVALID_JOB_ID) { - throw new IOException("A concurrent transformation request is going to be executed"); - } else { - throw new IOException(ExceptionMessage.TABLE_BEING_TRANSFORMED - .getMessage(existingJobId.toString(), tableName, dbName)); - } - } - - ArrayList concurrentJobs = new ArrayList<>(plans.size()); - for (TransformPlan plan : plans) { - concurrentJobs.add(new CompositeConfig(plan.getJobConfigs(), true)); - } - CompositeConfig transformJob = new CompositeConfig(concurrentJobs, false); - - long jobId; - try { - jobId = mJobMasterClient.run(transformJob); - } catch (IOException e) { - // The job fails to start, clear the acquired permit for execution. - // No need to journal this REMOVE, if master crashes, when it restarts, the permit placeholder - // entry will not exist any more, which is correct behavior. - mState.releaseJobPermit(dbTable); - String error = String.format("Fails to start job to transform table %s in database %s", - tableName, dbName); - LOG.error(error, e); - throw new IOException(error, e); - } - - Map transformedLayouts = new HashMap<>(plans.size()); - for (TransformPlan plan : plans) { - transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout()); - } - AddTransformJobInfoEntry journalEntry = AddTransformJobInfoEntry.newBuilder() - .setDbName(dbName) - .setTableName(tableName) - .setDefinition(definition.getDefinition()) - .setJobId(jobId) - .putAllTransformedLayouts(Maps.transformValues(transformedLayouts, Layout::toProto)) - .build(); - try (JournalContext journalContext = mCreateJournalContext.apply()) { - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder() - .setAddTransformJobInfo(journalEntry).build()); - } - return jobId; - } - - /** - * @param jobId the job ID - * @return the job information - */ - public Optional getTransformJobInfo(long jobId) { - TransformJobInfo job = mState.getRunningJob(jobId); - if (job == null) { - job = mJobHistory.getIfPresent(jobId); - } - return job == null ? Optional.empty() : Optional.of(job); - } - - /** - * @return all transformation jobs, including the running jobs and the kept finished jobs, - * sorted by job ID in increasing order - */ - public List getAllTransformJobInfo() { - List jobs = Lists.newArrayList(mJobHistory.asMap().values()); - jobs.addAll(mState.getRunningJobs()); - jobs.sort(Comparator.comparing(TransformJobInfo::getJobId)); - return jobs; - } - - @Override - public Journaled getDelegate() { - return mState; - } - - /** - * Periodically polls the job service to monitor the status of the running transformation jobs, - * if a transformation job succeeds, then update the Table partitions' layouts. - */ - private final class JobMonitor implements HeartbeatExecutor { - private void onFinish(TransformJobInfo job) { - mJobHistory.put(job.getJobId(), job); - RemoveTransformJobInfoEntry journalEntry = RemoveTransformJobInfoEntry.newBuilder() - .setDbName(job.getDb()) - .setTableName(job.getTable()) - .build(); - try (JournalContext journalContext = mCreateJournalContext.apply()) { - applyAndJournal(journalContext, Journal.JournalEntry.newBuilder() - .setRemoveTransformJobInfo(journalEntry).build()); - } catch (UnavailableException e) { - LOG.error("Failed to create journal for RemoveTransformJobInfo for database {} table {}", - job.getDb(), job.getTable()); - } - } - - /** - * Handle the cases where a job fails, is cancelled, or job ID not found. - * - * @param job the transformation job - * @param status the job status - * @param error the job error message - */ - private void handleJobError(TransformJobInfo job, Status status, String error) { - job.setJobStatus(status); - job.setJobErrorMessage(error); - onFinish(job); - } - - /** - * Handle the case where a job is completed. - * - * @param job the transformation job - */ - private void handleJobSuccess(TransformJobInfo job) { - try (JournalContext journalContext = mCreateJournalContext.apply()) { - mCatalog.completeTransformTable(journalContext, job.getDb(), job.getTable(), - job.getDefinition(), job.getTransformedLayouts()); - job.setJobStatus(Status.COMPLETED); - } catch (IOException e) { - String error = String.format("Failed to update partition layouts for database %s table %s", - job.getDb(), job.getTable()); - LOG.error(error); - job.setJobStatus(Status.FAILED); - job.setJobErrorMessage(error); - } - onFinish(job); - } - - @Override - public void heartbeat(long timeLimitMs) throws InterruptedException { - for (TransformJobInfo job : mState.getRunningJobs()) { - if (Thread.currentThread().isInterrupted()) { - throw new InterruptedException("TransformManager's heartbeat was interrupted"); - } - long jobId = job.getJobId(); - try { - LOG.debug("Polling for status of transformation job {}", jobId); - JobInfo jobInfo = mJobMasterClient.getJobStatus(jobId); - switch (jobInfo.getStatus()) { - case FAILED: // fall through - case CANCELED: // fall through - LOG.warn("Transformation job {} for database {} table {} {}: {}", jobId, - job.getDb(), job.getTable(), - jobInfo.getStatus() == Status.FAILED ? "failed" : "canceled", - jobInfo.getErrorMessage()); - handleJobError(job, jobInfo.getStatus(), jobInfo.getErrorMessage()); - break; - case COMPLETED: - LOG.info("Transformation job {} for database {} table {} succeeds", jobId, - job.getDb(), job.getTable()); - handleJobSuccess(job); - break; - case RUNNING: // fall through - case CREATED: - break; - default: - throw new IllegalStateException("Unrecognized job status: " + jobInfo.getStatus()); - } - } catch (NotFoundException e) { - String error = ExceptionMessage.TRANSFORM_JOB_ID_NOT_FOUND_IN_JOB_SERVICE.getMessage( - jobId, job.getDb(), job.getTable(), e.getMessage()); - LOG.warn(error); - handleJobError(job, Status.FAILED, error); - } catch (IOException e) { - LOG.error("Failed to get status for job (id={})", jobId, e); - } - } - } - - @Override - public void close() { - // EMPTY - } - } - - /** - * Journaled state. - * - * The internal data structure should never be exposed outside of the class, - * all changes to the internal state should happen through applying journal entries. - */ - private final class State implements Journaled { - /** - * Map from (db, table) to the ID of the running transformation job. - * When trying to start a transformation on a table, a placeholder ID is put first. - * When removing a job, first remove from {@link #mRunningJobs}, then remove from this map, - * otherwise, there might be concurrent transformations running on the same table. - */ - private final ConcurrentHashMap, Long> mRunningJobIds = - new ConcurrentHashMap<>(); - /** - * Map from job ID to job info. - */ - private final ConcurrentHashMap mRunningJobs = - new ConcurrentHashMap<>(); - - /** - * @return all running jobs - */ - public Collection getRunningJobs() { - return mRunningJobs.values(); - } - - /** - * @param jobId the job ID - * @return corresponding job or null if not exists - */ - public TransformJobInfo getRunningJob(long jobId) { - return mRunningJobs.get(jobId); - } - - /** - * Acquires a permit for transforming a table. - * - * @param dbTable a pair of database and table name - * @return the ID of the existing transformation on the table, or null - */ - public Long acquireJobPermit(Pair dbTable) { - return mRunningJobIds.putIfAbsent(dbTable, INVALID_JOB_ID); - } - - /** - * Releases the previously acquired permit for transforming a table. - * - * @param dbTable a pair of database and table name - */ - public void releaseJobPermit(Pair dbTable) { - mRunningJobIds.remove(dbTable); - } - - @Override - public boolean processJournalEntry(JournalEntry entry) { - if (entry.hasAddTransformJobInfo()) { - applyAddTransformJobInfoEntry(entry.getAddTransformJobInfo()); - } else if (entry.hasRemoveTransformJobInfo()) { - applyRemoveTransformJobInfoEntry(entry.getRemoveTransformJobInfo()); - } else { - return false; - } - return true; - } - - private void applyAddTransformJobInfoEntry(AddTransformJobInfoEntry entry) { - Map layouts = entry.getTransformedLayoutsMap(); - Map transformedLayouts = Maps.transformValues(layouts, - layout -> mCatalog.getLayoutRegistry().create(layout)); - TransformJobInfo job = new TransformJobInfo(entry.getDbName(), entry.getTableName(), - entry.getDefinition(), entry.getJobId(), transformedLayouts); - mRunningJobIds.put(job.getDbTable(), job.getJobId()); - mRunningJobs.put(job.getJobId(), job); - } - - private void applyRemoveTransformJobInfoEntry(RemoveTransformJobInfoEntry entry) { - Pair dbTable = new Pair<>(entry.getDbName(), entry.getTableName()); - long jobId = mRunningJobIds.get(dbTable); - mRunningJobs.remove(jobId); - mRunningJobIds.remove(dbTable); - } - - @Override - public void resetState() { - mRunningJobs.clear(); - mRunningJobIds.clear(); - mJobHistory.invalidateAll(); - mJobHistory.cleanUp(); - } - - @Override - public CloseableIterator getJournalEntryIterator() { - return CloseableIterator.noopCloseable( - Iterators.transform(mRunningJobs.values().iterator(), job -> { - AddTransformJobInfoEntry journal = AddTransformJobInfoEntry.newBuilder() - .setDbName(job.getDb()) - .setTableName(job.getTable()) - .setDefinition(job.getDefinition()) - .setJobId(job.getJobId()) - .putAllTransformedLayouts(Maps.transformValues( - job.getTransformedLayouts(), Layout::toProto)) - .build(); - return JournalEntry.newBuilder().setAddTransformJobInfo(journal).build(); - })); - } - - @Override - public CheckpointName getCheckpointName() { - return CheckpointName.TABLE_MASTER_TRANSFORM_MANAGER; - } - } - - /** - * A supplier with return type R that might throw exception E. - * - * @param the return type - * @param the exception type - */ - @FunctionalInterface - public interface ThrowingSupplier { - /** - * @return the result - */ - R apply() throws E; - } -} diff --git a/dora/table/server/master/src/main/resources/META-INF/services/alluxio.master.MasterFactory b/dora/table/server/master/src/main/resources/META-INF/services/alluxio.master.MasterFactory deleted file mode 100644 index 26dcf7c749fb..000000000000 --- a/dora/table/server/master/src/main/resources/META-INF/services/alluxio.master.MasterFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.master.table.TableMasterFactory diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/AlluxioCatalogTest.java b/dora/table/server/master/src/test/java/alluxio/master/table/AlluxioCatalogTest.java deleted file mode 100644 index bed2745b0023..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/AlluxioCatalogTest.java +++ /dev/null @@ -1,575 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.when; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.ColumnStatisticsData; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.StringColumnStatsData; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.master.journal.NoopJournalContext; -import alluxio.table.common.Layout; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; -import alluxio.table.common.transform.TransformDefinition; -import alluxio.table.common.transform.TransformPlan; -import alluxio.table.common.transform.action.TransformActionUtils; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UdbTable; -import alluxio.table.common.udb.UnderDatabaseRegistry; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.mockito.Mockito; -import org.powermock.reflect.Whitebox; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ThreadLocalRandom; -import java.util.stream.Collectors; - -public class AlluxioCatalogTest { - private static final TransformDefinition TRANSFORM_DEFINITION = - TransformDefinition.parse("file.count.max=100"); - - private AlluxioCatalog mCatalog; - - @Rule - public ExpectedException mException = ExpectedException.none(); - - @Before - public void before() { - mCatalog = new AlluxioCatalog(); - TestDatabase.reset(); - } - - @Test - public void attachDb() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 2, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - List dbs = mCatalog.getAllDatabases(); - assertEquals(1, dbs.size()); - assertEquals(dbName, dbs.get(0)); - } - - @Test - public void detachNonExistingDb() throws Exception { - mException.expect(IOException.class); - mCatalog.detachDatabase(NoopJournalContext.INSTANCE, "testDb"); - } - - @Test - public void detachDb() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 2, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - assertEquals(1, mCatalog.getAllDatabases().size()); - assertTrue(mCatalog.detachDatabase(NoopJournalContext.INSTANCE, dbName)); - assertEquals(0, mCatalog.getAllDatabases().size()); - } - - @Test - public void getDb() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 2, false); - - try { - mCatalog.getDatabase(dbName); - fail(); - } catch (IOException e) { - assertEquals("Database " + dbName + " does not exist", e.getMessage()); - } - - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - assertEquals(dbName, mCatalog.getDatabase(dbName).getDbName()); - assertEquals(TestDatabase.sTestDbInfo.getComment(), - mCatalog.getDatabase(dbName).getComment()); - assertEquals(TestDatabase.sTestDbInfo.getLocation(), - mCatalog.getDatabase(dbName).getLocation()); - assertEquals(TestDatabase.sTestDbInfo.getOwnerName(), - mCatalog.getDatabase(dbName).getOwnerName()); - assertEquals(TestDatabase.sTestDbInfo.getOwnerType(), - mCatalog.getDatabase(dbName).getOwnerType()); - assertEquals(TestDatabase.sTestDbInfo.getParameters(), - mCatalog.getDatabase(dbName).getParameterMap()); - } - - @Test - public void testGetAllDatabase() throws Exception { - addMockDbs(); - assertEquals(2, mCatalog.getAllDatabases().size()); - TestDatabase.genTable(1, 2, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, "testdb", - Collections.emptyMap(), false); - assertEquals(3, mCatalog.getAllDatabases().size()); - } - - @Test - public void testGetAllTablesNotFound() throws Exception { - mException.expect(NotFoundException.class); - mCatalog.getAllTables("dbs"); - } - - @Test - public void testGetAllTablesFound() throws Exception { - addMockDbs(); - assertTrue(mCatalog.getAllTables("db2").contains("1")); - assertTrue(mCatalog.getAllTables("db2").contains("2")); - assertTrue(mCatalog.getAllTables("db2").contains("3")); - assertTrue(mCatalog.getAllTables("db2").contains("4")); - } - - @Test - public void testGetNotExistentTable() throws Exception { - addMockDbs(); - mException.expect(NotFoundException.class); - mCatalog.getTable("db1", "noop"); - } - - @Test - public void testGetExistingTables() throws Exception { - addMockDbs(); - assertEquals("1", mCatalog.getTable("db2", "1").getName()); - assertEquals("2", mCatalog.getTable("db2", "2").getName()); - assertEquals("3", mCatalog.getTable("db2", "3").getName()); - assertEquals("4", mCatalog.getTable("db2", "4").getName()); - mException.expect(NotFoundException.class); - mCatalog.getTable("db2", "5"); - } - - @Test - public void testGetPartitionUnpartitonedUdbTable() throws Exception { - Schema s = schemaFromColNames("c1", "c2", "c3"); - // setup - UdbTable tbl = createMockUdbTable("test", s); - Database db = createMockDatabase("noop", "test", Collections.emptyList()); - addTableToDb(db, Table.create(db, tbl, null)); - addDbToCatalog(db); - assertEquals(1, mCatalog.getTable("test", "test").getPartitions().size()); - } - - @Test - public void testGetPartitionPartitonedUdbTable() throws Exception { - Schema s = schemaFromColNames("c1", "c2", "c3"); - // setup - UdbTable tbl = createMockPartitionedUdbTable("test", s); - Database db = createMockDatabase("noop", "test", Collections.emptyList()); - addTableToDb(db, Table.create(db, tbl, null)); - addDbToCatalog(db); - assertEquals(2, mCatalog.getTable("test", "test").getPartitions().size()); - } - - @Test - public void testGetPartitionColumnStats() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 2, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - // single partition - assertEquals(1, mCatalog.getPartitionColumnStatistics(dbName, - TestDatabase.getTableName(0), - Arrays.asList(TestUdbTable.getPartName(0)), Arrays.asList("col2")).size()); - - // multiple partitions - assertEquals(2, mCatalog.getPartitionColumnStatistics(dbName, - TestDatabase.getTableName(0), - Arrays.asList(TestUdbTable.getPartName(0), TestUdbTable.getPartName(1)), - Arrays.asList("col2")).size()); - - // unknown column - assertEquals(2, mCatalog.getPartitionColumnStatistics(dbName, - TestDatabase.getTableName(0), - Arrays.asList(TestUdbTable.getPartName(0), TestUdbTable.getPartName(1)), - Arrays.asList("col3")).size()); - - // unknown partition - assertEquals(0, mCatalog.getPartitionColumnStatistics(dbName, - TestDatabase.getTableName(0), - Arrays.asList(TestUdbTable.getPartName(3)), - Arrays.asList("col2")).size()); - } - - @Test - public void testGetColumnStats() throws Exception { - Schema s = schemaFromColNames("c1", "c2", "c3"); - // setup - // Why does this API seem so counter intuitive? - UdbTable tbl = createMockUdbTable("test", s); - Database db = createMockDatabase("noop", "test", Collections.emptyList()); - addTableToDb(db, Table.create(db, tbl, null)); - addDbToCatalog(db); - - // basic, filter on each col - assertEquals(1, - mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c1")).size()); - assertEquals(1, - mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c2")).size()); - assertEquals(1, - mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c3")).size()); - - // try two - assertEquals(2, - mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c1", "c2")).size()); - // flip order - assertEquals(2, - mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c2", "c1")).size()); - - // non existing - assertEquals(0, mCatalog.getTableColumnStatistics("test", "test", - Lists.newArrayList("doesnotexist")).size()); - - // empty - assertEquals(0, mCatalog.getTableColumnStatistics("test", "test", - Lists.newArrayList()).size()); - } - - @Test - public void getTransformPlanForNonExistingDatabase() throws IOException { - String dbName = "doesnotexist"; - mException.expect(NotFoundException.class); - mException.expectMessage(ExceptionMessage.DATABASE_DOES_NOT_EXIST.getMessage(dbName)); - mCatalog.getTransformPlan(dbName, "table", TRANSFORM_DEFINITION); - } - - @Test - public void getTransformPlanForNonExistingTable() throws IOException { - String dbName = "existingdb"; - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - assertEquals(1, mCatalog.getAllDatabases().size()); - assertEquals(0, mCatalog.getAllTables(dbName).size()); - String tableName = "doesnotexist"; - mException.expect(NotFoundException.class); - mException.expectMessage(ExceptionMessage.TABLE_DOES_NOT_EXIST.getMessage(tableName, dbName)); - mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - } - - @Test - public void getTransformPlan() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 1, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - assertEquals(1, mCatalog.getAllDatabases().size()); - assertEquals(1, mCatalog.getAllTables(dbName).size()); - String tableName = TestDatabase.getTableName(0); - // When generating transform plan, the authority of the output path - // will be determined based on this hostname configuration. - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - List plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - assertEquals(1, plans.size()); - Table table = mCatalog.getTable(dbName, tableName); - assertEquals(1, table.getPartitions().size()); - assertEquals(table.getPartitions().get(0).getLayout(), plans.get(0).getBaseLayout()); - } - - @Test - public void getTransformPlanOutputUri() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 1, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - String tableName = TestDatabase.getTableName(0); - Table table = mCatalog.getTable(dbName, tableName); - - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - Configuration.set(PropertyKey.MASTER_RPC_PORT, 8080); - List plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - assertEquals("alluxio://localhost:8080/", - plans.get(0).getTransformedLayout().getLocation().getRootPath()); - - Configuration.set(PropertyKey.MASTER_RPC_ADDRESSES, "host1:1,host2:2"); - plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - assertEquals("alluxio://host1:1,host2:2/", - plans.get(0).getTransformedLayout().getLocation().getRootPath()); - - Configuration.set(PropertyKey.ZOOKEEPER_ENABLED, true); - Configuration.set(PropertyKey.ZOOKEEPER_ADDRESS, "host:1000"); - plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - assertEquals("alluxio://zk@host:1000/", - plans.get(0).getTransformedLayout().getLocation().getRootPath()); - } - - @Test - public void getTransformPlanTransformedLayout() throws Exception { - String dbName = "testdb"; - TestDatabase.genTable(1, 1, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - String tableName = TestDatabase.getTableName(0); - - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - - final TransformDefinition transformDefinition = - TransformDefinition.parse("file.count.max=100;file.parquet.compression=uncompressed"); - - List plans = mCatalog.getTransformPlan(dbName, tableName, transformDefinition); - assertEquals(1, plans.size()); - alluxio.job.plan.transform.PartitionInfo transformedPartitionInfo = - TransformActionUtils.generatePartitionInfo(plans.get(0).getTransformedLayout()); - assertEquals("uncompressed", - transformedPartitionInfo.getSerdeProperties().get("file.parquet.compression")); - } - - @Test - public void completeTransformNonExistingDatabase() throws IOException { - String dbName = "doesnotexist"; - mException.expect(NotFoundException.class); - mException.expectMessage(ExceptionMessage.DATABASE_DOES_NOT_EXIST.getMessage(dbName)); - mCatalog.completeTransformTable(NoopJournalContext.INSTANCE, dbName, "table", - TRANSFORM_DEFINITION.getDefinition(), Collections.emptyMap()); - } - - @Test - public void completeTransformNonExistingTable() throws IOException { - String dbName = "existingdb"; - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - assertEquals(1, mCatalog.getAllDatabases().size()); - assertEquals(0, mCatalog.getAllTables(dbName).size()); - String tableName = "doesnotexist"; - mException.expect(NotFoundException.class); - mException.expectMessage(ExceptionMessage.TABLE_DOES_NOT_EXIST.getMessage(tableName, dbName)); - mCatalog.completeTransformTable(NoopJournalContext.INSTANCE, dbName, tableName, - TRANSFORM_DEFINITION.getDefinition(), Collections.emptyMap()); - } - - @Test - public void completeTransformTable() throws IOException { - String dbName = "testdb"; - TestDatabase.genTable(1, 10, false); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, - TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, - Collections.emptyMap(), false); - String tableName = TestDatabase.getTableName(0); - - Table table = mCatalog.getTable(dbName, tableName); - table.getPartitions().forEach(partition -> - assertFalse(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition()))); - - // When generating transform plan, the authority of the output path - // will be determined based on this hostname configuration. - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - List plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION); - - Map transformedLayouts = Maps.newHashMapWithExpectedSize(plans.size()); - plans.forEach(plan -> - transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout())); - mCatalog.completeTransformTable(NoopJournalContext.INSTANCE, dbName, tableName, - TRANSFORM_DEFINITION.getDefinition(), transformedLayouts); - - table.getPartitions().forEach(partition -> { - assertTrue(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition())); - assertEquals(transformedLayouts.get(partition.getSpec()), partition.getLayout()); - }); - } - - @Test - public void parallelSync() throws Exception { - // this should be larger than numThreads, to guarantee all threads are utilized - int numTables = 200; - TestDatabase.genTable(numTables, 2, false); - - testParallelSyncInternal("8", 8); - testParallelSyncInternal("16", 16); - testParallelSyncInternal("1", 1); - testParallelSyncInternal("0", CatalogProperty.DEFAULT_DB_SYNC_THREADS); - testParallelSyncInternal("-1", CatalogProperty.DEFAULT_DB_SYNC_THREADS); - testParallelSyncInternal("", CatalogProperty.DEFAULT_DB_SYNC_THREADS); - testParallelSyncInternal("not an int", CatalogProperty.DEFAULT_DB_SYNC_THREADS); - } - - /** - * @param syncThreads the string value for the sync threads parameter. remains unset if null - * @param expectedThreadsUsed the expected number of threads used for the sync - */ - private void testParallelSyncInternal(String syncThreads, int expectedThreadsUsed) - throws Exception { - Map attachConf = Collections.emptyMap(); - if (syncThreads != null) { - attachConf = ImmutableMap.of(CatalogProperty.DB_SYNC_THREADS.getName(), syncThreads); - } - // verify thread count for attach - assertTrue(TestDatabase.getTableThreadNames().isEmpty()); - mCatalog.attachDatabase(NoopJournalContext.INSTANCE, TestUdbFactory.TYPE, "connect_URI", - TestDatabase.TEST_UDB_NAME, TestDatabase.TEST_UDB_NAME, attachConf, false); - Set threadNames = TestDatabase.getTableThreadNames(); - assertEquals("unexpected # threads used for attach for config value: " + syncThreads, - expectedThreadsUsed, threadNames.size()); - - // verify thread count for sync - TestDatabase.resetGetTableThreadNames(); - assertTrue(TestDatabase.getTableThreadNames().isEmpty()); - mCatalog.syncDatabase(NoopJournalContext.INSTANCE, TestDatabase.TEST_UDB_NAME); - threadNames = TestDatabase.getTableThreadNames(); - assertEquals("unexpected # threads used for sync for config value: " + syncThreads, - expectedThreadsUsed, threadNames.size()); - - // reset the state by detaching and resetting the thread name set - mCatalog.detachDatabase(NoopJournalContext.INSTANCE, TestDatabase.TEST_UDB_NAME); - TestDatabase.resetGetTableThreadNames(); - } - - /** - * Add mock database of name "db1" and "db2" to the catalog - * - * db1 has no tables. - * - * db2 has 4 mock tables - * - * @return a map of db names to database objects - */ - private Map addMockDbs() { - Database db1 = createMockDatabase("noop", "db1", Collections.emptyList()); - List
tables = Lists.newArrayList(1, 2, 3, 4).stream().map(i -> { - Table t = Mockito.mock(Table.class); - when(t.getName()).thenReturn(Integer.toString(i)); - return t; - }) - .collect(Collectors.toList()); - Database db2 = createMockDatabase("noop", "db2", tables); - Map dbs = new HashMap<>(); - dbs.put("db1", db1); - dbs.put("db2", db2); - assertEquals(0, dbs.get("db1").getTables().size()); - assertEquals(4, dbs.get("db2").getTables().size()); - Whitebox.setInternalState(mCatalog, "mDBs", dbs); - return dbs; - } - - private void addTableToDb(Database db, Table table) { - Map dbTables = Whitebox.getInternalState(db, "mTables"); - dbTables.put(table.getName(), table); - } - - private Database createMockDatabase(String type, String name, Collection
tables) { - UdbContext udbCtx = Mockito.mock(UdbContext.class); - when(udbCtx.getUdbRegistry()).thenReturn(Mockito.mock(UnderDatabaseRegistry.class)); - Database db = Database.create( - Mockito.mock(CatalogContext.class), - udbCtx, - type, - name, - Collections.emptyMap() - ); - tables.forEach(table -> addTableToDb(db, table)); - return db; - } - - private void addDbToCatalogWithTables(String dbName, Collection
tables) { - Database db = createMockDatabase("noop", dbName, tables); - addDbToCatalog(db); - } - - private void addDbToCatalog(Database db) { - ((Map) Whitebox.getInternalState(mCatalog, "mDBs")).put(db.getName(), db); - } - - UdbTable createMockPartitionedUdbTable(String name, Schema schema) throws IOException { - UdbPartition partition = Mockito.mock(UdbPartition.class); - when(partition.getSpec()).thenReturn(name); - when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), - Collections.emptyList())); - UdbTable tbl = Mockito.mock(UdbTable.class); - when(tbl.getName()).thenReturn(name); - when(tbl.getSchema()).thenReturn(schema); - when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema)); - when(tbl.getPartitions()).thenReturn(Arrays.asList(partition, partition)); - when(tbl.getPartitionCols()).thenReturn(Arrays.asList(FieldSchema.getDefaultInstance())); - when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), - Collections.emptyList()).toProto()); - return tbl; - } - - UdbTable createMockUdbTable(String name, Schema schema) throws IOException { - UdbPartition partition = Mockito.mock(UdbPartition.class); - when(partition.getSpec()).thenReturn(name); - when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), - Collections.emptyList())); - UdbTable tbl = Mockito.mock(UdbTable.class); - when(tbl.getName()).thenReturn(name); - when(tbl.getSchema()).thenReturn(schema); - when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema)); - when(tbl.getPartitions()).thenReturn(Arrays.asList(partition)); - when(tbl.getPartitionCols()).thenReturn(Collections.emptyList()); - when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), - Collections.emptyList()).toProto()); - return tbl; - } - - Schema schemaFromColNames(String... names) { - Schema.Builder s = Schema.newBuilder(); - for (int i = 0; i < names.length; i++) { - s.addCols(FieldSchema.newBuilder().setName(names[i]).setType("string").build()); - } - return s.build(); - } - - List createRandomStatsForSchema(Schema s) { - return s.getColsList().stream().map(f -> { - if (!f.getType().equals("string")) { - throw new RuntimeException("can only generate random stats for string columns"); - } - return ColumnStatisticsInfo.newBuilder() - .setColName(f.getName()) - .setColType(f.getType()) - .setData( - ColumnStatisticsData.newBuilder() - .setStringStats( - StringColumnStatsData.newBuilder() - .setAvgColLen(ThreadLocalRandom.current().nextInt() % 1000) - .setMaxColLen((ThreadLocalRandom.current().nextInt() % 1000) + 750) - .setNumNulls(ThreadLocalRandom.current().nextInt() % 100) - .build() - ) - .build() - ) - .build(); - }).collect(Collectors.toList()); - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/DbConfigTest.java b/dora/table/server/master/src/test/java/alluxio/master/table/DbConfigTest.java deleted file mode 100644 index 3dc1848d94b0..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/DbConfigTest.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import alluxio.table.common.udb.UdbBypassSpec; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import org.junit.Before; -import org.junit.Test; - -import java.util.List; - -public class DbConfigTest { - private ObjectMapper mMapper; - - @Before - public void before() { - mMapper = new ObjectMapper(); - } - - /* BypassTableEntry tests */ - @Test - public void tableNamesOnly() throws Exception { - DbConfig.BypassTableEntry entry = - mMapper.readValue("\"table1\"", DbConfig.BypassTableEntry.class); - assertEquals("table1", entry.getTable()); - assertEquals(ImmutableSet.of(), entry.getPartitions()); - } - - @Test - public void tableNamesAndPartitions() throws Exception { - DbConfig.BypassTableEntry entry = mMapper.readValue( - "{\"table\": \"table2\", \"partitions\": [\"t2p1\", \"t2p2\"]}", - DbConfig.BypassTableEntry.class - ); - assertEquals("table2", entry.getTable()); - assertEquals(ImmutableSet.of("t2p1", "t2p2"), entry.getPartitions()); - } - - @Test - public void missingPartitions() throws Exception { - DbConfig.BypassTableEntry entry = mMapper.readValue( - "{\"table\": \"table3\"}", - DbConfig.BypassTableEntry.class - ); - assertEquals("table3", entry.getTable()); - assertEquals(ImmutableSet.of(), entry.getPartitions()); - } - - @Test - public void equalityRegardlessOfPartitions() throws Exception { - DbConfig.BypassTableEntry entry1 = mMapper.readValue( - "{\"table\": \"table4\", \"partitions\": [\"p1\"]}", - DbConfig.BypassTableEntry.class - ); - DbConfig.BypassTableEntry entry2 = mMapper.readValue( - "{\"table\": \"table4\", \"partitions\": [\"p2\"]}", - DbConfig.BypassTableEntry.class - ); - assertEquals(entry1, entry2); - assertEquals(entry1.hashCode(), entry2.hashCode()); - } - - /* BypassEntry tests */ - @Test - public void emptyListOfTables() throws Exception { - DbConfig.BypassEntry entry = mMapper.readValue("{\"tables\": []}", DbConfig.BypassEntry.class); - assertEquals(ImmutableSet.of(), entry.getBypassedTables()); - } - - @Test - public void nullConstructor() throws Exception { - DbConfig.BypassEntry entry1 = mMapper.readValue("{}", DbConfig.BypassEntry.class); - assertEquals(ImmutableSet.of(), entry1.getBypassedTables()); - DbConfig.BypassEntry entry2 = new DbConfig.BypassEntry(null); - assertEquals(ImmutableSet.of(), entry2.getBypassedTables()); - } - - @Test - public void convertToUdbBypassSpec() throws Exception { - DbConfig.BypassEntry entry = - mMapper.readValue("{\"tables\": [\"table1\"]}", DbConfig.BypassEntry.class); - assertEquals(ImmutableSet.of("table1"), entry.getBypassedTables()); - UdbBypassSpec spec = entry.toUdbBypassSpec(); - assertTrue(spec.hasTable("table1")); - } - - /* DbConfig tests */ - @Test - public void emptyConfig() throws Exception { - List src = ImmutableList.of( - "{}", - "{\"bypass\": {}}" - ); - for (String input : src) { - DbConfig config = mMapper.readValue(input, DbConfig.class); - assertEquals(DbConfig.empty().getBypassEntry().getBypassTableEntries(), - config.getBypassEntry().getBypassTableEntries()); - } - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/TableMasterFactoryTest.java b/dora/table/server/master/src/test/java/alluxio/master/table/TableMasterFactoryTest.java deleted file mode 100644 index ae1c4991901d..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/TableMasterFactoryTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; - -import alluxio.Constants; -import alluxio.Server; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.master.AlwaysStandbyPrimarySelector; -import alluxio.master.BackupManager; -import alluxio.master.CoreMasterContext; -import alluxio.master.MasterRegistry; -import alluxio.master.MasterUtils; -import alluxio.master.TestSafeModeManager; -import alluxio.master.journal.noop.NoopJournalSystem; -import alluxio.master.metastore.heap.HeapBlockMetaStore; -import alluxio.master.metastore.heap.HeapInodeStore; -import alluxio.underfs.MasterUfsManager; - -import org.junit.After; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.Set; -import java.util.stream.Collectors; - -public class TableMasterFactoryTest { - - private CoreMasterContext mContext; - - @ClassRule - public static TemporaryFolder sTemp = new TemporaryFolder(); - - @Before - public void before() { - mContext = CoreMasterContext.newBuilder() - .setJournalSystem(new NoopJournalSystem()) - .setPrimarySelector(new AlwaysStandbyPrimarySelector()) - .setSafeModeManager(new TestSafeModeManager()) - .setBackupManager(mock(BackupManager.class)) - .setBlockStoreFactory(HeapBlockMetaStore::new) - .setInodeStoreFactory(x -> new HeapInodeStore()) - .setUfsManager(new MasterUfsManager()) - .build(); - Configuration.set(PropertyKey.MASTER_JOURNAL_FOLDER, sTemp.getRoot().getAbsolutePath()); - } - - @After - public void after() { - Configuration.set(PropertyKey.TABLE_ENABLED, true); - } - - @Test - public void enabled() throws Exception { - Configuration.set(PropertyKey.TABLE_ENABLED, true); - MasterRegistry registry = new MasterRegistry(); - MasterUtils.createMasters(registry, mContext); - Set names = - registry.getServers().stream().map(Server::getName).collect(Collectors.toSet()); - assertTrue(names.contains(Constants.TABLE_MASTER_NAME)); - } - - @Test - public void disabled() { - Configuration.set(PropertyKey.TABLE_ENABLED, false); - MasterRegistry registry = new MasterRegistry(); - MasterUtils.createMasters(registry, mContext); - Set names = - registry.getServers().stream().map(Server::getName).collect(Collectors.toSet()); - assertFalse(names.contains(Constants.TABLE_MASTER_NAME)); - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/TestDatabase.java b/dora/table/server/master/src/test/java/alluxio/master/table/TestDatabase.java deleted file mode 100644 index 755c983be69d..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/TestDatabase.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.client.file.FileSystem; -import alluxio.collections.ConcurrentHashSet; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.PrincipalType; -import alluxio.table.common.udb.UdbBypassSpec; -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UdbTable; -import alluxio.table.common.udb.UnderDatabase; - -import com.google.common.collect.ImmutableMap; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * A udb implementation which does nothing, used for testing. - */ -public class TestDatabase implements UnderDatabase { - public static final String TEST_UDB_NAME = "test_udb_name"; - public static final String TABLE_NAME_PREFIX = "test_table_name"; - public static DatabaseInfo sTestDbInfo = new DatabaseInfo("test://test", "TestOwner", - PrincipalType.USER, "comment", ImmutableMap.of("testkey", "testvalue")); - - private static final TestDatabase DATABASE = new TestDatabase(); - - private Map mUdbTables; - private UdbContext mUdbContext; - /** - * The names of the threads calling getTable(). This is useful for examining parallel sync - * behavior. - */ - private Set mGetTableThreadNames = new ConcurrentHashSet<>(); - - private TestDatabase() { - mUdbTables = new HashMap<>(); - } - - /** - * Resets the db by clearing out all tables. - */ - public static void reset() { - DATABASE.mUdbTables.clear(); - resetGetTableThreadNames(); - } - - /** - * Resets the set of thread names for getTable. - */ - public static void resetGetTableThreadNames() { - DATABASE.mGetTableThreadNames.clear(); - } - - /** - * @return the set of thread names used to call getTable - */ - public static Set getTableThreadNames() { - return DATABASE.mGetTableThreadNames; - } - - /** - * Creates an instance. - * - * @param udbContext the db context - * @param configuration the configuration - * @return the new instance - */ - public static TestDatabase create(UdbContext udbContext, - UdbConfiguration configuration) { - DATABASE.setUdbContext(udbContext); - return DATABASE; - } - - private void checkDbName() throws NotFoundException { - if (!getUdbContext().getUdbDbName().equals(TEST_UDB_NAME)) { - throw new NotFoundException("Database " + getUdbContext().getDbName() + " does not exist."); - } - } - - @Override - public String getType() { - return TestUdbFactory.TYPE; - } - - @Override - public String getName() { - return TEST_UDB_NAME; - } - - @Override - public List getTableNames() throws IOException { - checkDbName(); - return new ArrayList<>(mUdbTables.keySet()); - } - - @Override - public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException { - checkDbName(); - if (!mUdbTables.containsKey(tableName)) { - throw new NotFoundException("Table " + tableName + " does not exist."); - } - mGetTableThreadNames.add(Thread.currentThread().getName()); - return mUdbTables.get(tableName); - } - - public static String getTableName(int i) { - return TABLE_NAME_PREFIX + Integer.toString(i); - } - - public static void genTable(int numOfTable, int numOfPartitions, boolean generateFiles) { - DATABASE.mUdbTables.clear(); - FileSystem fs = null; - if (generateFiles) { - fs = FileSystem.Factory.create(); - } - for (int i = 0; i < numOfTable; i++) { - DATABASE.mUdbTables.put(getTableName(i), - new TestUdbTable(TEST_UDB_NAME, getTableName(i), numOfPartitions, fs)); - } - } - - private void setUdbContext(UdbContext udbContext) { - mUdbContext = udbContext; - } - - @Override - public UdbContext getUdbContext() { - return mUdbContext; - } - - @Override - public DatabaseInfo getDatabaseInfo() { - return sTestDbInfo; - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbFactory.java b/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbFactory.java deleted file mode 100644 index 47aa7b23338d..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbFactory.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.table.common.udb.UnderDatabaseFactory; - -public class TestUdbFactory implements UnderDatabaseFactory { - - public static final String TYPE = "testudb"; - - @Override - public String getType() { - return TYPE; - } - - @Override - public UnderDatabase create(UdbContext udbContext, UdbConfiguration configuration) { - return TestDatabase.create(udbContext, configuration); - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbTable.java b/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbTable.java deleted file mode 100644 index ae51b5226702..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/TestUdbTable.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table; - -import alluxio.AlluxioURI; -import alluxio.Constants; -import alluxio.client.file.FileOutStream; -import alluxio.client.file.FileSystem; -import alluxio.exception.AlluxioException; -import alluxio.grpc.CreateFilePOptions; -import alluxio.grpc.table.ColumnStatisticsData; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.LongColumnStatsData; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; -import alluxio.table.common.udb.UdbTable; -import alluxio.uri.Authority; -import alluxio.util.CommonUtils; -import alluxio.util.ConfigurationUtils; -import alluxio.util.WaitForOptions; - -import com.google.common.collect.ImmutableList; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -public class TestUdbTable implements UdbTable { - private List mDataCols; - private String mDbName; - private String mName; - private PartitionInfo mPartitionInfo; - private Layout mTableLayout; - private List mTestPartitions; - private Schema mSchema; - private List mPartitionCols; - private List mStats; - - public TestUdbTable(String dbName, String name, int numOfPartitions, FileSystem fs) { - mDbName = dbName; - mName = name; - mPartitionInfo = PartitionInfo.newBuilder() - .setDbName(mDbName) - .setTableName(mName) - .setPartitionName(mName).build(); - mTableLayout = Layout.newBuilder() - .setLayoutType(HiveLayout.TYPE) - .setLayoutData(mPartitionInfo.toByteString()) - .build(); - FieldSchema col = FieldSchema.newBuilder().setName("col1") - .setType("int").setId(1).build(); - FieldSchema col2 = FieldSchema.newBuilder().setName("col2") - .setType("int").setId(2).build(); - mSchema = Schema.newBuilder().addCols(col).addCols(col2).build(); - mPartitionCols = Arrays.asList(col); - mDataCols = Arrays.asList(col2); - ColumnStatisticsInfo stats = ColumnStatisticsInfo.newBuilder().setColName("col2") - .setColType("int").setData(ColumnStatisticsData.newBuilder() - .setLongStats(LongColumnStatsData.getDefaultInstance()).build()).build(); - mStats = Arrays.asList(stats); - - mTestPartitions = Stream.iterate(0, n -> n + 1) - .limit(numOfPartitions).map(i -> { - AlluxioURI location = new AlluxioURI("/udbtable/" - + CommonUtils.randomAlphaNumString(5) + i + "/test.csv"); - if (fs != null) { - location = new AlluxioURI(Constants.SCHEME, - Authority.fromString(String.join(",", - ConfigurationUtils.getMasterRpcAddresses( - fs.getConf()).stream() - .map(InetSocketAddress::toString) - .collect(ImmutableList.toImmutableList()))), - "/udbtable/" + CommonUtils.randomAlphaNumString(5) + i + "/test.csv"); - try (FileOutStream out = fs.createFile(location, - CreateFilePOptions.newBuilder().setRecursive(true).build())) { - out.write("1".getBytes()); - } catch (IOException | AlluxioException e) { - throw new RuntimeException(e); - } - - final AlluxioURI waitLocation = location; - try { - CommonUtils.waitFor("file to be completed", () -> { - try { - return fs.getStatus(waitLocation).isCompleted(); - } catch (Exception e) { - e.printStackTrace(); - return false; - } - }, WaitForOptions.defaults().setTimeoutMs(100)); - } catch (InterruptedException | TimeoutException e) { - throw new RuntimeException(e); - } - } - return new TestPartition(new HiveLayout(genPartitionInfo( - mDbName, mName, i, location.getParent().toString(), mDataCols), mStats)); - }) - .collect(Collectors.toList()); - } - - public static String getPartName(int index) { - return "col1=" + index; - } - - private static PartitionInfo genPartitionInfo(String dbName, String tableName, int index, - String location, List dataCols) { - return PartitionInfo.newBuilder() - .setDbName(dbName) - .setTableName(tableName) - .setPartitionName(getPartName(index)) - .addAllDataCols(dataCols) - .setStorage(Storage.newBuilder().setLocation(location).build()) - .build(); - } - - @Override - public String getName() { - return mName; - } - - @Override - public Schema getSchema() { - return mSchema; - } - - @Override - public String getOwner() { - return "testowner"; - } - - @Override - public Map getParameters() { - return Collections.emptyMap(); - } - - @Override - public List getPartitionCols() { - return mPartitionCols; - } - - @Override - public Layout getLayout() { - return mTableLayout; - } - - @Override - public List getStatistics() { - return mStats; - } - - @Override - public List getPartitions() { - return mTestPartitions; - } - - private class TestPartition implements UdbPartition { - private HiveLayout mLayout; - - private TestPartition(HiveLayout hiveLayout) { - mLayout = hiveLayout; - } - - @Override - public String getSpec() { - return mLayout.getSpec(); - } - - @Override - public alluxio.table.common.Layout getLayout() { - return mLayout; - } - } -} diff --git a/dora/table/server/master/src/test/java/alluxio/master/table/transform/TransformManagerTest.java b/dora/table/server/master/src/test/java/alluxio/master/table/transform/TransformManagerTest.java deleted file mode 100644 index a38e72b3ce62..000000000000 --- a/dora/table/server/master/src/test/java/alluxio/master/table/transform/TransformManagerTest.java +++ /dev/null @@ -1,355 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.table.transform; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.any; - -import alluxio.client.job.JobMasterClient; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.status.NotFoundException; -import alluxio.heartbeat.HeartbeatContext; -import alluxio.heartbeat.HeartbeatScheduler; -import alluxio.heartbeat.ManuallyScheduleHeartbeat; -import alluxio.job.JobConfig; -import alluxio.job.wire.PlanInfo; -import alluxio.job.wire.Status; -import alluxio.master.CoreMasterContext; -import alluxio.master.MasterTestUtils; -import alluxio.master.PortRegistry; -import alluxio.master.journal.JournalSystem; -import alluxio.master.journal.JournalTestUtils; -import alluxio.master.journal.JournalType; -import alluxio.master.table.DefaultTableMaster; -import alluxio.master.table.Partition; -import alluxio.master.table.TableMaster; -import alluxio.master.table.TestDatabase; -import alluxio.master.table.TestUdbFactory; -import alluxio.table.common.Layout; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; -import org.mockito.Mockito; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; -import java.util.Random; -import javax.annotation.Nullable; - -/** - * See the design of {@link alluxio.master.table.transform.TransformManager}. - * - * There are two components that might crash: - * 1. the {@link TableMaster} running the manager; - * 2. the job service. - * - * The test verifies that the manager works in the following cases: - * - * 1. transformation can be executed, table partitions can be updated automatically, and - * transformation job history is kept correctly; - * 2. if table master is restarted, and job service completes the transformation jobs during - * restarting, after restarting, the manager still updates the partitions' locations; - * 3. if job master crashes before finishing the transformation jobs, - * or a transformation job fails, the transformation job status and reason of failure are kept. - * - * The test mocks the manager's {@link alluxio.client.job.JobMasterClient} without running a real - * job service. - * - * The test manually controls the manager's heartbeat by {@link HeartbeatScheduler}. - */ -public class TransformManagerTest { - private static final int NUM_TABLES = 3; - private static final int NUM_PARTITIONS = 2; - private static final String DB = TestDatabase.TEST_UDB_NAME; - private static final String TABLE1 = TestDatabase.getTableName(0); - private static final String TABLE2 = TestDatabase.getTableName(1); - private static final String TABLE3 = TestDatabase.getTableName(2); - private static final String EMPTY_DEFINITION = ""; - private static final String DEFINITION1 = "file.count.max=1"; - private static final String DEFINITION2 = "file.count.max=2"; - - private JournalSystem mJournalSystem; - private TableMaster mTableMaster; - private JobMasterClient mMockJobMasterClient; - - @Rule - public TemporaryFolder mTemporaryFolder = new TemporaryFolder(); - - @Rule - public ExpectedException mException = ExpectedException.none(); - - @Rule - public ManuallyScheduleHeartbeat mManualScheduler = - new ManuallyScheduleHeartbeat(HeartbeatContext.MASTER_TABLE_TRANSFORMATION_MONITOR); - - @Before - public void before() throws Exception { - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - Configuration.set(PropertyKey.MASTER_RPC_PORT, PortRegistry.getFreePort()); - Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS); - Configuration.set(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME, "1h"); - - mJournalSystem = JournalTestUtils.createJournalSystem(mTemporaryFolder); - mJournalSystem.format(); - - CoreMasterContext context = MasterTestUtils.testMasterContext(mJournalSystem); - mMockJobMasterClient = Mockito.mock(JobMasterClient.class); - mTableMaster = new DefaultTableMaster(context, mMockJobMasterClient); - - start(); - - TestDatabase.genTable(NUM_TABLES, NUM_PARTITIONS, false); - mTableMaster - .attachDatabase(TestUdbFactory.TYPE, "connect", DB, DB, Collections.emptyMap(), false); - } - - @After - public void after() throws Exception { - stop(); - } - - /** - * There should only be at most one running job on a table. - */ - @Test - public void noConcurrentJobOnSameTable() throws Exception { - long jobId = transform(TABLE1, DEFINITION1); - - // Try to run another transformation on the same table, since the previous transformation on - // the table hasn't finished, this transformation should not be able to proceed. - expectException(IOException.class, - ExceptionMessage.TABLE_BEING_TRANSFORMED.getMessage(Long.toString(jobId), TABLE1, DB)); - transform(TABLE1, DEFINITION2); - } - - /** - * A job with the same definition as the last transformation on the table is a repeated job, - * a repeated job should not be executed. - */ - @Test - public void noRepeatedJobOnSameTable() throws Exception { - long jobId = transform(TABLE1, DEFINITION1); - mockJobStatus(jobId, Status.COMPLETED, null); - heartbeat(); - - expectException(IOException.class, - ExceptionMessage.TABLE_ALREADY_TRANSFORMED.getMessage(DB, TABLE1, DEFINITION1)); - transform(TABLE1, DEFINITION1); - } - - /** - * When getting job information for a non-existing transformation, exception is thrown. - */ - @Test - public void getInfoForNonExistingTransformJob() throws Exception { - assertTrue(mTableMaster.getAllTransformJobInfo().isEmpty()); - long nonExistingJobId = -1; - - expectException(IOException.class, - ExceptionMessage.TRANSFORM_JOB_DOES_NOT_EXIST.getMessage(nonExistingJobId)); - mTableMaster.getTransformJobInfo(nonExistingJobId); - } - - /** - * This verifies what kind of definition will be used when the default definition is used. - */ - @Test - public void defaultJob() throws Exception { - assertTrue(mTableMaster.getAllTransformJobInfo().isEmpty()); - - // Starts 1 job. - long jobId1 = transform(TABLE1, EMPTY_DEFINITION); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId1), TABLE1, - DefaultTableMaster.DEFAULT_TRANSFORMATION, - jobId1, Status.RUNNING, null); - // Updates job status in heartbeat. - mockJobStatus(jobId1, Status.COMPLETED, null); - heartbeat(); - - // Checks that the layout for job1 is the transformed layout. - assertEquals(1, mTableMaster.getAllTransformJobInfo().size()); - TransformJobInfo job1Info = mTableMaster.getTransformJobInfo(jobId1); - checkTransformJobInfo(job1Info, TABLE1, DefaultTableMaster.DEFAULT_TRANSFORMATION, jobId1, - Status.COMPLETED, null); - } - - /** - * 1. when neither table master nor job master restarts, job status and partition locations are - * updated correctly; - * 2. when table master restarts, since information of the previous running jobs are - * journaled, once the jobs finish, their status and partition locations are updated. - * But the history of finished jobs (either succeeded or failed) is lost because the history is - * not journaled. - */ - @Test - public void jobHistory() throws Exception { - assertTrue(mTableMaster.getAllTransformJobInfo().isEmpty()); - - // Starts 3 jobs. - long jobId1 = transform(TABLE1, DEFINITION1); - long jobId2 = transform(TABLE2, DEFINITION2); - long jobId3 = transform(TABLE3, DEFINITION1); - - // Verifies that all jobs are running. - assertEquals(3, mTableMaster.getAllTransformJobInfo().size()); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId1), TABLE1, DEFINITION1, jobId1, - Status.RUNNING, null); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId2), TABLE2, DEFINITION2, jobId2, - Status.RUNNING, null); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId3), TABLE3, DEFINITION1, jobId3, - Status.RUNNING, null); - - // Updates job status in heartbeat. - mockJobStatus(jobId1, Status.COMPLETED, null); - mockJobStatus(jobId2, Status.FAILED, "error"); - mockJobStatus(jobId3, Status.RUNNING, null); - heartbeat(); - - // Verifies that job status has been updated by the heartbeat. - assertEquals(3, mTableMaster.getAllTransformJobInfo().size()); - TransformJobInfo job1Info = mTableMaster.getTransformJobInfo(jobId1); - checkTransformJobInfo(job1Info, TABLE1, DEFINITION1, jobId1, - Status.COMPLETED, null); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId2), TABLE2, DEFINITION2, jobId2, - Status.FAILED, "error"); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId3), TABLE3, DEFINITION1, jobId3, - Status.RUNNING, null); - - restart(); - - // Checks that the layout for job1 is the transformed layout. - checkLayout(job1Info, TABLE1); - - // Restarting table master will lose history for finished jobs, - // but history for running jobs are journaled and replayed. - assertEquals(1, mTableMaster.getAllTransformJobInfo().size()); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId3), TABLE3, DEFINITION1, jobId3, - Status.RUNNING, null); - - // Now completes job 3. - mockJobStatus(jobId3, Status.COMPLETED, null); - heartbeat(); - assertEquals(1, mTableMaster.getAllTransformJobInfo().size()); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId3), TABLE3, DEFINITION1, jobId3, - Status.COMPLETED, null); - } - - /** - * If the job master crashes, since job info is not journaled in job master, when it restarts, - * the previous transformation jobs cannot be found, the manager will update the status of these - * jobs as failed with descriptive error message. - */ - @Test - public void jobMasterRestart() throws Exception { - long jobId = transform(TABLE1, DEFINITION1); - Mockito.when(mMockJobMasterClient.getJobStatus(jobId)).thenThrow(new NotFoundException("none")); - heartbeat(); - assertEquals(1, mTableMaster.getAllTransformJobInfo().size()); - checkTransformJobInfo(mTableMaster.getTransformJobInfo(jobId), TABLE1, DEFINITION1, jobId, - Status.FAILED, ExceptionMessage.TRANSFORM_JOB_ID_NOT_FOUND_IN_JOB_SERVICE.getMessage( - jobId, DB, TABLE1, "none")); - } - - private void start() throws Exception { - mJournalSystem.start(); - mJournalSystem.gainPrimacy(); - mTableMaster.start(true); - } - - private void stop() throws Exception { - mTableMaster.stop(); - mJournalSystem.stop(); - } - - private void restart() throws Exception { - stop(); - start(); - } - - private long getRandomJobId() { - Random random = new Random(); - return random.nextLong(); - } - - private long transform(String table, String definition) throws Exception { - Mockito.when(mMockJobMasterClient.run(any(JobConfig.class))).thenReturn(getRandomJobId()); - return mTableMaster.transformTable(DB, table, definition); - } - - private void expectException(Class cls, String msg) { - mException.expect(cls); - mException.expectMessage(msg); - } - - /** - * Assert that the transform job information is expected. - * If status is {@link Status#COMPLETED}, assert that the partitions' locations are updated. - * - * @param info the transform job info - * @param table the expected table name - * @param definition the expected transform definition - * @param jobId the expected job ID - * @param status the expected job status - * @param error the expected job error - */ - private void checkTransformJobInfo(TransformJobInfo info, String table, String definition, - long jobId, Status status, @Nullable String error) throws Exception { - assertEquals(DB, info.getDb()); - assertEquals(table, info.getTable()); - assertEquals(definition, info.getDefinition()); - assertEquals(jobId, info.getJobId()); - assertEquals(status, info.getJobStatus()); - if (error != null) { - assertEquals(error, info.getJobErrorMessage()); - } else { - assertEquals("", info.getJobErrorMessage()); - } - if (status == Status.COMPLETED) { - checkLayout(info, table); - } - } - - /** - * Checks that the layouts of table partitions are the transformed layouts in info. - * - * @param info the job information - * @param table the table - */ - private void checkLayout(TransformJobInfo info, String table) throws IOException { - for (Map.Entry specLayouts : info.getTransformedLayouts().entrySet()) { - String spec = specLayouts.getKey(); - Layout layout = specLayouts.getValue(); - Partition partition = mTableMaster.getTable(DB, table).getPartition(spec); - assertTrue(partition.isTransformed(info.getDefinition())); - assertEquals(layout, partition.getLayout()); - } - } - - private void mockJobStatus(long jobId, Status status, @Nullable String error) - throws Exception { - // Mock job status. - PlanInfo jobInfo = new PlanInfo(jobId, "test", status, 0, error); - Mockito.when(mMockJobMasterClient.getJobStatus(jobId)).thenReturn(jobInfo); - } - - private void heartbeat() throws Exception { - HeartbeatScheduler.execute(HeartbeatContext.MASTER_TABLE_TRANSFORMATION_MONITOR); - } -} diff --git a/dora/table/server/master/src/test/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory b/dora/table/server/master/src/test/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory deleted file mode 100644 index 273abbe6f5dc..000000000000 --- a/dora/table/server/master/src/test/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.master.table.TestUdbFactory diff --git a/dora/table/server/pom.xml b/dora/table/server/pom.xml deleted file mode 100644 index bda1d34adac7..000000000000 --- a/dora/table/server/pom.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - 4.0.0 - - alluxio-table - org.alluxio - 301-SNAPSHOT - - alluxio-table-server - pom - Alluxio Table - Server - Alluxio Table server components - - - common - master - underdb - - - - - ${project.parent.parent.parent.basedir}/build - - diff --git a/dora/table/server/underdb/glue/pom.xml b/dora/table/server/underdb/glue/pom.xml deleted file mode 100644 index 339367070674..000000000000 --- a/dora/table/server/underdb/glue/pom.xml +++ /dev/null @@ -1,152 +0,0 @@ - - - - alluxio-table-server-underdb - org.alluxio - 301-SNAPSHOT - - 4.0.0 - - alluxio-table-server-underdb-glue - jar - Alluxio Table - Server - UnderDB - Glue - Alluxio table underDB implementation for aws glue - - - - - ${project.parent.parent.parent.parent.parent.basedir}/build - 1.11.820 - 1.11.820 - 2.3.7 - - - - - - com.amazonaws - aws-java-sdk-glue - ${glue.version} - - - - com.amazonaws - aws-java-sdk-core - ${aws.java.jdk.version} - - - - - com.sun.xml.bind - jaxb-impl - runtime - - - - org.apache.hive - hive-common - ${hive-metastore.version} - compile - - - org.apache.logging.log4j - log4j-1.2-api - - - org.apache.logging.log4j - log4j-slf4j-impl - - - com.google.protobuf - protobuf-java - - - - - - - - java11 - - 11 - - - - org.apache.hive - hive-common - ${hive-metastore.version} - compile - - - org.apache.logging.log4j - log4j-1.2-api - - - org.apache.logging.log4j - log4j-slf4j-impl - - - com.google.protobuf - protobuf-java - - - jdk.tools - jdk.tools - - - - - - - - - - - exec-maven-plugin - org.codehaus.mojo - false - - - copy-lib-jars-selectively - install - - exec - - - ${build.path}/lib/copy_jars.sh - - ${project.artifactId} - ${basedir}/target/${project.artifactId}-${project.version}-jar-with-dependencies.jar - ${build.path}/../lib/${project.artifactId}-${project.version}.jar - - - - - - - maven-clean-plugin - - - - ${build.path}/../lib - - **/${project.artifactId}-*.jar - - - - - - - - diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabase.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabase.java deleted file mode 100644 index 54172fcaa980..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabase.java +++ /dev/null @@ -1,524 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.AlluxioURI; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.master.table.DatabaseInfo; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; -import alluxio.table.common.udb.PathTranslator; -import alluxio.table.common.udb.UdbBypassSpec; -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UdbTable; -import alluxio.table.common.udb.UdbUtils; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.util.io.PathUtils; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.AWSGlueException; -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetDatabaseResult; -import com.amazonaws.services.glue.model.GetPartitionsRequest; -import com.amazonaws.services.glue.model.GetPartitionsResult; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.GetTablesResult; -import com.amazonaws.services.glue.model.GlueEncryptionException; -import com.amazonaws.services.glue.model.Partition; -import com.amazonaws.services.glue.model.Table; -import com.amazonaws.services.glue.model.ValidationException; -import com.google.common.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Glue database implementation. - */ -public class GlueDatabase implements UnderDatabase { - private static final Logger LOG = LoggerFactory.getLogger(GlueDatabase.class); - - private final UdbContext mUdbContext; - private final AWSGlueAsync mGlueClient; - private final UdbConfiguration mGlueConfiguration; - /** the name of the glue db. */ - private final String mGlueDbName; - - /** the owner name of glue database, which is an fake place holder. */ - private final String mOwnerName = "PUBLIC_OWNER"; - private final alluxio.grpc.table.PrincipalType mOwnerType = alluxio.grpc.table.PrincipalType.ROLE; - - @VisibleForTesting - protected GlueDatabase(UdbContext udbContext, UdbConfiguration glueConfig, String glueDbName) { - mUdbContext = udbContext; - mGlueConfiguration = glueConfig; - mGlueClient = createAsyncGlueClient(glueConfig); - mGlueDbName = glueDbName; - } - - /** - * Create an instance of the Glue database UDB. - * - * @param udbContext the db context - * @param configuration the configuration - * @return the new instance - */ - public static GlueDatabase create(UdbContext udbContext, UdbConfiguration configuration) { - String glueDbName = udbContext.getUdbDbName(); - if (glueDbName == null || glueDbName.isEmpty()) { - throw new IllegalArgumentException( - "Glue database name cannot be empty: " + glueDbName); - } else if (configuration.get(Property.GLUE_REGION) == null) { - throw new IllegalArgumentException("GlueUdb Error: Please setup aws region."); - } - - return new GlueDatabase(udbContext, configuration, glueDbName); - } - - @Override - public UdbContext getUdbContext() { - return mUdbContext; - } - - @Override - public DatabaseInfo getDatabaseInfo() throws IOException { - try { - GetDatabaseRequest dbRequest = new GetDatabaseRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withName(mGlueDbName); - GetDatabaseResult dbResult = mGlueClient.getDatabase(dbRequest); - Database glueDatabase = dbResult.getDatabase(); - // Glue database location, description and parameters could be null - String glueDbLocation = glueDatabase.getLocationUri() == null - ? "" : glueDatabase.getLocationUri(); - String glueDbDescription = glueDatabase.getDescription() == null - ? "" : glueDatabase.getDescription(); - Map glueParameters = new HashMap<>(); - if (glueDatabase.getParameters() != null) { - glueParameters.putAll(glueDatabase.getParameters()); - } - return new DatabaseInfo( - glueDbLocation, - mOwnerName, - mOwnerType, - glueDbDescription, - glueParameters); - } catch (EntityNotFoundException e) { - throw new IOException("Cannot find glue database: " + mGlueDbName - + "Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) - + ". " + e.getMessage(), e); - } - } - - /** - * This method allows user to test udb glue client with remote glue server. - * - * @param config udbconfiguration - * @return glue async client - */ - protected static AWSGlueAsync createAsyncGlueClient(UdbConfiguration config) { - ClientConfiguration clientConfig = new ClientConfiguration() - .withMaxConnections(config.getInt(Property.MAX_GLUE_CONNECTION)); - - if (!config.get(Property.AWS_PROXY_HOST).isEmpty()) { - clientConfig.withProxyProtocol(getProtocol(config.get(Property.AWS_PROXY_PROTOCOL))) - .withProxyHost(config.get(Property.AWS_PROXY_HOST)) - .withProxyPort(config.getInt(Property.AWS_PROXY_PORT)) - .withProxyUsername(config.get(Property.AWS_PROXY_USER_NAME)) - .withProxyPassword(config.get(Property.AWS_PROXY_PASSWORD)); - } - - AWSGlueAsyncClientBuilder asyncClientBuilder = AWSGlueAsyncClientBuilder - .standard() - .withClientConfiguration(clientConfig); - - if (!config.get(Property.GLUE_REGION).isEmpty()) { - LOG.info("Set Glue region: {}.", config.get(Property.GLUE_REGION)); - asyncClientBuilder.setRegion(config.get(Property.GLUE_REGION)); - } else { - LOG.warn("GlueDatabase: Please setup the AWS region."); - } - - asyncClientBuilder.setCredentials(getAWSCredentialsProvider(config)); - - return asyncClientBuilder.build(); - } - - private static AWSCredentialsProvider getAWSCredentialsProvider(UdbConfiguration config) { - //TODO(shouwei): add compelete authentication method for glue udb - if (!config.get(Property.AWS_GLUE_ACCESS_KEY).isEmpty() - && !config.get(Property.AWS_GLUE_SECRET_KEY).isEmpty()) { - return new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.get(Property.AWS_GLUE_ACCESS_KEY), - config.get(Property.AWS_GLUE_SECRET_KEY))); - } - return DefaultAWSCredentialsProviderChain.getInstance(); - } - - private static Protocol getProtocol(String protocol) { - if (protocol.equals("HTTP")) { - return Protocol.HTTP; - } else if (protocol.equals("HTTPS")) { - return Protocol.HTTPS; - } else { - LOG.warn("Invalid protocol type {}." - + "Avaiable proxy protocol type HTTP and HTTPS.", protocol); - } - return null; - } - - @Override - public String getType() { - return GlueDatabaseFactory.TYPE; - } - - @Override - public String getName() { - return mGlueDbName; - } - - @Override - public List getTableNames() throws IOException { - try { - String nextToken = null; - List tableNames = new ArrayList<>(); - do { - GetTablesRequest tablesRequest = - new GetTablesRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withDatabaseName(mGlueDbName) - .withNextToken(nextToken); - GetTablesResult tablesResult = mGlueClient.getTables(tablesRequest); - tablesResult.getTableList().forEach(table -> tableNames.add(table.getName())); - nextToken = tablesResult.getNextToken(); - } while (nextToken != null); - return tableNames; - } catch (EntityNotFoundException e) { - throw new IOException("Failed to get glue tables: " + e.getMessage() - + " in Database: " + mGlueDbName - + "; with Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e); - } - } - - @VisibleForTesting - private PathTranslator mountAlluxioPaths(Table table, List partitions, - UdbBypassSpec bypassSpec) - throws IOException { - String tableName = table.getName(); - AlluxioURI ufsUri; - AlluxioURI alluxioUri = mUdbContext.getTableLocation(tableName); - String glueUfsUri = table.getStorageDescriptor().getLocation(); - - try { - PathTranslator pathTranslator = new PathTranslator(); - if (bypassSpec.hasFullTable(tableName)) { - pathTranslator.addMapping(glueUfsUri, glueUfsUri); - return pathTranslator; - } - ufsUri = new AlluxioURI(table.getStorageDescriptor().getLocation()); - pathTranslator.addMapping( - UdbUtils.mountAlluxioPath( - tableName, - ufsUri, - alluxioUri, - mUdbContext, - mGlueConfiguration), - glueUfsUri); - - for (Partition partition : partitions) { - AlluxioURI partitionUri; - String partitionName; - if (partition.getStorageDescriptor() != null - && partition.getStorageDescriptor().getLocation() != null - && ufsUri.isAncestorOf( - partitionUri = new AlluxioURI( - partition.getStorageDescriptor().getLocation()))) { - glueUfsUri = partition.getStorageDescriptor().getLocation(); - partitionName = partition.getValues().toString(); - try { - partitionName = GlueUtils.makePartitionName( - table.getPartitionKeys(), - partition.getValues()); - } catch (IOException e) { - LOG.warn("Error making partition name for table {}," - + " partition {} in database {} with CatalogID {}.", - tableName, - partition.getValues().toString(), - mGlueDbName, - mGlueConfiguration.get(Property.CATALOG_ID)); - } - if (bypassSpec.hasPartition(tableName, partitionName)) { - pathTranslator.addMapping(partitionUri.getPath(), partitionUri.getPath()); - continue; - } - alluxioUri = new AlluxioURI( - PathUtils.concatPath( - mUdbContext.getTableLocation(tableName).getPath(), - partitionName)); - // mount partition path if it is not already mounted as part of the table path mount - pathTranslator - .addMapping( - UdbUtils.mountAlluxioPath( - tableName, - partitionUri, - alluxioUri, - mUdbContext, - mGlueConfiguration), - glueUfsUri); - } - } - return pathTranslator; - } catch (AlluxioException e) { - throw new IOException( - "Failed to mount table location. tableName: " + tableName - + " glueUfsLocation: " + glueUfsUri - + " AlluxioLocation: " + alluxioUri + " error: " + e.getMessage(), e); - } - } - - private List getTableColumnStatistics(String dbName, String tableName, - GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest) { - // TODO(shouwei): Add Async support for table column statistics - try { - return getClient().getColumnStatisticsForTable(getColumnStatisticsForTableRequest) - .getColumnStatisticsList().stream().map(GlueUtils::toProto).collect(Collectors.toList()); - } catch (AmazonClientException e) { - LOG.warn("Cannot get the table column statistics info for table {}.{} with error {}.", - dbName, tableName, e.toString()); - } - return Collections.emptyList(); - } - - private List getPartitionColumnStatistics(String dbName, String tableName, - GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest) { - // TODO(shouwei): Add Async support for partition column statistics - try { - List partColumnStatistic = getClient() - .getColumnStatisticsForPartition(getColumnStatisticsForPartitionRequest) - .getColumnStatisticsList().stream().map(GlueUtils::toProto).collect(Collectors.toList()); - return partColumnStatistic; - } catch (AmazonClientException e) { - LOG.warn("Cannot get the partition column statistics info for table {}.{} with error {}.", - dbName, tableName, e.toString()); - } - return Collections.emptyList(); - } - - @Override - public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException { - Table table; - List partitions; - try { - GetTableRequest tableRequest = new GetTableRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withDatabaseName(mGlueDbName) - .withName(tableName); - table = getClient().getTable(tableRequest).getTable(); - - partitions = batchGetPartitions(getClient(), tableName); - PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec); - - List partitionColumns; - if (table.getPartitionKeys() == null) { - partitionColumns = Collections.emptyList(); - } else { - partitionColumns = table.getPartitionKeys(); - } - - // Get table parameters - Map tableParameters = table.getParameters() == null - ? Collections.emptyMap() : table.getParameters(); - - // Get column statistics info for table - List columnNames = table.getStorageDescriptor() - .getColumns().stream().map(Column::getName).collect(Collectors.toList()); - GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = - new GetColumnStatisticsForTableRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withDatabaseName(mGlueDbName) - .withTableName(tableName) - .withColumnNames(columnNames); - List columnStatisticsTableData = new ArrayList<>(); - if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) { - columnStatisticsTableData = getTableColumnStatistics( - mGlueDbName, tableName, getColumnStatisticsForTableRequest); - } - - // Get column statistics info for partitions - // potential expensive call - Map> statsMap = new HashMap<>(); - if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) { - for (Partition partition : partitions) { - List partitionValue = partition.getValues(); - if (partitionValue != null) { - GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = - new GetColumnStatisticsForPartitionRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withDatabaseName(mGlueDbName) - .withTableName(tableName) - .withColumnNames(columnNames) - .withPartitionValues(partitionValue); - String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues()); - statsMap.put(partName, getPartitionColumnStatistics( - mGlueDbName, tableName, getColumnStatisticsForPartitionRequest)); - } - } - } - - PartitionInfo partitionInfo = PartitionInfo.newBuilder() - // Database name is not required for glue table, use mGlueDbName - .setDbName(mGlueDbName) - .setTableName(tableName) - .addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())) - .setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)) - .putAllParameters(tableParameters) - .build(); - - Layout layout = Layout.newBuilder() - .setLayoutType(HiveLayout.TYPE) - .setLayoutData(partitionInfo.toByteString()) - .build(); - - List udbPartitions = new ArrayList<>(); - if (partitionColumns.isEmpty()) { - PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder() - .setDbName(mGlueDbName) - .setTableName(tableName) - .addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())) - .setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)) - .setPartitionName(tableName) - .putAllParameters(tableParameters); - udbPartitions.add(new GluePartition( - new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList()))); - } else { - for (Partition partition : partitions) { - String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues()); - PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder() - .setDbName(mGlueDbName) - .setTableName(tableName) - .addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())) - .setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)) - .setPartitionName(partName) - .putAllParameters(partition.getParameters() == null - ? Collections.emptyMap() : partition.getParameters()); - if (partition.getValues() != null) { - partitionInfoBuilder.addAllValues(partition.getValues()); - } - udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), - statsMap.getOrDefault(partName, Collections.emptyList())))); - } - } - - return new GlueTable(this, - pathTranslator, - tableName, - GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), - columnStatisticsTableData, - // Glue does not provide FieldSchema from API directly - // Get FieldSchema from partition keys - GlueUtils.toProto(table.getPartitionKeys()), - udbPartitions, - layout, - table); - } catch (EntityNotFoundException e) { - throw new NotFoundException("Table " + tableName - + " does not exist in Database: " + mGlueDbName - + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) - + ".", e); - } catch (ValidationException e) { - throw new IOException("Failed to get table: " + tableName - + " in Database: " + mGlueDbName - + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) - + " with validation error: " + e.getMessage(), e); - } catch (GlueEncryptionException e) { - throw new IOException("Failed to get table: " + tableName - + " in Database: " + mGlueDbName - + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) - + " error: " + e.getMessage(), e); - } - } - - private List batchGetPartitions(AWSGlueAsync glueClient, String tableName) - throws IOException { - // TODO(shouwei): make getPartition multi-thread to accelerate the large table fetching - List partitions = new ArrayList<>(); - String nextToken = null; - try { - do { - GetPartitionsRequest getPartitionsRequest = - new GetPartitionsRequest() - .withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)) - .withDatabaseName(mGlueDbName) - .withTableName(tableName) - .withMaxResults(mGlueConfiguration.getInt(Property.MAX_GLUE_FETCH_PARTITIONS)) - .withNextToken(nextToken); - GetPartitionsResult getPartitionsResult = glueClient.getPartitions(getPartitionsRequest); - partitions.addAll(getPartitionsResult.getPartitions()); - nextToken = getPartitionsResult.getNextToken(); - LOG.debug("Glue table {}.{} adding {} batch partitions with total {} partitions.", - mGlueDbName, tableName, getPartitionsResult.getPartitions().size(), partitions.size()); - } while (nextToken != null); - - if (partitions != null) { - LOG.info("Glue table {}.{} has {} partitions.", - mGlueDbName, tableName, partitions.size()); - if (LOG.isDebugEnabled()) { - partitions.stream().forEach(partition -> - LOG.debug("Glue table {}.{} with partition: {}.", - partition.getDatabaseName(), tableName, partition)); - } - } - return partitions; - } catch (AWSGlueException e) { - throw new IOException("Cannot get partition information for table: " + tableName - + " in Database: " + mGlueDbName - + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) - + ". error: " + e.getMessage(), e); - } - } - - /** - * Get Glue Client. - * - * @return async glue client - */ - public AWSGlueAsync getClient() { - return mGlueClient; - } -} diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabaseFactory.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabaseFactory.java deleted file mode 100644 index e6ecde1211ed..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueDatabaseFactory.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.table.common.udb.UnderDatabaseFactory; - -/** - * Glue database factory to create database implementation. - */ -public class GlueDatabaseFactory implements UnderDatabaseFactory { - public static final String TYPE = "glue"; - - @Override - public String getType() { - return TYPE; - } - - @Override - public UnderDatabase create(UdbContext udbContext, UdbConfiguration configuration) { - return GlueDatabase.create(udbContext, configuration); - } -} diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GluePartition.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GluePartition.java deleted file mode 100644 index d0069e7ba9ac..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GluePartition.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.table.common.Layout; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; - -import com.google.common.base.MoreObjects; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Glue partition implementation. - */ -public class GluePartition implements UdbPartition { - private static final Logger LOG = LoggerFactory.getLogger(GluePartition.class); - - private final HiveLayout mLayout; - - /** - * Create Glue partition instance. - * - * @param layout glue table layout - */ - public GluePartition(HiveLayout layout) { - mLayout = layout; - } - - @Override - public String getSpec() { - return mLayout.getSpec(); - } - - @Override - public Layout getLayout() { - return mLayout; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("Spec", getSpec()) - .add("Values", mLayout.getData().getValuesList()) - .add("PartitionName", mLayout.getData().getPartitionName()) - .add("DatabaseName", mLayout.getData().getDbName()) - .add("TableName", mLayout.getData().getTableName()) - .add("Parameters", mLayout.getData().getParametersMap()) - .toString(); - } -} diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueTable.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueTable.java deleted file mode 100644 index 04d2f905ab3f..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueTable.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.Schema; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.udb.PathTranslator; -import alluxio.table.common.udb.UdbTable; - -import com.amazonaws.services.glue.model.Table; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * Glue table implementation. - */ -public class GlueTable implements UdbTable { - private static final Logger LOG = LoggerFactory.getLogger(GlueTable.class); - - private final GlueDatabase mGlueDatabase; - private final PathTranslator mPathTranslator; - private final String mName; - private final String mOwner; - private final Table mTable; - private final List mPartitionKeys; - private final Map mParameters; - private final List mUdbPartitions; - private final List mStatistics; - private final Schema mSchema; - private final Layout mLayout; - - /** - * Create a new glue table instance. - * - * @param glueDatabase the glue udb - * @param pathTranslator the glue to alluxio path translator - * @param name the table name - * @param schema the table schema - * @param cols list of partition keys - * @param udbPartitions list of partitions - * @param statistics the table statistics - * @param layout the table layout - * @param table glue table object - */ - public GlueTable(GlueDatabase glueDatabase, PathTranslator pathTranslator, String name, - Schema schema, List statistics, List cols, - List udbPartitions, Layout layout, Table table) { - mGlueDatabase = glueDatabase; - mPathTranslator = pathTranslator; - mTable = table; - mName = name; - mSchema = schema; - mUdbPartitions = udbPartitions; - mPartitionKeys = cols; - mStatistics = statistics; - mOwner = (table.getOwner() != null) ? table.getOwner() : null; - mParameters = (table.getParameters() != null) ? table.getParameters() : Collections.emptyMap(); - mLayout = layout; - } - - @Override - public String getName() { - return mName; - } - - @Override - public Schema getSchema() { - return mSchema; - } - - @Override - public String getOwner() { - return mOwner; - } - - @Override - public Map getParameters() { - return mParameters; - } - - @Override - public List getPartitionCols() { - return mPartitionKeys; - } - - @Override - public List getStatistics() { - return mStatistics; - } - - @Override - public Layout getLayout() { - return mLayout; - } - - @Override - public List getPartitions() { - return mUdbPartitions; - } -} diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueUtils.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueUtils.java deleted file mode 100644 index 21b1d8f9c2ab..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/GlueUtils.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.grpc.table.BinaryColumnStatsData; -import alluxio.grpc.table.BooleanColumnStatsData; -import alluxio.grpc.table.ColumnStatisticsData; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Date; -import alluxio.grpc.table.DateColumnStatsData; -import alluxio.grpc.table.Decimal; -import alluxio.grpc.table.DecimalColumnStatsData; -import alluxio.grpc.table.DoubleColumnStatsData; -import alluxio.grpc.table.LongColumnStatsData; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.StringColumnStatsData; -import alluxio.grpc.table.layout.hive.HiveBucketProperty; -import alluxio.grpc.table.layout.hive.SortingColumn; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.grpc.table.layout.hive.StorageFormat; -import alluxio.table.common.udb.PathTranslator; - -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.ColumnStatistics; -import com.amazonaws.services.glue.model.Order; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.google.protobuf.ByteString; -import org.apache.hadoop.hive.common.FileUtils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Glue utils. - */ -public class GlueUtils { - - private GlueUtils() {} - - /** - * Convert glue field schema to alluxio proto (Glue do not have filedschema api). - * - * @param glueColumns list of glue columns - * @return alluxio proto of schema - */ - public static Schema toProtoSchema(List glueColumns) { - Schema.Builder schemaBuilder = Schema.newBuilder(); - schemaBuilder.addAllCols(toProto(glueColumns)); - return schemaBuilder.build(); - } - - /** - * Convert the Glue FieldSchema to Alluxio FieldSchema. - * - * @param glueCloumns Glue FiledSchema - * @return list of Alluxio FieldSchema - */ - public static List toProto(List glueCloumns) { - if (glueCloumns == null) { - return Collections.emptyList(); - } - List list = new ArrayList<>(); - for (Column column:glueCloumns) { - alluxio.grpc.table.FieldSchema.Builder builder = alluxio.grpc.table.FieldSchema.newBuilder() - .setName(column.getName()) - .setType(column.getType()); - if (column.getComment() != null) { - builder.setComment(column.getComment()); - } - list.add(builder.build()); - } - return list; - } - - /** - * Convert glue ColumnStatistics to Alluxio ColumnStatisticsInfo. - * - * @param glueColumnStatistic glue column statistic info - * @return Alluxio ColumnStatisticsInfo - */ - public static ColumnStatisticsInfo toProto(ColumnStatistics glueColumnStatistic) { - if (glueColumnStatistic == null) { - return ColumnStatisticsInfo.newBuilder().build(); - } - - ColumnStatisticsInfo.Builder columnStatisticsInfoBuilder = ColumnStatisticsInfo.newBuilder(); - columnStatisticsInfoBuilder.setColName(glueColumnStatistic.getColumnName()) - .setColType(glueColumnStatistic.getColumnType()); - - if (glueColumnStatistic.getStatisticsData() != null) { - com.amazonaws.services.glue.model.ColumnStatisticsData glueColumnStatisticsData = - glueColumnStatistic.getStatisticsData(); - String columnType = glueColumnStatistic.getStatisticsData().getType(); - if (columnType != null) { - if (columnType.equals("BOOLEAN") - && glueColumnStatisticsData.getBooleanColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.BooleanColumnStatisticsData booleanData = - glueColumnStatisticsData.getBooleanColumnStatisticsData(); - if (booleanData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setBooleanStats(toProto(booleanData)).build()); - } - } - if (columnType.equals("DATE") - && glueColumnStatisticsData.getDateColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.DateColumnStatisticsData dateData = - glueColumnStatisticsData.getDateColumnStatisticsData(); - if (dateData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setDateStats(toProto(dateData)).build()); - } - } - if (columnType.equals("DECIMAL") - && glueColumnStatisticsData.getDecimalColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.DecimalColumnStatisticsData decimalData = - glueColumnStatisticsData.getDecimalColumnStatisticsData(); - if (decimalData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setDecimalStats(toProto(decimalData)).build()); - } - } - if (columnType.equals("DOUBLE") - && glueColumnStatisticsData.getDoubleColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.DoubleColumnStatisticsData doubleData = - glueColumnStatisticsData.getDoubleColumnStatisticsData(); - if (doubleData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setDoubleStats(toProto(doubleData)).build()); - } - } - if (columnType.equals("LONG") - && glueColumnStatisticsData.getLongColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.LongColumnStatisticsData longData = - glueColumnStatisticsData.getLongColumnStatisticsData(); - if (longData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setLongStats(toProto(longData)).build()); - } - } - if (columnType.equals("STRING") - && glueColumnStatisticsData.getStringColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.StringColumnStatisticsData stringData = - glueColumnStatisticsData.getStringColumnStatisticsData(); - if (stringData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setStringStats(toProto(stringData)).build()); - } - } - if (columnType.equals("BINARY") - && glueColumnStatisticsData.getBinaryColumnStatisticsData() != null) { - com.amazonaws.services.glue.model.BinaryColumnStatisticsData binaryData = - glueColumnStatisticsData.getBinaryColumnStatisticsData(); - if (binaryData != null) { - columnStatisticsInfoBuilder.setData( - ColumnStatisticsData.newBuilder().setBinaryStats(toProto(binaryData)).build()); - } - } - } - } - - return columnStatisticsInfoBuilder.build(); - } - - private static BooleanColumnStatsData toProto( - com.amazonaws.services.glue.model.BooleanColumnStatisticsData booleanData) { - BooleanColumnStatsData.Builder builder = BooleanColumnStatsData.newBuilder(); - builder.setNumNulls(booleanData.getNumberOfNulls()) - .setNumTrues(booleanData.getNumberOfTrues()) - .setNumFalses(booleanData.getNumberOfFalses()); - return builder.build(); - } - - private static DateColumnStatsData toProto( - com.amazonaws.services.glue.model.DateColumnStatisticsData dateData) { - DateColumnStatsData.Builder builder = DateColumnStatsData.newBuilder(); - builder.setNumNulls(dateData.getNumberOfNulls()) - .setNumDistincts(dateData.getNumberOfDistinctValues()); - if (dateData.getMaximumValue() != null) { - builder.setHighValue(Date.newBuilder() - .setDaysSinceEpoch(dateData.getMaximumValue().getTime()).build()); - } - if (dateData.getMinimumValue() != null) { - builder.setLowValue(Date.newBuilder() - .setDaysSinceEpoch(dateData.getMinimumValue().getTime()).build()); - } - return builder.build(); - } - - private static DecimalColumnStatsData toProto( - com.amazonaws.services.glue.model.DecimalColumnStatisticsData decimalData) { - DecimalColumnStatsData.Builder builder = DecimalColumnStatsData.newBuilder(); - builder.setNumNulls(decimalData.getNumberOfNulls()) - .setNumDistincts(decimalData.getNumberOfDistinctValues()); - if (decimalData.getMaximumValue() != null) { - builder.setHighValue(Decimal.newBuilder().setScale(decimalData.getMaximumValue().getScale()) - .setUnscaled( - ByteString.copyFrom(decimalData.getMaximumValue().getUnscaledValue().array()))); - } - if (decimalData.getMinimumValue() != null) { - builder.setLowValue(Decimal.newBuilder().setScale(decimalData.getMinimumValue().getScale()) - .setUnscaled( - ByteString.copyFrom(decimalData.getMinimumValue().getUnscaledValue().array()))); - } - return builder.build(); - } - - private static DoubleColumnStatsData toProto( - com.amazonaws.services.glue.model.DoubleColumnStatisticsData doubleData) { - DoubleColumnStatsData.Builder builder = DoubleColumnStatsData.newBuilder(); - builder.setNumNulls(doubleData.getNumberOfNulls()) - .setNumDistincts(doubleData.getNumberOfDistinctValues()); - if (doubleData.getMaximumValue() != null) { - builder.setHighValue(doubleData.getMaximumValue()); - } - if (doubleData.getMinimumValue() != null) { - builder.setLowValue(doubleData.getMinimumValue()); - } - return builder.build(); - } - - private static LongColumnStatsData toProto( - com.amazonaws.services.glue.model.LongColumnStatisticsData longData) { - LongColumnStatsData.Builder builder = LongColumnStatsData.newBuilder(); - builder.setNumNulls(longData.getNumberOfNulls()) - .setNumDistincts(longData.getNumberOfDistinctValues()); - if (longData.getMaximumValue() != null) { - builder.setHighValue(longData.getMaximumValue()); - } - if (longData.getMinimumValue() != null) { - builder.setLowValue(longData.getMinimumValue()); - } - return builder.build(); - } - - private static StringColumnStatsData toProto( - com.amazonaws.services.glue.model.StringColumnStatisticsData stringData) { - StringColumnStatsData.Builder builder = StringColumnStatsData.newBuilder(); - builder.setNumNulls(stringData.getNumberOfNulls()) - .setNumDistincts(stringData.getNumberOfDistinctValues()); - if (stringData.getAverageLength() != null) { - builder.setAvgColLen(stringData.getAverageLength()); - } - if (stringData.getMaximumLength() != null) { - builder.setMaxColLen(stringData.getMaximumLength().longValue()); - } - return builder.build(); - } - - private static BinaryColumnStatsData toProto( - com.amazonaws.services.glue.model.BinaryColumnStatisticsData binaryData) { - BinaryColumnStatsData.Builder builder = BinaryColumnStatsData.newBuilder(); - builder.setNumNulls(binaryData.getNumberOfNulls()); - if (binaryData.getMaximumLength() != null) { - builder.setMaxColLen(binaryData.getMaximumLength()); - } - if (binaryData.getAverageLength() != null) { - builder.setAvgColLen(binaryData.getAverageLength()); - } - return builder.build(); - } - - /** - * Convert the Glue Storage Descriptor and Translator information to Storage. - * - * @param sd the glue storage descriptor - * @param translator the glue translator - * @return storage proto - * @throws IOException - */ - public static Storage toProto(StorageDescriptor sd, PathTranslator translator) - throws IOException { - if (sd == null) { - return Storage.getDefaultInstance(); - } - - String serDe = sd.getSerdeInfo() == null ? null - : sd.getSerdeInfo().getSerializationLibrary(); - Map serdeLibMap = sd.getSerdeInfo() == null ? null - : sd.getSerdeInfo().getParameters(); - StorageFormat.Builder formatBuilder = StorageFormat.newBuilder() - .setInputFormat(sd.getInputFormat()) - .setOutputFormat(sd.getOutputFormat()); - - if (serdeLibMap != null) { - formatBuilder.putAllSerdelibParameters(serdeLibMap); - } - if (serDe != null) { - formatBuilder.setSerde(serDe); // Check SerDe info - } - - alluxio.grpc.table.layout.hive.Storage.Builder storageBuilder = - alluxio.grpc.table.layout.hive.Storage.newBuilder(); - List bucketColumn = sd.getBucketColumns() == null - ? Collections.emptyList() : sd.getBucketColumns(); - List orderList = sd.getSortColumns(); - List sortingColumns; - if (orderList == null) { - sortingColumns = Collections.emptyList(); - } else { - sortingColumns = orderList.stream().map( - order -> SortingColumn.newBuilder().setColumnName(order.getColumn()) - .setOrder(order.getSortOrder() == 1 ? SortingColumn.SortingOrder.ASCENDING - : SortingColumn.SortingOrder.DESCENDING).build()) - .collect(Collectors.toList()); - } - return storageBuilder.setStorageFormat(formatBuilder.build()) - .setLocation(translator.toAlluxioPath(sd.getLocation())) - .setBucketProperty(HiveBucketProperty.newBuilder().setBucketCount(sd.getNumberOfBuckets()) - .addAllBucketedBy(bucketColumn).addAllSortedBy(sortingColumns).build()) - .setSkewed(sd.getSkewedInfo() != null && (sd.getSkewedInfo().getSkewedColumnNames()) != null - && !sd.getSkewedInfo().getSkewedColumnNames().isEmpty()) - .putAllSerdeParameters(sd.getParameters()).build(); - } - - /** - * Align to hive makePartName, convert glue partition information to alluxio partition name. - * - * @param columns glue table partition keys - * @param partitionValues glue partition values - * @return partition name - * @throws IOException - */ - public static String makePartitionName(List columns, List partitionValues) - throws IOException { - if ((columns.size() != partitionValues.size()) || columns.size() == 0) { - String errorMesg = "Invalid partition key & values; key ["; - for (Column column : columns) { - errorMesg += (column.getName() + ","); - } - errorMesg += "], values ["; - for (String partitionValue : partitionValues) { - errorMesg += (partitionValue + ", "); - } - throw new IOException(errorMesg + "]"); - } - List columnNames = new ArrayList<>(); - for (Column column : columns) { - columnNames.add(column.getName()); - } - return makePartName(columnNames, partitionValues); - } - - /** - * Make partition name for glue, wrapper of hive makePartName. - * - * @param partCols partition columns - * @param vals partition values - * @return partition name - */ - public static String makePartName(List partCols, List vals) { - return FileUtils.makePartName(partCols, vals); - } -} diff --git a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/Property.java b/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/Property.java deleted file mode 100644 index f7964bbad6d4..000000000000 --- a/dora/table/server/underdb/glue/src/main/java/alluxio/table/under/glue/Property.java +++ /dev/null @@ -1,257 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.table.common.udb.UdbProperty; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import javax.annotation.Nullable; - -/** - * This contains all the properties for Glue UDB. - */ -public class Property extends UdbProperty { - private static final Logger LOG = LoggerFactory.getLogger(Property.class); - /** A map from default property key's string name to the key. */ - private static final Map DEFAULT_KEYS_MAP = new ConcurrentHashMap<>(); - - /** - * Create a alluxio.table.under.glue.Property instance. - * - * @param name the property name - * @param description the property description - * @param defaultValue the default value - */ - public Property(String name, String description, String defaultValue) { - super(name, description, defaultValue); - } - - /** - * UDB property builder. - */ - public static final class Builder { - private String mName; - private String mDescription; - private String mDefaultValue; - - /** - * @param name name of property - */ - public Builder(String name) { - mName = name; - } - - /** - * @param name name for the property - * @return the updated builder instance - */ - public Builder setName(String name) { - mName = name; - return this; - } - - /** - * @param defaultValue the property's default value - * @return the updated builder instance - */ - public Builder setDefaultValue(String defaultValue) { - mDefaultValue = defaultValue; - return this; - } - - /** - * @param description of the property - * @return the updated builder instance - */ - public Builder setDescription(String description) { - mDescription = description; - return this; - } - - /** - * Register the unregistered udb property. - * - * @return registered udb property - */ - public Property build() { - Property property = buildUnregistered(); - Preconditions.checkState( - Property.register(property), - "Cannot register existing alluxio.table.under.glue.Property \"%s\"", mName); - return property; - } - - /** - * Creates the Udb alluxio.table.under.glue.Property - * without registering it with default property list. - * - * @return udb property - */ - public Property buildUnregistered() { - Property property = new Property(mName, mDescription, mDefaultValue); - return property; - } - } - - /** - * Registers the given UDB alluxio.table.under.glue.Property to the global map. - * - * @param Property the udb property - * @return whether the udb property is successfully registered - */ - @VisibleForTesting - public static boolean register(Property Property) { - String name = Property.getName(); - if (DEFAULT_KEYS_MAP.containsKey(name)) { - return false; - } - - DEFAULT_KEYS_MAP.put(name, Property); - return true; - } - - /** - * Unregisters the given key from the global map. - * - * @param Property the property to unregister - */ - @VisibleForTesting - public static void unregister(Property Property) { - String name = Property.getName(); - DEFAULT_KEYS_MAP.remove(name); - } - - public static final Property MAX_GLUE_CONNECTION = - new Builder(Name.MAX_GLUE_CONNECTION) - .setDefaultValue("5") - .setDescription("The maximum number of connection to glue metastore.") - .build(); - - public static final Property MAX_GLUE_FETCH_PARTITIONS = - new Builder(Name.MAX_GLUE_FETCH_PARTITIONS) - .setDefaultValue("512") - .setDescription("The maximum number of partitions to return in a single response.") - .build(); - - public static final Property GLUE_REGION = - new Builder(Name.GLUE_REGION) - .setDefaultValue("") - .setDescription("The regional endpoint for client service calls.") - .build(); - - // TODO(shouwei): check the necessity of catalogid - public static final Property CATALOG_ID = - new Builder(Name.CATALOG_ID) - .setDefaultValue("") - .setDescription("The catalog id of aws glue.") - .build(); - - public static final Property AWS_GLUE_ACCESS_KEY = - new Builder(Name.AWS_GLUE_ACCESS_KEY) - .setDefaultValue("") - .setDescription("The access key to access the aws glue.") - .build(); - - public static final Property AWS_GLUE_SECRET_KEY = - new Builder(Name.AWS_GLUE_SECRET_KEY) - .setDefaultValue("") - .setDescription("The secret key to access the aws glue.") - .build(); - - public static final Property AWS_PROXY_PROTOCOL = - new Builder(Name.AWS_PROXY_PROTOCOL) - .setDefaultValue("HTTP") - .setDescription("The Protocol to use for connecting to the proxy.") - .build(); - - public static final Property AWS_PROXY_HOST = - new Builder(Name.AWS_PROXY_HOST) - .setDefaultValue("") - .setDescription("The proxy host the client will connect through.") - .build(); - - public static final Property AWS_PROXY_PORT = - new Builder(Name.AWS_PROXY_PORT) - .setDefaultValue("") - .setDescription("The proxy port the client will connect through.") - .build(); - - public static final Property AWS_PROXY_USER_NAME = - new Builder(Name.AWS_PROXY_USER_NAME) - .setDefaultValue("") - .setDescription("The proxy user name.") - .build(); - - public static final Property AWS_PROXY_PASSWORD = - new Builder(Name.AWS_PROXY_PASSWORD) - .setDefaultValue("") - .setDescription("The proxy password.") - .build(); - - public static final Property TABLE_COLUMN_STATISTICS_ENABLE = - new Builder(Name.TABLE_COLUMN_STATISTICS_ENABLE) - .setDefaultValue("false") - .setDescription("Enable Glue table column statistics.") - .build(); - - public static final Property PARTITION_COLUMN_STATISTICS_ENABLE = - new Builder(Name.PARTITION_COLUMN_STATISTICS_ENABLE) - .setDefaultValue("false") - .setDescription("Enable Glue partition column statistics.") - .build(); - - @Override - public String getName() { - return mName; - } - - @Override - public String getDescription() { - return mDescription; - } - - /** - * @return the default value of udb property or null if value not set - */ - @Nullable - @Override - public String getDefaultValue() { - Object defaultValue = mDefaultValue; - return defaultValue == null ? null : defaultValue.toString(); - } - - /** - * Corresponding configurations of GLUE configurations. - */ - public static final class Name { - // AWS Glue related properties - public static final String MAX_GLUE_CONNECTION = "max.connection"; - public static final String MAX_GLUE_FETCH_PARTITIONS = "partitions.fetch.max"; - public static final String GLUE_REGION = "aws.region"; - public static final String CATALOG_ID = "aws.catalog.id"; - public static final String AWS_GLUE_ACCESS_KEY = "aws.accesskey"; - public static final String AWS_GLUE_SECRET_KEY = "aws.secretkey"; - public static final String AWS_PROXY_PROTOCOL = "aws.proxy.protocol"; - public static final String AWS_PROXY_HOST = "aws.proxy.host"; - public static final String AWS_PROXY_PORT = "aws.proxy.port"; - public static final String AWS_PROXY_USER_NAME = "aws.proxy.username"; - public static final String AWS_PROXY_PASSWORD = "aws.proxy.password"; - public static final String TABLE_COLUMN_STATISTICS_ENABLE = "table.column.statistics"; - public static final String PARTITION_COLUMN_STATISTICS_ENABLE = "partition.column.statistics"; - } -} diff --git a/dora/table/server/underdb/glue/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory b/dora/table/server/underdb/glue/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory deleted file mode 100644 index 5fcc2d517ba7..000000000000 --- a/dora/table/server/underdb/glue/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.table.under.glue.GlueDatabaseFactory diff --git a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueDatabaseTest.java b/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueDatabaseTest.java deleted file mode 100644 index da528fdf8279..000000000000 --- a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueDatabaseTest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import static org.junit.Assert.assertEquals; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; - -import com.google.common.collect.ImmutableMap; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.util.Map; - -public class GlueDatabaseTest { - - private static final String DB_NAME = "test"; - - @Rule - public ExpectedException mExpection = ExpectedException.none(); - - private UdbContext mUdbContext; - private UdbConfiguration mUdbConfiguration; - - @Before - public void before() { - Map conf = ImmutableMap.of("aws.region", "us-east-1"); - mUdbConfiguration = new UdbConfiguration(conf); - mUdbContext = new UdbContext(null, null, "glue", "null", DB_NAME, DB_NAME); - } - - @Test - public void create() { - assertEquals(DB_NAME, GlueDatabase.create(mUdbContext, mUdbConfiguration).getName()); - } - - @Test - public void createEmptyName() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = - new UdbContext(null, null, "glue", null, "", DB_NAME); - assertEquals(DB_NAME, - GlueDatabase.create(udbContext, new UdbConfiguration(ImmutableMap.of())).getName()); - } - - @Test - public void createNullName() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = - new UdbContext(null, null, "glue", null, null, DB_NAME); - assertEquals(DB_NAME, - GlueDatabase.create(udbContext, new UdbConfiguration(ImmutableMap.of())).getName()); - } -} diff --git a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueUtilsTest.java b/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueUtilsTest.java deleted file mode 100644 index dc866fdfedf8..000000000000 --- a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/GlueUtilsTest.java +++ /dev/null @@ -1,473 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.util.CommonUtils; - -import com.amazonaws.services.glue.model.BinaryColumnStatisticsData; -import com.amazonaws.services.glue.model.BooleanColumnStatisticsData; -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.ColumnStatistics; -import com.amazonaws.services.glue.model.ColumnStatisticsData; -import com.amazonaws.services.glue.model.DateColumnStatisticsData; -import com.amazonaws.services.glue.model.DecimalColumnStatisticsData; -import com.amazonaws.services.glue.model.DecimalNumber; -import com.amazonaws.services.glue.model.DoubleColumnStatisticsData; -import com.amazonaws.services.glue.model.LongColumnStatisticsData; -import com.amazonaws.services.glue.model.StringColumnStatisticsData; -import com.google.common.collect.ImmutableList; -import org.junit.Test; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Date; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; - -public class GlueUtilsTest { - - private static final String COLUMN_NAME = "testColumn"; - private static final String PARTITION_VALUE_1 = "value1"; - private static final String PARTITION_VALUE_2 = "value2"; - private Random mRandom = ThreadLocalRandom.current(); - - @Test - public void toProtoSchema() { - assertEquals(MockGlueDatabase.alluxioSchema(), - GlueUtils.toProtoSchema(ImmutableList.of(MockGlueDatabase.glueTestColumn(COLUMN_NAME)))); - } - - @Test - public void toProto() { - assertEquals(MockGlueDatabase.alluxioFieldSchema(), - GlueUtils.toProto(ImmutableList.of(MockGlueDatabase.glueTestColumn(COLUMN_NAME)))); - } - - @Test - public void makePartitionName() throws IOException { - List columns = ImmutableList.of(MockGlueDatabase.glueTestColumn(COLUMN_NAME)); - List emptyColumns = ImmutableList.of(); - - List partitionValueList1 = ImmutableList.of(PARTITION_VALUE_1); - List partitionValueList2 = ImmutableList.of(PARTITION_VALUE_1, PARTITION_VALUE_2); - - String expectedMessageEmpty = - "Invalid partition key & values; key [], values [value1, ]"; - String expectedMessageUneven = - "Invalid partition key & values; key [testColumn,], values [value1, value2, ]"; - - assertEquals( - GlueUtils.makePartitionName(columns, partitionValueList1), - GlueUtils.makePartName(ImmutableList.of(COLUMN_NAME), partitionValueList1)); - assertMakePartName(emptyColumns, partitionValueList1, expectedMessageEmpty); - assertMakePartName(columns, partitionValueList2, expectedMessageUneven); - } - - private static void assertMakePartName(List columns, - List partitionValues, String expectedException) { - IOException ioException = new IOException(); - try { - GlueUtils.makePartitionName(columns, partitionValues); - } catch (IOException e) { - ioException = e; - } - assertExecption(ioException, expectedException); - } - - private static void assertExecption(IOException ioExecption, String expectedException) { - assertTrue(ioExecption.getMessage().contains(expectedException)); - } - - @Test - public void protoColStatsWithBooleanData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("BOOLEAN"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - BooleanColumnStatisticsData booleanData = new BooleanColumnStatisticsData(); - booleanData.setNumberOfFalses(mRandom.nextLong()); - booleanData.setNumberOfTrues(mRandom.nextLong()); - booleanData.setNumberOfNulls(mRandom.nextLong()); - data.setBooleanColumnStatisticsData(booleanData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithDateData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("DATE"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - DateColumnStatisticsData dateData = new DateColumnStatisticsData(); - dateData.setMaximumValue(new Date(mRandom.nextLong())); - dateData.setMinimumValue(new Date(mRandom.nextLong())); - dateData.setNumberOfNulls(mRandom.nextLong()); - dateData.setNumberOfDistinctValues(mRandom.nextLong()); - data.setDateColumnStatisticsData(dateData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify null column values - dateData.setMaximumValue(null); - dateData.setMinimumValue(null); - data.setDateColumnStatisticsData(dateData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithDecimalData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("DECIMAL"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - DecimalColumnStatisticsData decimalData = new DecimalColumnStatisticsData(); - DecimalNumber maxDecimalNumber = new DecimalNumber(); - maxDecimalNumber.setScale(mRandom.nextInt()); - maxDecimalNumber.setUnscaledValue(ByteBuffer.wrap(CommonUtils.randomBytes(5))); - DecimalNumber minDecimalNumber = new DecimalNumber(); - minDecimalNumber.setScale(mRandom.nextInt()); - minDecimalNumber.setUnscaledValue(ByteBuffer.wrap(CommonUtils.randomBytes(5))); - decimalData.setMaximumValue(maxDecimalNumber); - decimalData.setMinimumValue(minDecimalNumber); - decimalData.setNumberOfNulls(mRandom.nextLong()); - decimalData.setNumberOfDistinctValues(mRandom.nextLong()); - data.setDecimalColumnStatisticsData(decimalData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify null column values - decimalData.setMaximumValue(null); - decimalData.setMinimumValue(null); - data.setDecimalColumnStatisticsData(decimalData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithDoubleData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("DOUBLE"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - DoubleColumnStatisticsData doubleData = new DoubleColumnStatisticsData(); - doubleData.setMaximumValue(mRandom.nextDouble()); - doubleData.setMinimumValue(mRandom.nextDouble()); - doubleData.setNumberOfNulls(mRandom.nextLong()); - doubleData.setNumberOfDistinctValues(mRandom.nextLong()); - data.setDoubleColumnStatisticsData(doubleData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify null column values - doubleData.setMaximumValue(null); - doubleData.setMinimumValue(null); - data.setDoubleColumnStatisticsData(doubleData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithLongData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("LONG"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - LongColumnStatisticsData longData = new LongColumnStatisticsData(); - longData.setMaximumValue(mRandom.nextLong()); - longData.setMinimumValue(mRandom.nextLong()); - longData.setNumberOfNulls(mRandom.nextLong()); - longData.setNumberOfDistinctValues(mRandom.nextLong()); - data.setLongColumnStatisticsData(longData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify null column values - longData.setMaximumValue(null); - longData.setMinimumValue(null); - data.setLongColumnStatisticsData(longData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithStringData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("STRING"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - StringColumnStatisticsData stringData = new StringColumnStatisticsData(); - stringData.setMaximumLength(mRandom.nextLong()); - stringData.setAverageLength(mRandom.nextDouble()); - stringData.setNumberOfNulls(mRandom.nextLong()); - stringData.setNumberOfDistinctValues(mRandom.nextLong()); - data.setStringColumnStatisticsData(stringData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - @Test - public void protoColStatsWithBinaryData() { - // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData - ColumnStatistics glueColStats = new ColumnStatistics(); - glueColStats.setColumnName("colName"); - glueColStats.setColumnType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - data.setType("BINARY"); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - - // verify non-empty data - BinaryColumnStatisticsData binaryData = new BinaryColumnStatisticsData(); - binaryData.setAverageLength(mRandom.nextDouble()); - binaryData.setMaximumLength(mRandom.nextLong()); - binaryData.setNumberOfNulls(mRandom.nextLong()); - data.setBinaryColumnStatisticsData(binaryData); - glueColStats.setStatisticsData(data); - verifyColumnStats(glueColStats); - } - - private void verifyColumnStats(ColumnStatistics glueColStats) { - ColumnStatisticsInfo colStats = GlueUtils.toProto(glueColStats); - assertEquals(glueColStats.getColumnName(), colStats.getColName()); - assertEquals(glueColStats.getColumnType(), colStats.getColType()); - - // verify empty ColumnStatisticData - if (glueColStats.getStatisticsData() == null) { - assertEquals(glueColStats.getStatisticsData() == null - && glueColStats.getStatisticsData().getType() != null, - colStats.hasData()); - } - - if (glueColStats.getStatisticsData() != null) { - ColumnStatisticsData glueData = glueColStats.getStatisticsData(); - alluxio.grpc.table.ColumnStatisticsData data = colStats.getData(); - - //verify boolean - if (glueData.getBooleanColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "BOOLEAN"); - BooleanColumnStatisticsData glueBoolean = glueData.getBooleanColumnStatisticsData(); - assertEquals(glueBoolean.getNumberOfFalses() != null - && glueBoolean.getNumberOfTrues() != null - && glueBoolean.getNumberOfNulls() != null, - data.hasBooleanStats()); - if (data.hasBooleanStats()) { - alluxio.grpc.table.BooleanColumnStatsData boolData = data.getBooleanStats(); - assertEquals(glueBoolean.getNumberOfFalses().longValue(), boolData.getNumFalses()); - assertEquals(glueBoolean.getNumberOfTrues().longValue(), boolData.getNumTrues()); - assertEquals(glueBoolean.getNumberOfNulls().longValue(), boolData.getNumNulls()); - } - } - - //verify date - if (glueData.getDateColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "DATE"); - DateColumnStatisticsData glueDate = glueData.getDateColumnStatisticsData(); - assertEquals(glueDate.getNumberOfDistinctValues() != null - && glueDate.getNumberOfNulls() != null, - data.hasDateStats()); - if (data.hasDateStats()) { - alluxio.grpc.table.DateColumnStatsData date = data.getDateStats(); - assertEquals(glueDate.getNumberOfDistinctValues().longValue(), date.getNumDistincts()); - assertEquals(glueDate.getNumberOfNulls().longValue(), date.getNumNulls()); - assertEquals(glueDate.getMaximumValue() != null, date.hasHighValue()); - if (glueDate.getMaximumValue() != null) { - assertEquals(glueDate.getMaximumValue().getTime(), - date.getHighValue().getDaysSinceEpoch()); - } - assertEquals(glueDate.getMinimumValue() != null, date.hasLowValue()); - if (glueDate.getMinimumValue() != null) { - assertEquals(glueDate.getMinimumValue().getTime(), - date.getLowValue().getDaysSinceEpoch()); - } - } - } - - //verify decimal - if (glueData.getDecimalColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "DECIMAL"); - DecimalColumnStatisticsData glueDecimal = glueData.getDecimalColumnStatisticsData(); - assertEquals(glueDecimal.getNumberOfDistinctValues() != null - && glueDecimal.getNumberOfNulls() != null, - data.hasDecimalStats()); - if (data.hasDecimalStats()) { - alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats(); - assertEquals(glueDecimal.getNumberOfDistinctValues().longValue(), - decimal.getNumDistincts()); - assertEquals(glueDecimal.getNumberOfNulls().longValue(), decimal.getNumNulls()); - assertEquals(glueDecimal.getMaximumValue() != null, decimal.hasHighValue()); - if (glueDecimal.getMaximumValue() != null) { - assertEquals(glueDecimal.getMaximumValue().getScale().longValue(), - decimal.getHighValue().getScale()); - assertArrayEquals(glueDecimal.getMaximumValue().getUnscaledValue().array(), - decimal.getHighValue().getUnscaled().toByteArray()); - } - assertEquals(glueDecimal.getMinimumValue() != null, decimal.hasLowValue()); - if (glueDecimal.getMinimumValue() != null) { - assertEquals(glueDecimal.getMinimumValue().getScale().longValue(), - decimal.getLowValue().getScale()); - assertArrayEquals(glueDecimal.getMinimumValue().getUnscaledValue().array(), - decimal.getLowValue().getUnscaled().toByteArray()); - } - } - } - - //verify double - if (glueData.getDoubleColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "DOUBLE"); - DoubleColumnStatisticsData glueDouble = glueData.getDoubleColumnStatisticsData(); - assertEquals(glueDouble.getNumberOfDistinctValues() != null - && glueDouble.getNumberOfNulls() != null, - data.hasDoubleStats()); - if (data.hasDoubleStats()) { - alluxio.grpc.table.DoubleColumnStatsData doubleData = data.getDoubleStats(); - assertEquals(glueDouble.getNumberOfDistinctValues().longValue(), - doubleData.getNumDistincts()); - assertEquals(glueDouble.getNumberOfNulls().longValue(), doubleData.getNumNulls()); - assertEquals(glueDouble.getMaximumValue() != null, doubleData.hasHighValue()); - if (glueDouble.getMaximumValue() != null) { - assertEquals(glueDouble.getMaximumValue().doubleValue(), - doubleData.getHighValue(), 0.01); - } - assertEquals(glueDouble.getMinimumValue() != null, doubleData.hasLowValue()); - if (glueDouble.getMinimumValue() != null) { - assertEquals(glueDouble.getMinimumValue().doubleValue(), - doubleData.getLowValue(), 0.01); - } - } - } - - //verify long - if (glueData.getLongColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "LONG"); - LongColumnStatisticsData glueLong = glueData.getLongColumnStatisticsData(); - assertEquals(glueLong.getNumberOfDistinctValues() != null - && glueLong.getNumberOfNulls() != null, - data.hasLongStats()); - if (data.hasLongStats()) { - alluxio.grpc.table.LongColumnStatsData longData = data.getLongStats(); - assertEquals(glueLong.getNumberOfDistinctValues().longValue(), - longData.getNumDistincts()); - assertEquals(glueLong.getNumberOfNulls().longValue(), longData.getNumNulls()); - assertEquals(glueLong.getMaximumValue() != null, longData.hasHighValue()); - if (glueLong.getMaximumValue() != null) { - assertEquals(glueLong.getMaximumValue().longValue(), longData.getHighValue()); - } - assertEquals(glueLong.getMinimumValue() != null, longData.hasLowValue()); - if (glueLong.getMinimumValue() != null) { - assertEquals(glueLong.getMinimumValue().longValue(), longData.getLowValue()); - } - } - } - - //verify string - if (glueData.getStringColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "STRING"); - StringColumnStatisticsData glueString = glueData.getStringColumnStatisticsData(); - assertEquals(glueString.getNumberOfDistinctValues() != null - && glueString.getNumberOfNulls() != null - && glueString.getMaximumLength() != null - && glueString.getAverageLength() != null, - data.hasStringStats()); - if (data.hasStringStats()) { - alluxio.grpc.table.StringColumnStatsData stringData = data.getStringStats(); - assertEquals(glueString.getNumberOfDistinctValues().longValue(), - stringData.getNumDistincts()); - assertEquals(glueString.getNumberOfNulls().longValue(), stringData.getNumNulls()); - assertEquals(glueString.getMaximumLength().longValue(), stringData.getMaxColLen()); - assertEquals(glueString.getAverageLength().doubleValue(), - stringData.getAvgColLen(), 0.01); - } - } - - //verify binary - if (glueData.getBinaryColumnStatisticsData() != null) { - assertEquals(glueData.getType(), "BINARY"); - BinaryColumnStatisticsData glueBinary = glueData.getBinaryColumnStatisticsData(); - assertEquals(glueBinary.getAverageLength() != null - && glueBinary.getMaximumLength() != null - && glueBinary.getNumberOfNulls() != null, - data.hasBinaryStats()); - if (data.hasBinaryStats()) { - alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats(); - assertEquals(glueBinary.getAverageLength().doubleValue(), binary.getAvgColLen(), 0.01); - assertEquals(glueBinary.getMaximumLength().longValue(), binary.getMaxColLen()); - assertEquals(glueBinary.getNumberOfNulls().longValue(), binary.getNumNulls()); - } - } - } - } -} diff --git a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/MockGlueDatabase.java b/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/MockGlueDatabase.java deleted file mode 100644 index b3d827e1606e..000000000000 --- a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/MockGlueDatabase.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.layout.hive.HiveBucketProperty; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.grpc.table.layout.hive.StorageFormat; - -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.Partition; -import com.amazonaws.services.glue.model.SerDeInfo; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.Table; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; - -import java.util.Collections; -import java.util.List; - -public class MockGlueDatabase { - - private MockGlueDatabase() {} - - private static final String DB_NAME = "testdb"; - private static final String TABLE_NAME = "testtb"; - private static final String COLUMN_NAME = "testColumn"; - private static final String INPUT_FORMAT = "testInputFormat"; - private static final String OUTPUT_FORMAT = "testOutputFormat"; - private static final String SERDE_LIB = "testSerdeLib"; - private static final String SD_LOCATION = "/testTable"; - private static final Integer NUM_OF_BUCKET = 1; - - public static Database glueTestDatabase() { - return new Database() - .withName(DB_NAME) - .withDescription("test database") - .withLocationUri("/testdb") - .withParameters(Collections.emptyMap()); - } - - public static Table glueTestTable() { - return new Table() - .withDatabaseName(DB_NAME) - .withName(TABLE_NAME) - .withPartitionKeys(glueTestColumn(COLUMN_NAME)) - .withParameters(Collections.emptyMap()) - .withStorageDescriptor(glueTestStorageDescriptor()); - } - - public static Partition glueTestPartition(List values) { - return new Partition() - .withDatabaseName(DB_NAME) - .withTableName(TABLE_NAME) - .withValues(values) - .withParameters(ImmutableMap.of()) - .withStorageDescriptor(glueTestStorageDescriptor()); - } - - public static Column glueTestColumn(String columnName) { - return new Column() - .withName(columnName) - .withType("string") - .withComment("test column"); - } - - public static StorageDescriptor glueTestStorageDescriptor() { - return new StorageDescriptor() - .withBucketColumns(ImmutableList.of("testBucket")) - .withColumns(ImmutableList.of(glueTestColumn(COLUMN_NAME))) - .withParameters(ImmutableMap.of()) - .withSerdeInfo(new SerDeInfo() - .withSerializationLibrary(SERDE_LIB) - .withParameters(ImmutableMap.of())) - .withInputFormat(INPUT_FORMAT) - .withOutputFormat(OUTPUT_FORMAT) - .withLocation(SD_LOCATION) - .withNumberOfBuckets(NUM_OF_BUCKET); - } - - public static Schema alluxioSchema() { - Schema.Builder schemaBuilder = Schema.newBuilder(); - schemaBuilder.addAllCols(alluxioFieldSchema()); - return schemaBuilder.build(); - } - - public static List alluxioFieldSchema() { - return ImmutableList.of( - FieldSchema.newBuilder() - .setName(COLUMN_NAME) - .setType("string") - .setComment("test column") - .build() - ); - } - - public static Storage alluxioStorage() { - return Storage - .newBuilder() - .setStorageFormat(alluxioStorageFormat()) - .setLocation(SD_LOCATION) - .setBucketProperty(HiveBucketProperty - .newBuilder() - .setBucketCount(NUM_OF_BUCKET) - .build()) - .putAllSerdeParameters(ImmutableMap.of()) - .build(); - } - - public static StorageFormat alluxioStorageFormat() { - return StorageFormat - .newBuilder() - .setInputFormat(INPUT_FORMAT) - .setOutputFormat(OUTPUT_FORMAT) - .putAllSerdelibParameters(ImmutableMap.of()) - .setSerde(SERDE_LIB) - .build(); - } -} diff --git a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/RemoteGlueTest.java b/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/RemoteGlueTest.java deleted file mode 100644 index 5b6572cf6727..000000000000 --- a/dora/table/server/underdb/glue/src/test/java/alluxio/table/under/glue/RemoteGlueTest.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.glue; - -import static org.junit.Assert.assertEquals; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.google.common.collect.ImmutableMap; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -import java.io.IOException; -import java.util.Map; - -@Ignore -public class RemoteGlueTest { - - private static final String DB_NAME = ""; - - private UdbContext mUdbContext; - private UdbConfiguration mUdbConfiguration; - private GlueDatabase mGlueDatabase; - private AWSGlueAsync mGlueClient; - - private static final String AWS_ACCESS_KEY_ID = ""; - private static final String AWS_SECRET_KEY = ""; - private static final String CATALOG_ID = ""; - private static final String AWS_REGION = ""; - - @Before - /** - * Integration test with remote glue service. - */ - public void connect() { - Map conf = ImmutableMap.of( - "aws.accesskey", AWS_ACCESS_KEY_ID, - "aws.secretkey", AWS_SECRET_KEY, - "aws.region", AWS_REGION, - "aws.catalog.id", CATALOG_ID - ); - mUdbConfiguration = new UdbConfiguration(conf); - mGlueDatabase = new GlueDatabase(mUdbContext, mUdbConfiguration, DB_NAME); - mGlueClient = mGlueDatabase.getClient(); - } - - @Test - public void getDatabase() { - GetDatabaseRequest dbRequest = new GetDatabaseRequest() - .withCatalogId(CATALOG_ID) - .withName(DB_NAME); - assertEquals(DB_NAME, mGlueDatabase.getClient().getDatabase(dbRequest).getDatabase().getName()); - } - - @Test - public void getTables() throws IOException { - for (String tableName : mGlueDatabase.getTableNames()) { - System.out.println("Table Names: " + tableName + "."); - } - - System.out.println("Table counts: " + mGlueDatabase.getTableNames().size()); - } -} diff --git a/dora/table/server/underdb/hive/pom.xml b/dora/table/server/underdb/hive/pom.xml deleted file mode 100644 index e5d0cd4dc6d7..000000000000 --- a/dora/table/server/underdb/hive/pom.xml +++ /dev/null @@ -1,202 +0,0 @@ - - - 4.0.0 - - alluxio-table-server-underdb - org.alluxio - 301-SNAPSHOT - - alluxio-table-server-underdb-hive - jar - Alluxio Table - Server - UnderDB - Hive - Alluxio table underDB implementation for hive - - - - - ${project.parent.parent.parent.parent.parent.basedir}/build - - 2.3.7 - - - - - - com.google.guava - guava - provided - - - org.apache.hive - hive-metastore - ${hive-metastore.version} - compile - - - org.apache.logging.log4j - log4j-1.2-api - - - org.apache.logging.log4j - log4j-slf4j-impl - - - com.google.protobuf - protobuf-java - - - - - - - org.alluxio - alluxio-core-common - ${project.version} - provided - - - org.alluxio - alluxio-core-transport - ${project.version} - provided - - - org.alluxio - alluxio-core-client-fs - ${project.version} - provided - - - org.alluxio - alluxio-core-server-common - ${project.version} - provided - - - - - com.google.guava - guava-testlib - test - - - - - org.alluxio - alluxio-core-common - ${project.version} - test-jar - test - - - org.alluxio - alluxio-underfs-local - ${project.version} - test - - - - - - - src/main/resources - true - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - - - exec-maven-plugin - org.codehaus.mojo - false - - - copy-lib-jars-selectively - install - - exec - - - ${build.path}/lib/copy_jars.sh - - ${project.artifactId} - ${basedir}/target/${project.artifactId}-${project.version}-jar-with-dependencies.jar - ${build.path}/../lib/${project.artifactId}-${project.version}.jar - - - - - - - maven-clean-plugin - - - - ${build.path}/../lib - - **/${project.artifactId}-*.jar - - - - - - - - - - - java11 - - 11 - - - - org.apache.hive - hive-metastore - ${hive-metastore.version} - compile - - - org.apache.logging.log4j - log4j-1.2-api - - - org.apache.logging.log4j - log4j-slf4j-impl - - - com.google.protobuf - protobuf-java - - - jdk.tools - jdk.tools - - - - - - - diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabase.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabase.java deleted file mode 100644 index 92ec065e35de..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabase.java +++ /dev/null @@ -1,324 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.AlluxioURI; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.layout.hive.PartitionInfo; -import alluxio.master.table.DatabaseInfo; -import alluxio.resource.CloseableResource; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; -import alluxio.table.common.udb.PathTranslator; -import alluxio.table.common.udb.UdbBypassSpec; -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UdbTable; -import alluxio.table.common.udb.UdbUtils; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.table.under.hive.util.AbstractHiveClientPool; -import alluxio.table.under.hive.util.HiveClientPoolCache; -import alluxio.util.io.PathUtils; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; - -/** - * Hive database implementation. - */ -public class HiveDatabase implements UnderDatabase { - private static final Logger LOG = LoggerFactory.getLogger(HiveDatabase.class); - - private static final int MAX_PARTITION_COLUMN_STATISTICS = 10000; - - private final UdbContext mUdbContext; - private final UdbConfiguration mConfiguration; - /** the connection uri for the hive metastore. */ - private final String mConnectionUri; - /** the name of the hive db. */ - private final String mHiveDbName; - - private static final HiveClientPoolCache CLIENT_POOL_CACHE = new HiveClientPoolCache(); - /** Hive client is not thread-safe, so use a client pool for concurrency. */ - private final AbstractHiveClientPool mClientPool; - - private HiveDatabase(UdbContext udbContext, UdbConfiguration configuration, - String connectionUri, String hiveDbName) { - this(udbContext, - configuration, - connectionUri, - hiveDbName, - CLIENT_POOL_CACHE.getPool(connectionUri)); - } - - @VisibleForTesting - HiveDatabase(UdbContext udbContext, UdbConfiguration configuration, - String connectionUri, String hiveDbName, AbstractHiveClientPool clientPool) { - mUdbContext = udbContext; - mConfiguration = configuration; - mConnectionUri = connectionUri; - mHiveDbName = hiveDbName; - mClientPool = clientPool; - } - - /** - * Creates an instance of the Hive database UDB. - * - * @param udbContext the db context - * @param configuration the configuration - * @return the new instance - */ - public static HiveDatabase create(UdbContext udbContext, UdbConfiguration configuration) { - String connectionUri = udbContext.getConnectionUri(); - if (connectionUri == null || connectionUri.isEmpty()) { - throw new IllegalArgumentException( - "Hive udb connection uri cannot be empty: " + connectionUri); - } - String hiveDbName = udbContext.getUdbDbName(); - if (hiveDbName == null || hiveDbName.isEmpty()) { - throw new IllegalArgumentException("Hive database name cannot be empty: " + hiveDbName); - } - - return new HiveDatabase(udbContext, configuration, connectionUri, hiveDbName); - } - - @Override - public UdbContext getUdbContext() { - return mUdbContext; - } - - @Override - public DatabaseInfo getDatabaseInfo() throws IOException { - try (CloseableResource client = mClientPool.acquireClientResource()) { - Database hiveDb = client.get().getDatabase(mHiveDbName); - alluxio.grpc.table.PrincipalType type = alluxio.grpc.table.PrincipalType.USER; - if (Objects.equals(hiveDb.getOwnerType(), PrincipalType.ROLE)) { - type = alluxio.grpc.table.PrincipalType.ROLE; - } - return new DatabaseInfo(hiveDb.getLocationUri(), hiveDb.getOwnerName(), type, - hiveDb.getDescription(), hiveDb.getParameters()); - } catch (TException e) { - throw new IOException("Failed to get hive database " + mHiveDbName - + ". " + e.getMessage(), e); - } - } - - @Override - public String getType() { - return HiveDatabaseFactory.TYPE; - } - - @Override - public String getName() { - return mHiveDbName; - } - - @Override - public List getTableNames() throws IOException { - try (CloseableResource client = mClientPool.acquireClientResource()) { - return client.get().getAllTables(mHiveDbName); - } catch (TException e) { - throw new IOException("Failed to get hive tables: " + e.getMessage(), e); - } - } - - private PathTranslator mountAlluxioPaths(Table table, List partitions, - UdbBypassSpec bypassSpec) - throws IOException { - String tableName = table.getTableName(); - AlluxioURI ufsUri; - AlluxioURI alluxioUri = mUdbContext.getTableLocation(tableName); - String hiveUfsUri = table.getSd().getLocation(); - - try { - PathTranslator pathTranslator = new PathTranslator(); - if (bypassSpec.hasFullTable(tableName)) { - pathTranslator.addMapping(hiveUfsUri, hiveUfsUri); - return pathTranslator; - } - ufsUri = new AlluxioURI(table.getSd().getLocation()); - pathTranslator.addMapping( - UdbUtils.mountAlluxioPath(tableName, - ufsUri, - alluxioUri, - mUdbContext, - mConfiguration), - hiveUfsUri); - - for (Partition part : partitions) { - AlluxioURI partitionUri; - if (part.getSd() != null && part.getSd().getLocation() != null) { - partitionUri = new AlluxioURI(part.getSd().getLocation()); - if (!mConfiguration.getBoolean(Property.ALLOW_DIFF_PART_LOC_PREFIX) - && !ufsUri.isAncestorOf(partitionUri)) { - continue; - } - hiveUfsUri = part.getSd().getLocation(); - String partName = part.getValues().toString(); - try { - partName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues()); - } catch (MetaException e) { - LOG.warn("Error making partition name for table {}, partition {}", tableName, - part.getValues().toString()); - } - if (bypassSpec.hasPartition(tableName, partName)) { - pathTranslator.addMapping(partitionUri.getPath(), partitionUri.getPath()); - continue; - } - alluxioUri = new AlluxioURI(PathUtils.concatPath( - mUdbContext.getTableLocation(tableName).getPath(), partName)); - - // mount partition path if it is not already mounted as part of the table path mount - pathTranslator.addMapping( - UdbUtils.mountAlluxioPath(tableName, - partitionUri, - alluxioUri, - mUdbContext, - mConfiguration), - hiveUfsUri); - } - } - return pathTranslator; - } catch (AlluxioException e) { - throw new IOException( - "Failed to mount table location. tableName: " + tableName - + " hiveUfsLocation: " + hiveUfsUri - + " AlluxioLocation: " + alluxioUri - + " error: " + e.getMessage(), e); - } - } - - @Override - public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException { - try { - Table table; - List partitions; - List columnStats; - List partitionColumns; - Map> statsMap = new HashMap<>(); - // perform all the hive client operations, and release the client early. - try (CloseableResource client = mClientPool.acquireClientResource()) { - table = client.get().getTable(mHiveDbName, tableName); - - // Potentially expensive call - partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1); - - List colNames = table.getSd().getCols().stream().map(FieldSchema::getName) - .collect(Collectors.toList()); - columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames); - - // construct the partition statistics - List dataColumns = table.getSd().getCols().stream() - .map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName) - .collect(Collectors.toList()); - partitionColumns = table.getPartitionKeys().stream() - .map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName) - .collect(Collectors.toList()); - List partitionNames = partitions.stream() - .map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())) - .collect(Collectors.toList()); - - for (List partialPartitionNames : - Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) { - statsMap.putAll(client.get() - .getPartitionColumnStatistics(mHiveDbName, tableName, - partialPartitionNames, dataColumns) - .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, - e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), - (e1, e2) -> e2))); - } - } - - PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec); - List colStats = - columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList()); - // construct table layout - PartitionInfo partitionInfo = PartitionInfo.newBuilder() - .setDbName(getUdbContext().getDbName()) - .setTableName(tableName) - .addAllDataCols(HiveUtils.toProto(table.getSd().getCols())) - .setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)) - .putAllParameters(table.getParameters()) - // ignore partition name - .build(); - Layout layout = Layout.newBuilder() - .setLayoutType(HiveLayout.TYPE) - .setLayoutData(partitionInfo.toByteString()) - // ignore spec and statistics for table layout - .build(); - - // create udb partitions info - List udbPartitions = new ArrayList<>(); - if (partitionColumns.isEmpty()) { - // unpartitioned table, generate a partition - PartitionInfo.Builder pib = PartitionInfo.newBuilder() - .setDbName(getUdbContext().getDbName()) - .setTableName(tableName) - .addAllDataCols(HiveUtils.toProto(table.getSd().getCols())) - .setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)) - .setPartitionName(tableName) - .putAllParameters(table.getParameters()); - udbPartitions.add(new HivePartition( - new HiveLayout(pib.build(), Collections.emptyList()))); - } else { - for (Partition partition : partitions) { - String partName = FileUtils.makePartName(partitionColumns, partition.getValues()); - PartitionInfo.Builder pib = PartitionInfo.newBuilder() - .setDbName(getUdbContext().getDbName()) - .setTableName(tableName) - .addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())) - .setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)) - .setPartitionName(partName) - .putAllParameters(partition.getParameters()); - if (partition.getValues() != null) { - pib.addAllValues(partition.getValues()); - } - udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), - statsMap.getOrDefault(partName, Collections.emptyList())))); - } - } - - return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, - HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table); - } catch (NoSuchObjectException e) { - throw new NotFoundException("Table " + tableName + " does not exist.", e); - } catch (TException e) { - throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e); - } - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabaseFactory.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabaseFactory.java deleted file mode 100644 index 6784fbd23f80..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveDatabaseFactory.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; -import alluxio.table.common.udb.UnderDatabase; -import alluxio.table.common.udb.UnderDatabaseFactory; - -/** - * Factory to create database implementation. - */ -public class HiveDatabaseFactory implements UnderDatabaseFactory { - public static final String TYPE = "hive"; - - @Override - public String getType() { - return TYPE; - } - - @Override - public UnderDatabase create(UdbContext udbContext, UdbConfiguration configuration) { - return HiveDatabase.create(udbContext, configuration); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HivePartition.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HivePartition.java deleted file mode 100644 index 799ceb289ce9..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HivePartition.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.table.common.Layout; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.layout.HiveLayout; - -import com.google.common.base.MoreObjects; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Hive table implementation. - */ -public class HivePartition implements UdbPartition { - private static final Logger LOG = LoggerFactory.getLogger(HivePartition.class); - - private final HiveLayout mLayout; - - /** - * Creates an instance. - * - * @param layout the layout - */ - public HivePartition(HiveLayout layout) { - mLayout = layout; - } - - @Override - public String getSpec() { - return mLayout.getSpec(); - } - - @Override - public Layout getLayout() { - return mLayout; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("Spec", getSpec()) - .add("Values", mLayout.getData().getValuesList()) - .add("PartitionName", mLayout.getData().getPartitionName()) - .add("DatabaseName", mLayout.getData().getDbName()) - .add("TableName", mLayout.getData().getTableName()) - .add("Parameters", mLayout.getData().getParametersMap()) - .toString(); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveTable.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveTable.java deleted file mode 100644 index e645839539f5..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveTable.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.FieldSchema; -import alluxio.grpc.table.Layout; -import alluxio.grpc.table.Schema; -import alluxio.table.common.UdbPartition; -import alluxio.table.common.udb.UdbTable; - -import org.apache.hadoop.hive.metastore.api.Table; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * Hive table implementation. - */ -public class HiveTable implements UdbTable { - private static final Logger LOG = LoggerFactory.getLogger(HiveTable.class); - - private final String mName; - private final Schema mSchema; - private final String mOwner; - private final List mStatistics; - private final List mPartitionKeys; - private final List mUdbPartitions; - private final Map mParameters; - private final Layout mLayout; - - /** - * Creates a new instance. - * - * @param name the table name - * @param schema the table schema - * @param statistics the table statistics - * @param cols partition keys - * @param udbPartitions udb partition list - * @param layout the table layout - * @param table hive table object - */ - public HiveTable(String name, Schema schema, List statistics, - List cols, List udbPartitions, Layout layout, Table table) { - mUdbPartitions = udbPartitions; - mName = name; - mSchema = schema; - mStatistics = statistics; - mPartitionKeys = cols; - mOwner = table.getOwner(); - mParameters = (table.getParameters() != null) ? table.getParameters() : Collections.emptyMap(); - mLayout = layout; - } - - @Override - public String getName() { - return mName; - } - - @Override - public Schema getSchema() { - return mSchema; - } - - @Override - public String getOwner() { - return mOwner; - } - - @Override - public Map getParameters() { - return mParameters; - } - - @Override - public List getPartitionCols() { - return mPartitionKeys; - } - - @Override - public List getStatistics() { - return mStatistics; - } - - @Override - public Layout getLayout() { - return mLayout; - } - - @Override - public List getPartitions() { - return mUdbPartitions; - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveUtils.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveUtils.java deleted file mode 100644 index cdbbfbe2b2ac..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/HiveUtils.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.grpc.table.BinaryColumnStatsData; -import alluxio.grpc.table.BooleanColumnStatsData; -import alluxio.grpc.table.ColumnStatisticsData; -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.grpc.table.Date; -import alluxio.grpc.table.DateColumnStatsData; -import alluxio.grpc.table.Decimal; -import alluxio.grpc.table.DecimalColumnStatsData; -import alluxio.grpc.table.DoubleColumnStatsData; -import alluxio.grpc.table.LongColumnStatsData; -import alluxio.grpc.table.Schema; -import alluxio.grpc.table.StringColumnStatsData; -import alluxio.grpc.table.layout.hive.HiveBucketProperty; -import alluxio.grpc.table.layout.hive.SortingColumn; -import alluxio.grpc.table.layout.hive.Storage; -import alluxio.grpc.table.layout.hive.StorageFormat; -import alluxio.table.common.udb.PathTranslator; - -import com.google.protobuf.ByteString; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Utilities for hive types. - */ -public class HiveUtils { - private HiveUtils() {} // prevent instantiation - - /** - * @param hiveSchema the hive schema - * @return the proto representation - */ - public static Schema toProtoSchema(List hiveSchema) { - Schema.Builder schemaBuilder = Schema.newBuilder(); - schemaBuilder.addAllCols(toProto(hiveSchema)); - return schemaBuilder.build(); - } - - /** - * @param hiveSchema the hive schema - * @return the proto representation - */ - public static List toProto(List hiveSchema) { - List list = new ArrayList<>(); - for (FieldSchema field : hiveSchema) { - alluxio.grpc.table.FieldSchema.Builder builder = alluxio.grpc.table.FieldSchema.newBuilder() - .setName(field.getName()) - .setType(field.getType()); // does not support complex types now - if (field.isSetComment()) { - builder.setComment(field.getComment()); - } - list.add(builder.build()); - } - return list; - } - - /** - * Convert from a StorageDescriptor to a Storage object. - * - * @param sd storage descriptor - * @param translator path translator - * @return storage proto object - */ - public static Storage toProto(StorageDescriptor sd, PathTranslator translator) - throws IOException { - if (sd == null) { - return Storage.getDefaultInstance(); - } - String serDe = sd.getSerdeInfo() == null ? null - : sd.getSerdeInfo().getSerializationLib(); - Map serdeLibMap = sd.getSerdeInfo() == null ? null - : sd.getSerdeInfo().getParameters(); - StorageFormat.Builder formatBuilder = StorageFormat.newBuilder() - .setInputFormat(sd.getInputFormat()) - .setOutputFormat(sd.getOutputFormat()); - - if (serdeLibMap != null) { - formatBuilder.putAllSerdelibParameters(serdeLibMap); - } - if (serDe != null) { - formatBuilder.setSerde(serDe); // Check SerDe info - } - Storage.Builder storageBuilder = Storage.newBuilder(); - List orderList = sd.getSortCols(); - List sortingColumns; - if (orderList == null) { - sortingColumns = Collections.emptyList(); - } else { - sortingColumns = orderList.stream().map( - order -> SortingColumn.newBuilder().setColumnName(order.getCol()) - .setOrder(order.getOrder() == 1 ? SortingColumn.SortingOrder.ASCENDING - : SortingColumn.SortingOrder.DESCENDING).build()) - .collect(Collectors.toList()); - } - - return storageBuilder.setStorageFormat(formatBuilder.build()) - .setLocation(translator.toAlluxioPath(sd.getLocation())) - .setBucketProperty(HiveBucketProperty.newBuilder().setBucketCount(sd.getNumBuckets()) - .addAllBucketedBy(sd.getBucketCols()).addAllSortedBy(sortingColumns).build()) - .setSkewed(sd.getSkewedInfo() != null && (sd.getSkewedInfo().getSkewedColNames()) != null - && !sd.getSkewedInfo().getSkewedColNames().isEmpty()) - .putAllSerdeParameters(sd.getParameters()).build(); - } - - /** - * Convert ColumnStatisticsObj to proto definition. - * - * @param colStats column statistics - * @return the proto form - */ - public static ColumnStatisticsInfo toProto(ColumnStatisticsObj colStats) { - ColumnStatisticsInfo.Builder builder = ColumnStatisticsInfo.newBuilder(); - builder.setColName(colStats.getColName()).setColType(colStats.getColType()); - if (colStats.isSetStatsData()) { - org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsData = colStats.getStatsData(); - if (statsData.isSetBooleanStats()) { - org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData data = - statsData.getBooleanStats(); - if (data != null) { - BooleanColumnStatsData.Builder booleanColumnBuilder = BooleanColumnStatsData.newBuilder() - .setNumTrues(data.getNumTrues()).setNumFalses(data.getNumFalses()) - .setNumNulls(data.getNumNulls()); - if (data.isSetBitVectors()) { - booleanColumnBuilder.setBitVectors(data.getBitVectors()); - } - builder.setData(ColumnStatisticsData.newBuilder() - .setBooleanStats(booleanColumnBuilder.build()).build()); - } - } - if (statsData.isSetDoubleStats()) { - org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData doubleStats = - statsData.getDoubleStats(); - if (doubleStats != null) { - builder.setData( - ColumnStatisticsData.newBuilder().setDoubleStats(toProto(doubleStats)).build()); - } - } - if (statsData.isSetLongStats()) { - org.apache.hadoop.hive.metastore.api.LongColumnStatsData longData = - statsData.getLongStats(); - - if (longData != null) { - builder.setData(ColumnStatisticsData.newBuilder() - .setLongStats(toProto(longData)).build()); - } - } - if (statsData.isSetStringStats()) { - org.apache.hadoop.hive.metastore.api.StringColumnStatsData stringData = - statsData.getStringStats(); - if (stringData != null) { - StringColumnStatsData.Builder stringColumnBuilder = StringColumnStatsData.newBuilder() - .setNumDistincts(stringData.getNumDVs()).setAvgColLen(stringData.getAvgColLen()) - .setMaxColLen(stringData.getMaxColLen()) - .setNumNulls(stringData.getNumNulls()); - if (stringData.isSetBitVectors()) { - stringColumnBuilder.setBitVectors(stringData.getBitVectors()); - } - builder.setData(ColumnStatisticsData.newBuilder() - .setStringStats(stringColumnBuilder.build()).build()); - } - } - if (statsData.isSetBinaryStats()) { - org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData data = - statsData.getBinaryStats(); - if (data != null) { - BinaryColumnStatsData.Builder binaryColumnBuilder = BinaryColumnStatsData.newBuilder() - .setMaxColLen(data.getMaxColLen()).setAvgColLen(data.getAvgColLen()) - .setNumNulls(data.getNumNulls()); - if (data.isSetBitVectors()) { - binaryColumnBuilder.setBitVectors(data.getBitVectors()); - } - builder.setData(ColumnStatisticsData.newBuilder() - .setBinaryStats(binaryColumnBuilder.build()).build()); - } - } - if (statsData.isSetDateStats()) { - org.apache.hadoop.hive.metastore.api.DateColumnStatsData data = - statsData.getDateStats(); - if (data != null) { - builder.setData(ColumnStatisticsData.newBuilder().setDateStats(toProto(data)).build()); - } - } - if (statsData.isSetDecimalStats()) { - org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData data = - statsData.getDecimalStats(); - if (data != null) { - builder.setData(ColumnStatisticsData.newBuilder().setDecimalStats(toProto(data)).build()); - } - } - } - return builder.build(); - } - - private static DateColumnStatsData toProto( - org.apache.hadoop.hive.metastore.api.DateColumnStatsData dateData) { - DateColumnStatsData.Builder builder = DateColumnStatsData.newBuilder(); - if (dateData.isSetBitVectors()) { - builder.setBitVectors(dateData.getBitVectors()); - } - builder.setNumNulls(dateData.getNumNulls()); - builder.setNumDistincts(dateData.getNumDVs()); - if (dateData.isSetHighValue()) { - builder.setHighValue( - Date.newBuilder().setDaysSinceEpoch(dateData.getHighValue().getDaysSinceEpoch()).build()); - } - if (dateData.isSetLowValue()) { - builder.setLowValue( - Date.newBuilder().setDaysSinceEpoch(dateData.getLowValue().getDaysSinceEpoch()).build()); - } - return builder.build(); - } - - private static DecimalColumnStatsData toProto( - org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData decimalData) { - DecimalColumnStatsData.Builder builder = DecimalColumnStatsData.newBuilder(); - if (decimalData.isSetBitVectors()) { - builder.setBitVectors(decimalData.getBitVectors()); - } - builder.setNumNulls(decimalData.getNumNulls()); - builder.setNumDistincts(decimalData.getNumDVs()); - if (decimalData.isSetHighValue() && decimalData.getHighValue().isSetUnscaled()) { - builder.setHighValue(Decimal.newBuilder().setScale(decimalData.getHighValue().getScale()) - .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled())).build()); - } - if (decimalData.isSetLowValue() && decimalData.getLowValue().isSetUnscaled()) { - builder.setLowValue(Decimal.newBuilder().setScale(decimalData.getLowValue().getScale()) - .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled())).build()); - } - return builder.build(); - } - - private static DoubleColumnStatsData toProto( - org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData doubleData) { - DoubleColumnStatsData.Builder builder = DoubleColumnStatsData.newBuilder(); - if (doubleData.isSetBitVectors()) { - builder.setBitVectors(doubleData.getBitVectors()); - } - builder.setNumNulls(doubleData.getNumNulls()); - builder.setNumDistincts(doubleData.getNumDVs()); - if (doubleData.isSetHighValue()) { - builder.setHighValue(doubleData.getHighValue()); - } - if (doubleData.isSetLowValue()) { - builder.setLowValue(doubleData.getLowValue()); - } - return builder.build(); - } - - private static LongColumnStatsData toProto( - org.apache.hadoop.hive.metastore.api.LongColumnStatsData longData) { - LongColumnStatsData.Builder builder = LongColumnStatsData.newBuilder(); - if (longData.isSetBitVectors()) { - builder.setBitVectors(longData.getBitVectors()); - } - builder.setNumNulls(longData.getNumNulls()); - builder.setNumDistincts(longData.getNumDVs()); - if (longData.isSetHighValue()) { - builder.setHighValue(longData.getHighValue()); - } - if (longData.isSetLowValue()) { - builder.setLowValue(longData.getLowValue()); - } - return builder.build(); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/Property.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/Property.java deleted file mode 100644 index 503ef38279e7..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/Property.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import alluxio.table.common.udb.UdbProperty; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import javax.annotation.Nullable; - -/** - * This contains all the properties for this UDB. - */ -public final class Property extends UdbProperty { - private static final Logger LOG = LoggerFactory.getLogger(Property.class); - /** A map from default property key's string name to the key. */ - private static final Map DEFAULT_KEYS_MAP = new ConcurrentHashMap<>(); - - /** - * Create a alluxio.table.under.hive.Property instance. - * - * @param name the property name - * @param description the property description - * @param defaultValue the default value - */ - public Property(String name, String description, String defaultValue) { - super(name, description, defaultValue); - } - - /** - * UDB property builder. - */ - public static final class Builder { - private String mName; - private String mDescription; - private String mDefaultValue; - - /** - * @param name name of property - */ - public Builder(String name) { - mName = name; - } - - /** - * @param name name for the property - * @return the updated builder instance - */ - public Builder setName(String name) { - mName = name; - return this; - } - - /** - * @param defaultValue the property's default value - * @return the updated builder instance - */ - public Builder setDefaultValue(String defaultValue) { - mDefaultValue = defaultValue; - return this; - } - - /** - * @param description of the property - * @return the updated builder instance - */ - public Builder setDescription(String description) { - mDescription = description; - return this; - } - - /** - * Register the unregistered udb property. - * - * @return registered udb property - */ - public Property build() { - Property property = buildUnregistered(); - Preconditions.checkState( - Property.register(property), - "Cannot register existing alluxio.table.under.hive.Property \"%s\"", mName); - return property; - } - - /** - * Creates the Udb alluxio.table.under.hive.Property - * without registering it with default property list. - * - * @return udb property - */ - public Property buildUnregistered() { - Property property = new Property(mName, mDescription, mDefaultValue); - return property; - } - } - - /** - * Registers the given UDB alluxio.table.under.hive.Property to the global map. - * - * @param Property the udb property - * @return whether the udb property is successfully registered - */ - @VisibleForTesting - public static boolean register(Property Property) { - String name = Property.getName(); - if (DEFAULT_KEYS_MAP.containsKey(name)) { - return false; - } - - DEFAULT_KEYS_MAP.put(name, Property); - return true; - } - - /** - * Unregisters the given key from the global map. - * - * @param Property the property to unregister - */ - @VisibleForTesting - public static void unregister(Property Property) { - String name = Property.getName(); - DEFAULT_KEYS_MAP.remove(name); - } - - public static final Property ALLOW_DIFF_PART_LOC_PREFIX = - new Builder(Name.ALLOW_DIFF_PART_LOC_PREFIX) - .setDefaultValue("false") - .setDescription("Whether to mount partitions diff location prefix partitions") - .build(); - - @Override - public String getName() { - return mName; - } - - @Override - public String getDescription() { - return mDescription; - } - - /** - * @return the default value of udb property or null if value not set - */ - @Nullable - @Override - public String getDefaultValue() { - Object defaultValue = mDefaultValue; - return defaultValue == null ? null : defaultValue.toString(); - } - - /** - * Corresponding configurations of HIVE configurations. - */ - public static final class Name { - // Hive related properties - public static final String ALLOW_DIFF_PART_LOC_PREFIX = "allow.diff.partition.location.prefix"; - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/AbstractHiveClientPool.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/AbstractHiveClientPool.java deleted file mode 100644 index 718ae1521656..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/AbstractHiveClientPool.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import alluxio.resource.CloseableResource; -import alluxio.resource.DynamicResourcePool; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; - -import java.io.IOException; - -/** - * A pool for hive clients. - */ -public abstract class AbstractHiveClientPool extends DynamicResourcePool { - protected AbstractHiveClientPool(Options options) { - super(options); - } - - /** - * @return a closeable resource for the hive client - */ - public abstract CloseableResource acquireClientResource() throws IOException; -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/CompatibleMetastoreClient.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/CompatibleMetastoreClient.java deleted file mode 100644 index 6ba69d482b66..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/CompatibleMetastoreClient.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.thrift.TApplicationException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.reflect.InvocationHandler; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import javax.annotation.Nullable; - -class CompatibleMetastoreClient implements InvocationHandler { - private static final Logger LOG = LoggerFactory.getLogger(CompatibleMetastoreClient.class); - - private final IMetaStoreClient mDelegate; - private final HMSShim mCompat; - - CompatibleMetastoreClient(IMetaStoreClient delegate, @Nullable HMSShim compatibility) { - mDelegate = delegate; - mCompat = compatibility; - } - - @Override - public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { - try { - return method.invoke(mDelegate, args); - } catch (InvocationTargetException delegateException) { - try { - if (mCompat != null - && delegateException.getCause().getClass() - .isAssignableFrom(TApplicationException.class)) { - LOG.debug("Attempting to call hive metastore with compatibility class {}", - mCompat.getClass().getName()); - return invokeCompatibility(method, args); - } - } catch (InvocationTargetException compatibilityException) { - if (compatibilityException.getCause().getClass() - .isAssignableFrom(TApplicationException.class)) { - LOG.warn("Invocation of compatibility for metastore client method {} failed.", - method.getName(), compatibilityException); - } else { - // compatibility worked but threw non TApplicationException, re-throwing cause. - throw compatibilityException.getCause(); - } - } catch (Throwable t) { - LOG.warn( - "Unable to invoke compatibility for metastore client method {}.", - method.getName(), t); - } - throw delegateException.getCause(); - } - } - - private Object invokeCompatibility(Method method, Object[] args) throws Throwable { - Class[] argTypes = getTypes(args); - Method compatibilityMethod = mCompat.getClass().getMethod(method.getName(), argTypes); - return compatibilityMethod.invoke(mCompat, args); - } - - private static Class[] getTypes(Object[] args) { - Class[] argTypes = new Class[args.length]; - for (int i = 0; i < args.length; ++i) { - argTypes[i] = args[i].getClass(); - } - return argTypes; - } -} - diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/DefaultHiveClientPool.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/DefaultHiveClientPool.java deleted file mode 100644 index b27e756e81d7..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/DefaultHiveClientPool.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import static alluxio.conf.PropertyKey.TABLE_UDB_HIVE_CLIENTPOOL_MAX; -import static alluxio.conf.PropertyKey.TABLE_UDB_HIVE_CLIENTPOOL_MIN; - -import alluxio.Constants; -import alluxio.conf.Configuration; -import alluxio.metrics.MetricKey; -import alluxio.metrics.MetricsSystem; -import alluxio.resource.CloseableResource; -import alluxio.util.ThreadFactoryUtils; - -import com.codahale.metrics.Counter; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient; -import org.apache.thrift.TException; - -import java.io.IOException; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import javax.annotation.concurrent.ThreadSafe; - -/** - * A pool for hive clients, since hive clients are not thread safe. - */ -@ThreadSafe -public final class DefaultHiveClientPool extends AbstractHiveClientPool { - private static final ScheduledExecutorService GC_EXECUTOR = - new ScheduledThreadPoolExecutor(1, ThreadFactoryUtils.build("HiveClientPool-GC-%d", true)); - private static final HiveMetaHookLoader NOOP_HOOK = table -> null; - private static final Counter COUNTER = MetricsSystem.counter( - MetricKey.CLIENT_DEFAULT_HIVE_CLIENT_COUNT.getName()); - - private final long mGcThresholdMs; - private final String mConnectionUri; - - /** - * Creates a new hive client client pool. - * - * @param connectionUri the connect uri for the hive metastore - */ - public DefaultHiveClientPool(String connectionUri) { - super(Options.defaultOptions() - .setMinCapacity(Configuration.getInt(TABLE_UDB_HIVE_CLIENTPOOL_MIN)) - .setMaxCapacity(Configuration.getInt(TABLE_UDB_HIVE_CLIENTPOOL_MAX)) - .setGcIntervalMs(5L * Constants.MINUTE_MS) - .setGcExecutor(GC_EXECUTOR)); - mConnectionUri = connectionUri; - mGcThresholdMs = 5L * Constants.MINUTE_MS; - } - - @Override - protected void closeResource(IMetaStoreClient client) { - client.close(); - } - - @Override - protected IMetaStoreClient createNewResource() throws IOException { - // Hive uses/saves the thread context class loader. - ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader(); - try { - // use the extension class loader - Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); - HiveConf conf = new HiveConf(); - conf.verifyAndSet("hive.metastore.uris", mConnectionUri); - - IMetaStoreClient client = HMSClientFactory.newInstance( - RetryingMetaStoreClient.getProxy(conf, NOOP_HOOK, HiveMetaStoreClient.class.getName())); - return client; - } catch (NullPointerException | TException e) { - // HiveMetaStoreClient throws a NPE if the uri is not a uri for hive metastore - throw new IOException(String - .format("Failed to create client to hive metastore: %s. error: %s", mConnectionUri, - e.getMessage()), e); - } finally { - Thread.currentThread().setContextClassLoader(currentClassLoader); - } - } - - @Override - protected boolean isHealthy(IMetaStoreClient client) { - // there is no way to check if a hive client is connected. - // TODO(gpang): periodically and asynchronously check the health of clients - return true; - } - - @Override - protected Counter getMetricCounter() { - return COUNTER; - } - - @Override - protected boolean shouldGc(ResourceInternal clientResourceInternal) { - return System.currentTimeMillis() - clientResourceInternal - .getLastAccessTimeMs() > mGcThresholdMs; - } - - @Override - public CloseableResource acquireClientResource() throws IOException { - return new CloseableResource(acquire()) { - @Override - public void closeResource() { - release(get()); - } - }; - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSClientFactory.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSClientFactory.java deleted file mode 100644 index 9295f2e54845..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSClientFactory.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.reflect.Proxy; -import javax.annotation.Nullable; - -class HMSClientFactory { - private static final Logger LOG = LoggerFactory.getLogger(HMSClientFactory.class); - - static IMetaStoreClient newInstance(IMetaStoreClient delegate) { - HMSShim compatibility = null; - try { - compatibility = new HMSShim(delegate); - } catch (Throwable t) { - LOG.warn("Unable to initialize hive metastore compatibility client", t); - } - return newInstance(delegate, compatibility); - } - - private static IMetaStoreClient newInstance(IMetaStoreClient delegate, - @Nullable HMSShim compatibility) { - ClassLoader classLoader = IMetaStoreClient.class.getClassLoader(); - Class[] interfaces = new Class[] { IMetaStoreClient.class }; - CompatibleMetastoreClient handler = new CompatibleMetastoreClient(delegate, - compatibility); - return (IMetaStoreClient) Proxy.newProxyInstance(classLoader, interfaces, handler); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSShim.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSShim.java deleted file mode 100644 index 2d2a9e40f3a4..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HMSShim.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore; -import org.apache.thrift.TApplicationException; -import org.apache.thrift.TBase; -import org.apache.thrift.TException; -import org.apache.thrift.TServiceClient; - -import java.lang.reflect.Field; -import java.lang.reflect.InvocationHandler; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.lang.reflect.Proxy; - -/** - * Implements a shim layer for hive metastore client. - **/ -public class HMSShim implements HiveCompatibility { - private final TServiceClient mClient; - - /** - * Constructor for HMSShim. - * - * @param client another client as delegate - */ - public HMSShim(IMetaStoreClient client) { - // Because RetryingMetaStoreClient is itself a proxy, we need to get to the base class - while (Proxy.isProxyClass(client.getClass())) { - InvocationHandler handler = Proxy.getInvocationHandler(client); - if (handler.getClass().isAssignableFrom(RetryingMetaStoreClient.class)) { - client = getField(handler, "base"); - continue; - } - // Other handlers can be added here - throw new RuntimeException("Unknown proxy handler for IMetaStoreClient"); - } - mClient = getField(client, "client"); - } - - private static T getField(Object object, String fieldName) { - try { - Field field = object.getClass().getDeclaredField(fieldName); - T result = null; - if (field.isAccessible()) { - result = (T) field.get(object); - } else { - field.setAccessible(true); - result = (T) field.get(object); - field.setAccessible(false); - } - return result; - } catch (SecurityException | NoSuchFieldException - | IllegalArgumentException | IllegalAccessException e) { - throw new RuntimeException("Unable to access field " + fieldName + " through reflection", e); - } - } - - private static Table deepCopy(Table table) { - return table.deepCopy(); - } - - private void sendBase(String methodName, TBase args) throws TException { - try { - Method sendBase = TServiceClient.class.getDeclaredMethod( - "sendBase", String.class, TBase.class); - sendBase.setAccessible(true); - sendBase.invoke(mClient, methodName, args); - } catch (NoSuchMethodException | SecurityException - | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { - throw new RuntimeException("Unable to invoke sendBase", e); - } - } - - private void receiveBase(TBase result, String methodName) throws TException { - try { - Method receiveBase = TServiceClient.class.getDeclaredMethod( - "receiveBase", TBase.class, String.class); - receiveBase.setAccessible(true); - receiveBase.invoke(mClient, result, methodName); - receiveBase.setAccessible(false); - } catch (NoSuchMethodException | SecurityException - | IllegalAccessException | IllegalArgumentException - | InvocationTargetException e) { - throw new RuntimeException("Unable to invoke receiveBase", e); - } - } - - /* - * Based on https://github.com/apache/hive/blob/release-1.2.1/metastore/src - * /java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java#L1206 - */ - @Override - public Table getTable(String dbname, String name) - throws MetaException, TException, NoSuchObjectException { - return deepCopy(get_table(dbname, name)); - } - - @Override - public boolean tableExists(String dbname, String name) - throws MetaException, TException, NoSuchObjectException { - try { - get_table(dbname, name); - } catch (NoSuchObjectException e) { - return false; - } - return true; - } - - /* - * Copied from Hive 1.2.1 ThriftHiveMetastore.Client#get_table(String,String) - see - * https://raw.githubusercontent.com/apache/hive/release-1.2.1 - * /metastore/src/gen/thrift/gen-javabean/org/apache/hadoop - * /hive/metastore/metastore/ThriftHiveMetastore.java - */ - private Table get_table(String dbname, String tbl_name) - throws MetaException, NoSuchObjectException, TException { - send_get_table(dbname, tbl_name); - return recv_get_table(); - } - - /* - * Copied from Hive 1.2.1 ThriftHiveMetastore.Client#send_get_table(String,String) - see - * https://raw.githubusercontent.com/apache/hive/release-1.2.1 - * /metastore/src/gen/thrift/gen-javabean/org/apache/hadoop - * /hive/metastore/metastore/ThriftHiveMetastore.java - */ - private void send_get_table(String dbname, String tbl_name) throws TException { - ThriftHiveMetastore.get_table_args args = new ThriftHiveMetastore.get_table_args(); - args.setDbname(dbname); - args.setTbl_name(tbl_name); - sendBase("get_table", args); - } - - /* - * Based on Hive 1.2.1 ThriftHiveMetastore.Client#recv_get_table() - see - * https://raw.githubusercontent.com/apache/hive/release-1.2.1 - * /metastore/src/gen/thrift/gen-javabean/org/apache/hadoop - * /hive/metastore/metastore/ThriftHiveMetastore.java - */ - private Table recv_get_table() - throws MetaException, NoSuchObjectException, TException { - ThriftHiveMetastore.get_table_result result = new ThriftHiveMetastore.get_table_result(); - receiveBase(result, "get_table"); - if (result.isSetSuccess()) { - return result.getSuccess(); - } - if (result.getO1() != null) { - throw result.getO1(); - } - if (result.getO2() != null) { - throw result.getO2(); - } - throw new TApplicationException(5, "get_table failed: unknown result"); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveClientPoolCache.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveClientPoolCache.java deleted file mode 100644 index 05f11750d5a0..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveClientPoolCache.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -/** - * This class implements a cache for Client connection pools for Hive Clients. - */ -public class HiveClientPoolCache { - private final Map mClientPools; - - /** - * Constructor for the Client Pool cache. - */ - public HiveClientPoolCache() { - mClientPools = new ConcurrentHashMap<>(); - } - - /** - * Get a hive client pool from the cache. - * - * @param connectionURI connection which serves as key - * @return hive client pool - */ - public AbstractHiveClientPool getPool(String connectionURI) { - return mClientPools.compute(connectionURI, (uri, pool) -> { - if (pool != null) { - return pool; - } else { - return new DefaultHiveClientPool(connectionURI); - } - }); - } -} diff --git a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveCompatibility.java b/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveCompatibility.java deleted file mode 100644 index 49e5927da465..000000000000 --- a/dora/table/server/underdb/hive/src/main/java/alluxio/table/under/hive/util/HiveCompatibility.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive.util; - -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; - -/** - * Hive compatibility. - */ -public interface HiveCompatibility { - /** - * Get table operation. - * @param dbname database name - * @param name table name - * @return Table object - */ - Table getTable(String dbname, String name) - throws MetaException, TException, NoSuchObjectException; - - /** - * Test if a table exists. - * @param dbname database name - * @param name table name - * @return true if the table exists - */ - boolean tableExists(String dbname, String name) - throws MetaException, TException, NoSuchObjectException; -} diff --git a/dora/table/server/underdb/hive/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory b/dora/table/server/underdb/hive/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory deleted file mode 100644 index a203e06a1b10..000000000000 --- a/dora/table/server/underdb/hive/src/main/resources/META-INF/services/alluxio.table.common.udb.UnderDatabaseFactory +++ /dev/null @@ -1,12 +0,0 @@ -# -# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 -# (the "License"). You may not use this work except in compliance with the License, which is -# available at www.apache.org/licenses/LICENSE-2.0 -# -# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied, as more fully set forth in the License. -# -# See the NOTICE file distributed with this work for information regarding copyright ownership. -# - -alluxio.table.under.hive.HiveDatabaseFactory diff --git a/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveDatabaseTest.java b/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveDatabaseTest.java deleted file mode 100644 index df594d587f91..000000000000 --- a/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveDatabaseTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import static org.junit.Assert.assertEquals; - -import alluxio.table.common.udb.UdbConfiguration; -import alluxio.table.common.udb.UdbContext; - -import com.google.common.collect.ImmutableMap; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.util.HashMap; -import java.util.Map; - -public class HiveDatabaseTest { - - private static final String DB_NAME = "test"; - private static final Map CONF = new HashMap<>(); - - @Rule - public ExpectedException mExpection = ExpectedException.none(); - - private UdbContext mUdbContext; - private UdbConfiguration mUdbConf; - - @Before - public void before() { - mUdbContext = - new UdbContext(null, null, "hive", "thrift://not_running:9083", DB_NAME, DB_NAME); - mUdbConf = new UdbConfiguration(CONF); - } - - @Test - public void create() { - assertEquals(DB_NAME, HiveDatabase.create(mUdbContext, mUdbConf).getName()); - } - - @Test - public void createEmptyName() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = - new UdbContext(null, null, "hive", "thrift://not_running:9083", "", DB_NAME); - assertEquals(DB_NAME, - HiveDatabase.create(udbContext, new UdbConfiguration(ImmutableMap.of())).getName()); - } - - @Test - public void createNullName() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = - new UdbContext(null, null, "hive", "thrift://not_running:9083", null, DB_NAME); - assertEquals(DB_NAME, - HiveDatabase.create(udbContext, new UdbConfiguration(ImmutableMap.of())).getName()); - } - - @Test - public void createEmptyConnectionUri() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = new UdbContext(null, null, "hive", "", DB_NAME, DB_NAME); - assertEquals(DB_NAME, HiveDatabase.create(udbContext, - new UdbConfiguration(ImmutableMap.of())).getName()); - } - - @Test - public void createNullConnectionUri() { - mExpection.expect(IllegalArgumentException.class); - UdbContext udbContext = new UdbContext(null, null, "hive", null, DB_NAME, DB_NAME); - assertEquals(DB_NAME, HiveDatabase.create(udbContext, - new UdbConfiguration(ImmutableMap.of())).getName()); - } - - @Test - public void createWithProps() { - Map props = ImmutableMap.of( - "prop1", "value1", - "prop2", "value2" - ); - assertEquals(DB_NAME, HiveDatabase.create(mUdbContext, new UdbConfiguration(props)).getName()); - } -} diff --git a/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveUtilsTest.java b/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveUtilsTest.java deleted file mode 100644 index 4d10d1507fe8..000000000000 --- a/dora/table/server/underdb/hive/src/test/java/alluxio/table/under/hive/HiveUtilsTest.java +++ /dev/null @@ -1,383 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.table.under.hive; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - -import alluxio.grpc.table.ColumnStatisticsInfo; -import alluxio.util.CommonUtils; - -import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; -import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; -import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.junit.Test; - -import java.nio.ByteBuffer; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; - -public class HiveUtilsTest { - - private Random mRandom = ThreadLocalRandom.current(); - - @Test - public void protoColStatsNoData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - verifyColumnStats(hiveColStats); - } - - // data.setStringStats(); - - @Test - public void protoColStatsWithBinaryData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - BinaryColumnStatsData binaryData = new BinaryColumnStatsData(); - data.setBinaryStats(binaryData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - binaryData.setAvgColLen(mRandom.nextDouble()); - binaryData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - binaryData.setMaxColLen(mRandom.nextLong()); - binaryData.setNumNulls(mRandom.nextLong()); - data.setBinaryStats(binaryData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithBooleanData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - BooleanColumnStatsData booleanData = new BooleanColumnStatsData(); - data.setBooleanStats(booleanData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - booleanData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - booleanData.setNumNulls(mRandom.nextLong()); - booleanData.setNumFalses(mRandom.nextLong()); - booleanData.setNumTrues(mRandom.nextLong()); - data.setBooleanStats(booleanData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithDateData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - DateColumnStatsData dateData = new DateColumnStatsData(); - data.setDateStats(dateData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - dateData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - dateData.setNumNulls(mRandom.nextLong()); - dateData.setHighValue(new Date(mRandom.nextLong())); - dateData.setLowValue(new Date(mRandom.nextLong())); - dateData.setNumDVs(mRandom.nextLong()); - data.setDateStats(dateData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify null column values - dateData.setHighValue(null); - dateData.setLowValue(null); - data.setDateStats(dateData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithDecimalData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); - data.setDecimalStats(decimalData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - decimalData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - decimalData.setNumNulls(mRandom.nextLong()); - decimalData.setHighValue( - new Decimal(ByteBuffer.wrap(CommonUtils.randomBytes(5)), (short) mRandom.nextInt())); - decimalData.setLowValue( - new Decimal(ByteBuffer.wrap(CommonUtils.randomBytes(5)), (short) mRandom.nextInt())); - decimalData.setNumDVs(mRandom.nextLong()); - data.setDecimalStats(decimalData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify null column values - decimalData.setHighValue(null); - decimalData.setLowValue(null); - data.setDecimalStats(decimalData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithDoubleData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); - data.setDoubleStats(doubleData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - doubleData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - doubleData.setNumNulls(mRandom.nextLong()); - doubleData.setHighValue(mRandom.nextDouble()); - doubleData.setLowValue(mRandom.nextDouble()); - doubleData.setNumDVs(mRandom.nextLong()); - data.setDoubleStats(doubleData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithLongData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - LongColumnStatsData longData = new LongColumnStatsData(); - data.setLongStats(longData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - longData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - longData.setNumNulls(mRandom.nextLong()); - longData.setHighValue(mRandom.nextLong()); - longData.setLowValue(mRandom.nextLong()); - longData.setNumDVs(mRandom.nextLong()); - data.setLongStats(longData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - @Test - public void protoColStatsWithStringData() { - ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj(); - hiveColStats.setColName("colName"); - hiveColStats.setColType("colType"); - - ColumnStatisticsData data = new ColumnStatisticsData(); - - // verify empty data - StringColumnStatsData stringData = new StringColumnStatsData(); - data.setStringStats(stringData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - - // verify non-empty data - stringData.setBitVectors(CommonUtils.randomAlphaNumString(5)); - stringData.setNumNulls(mRandom.nextLong()); - stringData.setNumDVs(mRandom.nextLong()); - stringData.setAvgColLen(mRandom.nextDouble()); - stringData.setMaxColLen(mRandom.nextLong()); - data.setStringStats(stringData); - hiveColStats.setStatsData(data); - verifyColumnStats(hiveColStats); - } - - private void verifyColumnStats(ColumnStatisticsObj hiveColStats) { - ColumnStatisticsInfo colStats = HiveUtils.toProto(hiveColStats); - assertEquals(hiveColStats.getColName(), colStats.getColName()); - assertEquals(hiveColStats.getColType(), colStats.getColType()); - assertEquals(hiveColStats.isSetStatsData(), colStats.hasData()); - if (hiveColStats.isSetStatsData()) { - ColumnStatisticsData hiveData = hiveColStats.getStatsData(); - alluxio.grpc.table.ColumnStatisticsData data = colStats.getData(); - - // verify binary - assertEquals(hiveData.isSetBinaryStats(), data.hasBinaryStats()); - if (hiveData.isSetBinaryStats()) { - BinaryColumnStatsData hiveBinary = hiveData.getBinaryStats(); - alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats(); - assertEquals(hiveBinary.isSetBitVectors(), binary.hasBitVectors()); - if (hiveBinary.isSetBitVectors()) { - assertEquals(hiveBinary.getBitVectors(), binary.getBitVectors()); - } - assertEquals(hiveBinary.getAvgColLen(), binary.getAvgColLen(), 0.01); - assertEquals(hiveBinary.getMaxColLen(), binary.getMaxColLen()); - assertEquals(hiveBinary.getNumNulls(), binary.getNumNulls()); - } - - // verify boolean - assertEquals(hiveData.isSetBooleanStats(), data.hasBooleanStats()); - if (hiveData.isSetBooleanStats()) { - BooleanColumnStatsData hiveBoolean = hiveData.getBooleanStats(); - alluxio.grpc.table.BooleanColumnStatsData bool = data.getBooleanStats(); - assertEquals(hiveBoolean.isSetBitVectors(), bool.hasBitVectors()); - if (hiveBoolean.isSetBitVectors()) { - assertEquals(hiveBoolean.getBitVectors(), bool.getBitVectors()); - } - assertEquals(hiveBoolean.getNumFalses(), bool.getNumFalses()); - assertEquals(hiveBoolean.getNumTrues(), bool.getNumTrues()); - assertEquals(hiveBoolean.getNumNulls(), bool.getNumNulls()); - } - - // verify date - assertEquals(hiveData.isSetDateStats(), data.hasDateStats()); - if (hiveData.isSetDateStats()) { - DateColumnStatsData hiveDate = hiveData.getDateStats(); - alluxio.grpc.table.DateColumnStatsData date = data.getDateStats(); - assertEquals(hiveDate.isSetBitVectors(), date.hasBitVectors()); - if (hiveDate.isSetBitVectors()) { - assertEquals(hiveDate.getBitVectors(), date.getBitVectors()); - } - assertEquals(hiveDate.getNumNulls(), date.getNumNulls()); - assertEquals(hiveDate.getNumDVs(), date.getNumDistincts()); - assertEquals(hiveDate.isSetHighValue(), date.hasHighValue()); - if (hiveDate.isSetHighValue()) { - assertEquals(hiveDate.getHighValue().getDaysSinceEpoch(), - date.getHighValue().getDaysSinceEpoch()); - } - assertEquals(hiveDate.isSetLowValue(), date.hasLowValue()); - if (hiveDate.isSetLowValue()) { - assertEquals(hiveDate.getLowValue().getDaysSinceEpoch(), - date.getLowValue().getDaysSinceEpoch()); - } - } - - // verify decimal - assertEquals(hiveData.isSetDecimalStats(), data.hasDecimalStats()); - if (hiveData.isSetDecimalStats()) { - DecimalColumnStatsData hiveDecimal = hiveData.getDecimalStats(); - alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats(); - assertEquals(hiveDecimal.isSetBitVectors(), decimal.hasBitVectors()); - if (hiveDecimal.isSetBitVectors()) { - assertEquals(hiveDecimal.getBitVectors(), decimal.getBitVectors()); - } - assertEquals(hiveDecimal.getNumNulls(), decimal.getNumNulls()); - assertEquals(hiveDecimal.getNumDVs(), decimal.getNumDistincts()); - assertEquals(hiveDecimal.isSetHighValue(), decimal.hasHighValue()); - if (hiveDecimal.isSetHighValue()) { - assertEquals(hiveDecimal.getHighValue().getScale(), decimal.getHighValue().getScale()); - assertArrayEquals(hiveDecimal.getHighValue().getUnscaled(), - decimal.getHighValue().getUnscaled().toByteArray()); - } - assertEquals(hiveDecimal.isSetLowValue(), decimal.hasLowValue()); - if (hiveDecimal.isSetLowValue()) { - assertEquals(hiveDecimal.getLowValue().getScale(), decimal.getLowValue().getScale()); - assertArrayEquals(hiveDecimal.getLowValue().getUnscaled(), - decimal.getLowValue().getUnscaled().toByteArray()); - } - } - - // verify double - assertEquals(hiveData.isSetDoubleStats(), data.hasDoubleStats()); - if (hiveData.isSetDoubleStats()) { - DoubleColumnStatsData hiveDouble = hiveData.getDoubleStats(); - alluxio.grpc.table.DoubleColumnStatsData dbl = data.getDoubleStats(); - assertEquals(hiveDouble.isSetBitVectors(), dbl.hasBitVectors()); - if (hiveDouble.isSetBitVectors()) { - assertEquals(hiveDouble.getBitVectors(), dbl.getBitVectors()); - } - assertEquals(hiveDouble.getNumNulls(), dbl.getNumNulls()); - assertEquals(hiveDouble.getNumDVs(), dbl.getNumDistincts()); - assertEquals(hiveDouble.isSetHighValue(), dbl.hasHighValue()); - if (hiveDouble.isSetHighValue()) { - assertEquals(hiveDouble.getHighValue(), dbl.getHighValue(), 0.01); - } - assertEquals(hiveDouble.isSetLowValue(), dbl.hasLowValue()); - if (hiveDouble.isSetLowValue()) { - assertEquals(hiveDouble.getLowValue(), dbl.getLowValue(), 0.01); - } - } - - // verify long - assertEquals(hiveData.isSetLongStats(), data.hasLongStats()); - if (hiveData.isSetLongStats()) { - LongColumnStatsData hiveLong = hiveData.getLongStats(); - alluxio.grpc.table.LongColumnStatsData dbl = data.getLongStats(); - assertEquals(hiveLong.isSetBitVectors(), dbl.hasBitVectors()); - if (hiveLong.isSetBitVectors()) { - assertEquals(hiveLong.getBitVectors(), dbl.getBitVectors()); - } - assertEquals(hiveLong.getNumNulls(), dbl.getNumNulls()); - assertEquals(hiveLong.getNumDVs(), dbl.getNumDistincts()); - assertEquals(hiveLong.isSetHighValue(), dbl.hasHighValue()); - if (hiveLong.isSetHighValue()) { - assertEquals(hiveLong.getHighValue(), dbl.getHighValue()); - } - assertEquals(hiveLong.isSetLowValue(), dbl.hasLowValue()); - if (hiveLong.isSetLowValue()) { - assertEquals(hiveLong.getLowValue(), dbl.getLowValue()); - } - } - - // verify string - assertEquals(hiveData.isSetStringStats(), data.hasStringStats()); - if (hiveData.isSetStringStats()) { - StringColumnStatsData hiveString = hiveData.getStringStats(); - alluxio.grpc.table.StringColumnStatsData string = data.getStringStats(); - assertEquals(hiveString.isSetBitVectors(), string.hasBitVectors()); - if (hiveString.isSetBitVectors()) { - assertEquals(hiveString.getBitVectors(), string.getBitVectors()); - } - assertEquals(hiveString.getAvgColLen(), string.getAvgColLen(), 0.01); - assertEquals(hiveString.getMaxColLen(), string.getMaxColLen()); - assertEquals(hiveString.getNumNulls(), string.getNumNulls()); - assertEquals(hiveString.getNumDVs(), string.getNumDistincts()); - } - } - } -} diff --git a/dora/table/server/underdb/pom.xml b/dora/table/server/underdb/pom.xml deleted file mode 100644 index 208663c7e9a9..000000000000 --- a/dora/table/server/underdb/pom.xml +++ /dev/null @@ -1,87 +0,0 @@ - - - 4.0.0 - - alluxio-table-server - org.alluxio - 301-SNAPSHOT - - alluxio-table-server-underdb - pom - Alluxio Table - Server - UnderDB - Alluxio table underDB implementations - - - hive - glue - - - - - ${project.parent.parent.parent.parent.basedir}/build - - - - - - org.alluxio - alluxio-table-base - ${project.version} - provided - - - org.alluxio - alluxio-table-server-common - ${project.version} - provided - - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - shade - package - - shade - - - ${project.artifactId}-${project.version}-jar-with-dependencies - - - - - - *:* - - LICENSE - META-INF/LICENSE - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - diff --git a/dora/table/shell/pom.xml b/dora/table/shell/pom.xml deleted file mode 100644 index badb2dd89440..000000000000 --- a/dora/table/shell/pom.xml +++ /dev/null @@ -1,52 +0,0 @@ - - - 4.0.0 - - alluxio-table - org.alluxio - 301-SNAPSHOT - - Alluxio Table - Shell - alluxio-table-shell - jar - - - - ${project.parent.parent.parent.basedir}/build - - - - - org.alluxio - alluxio-table-client - ${project.version} - - - org.alluxio - alluxio-core-common - ${project.version} - - - org.alluxio - alluxio-shell - ${project.version} - - - org.alluxio - alluxio-table-server-common - ${project.version} - - - diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/TableShell.java b/dora/table/shell/src/main/java/alluxio/cli/table/TableShell.java deleted file mode 100644 index 6bd8615a2e7a..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/TableShell.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table; - -import alluxio.ClientContext; -import alluxio.cli.AbstractShell; -import alluxio.cli.Command; -import alluxio.cli.CommandUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.conf.Configuration; -import alluxio.master.MasterClientContext; - -import java.util.Map; - -/** - * A shell implementation which is used to load the commands for interacting with the Alluxio - * table service. - */ -public class TableShell extends AbstractShell { - private static final String SHELL_NAME = "table"; - - /** - * Construct a new instance of {@link TableShell}. - */ - public TableShell() { - super(null, null, Configuration.global()); - } - - /** - * Manage Alluxio extensions. - * - * @param args array of arguments given by the user's input from the terminal - */ - public static void main(String[] args) { - TableShell tableShell = new TableShell(); - System.exit(tableShell.run(args)); - } - - @Override - public Map loadCommands() { - return CommandUtils.loadCommands(TableShell.class.getPackage().getName(), - new Class[] {AlluxioConfiguration.class, TableMasterClient.class, FileSystemContext.class}, - new Object[] {mConfiguration, mCloser.register(TableMasterClient.Factory.create( - MasterClientContext.newBuilder(ClientContext.create(mConfiguration)).build())), - FileSystemContext.create(mConfiguration)} - ); - } - - @Override - public String getShellName() { - return SHELL_NAME; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/TableShellUtils.java b/dora/table/shell/src/main/java/alluxio/cli/table/TableShellUtils.java deleted file mode 100644 index 3640af5e1e3f..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/TableShellUtils.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table; - -import alluxio.grpc.table.SyncStatus; - -import org.slf4j.Logger; - -import java.util.Map; - -/** - * A utility class for the table shell. - */ -public class TableShellUtils { - private static final String SHELL_NAME = "table"; - - private TableShellUtils() {} // prevent instantiation - - /** - * Prints out the sync status. - * - * @param status the sync status to print - * @param logger the logger to errors log to - * @param maxErrors a max number of errors to print to stdout - */ - public static void printSyncStatus(SyncStatus status, Logger logger, int maxErrors) { - System.out.println("# Tables ignored: " + status.getTablesIgnoredCount()); - System.out.println("# Tables unchanged: " + status.getTablesUnchangedCount()); - System.out.println("# Tables updated: " + status.getTablesUpdatedCount()); - System.out.println("# Tables removed: " + status.getTablesRemovedCount()); - System.out.println("# Tables with errors: " + status.getTablesErrorsCount()); - - // write the tables to the log - for (String table : status.getTablesIgnoredList()) { - logger.info("Table ignored: {}", table); - } - for (String table : status.getTablesUnchangedList()) { - logger.info("Table unchanged: {}", table); - } - for (String table : status.getTablesUpdatedList()) { - logger.info("Table updated: {}", table); - } - for (String table : status.getTablesRemovedList()) { - logger.info("Table removed: {}", table); - } - - if (status.getTablesErrorsCount() > 0) { - System.out.println("\nSync errors: "); - } - int count = 0; - for (Map.Entry entry : status.getTablesErrorsMap().entrySet()) { - String message = - String.format("Table %s failed to sync: %s", entry.getKey(), entry.getValue()); - if (count < maxErrors) { - System.out.println(message); - } else if (count == maxErrors) { - System.out.println("... (remaining can be found in the log)"); - } - logger.error(message); - count++; - } - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/AbstractTableCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/AbstractTableCommand.java deleted file mode 100644 index 51ec21ce82ec..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/AbstractTableCommand.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.fs.command.AbstractDistributedJobCommand; -import alluxio.cli.table.TableShell; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.InvalidArgumentException; - -import org.apache.commons.cli.CommandLine; - -import java.io.IOException; - -/** - * A class which should be extended when implementing commands for the - * {@link TableShell}. - */ -public abstract class AbstractTableCommand extends AbstractDistributedJobCommand { - - protected final AlluxioConfiguration mConf; - protected final TableMasterClient mClient; - - /** - * Creates a new instance of {@link AbstractTableCommand}. - * - * @param conf the alluxio configuration - * @param client the client interface which can be used to make RPCs against the table master - * @param fsContext the filesystem of Alluxio - */ - public AbstractTableCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(fsContext); - mConf = conf; - mClient = client; - } - - @Override - public abstract String getCommandName(); - - @Override - public abstract void validateArgs(CommandLine cl) throws InvalidArgumentException; - - @Override - public abstract int run(CommandLine cl) throws AlluxioException, IOException; -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/AttachDatabaseCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/AttachDatabaseCommand.java deleted file mode 100644 index 0219930492f3..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/AttachDatabaseCommand.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.CommandUtils; -import alluxio.cli.table.TableShellUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.table.SyncStatus; - -import com.google.common.collect.Maps; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Properties; - -/** - * A command which can be used to attach a UDB to the Alluxio master's table service. - */ -public class AttachDatabaseCommand extends AbstractTableCommand { - private static final Logger LOG = LoggerFactory.getLogger(AttachDatabaseCommand.class); - private static final int PRINT_MAX_ERRORS = 10; - - private static final String COMMAND_NAME = "attachdb"; - - private static final Option OPTION_OPTION = Option.builder("o") - .longOpt("option") - .required(false) - .hasArg(true) - .numberOfArgs(2) - .argName("key=value") - .valueSeparator('=') - .desc("options associated with this database or UDB") - .build(); - private static final Option DB_OPTION = Option.builder() - .longOpt("db") - .required(false) - .hasArg(true) - .numberOfArgs(1) - .argName("alluxio db name") - .desc("The name of the db in Alluxio. If unset, will use the udb db name.") - .build(); - private static final Option IGNORE_SYNC_ERRORS_OPTION = Option.builder() - .longOpt("ignore-sync-errors") - .required(false) - .hasArg(false) - .desc("Ignores sync errors, and keeps the database attached.") - .build(); - - /** - * Creates a new instance of {@link AttachDatabaseCommand}. - * - * @param conf alluxio configuration - * @param client the table master client used to make RPCs - * @param fsContext the filesystem of Alluxio - */ - public AttachDatabaseCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public Options getOptions() { - return new Options() - .addOption(OPTION_OPTION) - .addOption(DB_OPTION) - .addOption(IGNORE_SYNC_ERRORS_OPTION); - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsEquals(this, cl, 3); - } - - @Override - public int run(CommandLine cl) throws AlluxioStatusException { - String[] args = cl.getArgs(); - String udbType = args[0]; - String udbConnectionUri = args[1]; - String udbDbName = args[2]; - String dbName = udbDbName; - - if (cl.hasOption(DB_OPTION.getLongOpt())) { - String optDbName = cl.getOptionValue(DB_OPTION.getLongOpt()); - if (optDbName != null && !optDbName.isEmpty()) { - dbName = optDbName; - } - } - - Properties p = cl.getOptionProperties(OPTION_OPTION.getOpt()); - boolean ignoreSyncErrors = cl.hasOption(IGNORE_SYNC_ERRORS_OPTION.getLongOpt()); - - SyncStatus status = mClient - .attachDatabase(udbType, udbConnectionUri, udbDbName, dbName, Maps.fromProperties(p), - ignoreSyncErrors); - TableShellUtils.printSyncStatus(status, LOG, PRINT_MAX_ERRORS); - if (!ignoreSyncErrors && status.getTablesErrorsCount() > 0) { - System.out.println(String.format( - "%nDatabase is not attached. To keep it attached even with errors, please re-run '%s' " - + "with the '--%s' option.", - COMMAND_NAME, IGNORE_SYNC_ERRORS_OPTION.getLongOpt())); - } - return 0; - } - - @Override - public String getCommandName() { - return COMMAND_NAME; - } - - @Override - public String getDescription() { - return "Attaches a database to the Alluxio catalog from an under DB"; - } - - @Override - public String getUsage() { - return "attachdb [-o|--option ] [--db ] [--ignore-sync-errors] " - + " "; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/DetachDatabaseCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/DetachDatabaseCommand.java deleted file mode 100644 index c969e2aa5e33..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/DetachDatabaseCommand.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.AlluxioURI; -import alluxio.cli.CommandUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.conf.PropertyKey; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.util.io.PathUtils; - -import org.apache.commons.cli.CommandLine; - -/** - * This command removes a database from the table master. - * - * After calling this, the database and all table metadata is removed. To regain access to this - * table, the corresponding {@link AttachDatabaseCommand} will need to be called again. - */ -public class DetachDatabaseCommand extends AbstractTableCommand { - /** - * Creates a new instance of {@link DetachDatabaseCommand}. - * - * @param conf alluxio configuration - * @param client the table master client - * @param fsContext the filesystem of Alluxio - */ - public DetachDatabaseCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public void validateArgs(CommandLine cli) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsEquals(this, cli, 1); - } - - @Override - public String getCommandName() { - return "detachdb"; - } - - @Override - public String getUsage() { - return "detachdb "; - } - - @Override - public String getDescription() { - return "Detach a database with the given name from the Alluxio catalog master's namespace"; - } - - @Override - public int run(CommandLine cli) throws AlluxioStatusException { - String dbName = cli.getArgs()[0]; - if (mClient.detachDatabase(dbName)) { - AlluxioURI path = new AlluxioURI(PathUtils - .concatPath(mConf.get(PropertyKey.TABLE_CATALOG_PATH), dbName)); - System.out.println("Successfully detached db " + dbName - + ". Please unmount and clean up the alluxio location at " + path - + " before attaching another database named " + dbName); - } - return 0; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/ListDatabasesCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/ListDatabasesCommand.java deleted file mode 100644 index c8fe610ca659..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/ListDatabasesCommand.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.CommandUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.table.Constraint; -import alluxio.grpc.table.Partition; -import alluxio.grpc.table.TableInfo; - -import org.apache.commons.cli.CommandLine; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * List information about attached databases and tables in the Alluxio catalog. - * - * This command has similar function to what a {@code SHOW TABLES} or {@code SHOW DATABASES} query - * would return but makes it easier to query on a local machine what the Alluxio catalog - * currently stores without needing to boot up a presto instance. - */ -public class ListDatabasesCommand extends AbstractTableCommand { - - /** - * Creates a new instance of {@link ListDatabasesCommand}. - * - * @param conf alluxio configuration - * @param client the table master client - * @param fsContext the filesystem of Alluxio - */ - public ListDatabasesCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public String getCommandName() { - return "ls"; - } - - @Override - public String getDescription() { - return "list information about attached databases"; - } - - @Override - public String getUsage() { - return "ls [ [
]]"; - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsNoMoreThan(this, cl, 2); - } - - @Override - public int run(CommandLine cl) throws AlluxioException, IOException { - // Guaranteed to have 0 to 2 args. - String[] args = cl.getArgs(); - switch (args.length) { - case 0: - return listDatabases(); - case 1: - return listTables(args[0]); - case 2: - return listTable(args[0], args[1]); - default: - return 1; - } - } - - /** - * Print attached databases to stdout. - * - * @return 0 on success, any non-zero value otherwise - */ - public int listDatabases() throws AlluxioStatusException, IOException { - sortAndPrint(new ArrayList<>(mClient.getAllDatabases())); - return 0; - } - - /** - * Print list of tables stdout. - * - * @param db the database to list the tables of - * @return 0 on success, any non-zero value otherwise - */ - public int listTables(String db) throws AlluxioStatusException { - sortAndPrint(new ArrayList<>(mClient.getAllTables(db))); - return 0; - } - - private void sortAndPrint(List items) { - items.sort(String::compareTo); - items.forEach(System.out::println); - } - - /** - * Print table information to stdout. - * - * @param db the database the table exists in - * @param tableName the name of the table to dump information for - * @return 0 on success, any non-zero value otherwise - */ - public int listTable(String db, String tableName) throws AlluxioStatusException { - TableInfo table = mClient.getTable(db, tableName); - System.out.println(table); - List partitionList = mClient.readTable(db, tableName, - Constraint.getDefaultInstance()); - partitionList.forEach(System.out::println); - return 0; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/LoadTableCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/LoadTableCommand.java deleted file mode 100644 index 202ad9b60074..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/LoadTableCommand.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.AlluxioURI; -import alluxio.annotation.PublicApi; -import alluxio.cli.CommandUtils; -import alluxio.cli.fs.command.DistributedLoadUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.table.common.CatalogPathUtils; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Options; - -import java.io.IOException; -import javax.annotation.concurrent.ThreadSafe; - -/** - * Loads table into Alluxio space, makes it resident in memory. - */ -@ThreadSafe -@PublicApi -public class LoadTableCommand extends AbstractTableCommand { - - /** - * Constructs a new instance to load table into Alluxio space. - * - * @param conf the alluxio configuration - * @param client the client interface which can be used to make RPCs against the table master - * @param fsContext the filesystem of Alluxio - */ - public LoadTableCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public String getUsage() { - return "load " - + DistributedLoadUtils.getDistLoadCommonUsage() - + "
"; - } - - @Override - public String getCommandName() { - return "load"; - } - - @Override - public String getDescription() { - return "Loads table into Alluxio space. Currently only support hive table."; - } - - @Override - public Options getOptions() { - return DistributedLoadUtils.getDistLoadCommonOptions(); - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsEquals(this, cl, 2); - } - - @Override - public int run(CommandLine cl) throws AlluxioException, IOException { - System.out.println("***Tips:Load table command only support hive table for now.***"); - String[] args = cl.getArgs(); - String dbName = args[0]; - String tableName = args[1]; - if (!tableExists(dbName, tableName)) { - System.out.printf("Failed to load table %s.%s: table is not exit.%n", dbName, tableName); - return 0; - } - // Only support hive table for now. - String udbType = "hive"; - // To load table into Alluxio space, we get the SDS table's Alluxio parent path first and - // then load data under the path. For now, each table have one single parent path generated - // by CatalogPathUtils#getTablePathUdb. - // The parent path is mounted by SDS, it's mapping of the under table's UFS path in Alluxio. - // e.g. - // attached - // [SDS]default.test <===== [hive]default.test - // mount - // [SDS]alluxio:///catalog/default/tables/test/hive/ <===== [hive]hdfs:///.../default.db/test/ - // PLEASE NOTE: If Alluxio support different parent path, this statement can not guaranteed - // to be correct. - AlluxioURI path = CatalogPathUtils.getTablePathUdb(dbName, tableName, udbType); - System.out.printf("Loading table %s.%s...%n", dbName, tableName); - return DistributedLoadUtils.distributedLoad(this, cl, path.getPath()); - } - - private boolean tableExists(String dbName, String tableName) { - try { - // If getTable method called succeed, the table is exists. - mClient.getTable(dbName, tableName); - return true; - } catch (AlluxioStatusException e) { - return false; - } - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/SyncDatabaseCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/SyncDatabaseCommand.java deleted file mode 100644 index 0d6c3cc2448c..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/SyncDatabaseCommand.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.CommandUtils; -import alluxio.cli.table.TableShellUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.table.SyncStatus; - -import org.apache.commons.cli.CommandLine; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A command which can be used to sync a database with the underlying udb. - */ -public class SyncDatabaseCommand extends AbstractTableCommand { - private static final Logger LOG = LoggerFactory.getLogger(SyncDatabaseCommand.class); - private static final int PRINT_MAX_ERRORS = 10; - - /** - * Creates a new instance of {@link SyncDatabaseCommand}. - * - * @param conf alluxio configuration - * @param client the table master client - * @param fsContext the filesystem of Alluxio - */ - public SyncDatabaseCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public void validateArgs(CommandLine cli) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsEquals(this, cli, 1); - } - - @Override - public String getCommandName() { - return "sync"; - } - - @Override - public String getUsage() { - return "sync "; - } - - @Override - public String getDescription() { - return "Sync a database with the given name with the UDB"; - } - - @Override - public int run(CommandLine cli) throws AlluxioStatusException { - SyncStatus status = mClient.syncDatabase(cli.getArgs()[0]); - TableShellUtils.printSyncStatus(status, LOG, PRINT_MAX_ERRORS); - return 0; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformStatusCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformStatusCommand.java deleted file mode 100644 index 5d166a694d8a..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformStatusCommand.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.CommandUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.table.TransformJobInfo; - -import org.apache.commons.cli.CommandLine; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import javax.annotation.concurrent.ThreadSafe; - -/** - * Check transformation status. - */ -@ThreadSafe -public final class TransformStatusCommand extends AbstractTableCommand { - private static final Logger LOG = LoggerFactory.getLogger(TransformStatusCommand.class); - - private static final String COMMAND_NAME = "transformStatus"; - - /** - * creates the command. - * - * @param conf alluxio configuration - * @param client the table master client used to make RPCs - * @param fsContext the filesystem of Alluxio - */ - public TransformStatusCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public String getCommandName() { - return COMMAND_NAME; - } - - @Override - public String getUsage() { - return COMMAND_NAME + " []"; - } - - @Override - public String getDescription() { - return "Check status of transformations."; - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsNoMoreThan(this, cl, 1); - } - - private String toString(TransformJobInfo info) { - StringBuilder sb = new StringBuilder(); - sb.append("database: ").append(info.getDbName()).append("\n"); - sb.append("table: ").append(info.getTableName()).append("\n"); - sb.append("transformation: ").append(info.getDefinition()).append("\n"); - sb.append("job ID: ").append(info.getJobId()).append("\n"); - sb.append("job status: ").append(info.getJobStatus()).append("\n"); - if (!info.getJobError().isEmpty()) { - sb.append("job error: ").append(info.getJobError()); - } - return sb.toString(); - } - - @Override - public int run(CommandLine cl) throws IOException, AlluxioException { - String[] args = cl.getArgs(); - if (args.length == 0) { - for (TransformJobInfo info : mClient.getAllTransformJobInfo()) { - System.out.println(toString(info)); - System.out.println(); - } - } else { - long jobId = Long.parseLong(args[0]); - TransformJobInfo info = mClient.getTransformJobInfo(jobId); - System.out.println(toString(info)); - } - return 0; - } -} diff --git a/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformTableCommand.java b/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformTableCommand.java deleted file mode 100644 index 6d0775bc6eca..000000000000 --- a/dora/table/shell/src/main/java/alluxio/cli/table/command/TransformTableCommand.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table.command; - -import alluxio.cli.CommandUtils; -import alluxio.client.file.FileSystemContext; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.AlluxioConfiguration; -import alluxio.exception.AlluxioException; -import alluxio.exception.status.InvalidArgumentException; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import javax.annotation.concurrent.ThreadSafe; - -/** - * Transforms a structured table in Alluxio. - */ -@ThreadSafe -public final class TransformTableCommand extends AbstractTableCommand { - private static final Logger LOG = LoggerFactory.getLogger(TransformTableCommand.class); - - private static final String COMMAND_NAME = "transform"; - - private static final Option DEFINITION_OPTION = Option.builder("d") - .longOpt("definition") - .required(false) - .hasArg(true) - .numberOfArgs(1) - .build(); - - /** - * creates the command. - * - * @param conf alluxio configuration - * @param client the table master client used to make RPCs - * @param fsContext the filesystem of Alluxio - */ - public TransformTableCommand(AlluxioConfiguration conf, TableMasterClient client, - FileSystemContext fsContext) { - super(conf, client, fsContext); - } - - @Override - public String getCommandName() { - return COMMAND_NAME; - } - - @Override - public String getUsage() { - return COMMAND_NAME + "
[-d ]"; - } - - @Override - public Options getOptions() { - return new Options().addOption(DEFINITION_OPTION); - } - - @Override - public String getDescription() { - return "Transform files representing a structured table under an Alluxio directory." - + "\n\n" - + "Files are coalesced and converted to parquet format." - + "\n"; - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsEquals(this, cl, 2); - } - - @Override - public int run(CommandLine cl) throws IOException, AlluxioException { - String[] args = cl.getArgs(); - String databaseName = args[0]; - String tableName = args[1]; - String definition = ""; - - if (cl.hasOption(DEFINITION_OPTION.getLongOpt())) { - String optDefinition = cl.getOptionValue(DEFINITION_OPTION.getLongOpt()); - if (optDefinition != null && !optDefinition.isEmpty()) { - definition = optDefinition.trim(); - } - } - - long jobId = mClient.transformTable(databaseName, tableName, definition); - System.out.println("Started transformation job with job ID " + jobId + ", " - + "you can monitor the status of the job with " - + "'./bin/alluxio table transformStatus " + jobId + "'."); - return 0; - } -} diff --git a/dora/table/shell/src/test/java/alluxio/cli/table/TransformTableCommandTest.java b/dora/table/shell/src/test/java/alluxio/cli/table/TransformTableCommandTest.java deleted file mode 100644 index 1b9202984507..000000000000 --- a/dora/table/shell/src/test/java/alluxio/cli/table/TransformTableCommandTest.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.table; - -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import alluxio.annotation.dora.DoraTestTodoItem; -import alluxio.cli.table.command.TransformTableCommand; -import alluxio.client.table.TableMasterClient; -import alluxio.conf.Configuration; - -import org.junit.Ignore; -import org.junit.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.ArgumentMatchers; - -/** - * Test cases for TransformTableCommand. - */ -@DoraTestTodoItem(action = DoraTestTodoItem.Action.REMOVE, owner = "bowen", - comment = "table module is deprecated") -@Ignore("ignored during dora transition") -public class TransformTableCommandTest { - - @Test - public void transform() throws Exception { - transformInternal(null, ""); - transformInternal("-d abc", "abc"); - } - - private void transformInternal(String definition, String expected) throws Exception { - TableMasterClient client = mock(TableMasterClient.class); - when(client.transformTable(ArgumentMatchers.anyString(), ArgumentMatchers.anyString(), - ArgumentMatchers.anyString())).thenReturn(0L); - TransformTableCommand command = - new TransformTableCommand(Configuration.global(), client, null); - - ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(String.class); - - if (definition != null) { - command.run(command.parseAndValidateArgs("db", "table", definition)); - } else { - command.run(command.parseAndValidateArgs("db", "table")); - } - verify(client) - .transformTable(ArgumentMatchers.anyString(), ArgumentMatchers.anyString(), - argumentCaptor.capture()); - assertEquals(expected, argumentCaptor.getValue()); - } -} diff --git a/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableReaderIntegrationTest.java b/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableReaderIntegrationTest.java deleted file mode 100644 index 8f6cd0c16b3b..000000000000 --- a/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableReaderIntegrationTest.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import alluxio.AlluxioURI; -import alluxio.exception.ExceptionMessage; -import alluxio.job.JobIntegrationTest; -import alluxio.job.plan.transform.PartitionInfo; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.HashMap; - -/** - * Unit tests for {@link TableReader}. - */ -public class TableReaderIntegrationTest extends JobIntegrationTest { - @Rule - public TemporaryFolder mTmpFolder = new TemporaryFolder(); - - @Rule - public ExpectedException mThrown = ExpectedException.none(); - - private PartitionInfo mPartitionInfo = new PartitionInfo("serde", "inputformat", - new HashMap<>(), new HashMap<>(), new ArrayList<>()); - - @Test - public void createReaderWithoutScheme() throws Exception { - AlluxioURI uri = new AlluxioURI("/CREATE_READER_WITHOUT_SCHEME"); - mThrown.expect(IllegalArgumentException.class); - mThrown.expectMessage(ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_SCHEME.getMessage(uri)); - TableReader.create(uri, mPartitionInfo).close(); - } - - @Test - public void createAlluxioReaderWithoutAuthority() throws Exception { - AlluxioURI uri = new AlluxioURI("alluxio", null, "/CREATE_ALLUXIO_READER_WITHOUT_AUTHORITY"); - mThrown.expect(IllegalArgumentException.class); - mThrown.expectMessage(ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_AUTHORITY.getMessage(uri)); - TableReader.create(uri, mPartitionInfo).close(); - } -} diff --git a/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableWriterIntegrationTest.java b/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableWriterIntegrationTest.java deleted file mode 100644 index 09cbc73bdb61..000000000000 --- a/dora/tests/src/test/java/alluxio/job/plan/transform/format/TableWriterIntegrationTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import alluxio.AlluxioURI; -import alluxio.annotation.dora.DoraTestTodoItem; -import alluxio.exception.ExceptionMessage; -import alluxio.job.JobIntegrationTest; -import alluxio.job.plan.transform.format.parquet.ParquetSchema; -import alluxio.job.plan.transform.format.parquet.ParquetTestUtils; -import alluxio.uri.Authority; - -import org.apache.commons.io.FileUtils; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; - -/** - * Unit tests for {@link TableWriter}. - */ -@DoraTestTodoItem(action = DoraTestTodoItem.Action.REMOVE, owner = "bowen", - comment = "remove if the table module is deprecated") -@Ignore -public class TableWriterIntegrationTest extends JobIntegrationTest { - @Rule - public TemporaryFolder mTmpFolder = new TemporaryFolder(); - - @Rule - public ExpectedException mThrown = ExpectedException.none(); - - private TableWriter createWriter(AlluxioURI uri) throws IOException { - TableSchema schema = new ParquetSchema(ParquetTestUtils.SCHEMA); - return TableWriter.create(schema, uri); - } - - @Test - public void createLocalWriterWithNonExistingParents() throws IOException { - File file = mTmpFolder.newFile(); - File parentDir = file.getParentFile(); - FileUtils.deleteDirectory(parentDir); - AlluxioURI uri = new AlluxioURI("file:///" + file.toString()); - assertFalse(Files.exists(parentDir.toPath())); - createWriter(uri).close(); - assertTrue(Files.exists(file.toPath())); - } - - @Test - public void createAlluxioWriterWithNonExistingParents() throws Exception { - AlluxioURI uri = new AlluxioURI("alluxio", - Authority.fromString(mFsContext.getMasterAddress().toString()), - "/NON_EXISTENT_DIR/NON_EXISTENT_FILE"); - assertFalse(mFileSystem.exists(uri.getParent())); - assertFalse(mFileSystem.exists(uri)); - createWriter(uri).close(); - assertTrue(mFileSystem.exists(uri)); - } - - @Test - public void createWriterWithoutScheme() throws Exception { - AlluxioURI uri = new AlluxioURI("/CREATE_WRITER_WITHOUT_SCHEME"); - mThrown.expect(IllegalArgumentException.class); - mThrown.expectMessage(ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_SCHEME.getMessage(uri)); - createWriter(uri).close(); - } - - @Test - public void createAlluxioWriterWithoutAuthority() throws Exception { - AlluxioURI uri = new AlluxioURI("alluxio", null, "/CREATE_ALLUXIO_WRITER_WITHOUT_AUTHORITY"); - mThrown.expect(IllegalArgumentException.class); - mThrown.expectMessage(ExceptionMessage.TRANSFORM_TABLE_URI_LACKS_AUTHORITY.getMessage(uri)); - createWriter(uri).close(); - } -} diff --git a/dora/tests/src/test/java/alluxio/job/plan/transform/format/parquet/ParquetTestUtils.java b/dora/tests/src/test/java/alluxio/job/plan/transform/format/parquet/ParquetTestUtils.java deleted file mode 100644 index cfeeadbb964b..000000000000 --- a/dora/tests/src/test/java/alluxio/job/plan/transform/format/parquet/ParquetTestUtils.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.job.plan.transform.format.parquet; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData.Record; - -import java.util.ArrayList; -import java.util.List; - -/** - * Test utils related to parquet files. - */ -public class ParquetTestUtils { - private ParquetTestUtils() {} // Prevent initialization - - /** Parquet Schema. */ - public static final Schema SCHEMA; - /** Parquet record. */ - public static final Record RECORD; - /** Parquet columns. */ - public static final String[] COLUMNS = new String[]{"a", "b", "c"}; - /** Parquet column values. */ - public static final Integer[] VALUES = new Integer[]{1, 2, 3}; - - static { - List fields = new ArrayList<>(COLUMNS.length); - for (String column : COLUMNS) { - fields.add(new Schema.Field(column, Schema.create(Schema.Type.INT), null, null)); - } - SCHEMA = Schema.createRecord("schema", null, null, false, fields); - RECORD = new Record(SCHEMA); - for (int i = 0; i < COLUMNS.length; i++) { - RECORD.put(COLUMNS[i], VALUES[i]); - } - } -} From 1eb0d4835335087c80c750c3d8ac714c10a06a44 Mon Sep 17 00:00:00 2001 From: Bin Fan Date: Tue, 11 Jul 2023 22:18:39 -0700 Subject: [PATCH 2/2] Restore files to keep Trino/Presto build with Alluxio shaded client --- .../proto/grpc/table/layout/hive/hive.proto | 52 +++ .../main/proto/grpc/table/table_master.proto | 405 ++++++++++++++++++ .../table/RetryHandlingTableMasterClient.java | 223 ++++++++++ .../client/table/TableMasterClient.java | 203 +++++++++ .../main/java/alluxio/table/ProtoUtils.java | 95 ++++ 5 files changed, 978 insertions(+) create mode 100644 common/transport/src/main/proto/grpc/table/layout/hive/hive.proto create mode 100644 common/transport/src/main/proto/grpc/table/table_master.proto create mode 100644 dora/core/client/fs/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java create mode 100644 dora/core/client/fs/src/main/java/alluxio/client/table/TableMasterClient.java create mode 100644 dora/core/client/fs/src/main/java/alluxio/table/ProtoUtils.java diff --git a/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto b/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto new file mode 100644 index 000000000000..8faf0c73b88b --- /dev/null +++ b/common/transport/src/main/proto/grpc/table/layout/hive/hive.proto @@ -0,0 +1,52 @@ +syntax = "proto2"; + +option java_multiple_files = true; +option java_package = "alluxio.grpc.table.layout.hive"; +option java_outer_classname = "HiveLayoutProto"; + +package alluxio.grpc.table.layout; + +import "grpc/table/table_master.proto"; + +// TODO(binfan): remove this proto file. +// it is no longer used but only to keep old Presto/Trino build with Alluxio 3.x client. +message StorageFormat { + optional string serde = 1; + optional string input_format = 2; + optional string output_format = 3; + map serdelib_parameters = 4; +} + +message HiveBucketProperty { + repeated string bucketed_by = 1; + optional int64 bucket_count = 2; + repeated SortingColumn sorted_by = 3; +} + +message SortingColumn { + required string column_name = 1; + enum SortingOrder { + ASCENDING = 0; + DESCENDING = 1; + } + required SortingOrder order = 2; +} + +message Storage { + optional StorageFormat storage_format = 1; + optional string location = 2; + optional HiveBucketProperty bucket_property = 3; + optional bool skewed = 4; + map serde_parameters = 5; +} + +message PartitionInfo { + repeated string values = 1; // list of string values for each partition column + optional string partition_name = 2; // partition name in the form of "key=value" + + optional string db_name = 3; + optional string table_name = 4; + optional Storage storage = 5; + repeated FieldSchema data_cols = 6; + map parameters = 7; +} diff --git a/common/transport/src/main/proto/grpc/table/table_master.proto b/common/transport/src/main/proto/grpc/table/table_master.proto new file mode 100644 index 000000000000..a3d78f42c860 --- /dev/null +++ b/common/transport/src/main/proto/grpc/table/table_master.proto @@ -0,0 +1,405 @@ +syntax = "proto2"; + +option java_multiple_files = true; +option java_package = "alluxio.grpc.table"; +option java_outer_classname = "TableMasterProto"; + +package alluxio.grpc.table; + +import "grpc/job_master.proto"; + +// TODO(binfan): remove this proto file. +// it is no longer used but only to keep old Presto/Trino build with Alluxio 3.x client. +message FieldSchema { + optional uint32 id = 1; + optional string name = 2; + optional string type = 3; + optional string comment = 4; +} + +message Schema { + repeated FieldSchema cols = 1; +} + +enum PrincipalType { + USER = 0; + ROLE = 1; +} + +message Database { + optional string db_name = 1; + optional string description = 2; + optional string location = 3; + map parameter = 4; + optional string owner_name = 5; + optional PrincipalType owner_type = 6; + optional string comment = 7; +} + +// next available id: 12 +message TableInfo { + optional string db_name = 1; + optional string table_name = 2; + enum TableType { + NATIVE = 0; + IMPORTED = 1; + } + optional TableType type = 3; + optional string owner = 4; + optional Schema schema = 5; + optional Layout layout = 6; + map parameters = 7; + + // partitioning scheme + repeated FieldSchema partition_cols = 8; + + optional int64 previous_version = 9; + optional int64 version = 10; + optional int64 version_creation_time = 11; +} + +// TODO(gpang): update +message LayoutSpec { + optional string spec = 1; +} + +// TODO(gpang): update +message PartitionSpec { + optional string spec = 1; +} + +message Layout { + optional string layout_type = 1; + optional LayoutSpec layout_spec = 2; + optional bytes layout_data = 3; + map stats = 4; +} + +message Transformation { + optional Layout layout = 1; + optional string definition = 2; +} + +// next available id: 6 +message Partition { + optional PartitionSpec partition_spec = 1; + optional Layout base_layout = 2; + /** + * The latest transformation is in the back of the list. + */ + repeated Transformation transformations = 3; + optional int64 version = 4; + optional int64 version_creation_time = 5; +} + +message ColumnStatisticsInfo { + optional string col_name = 1; + optional string col_type = 2; + optional ColumnStatisticsData data = 3; +} + +message ColumnStatisticsData { + oneof data { + BooleanColumnStatsData boolean_stats = 1; + LongColumnStatsData long_stats = 2; + DoubleColumnStatsData double_stats = 3; + StringColumnStatsData string_stats = 4; + BinaryColumnStatsData binary_stats = 5; + DecimalColumnStatsData decimal_stats = 6; + DateColumnStatsData date_stats = 7; + } +} + +message BooleanColumnStatsData { + optional int64 num_trues = 1; + optional int64 num_falses = 2; + optional int64 num_nulls = 3; + optional string bit_vectors = 4; +} + +message LongColumnStatsData { + optional int64 low_value = 1; + optional int64 high_value = 2; + optional int64 num_nulls = 3; + optional int64 num_distincts = 4; + optional string bit_vectors = 5; +} + +message DoubleColumnStatsData { + optional double low_value = 1; + optional double high_value = 2; + optional int64 num_nulls = 3; + optional int64 num_distincts = 4; + optional string bit_vectors = 5; +} + +message Decimal { + required int32 scale = 1; // force using scale first in Decimal.compareTo + required bytes unscaled = 2; +} +message DecimalColumnStatsData { + optional Decimal low_value = 1; + optional Decimal high_value = 2; + optional int64 num_nulls = 3; + optional int64 num_distincts = 4; + optional string bit_vectors = 5; +} + +message StringColumnStatsData { + optional int64 max_col_len = 1; + optional double avg_col_len = 2; + optional int64 num_nulls = 3; + optional int64 num_distincts = 4; + optional string bit_vectors = 5; +} + +message BinaryColumnStatsData { + optional int64 max_col_len = 1; + optional double avg_col_len = 2; + optional int64 num_nulls = 3; + optional string bit_vectors = 4; +} + +message Date { + required int64 days_since_epoch = 1; +} +message DateColumnStatsData { + optional Date low_value = 1; + optional Date high_value = 2; + optional int64 num_nulls = 3; + optional int64 num_distincts = 4; + optional string bit_vectors = 5; +} + +message SyncStatus { + map tables_errors = 1; + repeated string tables_ignored = 2; + repeated string tables_unchanged = 3; + repeated string tables_updated = 4; + repeated string tables_removed = 5; +} + +message GetAllDatabasesPRequest { +} + +message GetAllDatabasesPResponse { + repeated string database = 1; +} + +message GetAllTablesPRequest { + optional string database = 1; +} + +message GetAllTablesPResponse { + repeated string table = 1; +} + +message GetDatabasePRequest { + optional string db_name = 1; +} + +message GetDatabasePResponse { + optional Database db = 1; +} + +message GetTablePRequest { + optional string db_name = 1; + optional string table_name = 2; +} + +message GetTablePResponse { + optional TableInfo table_info = 1; +} + +message AttachDatabasePRequest { + optional string udb_type = 1; + optional string udb_connection_uri = 2; + optional string udb_db_name = 3; + optional string db_name = 4; + map options = 5; + optional bool ignore_sync_errors = 6; +} + +message AttachDatabasePResponse { + // TODO(gpang): remove in favor of status + optional bool success = 1; + optional SyncStatus sync_status = 2; +} + +message DetachDatabasePRequest { + optional string db_name = 1; +} + +message DetachDatabasePResponse { + optional bool success = 1; +} + +message SyncDatabasePRequest { + optional string db_name = 1; +} + +message SyncDatabasePResponse { + // TODO(gpang): remove in favor of status + optional bool success = 1; + optional SyncStatus status = 2; +} + +message FileStatistics { + map column = 1; //map column names to column statistics +} + +message GetTableColumnStatisticsPRequest { + optional string db_name = 1; + optional string table_name = 2; + repeated string col_names = 3; +} + +message GetPartitionColumnStatisticsPRequest { + optional string db_name = 1; + optional string table_name = 2; + repeated string col_names = 3; + repeated string part_names = 4; +} + +message GetTableColumnStatisticsPResponse { + repeated ColumnStatisticsInfo statistics = 1; +} + +message ColumnStatisticsList { + repeated ColumnStatisticsInfo statistics = 1; +} +message GetPartitionColumnStatisticsPResponse { + map partition_statistics = 1; +} + +message Value { + oneof value { + int64 long_type = 1; + double double_type = 2; + string string_type = 3; + bool boolean_type = 4; + } +} + +message Range { + optional Value low = 1; + optional Value high = 2; +} + +message RangeSet { + repeated Range ranges = 1; +} + +message EquatableValueSet { + repeated Value candidates = 1; + optional bool white_list = 2; +} + +message AllOrNoneSet { + optional bool all = 1; +} + +message Domain { + oneof value_set { + RangeSet range = 1; + EquatableValueSet equatable = 2; + AllOrNoneSet all_or_none = 3; + } +} + +message Constraint { + map column_constraints = 1; // maps column to constraint, columns not present are not constrained +} + +message ReadTablePRequest { + optional string db_name = 1; + optional string table_name = 2; + optional Constraint constraint = 3; +} + +message ReadTablePResponse { + repeated Partition partitions = 1; +} + +message TransformTablePRequest { + optional string db_name = 1; + optional string table_name = 2; + optional string definition = 3; +} + +message TransformTablePResponse { + optional int64 job_id = 1; +} + +message GetTransformJobInfoPRequest { + optional int64 job_id = 1; +} + +message TransformJobInfo { + optional string db_name = 1; + optional string table_name = 2; + optional string definition = 3; + optional int64 job_id = 4; + optional alluxio.grpc.job.Status job_status = 5; + optional string job_error = 6; +} + +message GetTransformJobInfoPResponse { + repeated TransformJobInfo info = 1; +} + +/** + * This interface contains table master service endpoints for Alluxio clients. + */ +service TableMasterClientService { + + /** + * Returns all databases in the catalog + */ + rpc GetAllDatabases(GetAllDatabasesPRequest) returns (GetAllDatabasesPResponse); + + /** + * Returns all tables in the database + */ + rpc GetAllTables(GetAllTablesPRequest) returns (GetAllTablesPResponse); + + /** + * Gets a database by name from the table master + */ + rpc GetDatabase(GetDatabasePRequest) returns (GetDatabasePResponse); + + /** + * Returns a specific table info + */ + rpc GetTable(GetTablePRequest) returns (GetTablePResponse); + + /** + * Attach an existing database into the catalog as a new database name + */ + rpc AttachDatabase(AttachDatabasePRequest) returns (AttachDatabasePResponse); + + /** + * Detach existing database into the catalog, removing any metadata about the table + */ + rpc DetachDatabase(DetachDatabasePRequest) returns (DetachDatabasePResponse); + + /** + * Sync existing database into the catalog + */ + rpc SyncDatabase(SyncDatabasePRequest) returns (SyncDatabasePResponse); + + rpc GetTableColumnStatistics(GetTableColumnStatisticsPRequest) returns (GetTableColumnStatisticsPResponse); + + rpc GetPartitionColumnStatistics(GetPartitionColumnStatisticsPRequest) returns (GetPartitionColumnStatisticsPResponse); + + rpc ReadTable(ReadTablePRequest) returns (ReadTablePResponse); + + rpc TransformTable(TransformTablePRequest) returns (TransformTablePResponse); + + /** + * Gets information of transformation jobs. + * If the job ID exists in the request, the information for that job is returned; + * Otherwise, information of all the jobs kept in table master will be returned. + */ + rpc GetTransformJobInfo(GetTransformJobInfoPRequest) returns (GetTransformJobInfoPResponse); +} diff --git a/dora/core/client/fs/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java b/dora/core/client/fs/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java new file mode 100644 index 000000000000..b676a6bb26c8 --- /dev/null +++ b/dora/core/client/fs/src/main/java/alluxio/client/table/RetryHandlingTableMasterClient.java @@ -0,0 +1,223 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.table; + +import alluxio.AbstractMasterClient; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.ServiceType; +import alluxio.grpc.table.AttachDatabasePRequest; +import alluxio.grpc.table.ColumnStatisticsInfo; +import alluxio.grpc.table.Constraint; +import alluxio.grpc.table.Database; +import alluxio.grpc.table.DetachDatabasePRequest; +import alluxio.grpc.table.GetAllDatabasesPRequest; +import alluxio.grpc.table.GetAllTablesPRequest; +import alluxio.grpc.table.GetDatabasePRequest; +import alluxio.grpc.table.GetPartitionColumnStatisticsPRequest; +import alluxio.grpc.table.GetTableColumnStatisticsPRequest; +import alluxio.grpc.table.GetTablePRequest; +import alluxio.grpc.table.GetTransformJobInfoPRequest; +import alluxio.grpc.table.Partition; +import alluxio.grpc.table.ReadTablePRequest; +import alluxio.grpc.table.SyncDatabasePRequest; +import alluxio.grpc.table.SyncStatus; +import alluxio.grpc.table.TableInfo; +import alluxio.grpc.table.TableMasterClientServiceGrpc; +import alluxio.grpc.table.TransformJobInfo; +import alluxio.grpc.table.TransformTablePRequest; +import alluxio.master.MasterClientContext; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.concurrent.ThreadSafe; + +/** + * A wrapper for the gRPC client to interact with the table master. + * + * TODO(binfan): Remove this class. + * it is no longer used but only to keep old Presto/Trino build with Alluxio 3.x client. + */ +@ThreadSafe +public final class RetryHandlingTableMasterClient extends AbstractMasterClient + implements TableMasterClient { + private static final Logger RPC_LOG = LoggerFactory.getLogger(TableMasterClient.class); + private static final String TABLE_MASTER_NAME = "TableMaster"; + private static final String TABLE_MASTER_CLIENT_SERVICE_NAME = "TableMasterClient"; + private static final long TABLE_MASTER_CLIENT_SERVICE_VERSION = 1; + private TableMasterClientServiceGrpc.TableMasterClientServiceBlockingStub mClient = null; + + /** + * Creates a new block master client. + * + * @param conf master client configuration + */ + public RetryHandlingTableMasterClient(MasterClientContext conf) { + super(conf); + } + + @Override + protected ServiceType getRemoteServiceType() { + return ServiceType.TABLE_MASTER_CLIENT_SERVICE; + } + + @Override + protected String getServiceName() { + return TABLE_MASTER_CLIENT_SERVICE_NAME; + } + + @Override + protected long getServiceVersion() { + return TABLE_MASTER_CLIENT_SERVICE_VERSION; + } + + @Override + protected void afterConnect() { + mClient = TableMasterClientServiceGrpc.newBlockingStub(mChannel); + } + + @Override + public List getAllDatabases() throws AlluxioStatusException { + return retryRPC(() -> mClient.getAllDatabases( + GetAllDatabasesPRequest.newBuilder().build()).getDatabaseList(), + RPC_LOG, "GetAllDatabases", ""); + } + + @Override + public Database getDatabase(String databaseName) throws AlluxioStatusException { + return retryRPC(() -> mClient.getDatabase(GetDatabasePRequest.newBuilder() + .setDbName(databaseName).build()), + RPC_LOG, "GetDatabase", "databaseName=%s", databaseName).getDb(); + } + + @Override + public List getAllTables(String databaseName) throws AlluxioStatusException { + return retryRPC(() -> mClient.getAllTables( + GetAllTablesPRequest.newBuilder().setDatabase(databaseName).build()).getTableList(), + RPC_LOG, "GetAllTables", "databaseName=%s", databaseName); + } + + @Override + public TableInfo getTable(String databaseName, String tableName) throws AlluxioStatusException { + return retryRPC(() -> mClient.getTable( + GetTablePRequest.newBuilder().setDbName(databaseName).setTableName(tableName).build()) + .getTableInfo(), RPC_LOG, "GetTable", "databaseName=%s,tableName=%s", + databaseName, tableName); + } + + @Override + public SyncStatus attachDatabase(String udbType, String udbConnectionUri, String udbDbName, + String dbName, Map configuration, boolean ignoreSyncErrors) + throws AlluxioStatusException { + return retryRPC(() -> mClient.attachDatabase( + AttachDatabasePRequest.newBuilder().setUdbType(udbType) + .setUdbConnectionUri(udbConnectionUri).setUdbDbName(udbDbName).setDbName(dbName) + .putAllOptions(configuration).setIgnoreSyncErrors(ignoreSyncErrors).build()) + .getSyncStatus(), + RPC_LOG, "AttachDatabase", "udbType=%s,udbConnectionUri=%s,udbDbName=%s,dbName=%s," + + "configuration=%s,ignoreSyncErrors=%s", + udbType, udbConnectionUri, udbDbName, dbName, configuration, ignoreSyncErrors); + } + + @Override + public boolean detachDatabase(String dbName) + throws AlluxioStatusException { + return retryRPC(() -> mClient.detachDatabase( + DetachDatabasePRequest.newBuilder().setDbName(dbName).build()).getSuccess(), + RPC_LOG, "DetachDatabase", "dbName=%s", dbName); + } + + @Override + public SyncStatus syncDatabase(String dbName) throws AlluxioStatusException { + return retryRPC(() -> mClient.syncDatabase( + SyncDatabasePRequest.newBuilder().setDbName(dbName).build()).getStatus(), + RPC_LOG, "SyncDatabase", "dbName=%s", dbName); + } + + @Override + public List readTable(String databaseName, String tableName, Constraint constraint) + throws AlluxioStatusException { + return retryRPC(() -> mClient.readTable( + ReadTablePRequest.newBuilder().setDbName(databaseName).setTableName(tableName) + .setConstraint(constraint).build()).getPartitionsList(), + RPC_LOG, "ReadTable", "databaseName=%s,tableName=%s,constraint=%s", databaseName, tableName, + constraint); + } + + @Override + public List getTableColumnStatistics( + String databaseName, + String tableName, + List columnNames) throws AlluxioStatusException { + return retryRPC(() -> mClient.getTableColumnStatistics( + GetTableColumnStatisticsPRequest.newBuilder().setDbName(databaseName) + .setTableName(tableName).addAllColNames(columnNames).build()).getStatisticsList(), + RPC_LOG, "GetTableColumnStatistics", + "databaseName=%s,tableName=%s,columnNames=%s", databaseName, tableName, columnNames); + } + + @Override + public List getPartitionNames( + String databaseName, + String tableName) throws AlluxioStatusException { + return null; + } + + @Override + public Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + List partitionNames, + List columnNames) throws AlluxioStatusException { + return retryRPC(() -> mClient.getPartitionColumnStatistics( + GetPartitionColumnStatisticsPRequest.newBuilder().setDbName(databaseName) + .setTableName(tableName).addAllColNames(columnNames) + .addAllPartNames(partitionNames).build()).getPartitionStatisticsMap(), + RPC_LOG, "GetPartitionColumnStatistics", + "databaseName=%s,tableName=%s,partitionNames=%s,columnNames=%s", + databaseName, tableName, partitionNames, columnNames) + .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, + e -> e.getValue().getStatisticsList(), (e1, e2) -> e1)); + } + + @Override + public long transformTable(String dbName, String tableName, String definition) + throws AlluxioStatusException { + return retryRPC(() -> mClient.transformTable( + TransformTablePRequest.newBuilder() + .setDbName(dbName) + .setTableName(tableName) + .setDefinition(definition) + .build()).getJobId(), + RPC_LOG, "TransformTable", "dbName=%s,tableName=%s,definition=%s", + dbName, tableName, definition); + } + + @Override + public TransformJobInfo getTransformJobInfo(long jobId) throws AlluxioStatusException { + return retryRPC(() -> mClient.getTransformJobInfo( + GetTransformJobInfoPRequest.newBuilder() + .setJobId(jobId) + .build()).getInfo(0), + RPC_LOG, "GetTransformJobInfo", "jobId=%d", jobId); + } + + @Override + public List getAllTransformJobInfo() throws AlluxioStatusException { + return retryRPC(() -> mClient.getTransformJobInfo( + GetTransformJobInfoPRequest.newBuilder().build()).getInfoList(), + RPC_LOG, "GetAllTransformJobInfo", ""); + } +} diff --git a/dora/core/client/fs/src/main/java/alluxio/client/table/TableMasterClient.java b/dora/core/client/fs/src/main/java/alluxio/client/table/TableMasterClient.java new file mode 100644 index 000000000000..0db3c1884184 --- /dev/null +++ b/dora/core/client/fs/src/main/java/alluxio/client/table/TableMasterClient.java @@ -0,0 +1,203 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.table; + +import alluxio.Client; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.table.ColumnStatisticsInfo; +import alluxio.grpc.table.Constraint; +import alluxio.grpc.table.Database; +import alluxio.grpc.table.Partition; +import alluxio.grpc.table.SyncStatus; +import alluxio.grpc.table.TableInfo; +import alluxio.grpc.table.TransformJobInfo; +import alluxio.master.MasterClientContext; + +import java.util.List; +import java.util.Map; +import javax.annotation.concurrent.ThreadSafe; + +/** + * A client to use for interacting with a table master. + * + * TODO(binfan): Remove this class. + * it is no longer used but only to keep old Presto/Trino build with Alluxio 3.x client. + */ +@ThreadSafe +public interface TableMasterClient extends Client { + /** + * Factory for {@link TableMasterClient}. + */ + class Factory { + + private Factory() { + } // prevent instantiation + + /** + * Factory method for {@link TableMasterClient}. + * + * @param conf master client configuration + * @return a new {@link TableMasterClient} instance + */ + public static TableMasterClient create(MasterClientContext conf) { + return new RetryHandlingTableMasterClient(conf); + } + } + + /** + * Get a list of all database names. + * + * @return list of database names + * @throws AlluxioStatusException + */ + List getAllDatabases() throws AlluxioStatusException; + + /** + * Get database metadata. + * + * @param databaseName database name + * @return database metadata + */ + Database getDatabase(String databaseName) throws AlluxioStatusException; + + /** + * Get a list of all table names. + * + * @param databaseName database name + * @return list of table names + * @throws AlluxioStatusException + */ + List getAllTables(String databaseName) throws AlluxioStatusException; + + /** + * Get table metadata. + * + * @param databaseName database name + * @param tableName table name + * @return table metadata + * @throws AlluxioStatusException + */ + TableInfo getTable(String databaseName, String tableName) throws AlluxioStatusException; + + /** + * Attaches an existing database. + * + * @param udbType the database type + * @param udbConnectionUri the udb connection uri + * @param udbDbName the database name in the udb + * @param dbName the database name in Alluxio + * @param configuration the configuration map + * @param ignoreSyncErrors will ignore sync errors if true + * @return the sync status for the attach + * @throws AlluxioStatusException + */ + SyncStatus attachDatabase(String udbType, String udbConnectionUri, String udbDbName, + String dbName, Map configuration, boolean ignoreSyncErrors) + throws AlluxioStatusException; + + /** + * Detaches an existing database in the catalog master. + * + * @param dbName database name + * @return true if database created successfully + * @throws AlluxioStatusException + */ + boolean detachDatabase(String dbName) + throws AlluxioStatusException; + + /** + * Syncs an existing database in the catalog master. + * + * @param dbName database name + * @return the sync status + */ + SyncStatus syncDatabase(String dbName) throws AlluxioStatusException; + + /** + * Returns metadata for reading a table given constraints. + * + * @param databaseName database name + * @param tableName table name + * @param constraint constraint on the columns + * @return list of partitions + * @throws AlluxioStatusException + */ + List readTable(String databaseName, String tableName, Constraint constraint) + throws AlluxioStatusException; + + /** + * Get table column statistics with given database name, + * table name and list of column names. + * + * @param databaseName database name + * @param tableName table name + * @param columnNames column names + * @return list of column statistics + * @throws AlluxioStatusException + */ + List getTableColumnStatistics( + String databaseName, + String tableName, + List columnNames) throws AlluxioStatusException; + + /** + * Get partition names with given database name and table name. + * + * @param databaseName database name + * @param tableName table name + * @return list of partition names + * @throws AlluxioStatusException + */ + List getPartitionNames( + String databaseName, + String tableName) throws AlluxioStatusException; + + /** + * Get column statistics for selected partition and column. + * + * @param databaseName database name + * @param tableName table name + * @param partitionNames partition names + * @param columnNames column names + * @return Map<String partitionName, Map<String columnName, + * columnStatistics>> + * @throws AlluxioStatusException + */ + Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + List partitionNames, + List columnNames) throws AlluxioStatusException; + + /** + * Transforms a table. + * + * @param dbName the database name + * @param tableName the table name + * @param definition the transformation definition + * @return job ID which can be used to poll the job status from job service + * @throws AlluxioStatusException + */ + long transformTable(String dbName, String tableName, String definition) + throws AlluxioStatusException; + + /** + * @param jobId the transformation job's ID + * @return the job info + */ + TransformJobInfo getTransformJobInfo(long jobId) throws AlluxioStatusException; + + /** + * @return a list of information for all transformation jobs + */ + List getAllTransformJobInfo() throws AlluxioStatusException; +} diff --git a/dora/core/client/fs/src/main/java/alluxio/table/ProtoUtils.java b/dora/core/client/fs/src/main/java/alluxio/table/ProtoUtils.java new file mode 100644 index 000000000000..f836992879c0 --- /dev/null +++ b/dora/core/client/fs/src/main/java/alluxio/table/ProtoUtils.java @@ -0,0 +1,95 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.table; + +import alluxio.grpc.table.Layout; +import alluxio.grpc.table.Partition; +import alluxio.grpc.table.Transformation; +import alluxio.grpc.table.layout.hive.PartitionInfo; + +import com.google.protobuf.InvalidProtocolBufferException; + +import java.util.List; +import java.util.Objects; + +/** + * Protobuf related utils. + * + * TODO(binfan): Remove this class. + * it is no longer used but only to keep old Presto/Trino build with Alluxio 3.x client. + */ +public final class ProtoUtils { + /** + * @param partition the partition proto + * @return true if the partition has the hive layout, false otherwise + */ + public static boolean hasHiveLayout(Partition partition) { + if (!partition.hasBaseLayout()) { + return false; + } + Layout layout = partition.getBaseLayout(); + // TODO(gpang): use a layout registry + return Objects.equals(layout.getLayoutType(), "hive"); + } + + /** + * @param layout the layout proto + * @return true if the layout is a hive layout, false otherwise + */ + public static boolean isHiveLayout(Layout layout) { + return Objects.equals(layout.getLayoutType(), "hive"); + } + + private static Layout getCurrentLayout(Partition partition) { + List transformations = partition.getTransformationsList(); + return transformations.isEmpty() + ? partition.getBaseLayout() + : transformations.get(transformations.size() - 1).getLayout(); + } + + /** + * @param partition the partition proto + * @return the hive-specific partition proto + */ + public static PartitionInfo extractHiveLayout(Partition partition) + throws InvalidProtocolBufferException { + if (!hasHiveLayout(partition)) { + if (partition.hasBaseLayout()) { + throw new IllegalStateException( + "Cannot parse hive-layout. layoutType: " + partition.getBaseLayout().getLayoutType()); + } else { + throw new IllegalStateException("Cannot parse hive-layout from missing layout"); + } + } + Layout layout = getCurrentLayout(partition); + if (!layout.hasLayoutData()) { + throw new IllegalStateException("Cannot parse hive-layout from empty layout data"); + } + return PartitionInfo.parseFrom(layout.getLayoutData()); + } + + /** + * @param layout the layout proto + * @return the hive-specific partition proto + */ + public static PartitionInfo toHiveLayout(Layout layout) + throws InvalidProtocolBufferException { + if (!isHiveLayout(layout)) { + throw new IllegalStateException( + "Cannot parse hive-layout. layoutType: " + layout.getLayoutType()); + } + if (!layout.hasLayoutData()) { + throw new IllegalStateException("Cannot parse hive-layout from empty layout data"); + } + return PartitionInfo.parseFrom(layout.getLayoutData()); + } +}