From c158b754a850365e8fa6749dd97bbd2c50a40dbe Mon Sep 17 00:00:00 2001 From: FANNG Date: Thu, 2 Jan 2025 14:23:30 +0800 Subject: [PATCH 1/7] [#6031] extend S3 credential provider to support S3 fileset operations (#6033) ### What changes were proposed in this pull request? add get file meta permission for fileset operation ### Why are the changes needed? Fix: #6031 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? pass fileset tests --- .../gravitino/s3/credential/S3TokenProvider.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java index 24b88875de9..56d293d046f 100644 --- a/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java +++ b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java @@ -20,6 +20,7 @@ package org.apache.gravitino.s3.credential; import java.net.URI; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Objects; @@ -49,6 +50,7 @@ /** Generates S3 token to access S3 data. */ public class S3TokenProvider implements CredentialProvider { + private StsClient stsClient; private String roleArn; private String externalID; @@ -134,6 +136,7 @@ private IamPolicy createPolicy( allowGetObjectStatementBuilder.addResource( IamResource.create(getS3UriWithArn(arnPrefix, uri))); String bucketArn = arnPrefix + getBucketName(uri); + String rawPath = trimLeadingSlash(uri.getPath()); bucketListStatmentBuilder .computeIfAbsent( bucketArn, @@ -142,10 +145,14 @@ private IamPolicy createPolicy( .effect(IamEffect.ALLOW) .addAction("s3:ListBucket") .addResource(key)) - .addCondition( + .addConditions( IamConditionOperator.STRING_LIKE, "s3:prefix", - concatPathWithSep(trimLeadingSlash(uri.getPath()), "*", "/")); + Arrays.asList( + // Get raw path metadata information for AWS hadoop connector + rawPath, + // Listing objects in raw path + concatPathWithSep(rawPath, "*", "/"))); bucketGetLocationStatmentBuilder.computeIfAbsent( bucketArn, key -> From 6e0bd0d267b60fa8dcb2f9edb4bf5d69d1071489 Mon Sep 17 00:00:00 2001 From: FANNG Date: Thu, 2 Jan 2025 14:35:43 +0800 Subject: [PATCH 2/7] [#6055] feat(core): extend OSS credential provider to support OSS fileset operations (#6029) ### What changes were proposed in this pull request? 1. correct `ListBucket` to `ListObjects` 2. add `oss:GetBucketInfo` action ### Why are the changes needed? Fix: #6055 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? 1. run pass fileset oss test --- .../oss/credential/OSSTokenProvider.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java index 04ef0022a10..79d7f51f780 100644 --- a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java +++ b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java @@ -138,9 +138,10 @@ private String createPolicy(Set readLocations, Set writeLocation .effect(Effect.ALLOW) .addAction("oss:GetObject") .addAction("oss:GetObjectVersion"); + // Add support for bucket-level policies Map bucketListStatementBuilder = new HashMap<>(); - Map bucketGetLocationStatementBuilder = new HashMap<>(); + Map bucketMetadataStatementBuilder = new HashMap<>(); String arnPrefix = getArnPrefix(); Stream.concat(readLocations.stream(), writeLocations.stream()) @@ -150,22 +151,24 @@ private String createPolicy(Set readLocations, Set writeLocation URI uri = URI.create(location); allowGetObjectStatementBuilder.addResource(getOssUriWithArn(arnPrefix, uri)); String bucketArn = arnPrefix + getBucketName(uri); - // ListBucket + // OSS use 'oss:ListObjects' to list objects in a bucket while s3 use 's3:ListBucket' bucketListStatementBuilder.computeIfAbsent( bucketArn, key -> Statement.builder() .effect(Effect.ALLOW) - .addAction("oss:ListBucket") + .addAction("oss:ListObjects") .addResource(key) .condition(getCondition(uri))); - // GetBucketLocation - bucketGetLocationStatementBuilder.computeIfAbsent( + // Add get bucket location and bucket info action. + bucketMetadataStatementBuilder.computeIfAbsent( bucketArn, key -> Statement.builder() .effect(Effect.ALLOW) .addAction("oss:GetBucketLocation") + // Required for OSS Hadoop connector to get bucket information + .addAction("oss:GetBucketInfo") .addResource(key)); }); @@ -192,7 +195,7 @@ private String createPolicy(Set readLocations, Set writeLocation policyBuilder.addStatement( Statement.builder().effect(Effect.ALLOW).addAction("oss:ListBucket").build()); } - bucketGetLocationStatementBuilder + bucketMetadataStatementBuilder .values() .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); From ece06fada7613dc115e6a8154e48da1f56df878c Mon Sep 17 00:00:00 2001 From: Jerry Shao Date: Thu, 2 Jan 2025 16:49:17 +0800 Subject: [PATCH 3/7] [#5950] feat(catalog-model): Add integration tests for model API (#6051) ### What changes were proposed in this pull request? This PR adds the integration tests for model API to make sure it works as expected. ### Why are the changes needed? To have an end to end test. Fix: #5950 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add ITs. --------- Co-authored-by: Qi Yu --- catalogs/catalog-model/build.gradle.kts | 14 +- .../test/ModelCatalogOperationsIT.java | 364 ++++++++++++++++ .../src/test/resources/log4j2.properties | 73 ++++ .../gravitino/client/generic_model_catalog.py | 2 +- .../gravitino/client/gravitino_metalake.py | 4 +- .../tests/integration/integration_test_env.py | 10 +- .../tests/integration/test_model_catalog.py | 403 ++++++++++++++++++ .../ModelVersionAliasSQLProviderFactory.java | 5 +- .../ModelVersionAliasRelBaseSQLProvider.java | 9 +- .../h2/ModelVersionAliasRelH2SQLProvider.java | 40 ++ ...odelVersionAliasRelPostgreSQLProvider.java | 2 +- .../ModelVersionMetaPostgreSQLProvider.java | 2 +- 12 files changed, 909 insertions(+), 19 deletions(-) create mode 100644 catalogs/catalog-model/src/test/java/org/apache/gravtitino/catalog/model/integration/test/ModelCatalogOperationsIT.java create mode 100644 catalogs/catalog-model/src/test/resources/log4j2.properties create mode 100644 clients/client-python/tests/integration/test_model_catalog.py create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/ModelVersionAliasRelH2SQLProvider.java diff --git a/catalogs/catalog-model/build.gradle.kts b/catalogs/catalog-model/build.gradle.kts index 95af305fcae..5c125532263 100644 --- a/catalogs/catalog-model/build.gradle.kts +++ b/catalogs/catalog-model/build.gradle.kts @@ -29,17 +29,15 @@ dependencies { exclude(group = "*") } - implementation(project(":core")) { + implementation(project(":catalogs:catalog-common")) { exclude(group = "*") } implementation(project(":common")) { exclude(group = "*") } - - implementation(project(":catalogs:catalog-common")) { + implementation(project(":core")) { exclude(group = "*") } - implementation(libs.guava) implementation(libs.slf4j.api) @@ -47,14 +45,17 @@ dependencies { testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":server")) testImplementation(project(":server-common")) - testImplementation(libs.bundles.log4j) testImplementation(libs.commons.io) testImplementation(libs.commons.lang3) testImplementation(libs.mockito.core) testImplementation(libs.mockito.inline) + testImplementation(libs.mysql.driver) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) + testImplementation(libs.postgresql.driver) + testImplementation(libs.testcontainers) + testImplementation(libs.testcontainers.mysql) testRuntimeOnly(libs.junit.jupiter.engine) } @@ -68,8 +69,9 @@ tasks { val copyCatalogLibs by registering(Copy::class) { dependsOn("jar", "runtimeJars") from("build/libs") { - exclude("slf4j-*.jar") exclude("guava-*.jar") + exclude("log4j-*.jar") + exclude("slf4j-*.jar") } into("$rootDir/distribution/package/catalogs/model/libs") } diff --git a/catalogs/catalog-model/src/test/java/org/apache/gravtitino/catalog/model/integration/test/ModelCatalogOperationsIT.java b/catalogs/catalog-model/src/test/java/org/apache/gravtitino/catalog/model/integration/test/ModelCatalogOperationsIT.java new file mode 100644 index 00000000000..6e7adac5516 --- /dev/null +++ b/catalogs/catalog-model/src/test/java/org/apache/gravtitino/catalog/model/integration/test/ModelCatalogOperationsIT.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravtitino.catalog.model.integration.test; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import java.util.Arrays; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; +import org.apache.gravitino.Schema; +import org.apache.gravitino.client.GravitinoMetalake; +import org.apache.gravitino.exceptions.ModelAlreadyExistsException; +import org.apache.gravitino.exceptions.ModelVersionAliasesAlreadyExistException; +import org.apache.gravitino.exceptions.NoSuchModelException; +import org.apache.gravitino.exceptions.NoSuchModelVersionException; +import org.apache.gravitino.exceptions.NoSuchSchemaException; +import org.apache.gravitino.integration.test.util.BaseIT; +import org.apache.gravitino.model.Model; +import org.apache.gravitino.model.ModelVersion; +import org.apache.gravitino.utils.RandomNameUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class ModelCatalogOperationsIT extends BaseIT { + + private final String metalakeName = RandomNameUtils.genRandomName("model_it_metalake"); + private final String catalogName = RandomNameUtils.genRandomName("model_it_catalog"); + private final String schemaName = RandomNameUtils.genRandomName("model_it_schema"); + + private GravitinoMetalake gravitinoMetalake; + private Catalog gravitinoCatalog; + + @BeforeAll + public void setUp() { + createMetalake(); + createCatalog(); + } + + @AfterAll + public void tearDown() { + gravitinoMetalake.dropCatalog(catalogName, true); + client.dropMetalake(metalakeName, true); + } + + @BeforeEach + public void beforeEach() { + createSchema(); + } + + @AfterEach + public void afterEach() { + dropSchema(); + } + + @Test + public void testRegisterAndGetModel() { + String modelName = RandomNameUtils.genRandomName("model1"); + NameIdentifier modelIdent = NameIdentifier.of(schemaName, modelName); + String comment = "comment"; + Map properties = ImmutableMap.of("key1", "val1", "key2", "val2"); + + Model model = gravitinoCatalog.asModelCatalog().registerModel(modelIdent, comment, properties); + Assertions.assertEquals(modelName, model.name()); + Assertions.assertEquals(comment, model.comment()); + Assertions.assertEquals(properties, model.properties()); + + Model loadModel = gravitinoCatalog.asModelCatalog().getModel(modelIdent); + Assertions.assertEquals(modelName, loadModel.name()); + Assertions.assertEquals(comment, loadModel.comment()); + Assertions.assertEquals(properties, loadModel.properties()); + + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().modelExists(modelIdent)); + + // Test register existing model + Assertions.assertThrows( + ModelAlreadyExistsException.class, + () -> gravitinoCatalog.asModelCatalog().registerModel(modelIdent, comment, properties)); + + // Test register model in a non-existent schema + NameIdentifier nonExistentSchemaIdent = NameIdentifier.of("non_existent_schema", modelName); + Assertions.assertThrows( + NoSuchSchemaException.class, + () -> + gravitinoCatalog + .asModelCatalog() + .registerModel(nonExistentSchemaIdent, comment, properties)); + + // Test get non-existent model + NameIdentifier nonExistentModelIdent = NameIdentifier.of(schemaName, "non_existent_model"); + Assertions.assertThrows( + NoSuchModelException.class, + () -> gravitinoCatalog.asModelCatalog().getModel(nonExistentModelIdent)); + + // Test get model from non-existent schema + NameIdentifier nonExistentModelIdent2 = NameIdentifier.of("non_existent_schema", modelName); + Assertions.assertThrows( + NoSuchModelException.class, + () -> gravitinoCatalog.asModelCatalog().getModel(nonExistentModelIdent2)); + } + + @Test + public void testRegisterAndListModels() { + String modelName1 = RandomNameUtils.genRandomName("model1"); + String modelName2 = RandomNameUtils.genRandomName("model2"); + NameIdentifier modelIdent1 = NameIdentifier.of(schemaName, modelName1); + NameIdentifier modelIdent2 = NameIdentifier.of(schemaName, modelName2); + + gravitinoCatalog.asModelCatalog().registerModel(modelIdent1, null, null); + gravitinoCatalog.asModelCatalog().registerModel(modelIdent2, null, null); + + NameIdentifier[] models = + gravitinoCatalog.asModelCatalog().listModels(Namespace.of(schemaName)); + Set resultSet = Sets.newHashSet(models); + + Assertions.assertEquals(2, resultSet.size()); + Assertions.assertTrue(resultSet.contains(modelIdent1)); + Assertions.assertTrue(resultSet.contains(modelIdent2)); + + // Test delete and list models + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModel(modelIdent1)); + NameIdentifier[] modelsAfterDelete = + gravitinoCatalog.asModelCatalog().listModels(Namespace.of(schemaName)); + + Assertions.assertEquals(1, modelsAfterDelete.length); + Assertions.assertEquals(modelIdent2, modelsAfterDelete[0]); + + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModel(modelIdent2)); + NameIdentifier[] modelsAfterDeleteAll = + gravitinoCatalog.asModelCatalog().listModels(Namespace.of(schemaName)); + + Assertions.assertEquals(0, modelsAfterDeleteAll.length); + + // Test list models from non-existent schema + Assertions.assertThrows( + NoSuchSchemaException.class, + () -> gravitinoCatalog.asModelCatalog().listModels(Namespace.of("non_existent_schema"))); + } + + @Test + public void testRegisterAndDeleteModel() { + String modelName = RandomNameUtils.genRandomName("model1"); + NameIdentifier modelIdent = NameIdentifier.of(schemaName, modelName); + gravitinoCatalog.asModelCatalog().registerModel(modelIdent, null, null); + + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModel(modelIdent)); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().modelExists(modelIdent)); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().deleteModel(modelIdent)); + + // Test delete non-existent model + NameIdentifier nonExistentModelIdent = NameIdentifier.of(schemaName, "non_existent_model"); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().deleteModel(nonExistentModelIdent)); + + // Test delete model from non-existent schema + NameIdentifier nonExistentSchemaIdent = NameIdentifier.of("non_existent_schema", modelName); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().deleteModel(nonExistentSchemaIdent)); + } + + @Test + public void testLinkAndGerModelVersion() { + String modelName = RandomNameUtils.genRandomName("model1"); + Map properties = ImmutableMap.of("key1", "val1", "key2", "val2"); + NameIdentifier modelIdent = NameIdentifier.of(schemaName, modelName); + gravitinoCatalog.asModelCatalog().registerModel(modelIdent, null, null); + + gravitinoCatalog + .asModelCatalog() + .linkModelVersion(modelIdent, "uri", new String[] {"alias1"}, "comment", properties); + + ModelVersion modelVersion = + gravitinoCatalog.asModelCatalog().getModelVersion(modelIdent, "alias1"); + + Assertions.assertEquals(0, modelVersion.version()); + Assertions.assertEquals("uri", modelVersion.uri()); + Assertions.assertArrayEquals(new String[] {"alias1"}, modelVersion.aliases()); + Assertions.assertEquals("comment", modelVersion.comment()); + Assertions.assertEquals(properties, modelVersion.properties()); + Assertions.assertTrue( + gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, "alias1")); + + ModelVersion modelVersion1 = gravitinoCatalog.asModelCatalog().getModelVersion(modelIdent, 0); + + Assertions.assertEquals(0, modelVersion1.version()); + Assertions.assertEquals("uri", modelVersion1.uri()); + Assertions.assertArrayEquals(new String[] {"alias1"}, modelVersion1.aliases()); + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, 0)); + + // Test link a version to a non-existent model + NameIdentifier nonExistentModelIdent = NameIdentifier.of(schemaName, "non_existent_model"); + Assertions.assertThrows( + NoSuchModelException.class, + () -> + gravitinoCatalog + .asModelCatalog() + .linkModelVersion( + nonExistentModelIdent, "uri", new String[] {"alias1"}, "comment", properties)); + + // Test link a version using existing alias + Assertions.assertThrows( + ModelVersionAliasesAlreadyExistException.class, + () -> + gravitinoCatalog + .asModelCatalog() + .linkModelVersion( + modelIdent, "uri", new String[] {"alias1"}, "comment", properties)); + + // Test get non-existent model version + Assertions.assertThrows( + NoSuchModelVersionException.class, + () -> gravitinoCatalog.asModelCatalog().getModelVersion(modelIdent, "non_existent_alias")); + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, "non_existent_alias")); + + Assertions.assertThrows( + NoSuchModelVersionException.class, + () -> gravitinoCatalog.asModelCatalog().getModelVersion(modelIdent, 1)); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, 1)); + } + + @Test + public void testLinkAndDeleteModelVersions() { + String modelName = RandomNameUtils.genRandomName("model1"); + NameIdentifier modelIdent = NameIdentifier.of(schemaName, modelName); + gravitinoCatalog.asModelCatalog().registerModel(modelIdent, null, null); + + gravitinoCatalog + .asModelCatalog() + .linkModelVersion(modelIdent, "uri1", new String[] {"alias1"}, "comment1", null); + gravitinoCatalog + .asModelCatalog() + .linkModelVersion(modelIdent, "uri2", new String[] {"alias2"}, "comment2", null); + + Assertions.assertTrue( + gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, "alias1")); + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, "alias1")); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, 0)); + + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, 1)); + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().modelVersionExists(modelIdent, "alias2")); + Assertions.assertFalse(gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, 1)); + + // Test delete non-existent model version + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, "non_existent_alias")); + + // Test delete model version of non-existent model + NameIdentifier nonExistentModelIdent = NameIdentifier.of(schemaName, "non_existent_model"); + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().deleteModelVersion(nonExistentModelIdent, "alias1")); + + // Test delete model version of non-existent schema + NameIdentifier nonExistentSchemaIdent = NameIdentifier.of("non_existent_schema", modelName); + Assertions.assertFalse( + gravitinoCatalog.asModelCatalog().deleteModelVersion(nonExistentSchemaIdent, "alias1")); + } + + @Test + public void testLinkAndListModelVersions() { + String modelName = RandomNameUtils.genRandomName("model1"); + NameIdentifier modelIdent = NameIdentifier.of(schemaName, modelName); + gravitinoCatalog.asModelCatalog().registerModel(modelIdent, null, null); + + gravitinoCatalog + .asModelCatalog() + .linkModelVersion(modelIdent, "uri1", new String[] {"alias1"}, "comment1", null); + gravitinoCatalog + .asModelCatalog() + .linkModelVersion(modelIdent, "uri2", new String[] {"alias2"}, "comment2", null); + + int[] modelVersions = gravitinoCatalog.asModelCatalog().listModelVersions(modelIdent); + Set resultSet = Arrays.stream(modelVersions).boxed().collect(Collectors.toSet()); + + Assertions.assertEquals(2, resultSet.size()); + Assertions.assertTrue(resultSet.contains(0)); + Assertions.assertTrue(resultSet.contains(1)); + + // Test list model versions of non-existent model + NameIdentifier nonExistentModelIdent = NameIdentifier.of(schemaName, "non_existent_model"); + Assertions.assertThrows( + NoSuchModelException.class, + () -> gravitinoCatalog.asModelCatalog().listModelVersions(nonExistentModelIdent)); + + // Test list model versions of non-existent schema + NameIdentifier nonExistentSchemaIdent = NameIdentifier.of("non_existent_schema", modelName); + Assertions.assertThrows( + NoSuchModelException.class, + () -> gravitinoCatalog.asModelCatalog().listModelVersions(nonExistentSchemaIdent)); + + // Test delete and list model versions + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, 1)); + int[] modelVersionsAfterDelete = + gravitinoCatalog.asModelCatalog().listModelVersions(modelIdent); + + Assertions.assertEquals(1, modelVersionsAfterDelete.length); + Assertions.assertEquals(0, modelVersionsAfterDelete[0]); + + Assertions.assertTrue(gravitinoCatalog.asModelCatalog().deleteModelVersion(modelIdent, 0)); + int[] modelVersionsAfterDeleteAll = + gravitinoCatalog.asModelCatalog().listModelVersions(modelIdent); + + Assertions.assertEquals(0, modelVersionsAfterDeleteAll.length); + } + + private void createMetalake() { + GravitinoMetalake[] gravitinoMetalakes = client.listMetalakes(); + Assertions.assertEquals(0, gravitinoMetalakes.length); + + client.createMetalake(metalakeName, "comment", Collections.emptyMap()); + GravitinoMetalake loadMetalake = client.loadMetalake(metalakeName); + Assertions.assertEquals(metalakeName, loadMetalake.name()); + + gravitinoMetalake = loadMetalake; + } + + private void createCatalog() { + gravitinoMetalake.createCatalog(catalogName, Catalog.Type.MODEL, "comment", ImmutableMap.of()); + gravitinoCatalog = gravitinoMetalake.loadCatalog(catalogName); + } + + private void createSchema() { + Map properties = Maps.newHashMap(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); + String comment = "comment"; + + gravitinoCatalog.asSchemas().createSchema(schemaName, comment, properties); + Schema loadSchema = gravitinoCatalog.asSchemas().loadSchema(schemaName); + Assertions.assertEquals(schemaName, loadSchema.name()); + Assertions.assertEquals(comment, loadSchema.comment()); + Assertions.assertEquals("val1", loadSchema.properties().get("key1")); + Assertions.assertEquals("val2", loadSchema.properties().get("key2")); + } + + private void dropSchema() { + gravitinoCatalog.asSchemas().dropSchema(schemaName, true); + } +} diff --git a/catalogs/catalog-model/src/test/resources/log4j2.properties b/catalogs/catalog-model/src/test/resources/log4j2.properties new file mode 100644 index 00000000000..88da637c15d --- /dev/null +++ b/catalogs/catalog-model/src/test/resources/log4j2.properties @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Set to debug or trace if log4j initialization is failing +status = info + +# Name of the configuration +name = ConsoleLogConfig + +# Console appender configuration +appender.console.type = Console +appender.console.name = consoleLogger +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %-5p [%t] %c{1}:%L - %m%n + +# Log files location +property.logPath = ${sys:gravitino.log.path:-build/catalog-model-integration-test.log} + +# File appender configuration +appender.file.type = File +appender.file.name = fileLogger +appender.file.fileName = ${logPath} +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5p %c - %m%n + +# Root logger level +rootLogger.level = info + +# Root logger referring to console and file appenders +rootLogger.appenderRef.stdout.ref = consoleLogger +rootLogger.appenderRef.file.ref = fileLogger + +# File appender configuration for testcontainers +appender.testcontainersFile.type = File +appender.testcontainersFile.name = testcontainersLogger +appender.testcontainersFile.fileName = build/testcontainers.log +appender.testcontainersFile.layout.type = PatternLayout +appender.testcontainersFile.layout.pattern = %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5p %c - %m%n + +# Logger for testcontainers +logger.testcontainers.name = org.testcontainers +logger.testcontainers.level = debug +logger.testcontainers.additivity = false +logger.testcontainers.appenderRef.file.ref = testcontainersLogger + +logger.tc.name = tc +logger.tc.level = debug +logger.tc.additivity = false +logger.tc.appenderRef.file.ref = testcontainersLogger + +logger.docker.name = com.github.dockerjava +logger.docker.level = warn +logger.docker.additivity = false +logger.docker.appenderRef.file.ref = testcontainersLogger + +logger.http.name = com.github.dockerjava.zerodep.shaded.org.apache.hc.client5.http.wire +logger.http.level = off diff --git a/clients/client-python/gravitino/client/generic_model_catalog.py b/clients/client-python/gravitino/client/generic_model_catalog.py index 6077b0b6429..ca6b5cd31fb 100644 --- a/clients/client-python/gravitino/client/generic_model_catalog.py +++ b/clients/client-python/gravitino/client/generic_model_catalog.py @@ -199,7 +199,7 @@ def list_model_versions(self, model_ident: NameIdentifier) -> List[int]: model_full_ident = self._model_full_identifier(model_ident) resp = self.rest_client.get( - self._format_model_version_request_path(model_full_ident), + f"{self._format_model_version_request_path(model_full_ident)}/versions", error_handler=MODEL_ERROR_HANDLER, ) model_version_list_resp = ModelVersionListResponse.from_json( diff --git a/clients/client-python/gravitino/client/gravitino_metalake.py b/clients/client-python/gravitino/client/gravitino_metalake.py index 0f72bfc0749..be502aae705 100644 --- a/clients/client-python/gravitino/client/gravitino_metalake.py +++ b/clients/client-python/gravitino/client/gravitino_metalake.py @@ -128,7 +128,9 @@ def create_catalog( Args: name: The name of the catalog. catalog_type: The type of the catalog. - provider: The provider of the catalog. + provider: The provider of the catalog. This parameter can be None if the catalog + provides a managed implementation. Currently, only the model catalog supports None + provider. For the details, please refer to the Catalog.Type. comment: The comment of the catalog. properties: The properties of the catalog. diff --git a/clients/client-python/tests/integration/integration_test_env.py b/clients/client-python/tests/integration/integration_test_env.py index 9344ff93a26..308303e8a7d 100644 --- a/clients/client-python/tests/integration/integration_test_env.py +++ b/clients/client-python/tests/integration/integration_test_env.py @@ -67,7 +67,10 @@ class IntegrationTestEnv(unittest.TestCase): @classmethod def setUpClass(cls): - if os.environ.get("START_EXTERNAL_GRAVITINO") is not None: + if ( + os.environ.get("START_EXTERNAL_GRAVITINO") is not None + and os.environ.get("START_EXTERNAL_GRAVITINO").lower() == "true" + ): # Maybe Gravitino server already startup by Gradle test command or developer manual startup. if not check_gravitino_server_status(): logger.error("ERROR: Can't find online Gravitino server!") @@ -112,7 +115,10 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - if os.environ.get("START_EXTERNAL_GRAVITINO") is not None: + if ( + os.environ.get("START_EXTERNAL_GRAVITINO") is not None + and os.environ.get("START_EXTERNAL_GRAVITINO").lower() == "true" + ): return logger.info("Stop integration test environment...") diff --git a/clients/client-python/tests/integration/test_model_catalog.py b/clients/client-python/tests/integration/test_model_catalog.py new file mode 100644 index 00000000000..35ebfdc4726 --- /dev/null +++ b/clients/client-python/tests/integration/test_model_catalog.py @@ -0,0 +1,403 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from random import randint + +from gravitino import GravitinoAdminClient, GravitinoClient, Catalog, NameIdentifier +from gravitino.exceptions.base import ( + ModelAlreadyExistsException, + NoSuchSchemaException, + NoSuchModelException, + ModelVersionAliasesAlreadyExistException, + NoSuchModelVersionException, +) +from gravitino.namespace import Namespace +from tests.integration.integration_test_env import IntegrationTestEnv + + +class TestModelCatalog(IntegrationTestEnv): + + _metalake_name: str = "model_it_metalake" + str(randint(0, 1000)) + _catalog_name: str = "model_it_catalog" + str(randint(0, 1000)) + _schema_name: str = "model_it_schema" + str(randint(0, 1000)) + + _gravitino_admin_client: GravitinoAdminClient = None + _gravitino_client: GravitinoClient = None + _catalog: Catalog = None + + @classmethod + def setUpClass(cls): + super().setUpClass() + + cls._gravitino_admin_client = GravitinoAdminClient(uri="http://localhost:8090") + cls._gravitino_admin_client.create_metalake( + cls._metalake_name, comment="comment", properties={} + ) + + cls._gravitino_client = GravitinoClient( + uri="http://localhost:8090", metalake_name=cls._metalake_name + ) + cls._catalog = cls._gravitino_client.create_catalog( + name=cls._catalog_name, + catalog_type=Catalog.Type.MODEL, + provider=None, + comment="comment", + properties={}, + ) + + @classmethod + def tearDownClass(cls): + cls._gravitino_client.drop_catalog(name=cls._catalog_name, force=True) + cls._gravitino_admin_client.drop_metalake(name=cls._metalake_name, force=True) + + super().tearDownClass() + + def setUp(self): + self._catalog.as_schemas().create_schema(self._schema_name, "comment", {}) + + def tearDown(self): + self._catalog.as_schemas().drop_schema(self._schema_name, True) + + def test_register_get_model(self): + model_name = "model_it_model" + str(randint(0, 1000)) + model_ident = NameIdentifier.of(self._schema_name, model_name) + comment = "comment" + properties = {"k1": "v1", "k2": "v2"} + + model = self._catalog.as_model_catalog().register_model( + model_ident, comment, properties + ) + self.assertEqual(model_name, model.name()) + self.assertEqual(comment, model.comment()) + self.assertEqual(0, model.latest_version()) + self.assertEqual(properties, model.properties()) + + # Test register model without comment and properties + model = self._catalog.as_model_catalog().register_model( + NameIdentifier.of( + self._schema_name, model_name + "_no_comment_no_properties" + ), + comment=None, + properties=None, + ) + self.assertEqual(model_name + "_no_comment_no_properties", model.name()) + self.assertIsNone(model.comment()) + self.assertEqual(0, model.latest_version()) + self.assertEqual({}, model.properties()) + + ## Test register same name model again + with self.assertRaises(ModelAlreadyExistsException): + self._catalog.as_model_catalog().register_model( + model_ident, comment, properties + ) + + # Test register model in a non-existent schema + with self.assertRaises(NoSuchSchemaException): + self._catalog.as_model_catalog().register_model( + NameIdentifier.of("non_existent_schema", model_name), + comment, + properties, + ) + + # Test get model + model = self._catalog.as_model_catalog().get_model(model_ident) + self.assertEqual(model_name, model.name()) + self.assertEqual(comment, model.comment()) + self.assertEqual(0, model.latest_version()) + self.assertEqual(properties, model.properties()) + + # Test get non-existent model + with self.assertRaises(NoSuchModelException): + self._catalog.as_model_catalog().get_model( + NameIdentifier.of(self._schema_name, "non_existent_model") + ) + + # Test get a model for non-existent schema + with self.assertRaises(NoSuchModelException): + self._catalog.as_model_catalog().get_model( + NameIdentifier.of("non_existent_schema", model_name) + ) + + def test_register_list_models(self): + + model_name1 = "model_it_model1" + str(randint(0, 1000)) + model_name2 = "model_it_model2" + str(randint(0, 1000)) + model_ident1 = NameIdentifier.of(self._schema_name, model_name1) + model_ident2 = NameIdentifier.of(self._schema_name, model_name2) + comment = "comment" + properties = {"k1": "v1", "k2": "v2"} + + self._catalog.as_model_catalog().register_model( + model_ident1, comment, properties + ) + self._catalog.as_model_catalog().register_model( + model_ident2, comment, properties + ) + + models = self._catalog.as_model_catalog().list_models( + Namespace.of(self._schema_name) + ) + self.assertEqual(2, len(models)) + self.assertTrue(model_ident1 in models) + self.assertTrue(model_ident2 in models) + + # Test delete and list models + self.assertTrue(self._catalog.as_model_catalog().delete_model(model_ident1)) + models = self._catalog.as_model_catalog().list_models( + Namespace.of(self._schema_name) + ) + self.assertEqual(1, len(models)) + self.assertTrue(model_ident2 in models) + + self.assertTrue(self._catalog.as_model_catalog().delete_model(model_ident2)) + models = self._catalog.as_model_catalog().list_models( + Namespace.of(self._schema_name) + ) + self.assertEqual(0, len(models)) + + # Test list models for non-existent schema + with self.assertRaises(NoSuchSchemaException): + self._catalog.as_model_catalog().list_models( + Namespace.of("non_existent_schema") + ) + + def test_register_delete_model(self): + model_name = "model_it_model" + str(randint(0, 1000)) + model_ident = NameIdentifier.of(self._schema_name, model_name) + comment = "comment" + properties = {"k1": "v1", "k2": "v2"} + + self._catalog.as_model_catalog().register_model( + model_ident, comment, properties + ) + self.assertTrue(self._catalog.as_model_catalog().delete_model(model_ident)) + # delete again will return False + self.assertFalse(self._catalog.as_model_catalog().delete_model(model_ident)) + + # Test delete model in non-existent schema + self.assertFalse( + self._catalog.as_model_catalog().delete_model( + NameIdentifier.of("non_existent_schema", model_name) + ) + ) + + # Test delete non-existent model + self.assertFalse( + self._catalog.as_model_catalog().delete_model( + NameIdentifier.of(self._schema_name, "non_existent_model") + ) + ) + + def test_link_get_model_version(self): + model_name = "model_it_model" + str(randint(0, 1000)) + model_ident = NameIdentifier.of(self._schema_name, model_name) + self._catalog.as_model_catalog().register_model(model_ident, "comment", {}) + + # Test link model version + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri", + aliases=["alias1", "alias2"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + # Test link model version to a non-existent model + with self.assertRaises(NoSuchModelException): + self._catalog.as_model_catalog().link_model_version( + NameIdentifier.of(self._schema_name, "non_existent_model"), + uri="uri", + aliases=["alias1", "alias2"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + # Test link model version with existing aliases + with self.assertRaises(ModelVersionAliasesAlreadyExistException): + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri", + aliases=["alias1", "alias2"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + model_version = self._catalog.as_model_catalog().get_model_version( + model_ident, 0 + ) + self.assertEqual(0, model_version.version()) + self.assertEqual("uri", model_version.uri()) + self.assertEqual(["alias1", "alias2"], model_version.aliases()) + self.assertEqual("comment", model_version.comment()) + self.assertEqual({"k1": "v1", "k2": "v2"}, model_version.properties()) + + model_version = self._catalog.as_model_catalog().get_model_version_by_alias( + model_ident, "alias1" + ) + self.assertEqual(0, model_version.version()) + self.assertEqual("uri", model_version.uri()) + + model_version = self._catalog.as_model_catalog().get_model_version_by_alias( + model_ident, "alias2" + ) + self.assertEqual(0, model_version.version()) + self.assertEqual("uri", model_version.uri()) + + # Test get model version from non-existent model + with self.assertRaises(NoSuchModelVersionException): + self._catalog.as_model_catalog().get_model_version( + NameIdentifier.of(self._schema_name, "non_existent_model"), 0 + ) + + with self.assertRaises(NoSuchModelVersionException): + self._catalog.as_model_catalog().get_model_version_by_alias( + NameIdentifier.of(self._schema_name, "non_existent_model"), "alias1" + ) + + # Test get non-existent model version + with self.assertRaises(NoSuchModelVersionException): + self._catalog.as_model_catalog().get_model_version(model_ident, 1) + + with self.assertRaises(NoSuchModelVersionException): + self._catalog.as_model_catalog().get_model_version_by_alias( + model_ident, "non_existent_alias" + ) + + # Test link model version with None aliases, comment and properties + self._catalog.as_model_catalog().link_model_version( + model_ident, uri="uri", aliases=None, comment=None, properties=None + ) + model_version = self._catalog.as_model_catalog().get_model_version( + model_ident, 1 + ) + self.assertEqual(1, model_version.version()) + self.assertEqual("uri", model_version.uri()) + self.assertEqual([], model_version.aliases()) + self.assertIsNone(model_version.comment()) + self.assertEqual({}, model_version.properties()) + + def test_link_list_model_versions(self): + model_name = "model_it_model" + str(randint(0, 1000)) + model_ident = NameIdentifier.of(self._schema_name, model_name) + self._catalog.as_model_catalog().register_model(model_ident, "comment", {}) + + # Test link model versions + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri1", + aliases=["alias1", "alias2"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri2", + aliases=["alias3", "alias4"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + model_versions = self._catalog.as_model_catalog().list_model_versions( + model_ident + ) + self.assertEqual(2, len(model_versions)) + self.assertTrue(0 in model_versions) + self.assertTrue(1 in model_versions) + + # Test delete model version + self.assertTrue( + self._catalog.as_model_catalog().delete_model_version(model_ident, 0) + ) + model_versions = self._catalog.as_model_catalog().list_model_versions( + model_ident + ) + self.assertEqual(1, len(model_versions)) + self.assertTrue(1 in model_versions) + + self.assertTrue( + self._catalog.as_model_catalog().delete_model_version(model_ident, 1) + ) + model_versions = self._catalog.as_model_catalog().list_model_versions( + model_ident + ) + self.assertEqual(0, len(model_versions)) + + # Test list model versions for non-existent model + with self.assertRaises(NoSuchModelException): + self._catalog.as_model_catalog().list_model_versions( + NameIdentifier.of(self._schema_name, "non_existent_model") + ) + + def test_link_delete_model_version(self): + model_name = "model_it_model" + str(randint(0, 1000)) + model_ident = NameIdentifier.of(self._schema_name, model_name) + self._catalog.as_model_catalog().register_model(model_ident, "comment", {}) + + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri", + aliases=["alias1"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + self.assertTrue( + self._catalog.as_model_catalog().delete_model_version(model_ident, 0) + ) + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version(model_ident, 0) + ) + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version_by_alias( + model_ident, "alias1" + ) + ) + + self._catalog.as_model_catalog().link_model_version( + model_ident, + uri="uri", + aliases=["alias2"], + comment="comment", + properties={"k1": "v1", "k2": "v2"}, + ) + + self.assertTrue( + self._catalog.as_model_catalog().delete_model_version_by_alias( + model_ident, "alias2" + ) + ) + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version_by_alias( + model_ident, "alias2" + ) + ) + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version(model_ident, 1) + ) + + # Test delete model version for non-existent model + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version( + NameIdentifier.of(self._schema_name, "non_existent_model"), 0 + ) + ) + + self.assertFalse( + self._catalog.as_model_catalog().delete_model_version_by_alias( + NameIdentifier.of(self._schema_name, "non_existent_model"), "alias1" + ) + ) diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/ModelVersionAliasSQLProviderFactory.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/ModelVersionAliasSQLProviderFactory.java index 726e3d0e2b7..c83e9deaa22 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/ModelVersionAliasSQLProviderFactory.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/ModelVersionAliasSQLProviderFactory.java @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType; import org.apache.gravitino.storage.relational.mapper.provider.base.ModelVersionAliasRelBaseSQLProvider; +import org.apache.gravitino.storage.relational.mapper.provider.h2.ModelVersionAliasRelH2SQLProvider; import org.apache.gravitino.storage.relational.mapper.provider.postgresql.ModelVersionAliasRelPostgreSQLProvider; import org.apache.gravitino.storage.relational.po.ModelVersionAliasRelPO; import org.apache.gravitino.storage.relational.session.SqlSessionFactoryHelper; @@ -32,13 +33,11 @@ public class ModelVersionAliasSQLProviderFactory { static class ModelVersionAliasRelMySQLProvider extends ModelVersionAliasRelBaseSQLProvider {} - static class ModelVersionAliasRelH2Provider extends ModelVersionAliasRelBaseSQLProvider {} - private static final Map MODEL_VERSION_META_SQL_PROVIDER_MAP = ImmutableMap.of( JDBCBackendType.MYSQL, new ModelVersionAliasRelMySQLProvider(), - JDBCBackendType.H2, new ModelVersionAliasRelH2Provider(), + JDBCBackendType.H2, new ModelVersionAliasRelH2SQLProvider(), JDBCBackendType.POSTGRESQL, new ModelVersionAliasRelPostgreSQLProvider()); public static ModelVersionAliasRelBaseSQLProvider getProvider() { diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/ModelVersionAliasRelBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/ModelVersionAliasRelBaseSQLProvider.java index 5354b888f33..abaaa5a8aed 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/ModelVersionAliasRelBaseSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/ModelVersionAliasRelBaseSQLProvider.java @@ -100,13 +100,14 @@ public String softDeleteModelVersionAliasRelsByModelIdAndAlias( @Param("modelId") Long modelId, @Param("alias") String alias) { return "UPDATE " + ModelVersionAliasRelMapper.TABLE_NAME - + " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)" - + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" - + " WHERE model_id = #{modelId} AND model_version = (" + + " mvar JOIN (" + " SELECT model_version FROM " + ModelVersionAliasRelMapper.TABLE_NAME + " WHERE model_id = #{modelId} AND model_version_alias = #{alias} AND deleted_at = 0)" - + " AND deleted_at = 0"; + + " subquery ON mvar.model_version = subquery.model_version" + + " SET mvar.deleted_at = (UNIX_TIMESTAMP() * 1000.0)" + + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" + + " WHERE mvar.model_id = #{modelId} AND mvar.deleted_at = 0"; } public String softDeleteModelVersionAliasRelsBySchemaId(@Param("schemaId") Long schemaId) { diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/ModelVersionAliasRelH2SQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/ModelVersionAliasRelH2SQLProvider.java new file mode 100644 index 00000000000..a9ddc01c1e2 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/ModelVersionAliasRelH2SQLProvider.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.storage.relational.mapper.provider.h2; + +import org.apache.gravitino.storage.relational.mapper.ModelVersionAliasRelMapper; +import org.apache.gravitino.storage.relational.mapper.provider.base.ModelVersionAliasRelBaseSQLProvider; +import org.apache.ibatis.annotations.Param; + +public class ModelVersionAliasRelH2SQLProvider extends ModelVersionAliasRelBaseSQLProvider { + + @Override + public String softDeleteModelVersionAliasRelsByModelIdAndAlias( + @Param("modelId") Long modelId, @Param("alias") String alias) { + return "UPDATE " + + ModelVersionAliasRelMapper.TABLE_NAME + + " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)" + + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" + + " WHERE model_id = #{modelId} AND model_version = (" + + " SELECT model_version FROM " + + ModelVersionAliasRelMapper.TABLE_NAME + + " WHERE model_id = #{modelId} AND model_version_alias = #{alias} AND deleted_at = 0)" + + " AND deleted_at = 0"; + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionAliasRelPostgreSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionAliasRelPostgreSQLProvider.java index a37f0531258..da23bdca2d4 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionAliasRelPostgreSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionAliasRelPostgreSQLProvider.java @@ -46,7 +46,7 @@ public String softDeleteModelVersionAliasRelsByModelIdAndVersion( + ModelVersionAliasRelMapper.TABLE_NAME + " SET deleted_at = floor(extract(epoch from((current_timestamp -" + " timestamp '1970-01-01 00:00:00')*1000)))" - + " WHERE model_id = #{modelId} AND model_version = #{version} AND deleted_at = 0"; + + " WHERE model_id = #{modelId} AND model_version = #{modelVersion} AND deleted_at = 0"; } @Override diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionMetaPostgreSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionMetaPostgreSQLProvider.java index 09be14319bd..4183a53617c 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionMetaPostgreSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/ModelVersionMetaPostgreSQLProvider.java @@ -47,7 +47,7 @@ public String softDeleteModelVersionMetaByModelIdAndVersion( + ModelVersionMetaMapper.TABLE_NAME + " SET deleted_at = floor(extract(epoch from((current_timestamp -" + " timestamp '1970-01-01 00:00:00')*1000)))" - + " WHERE model_id = #{modelId} AND version = #{version} AND deleted_at = 0"; + + " WHERE model_id = #{modelId} AND version = #{modelVersion} AND deleted_at = 0"; } @Override From fb756169805d1f0164641cdd475228f6b7ead730 Mon Sep 17 00:00:00 2001 From: Lord of Abyss <103809695+Abyss-lord@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:58:53 +0800 Subject: [PATCH 4/7] [#6030] fix(CLI): Fix Setting the same tags multiple times in the Gravitino CLi gives unexpected output (#6037) ### What changes were proposed in this pull request? Fix the error information when Setting the same tags multiple times in the Gravitino CLi. now a hint information is given when the tag is set repeatedly ### Why are the changes needed? Fix: #6030 ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? ```bash gcli tag set --metalake demo_metalake --name Hive_catalog --tag tagB tagC # Hive_catalog now tagged with tagB,tagC gcli tag set --metalake demo_metalake --name Hive_catalog --tag tagB tagC # [tagB, tagC] are(is) already associated with Hive_catalog ``` --- .../main/java/org/apache/gravitino/cli/commands/TagEntity.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/commands/TagEntity.java b/clients/cli/src/main/java/org/apache/gravitino/cli/commands/TagEntity.java index d2d1cbbe18f..7bc8ec37649 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/commands/TagEntity.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/commands/TagEntity.java @@ -29,6 +29,7 @@ import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.exceptions.NoSuchSchemaException; import org.apache.gravitino.exceptions.NoSuchTableException; +import org.apache.gravitino.exceptions.TagAlreadyAssociatedException; import org.apache.gravitino.rel.Table; public class TagEntity extends Command { @@ -94,6 +95,8 @@ public void handle() { exitWithError(ErrorMessages.UNKNOWN_SCHEMA); } catch (NoSuchTableException err) { exitWithError(ErrorMessages.UNKNOWN_TABLE); + } catch (TagAlreadyAssociatedException err) { + exitWithError("Tags are already associated with " + name.getName()); } catch (Exception exp) { exitWithError(exp.getMessage()); } From c9d124b2016900dde2ad1ee1bfba6a7387bf5ce1 Mon Sep 17 00:00:00 2001 From: Vignesh Suresh Kumar <55813127+VigneshSK17@users.noreply.github.com> Date: Thu, 2 Jan 2025 19:00:29 -0500 Subject: [PATCH 5/7] [#5962] feat(client): added audit cli command model (#6047) ### What changes were proposed in this pull request? The audit command is one of the commands suggested by @justinmclean as part of adding Model entity support for the CLI. Also includes addition of relevant testing code for Model entity CLI commands a whole. ### Why are the changes needed? To add audit functionality for a Model using the CLI Improvement: #5962 ### Does this PR introduce _any_ user-facing change? Yes. The audit command for a model was added. ### How was this patch tested? Unit tests were added for Model CLI support and ran successfully for the audit command. --- .../apache/gravitino/cli/CommandEntities.java | 1 + .../apache/gravitino/cli/ErrorMessages.java | 1 + .../org/apache/gravitino/cli/FullName.java | 2 +- .../gravitino/cli/GravitinoCommandLine.java | 11 ++- .../gravitino/cli/TestableCommandLine.java | 6 ++ .../gravitino/cli/commands/ModelAudit.java | 90 +++++++++++++++++++ clients/cli/src/main/resources/model_help.txt | 8 ++ ...delCommand.java => TestModelCommands.java} | 24 ++++- docs/cli.md | 4 +- 9 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 clients/cli/src/main/java/org/apache/gravitino/cli/commands/ModelAudit.java create mode 100644 clients/cli/src/main/resources/model_help.txt rename clients/cli/src/test/java/org/apache/gravitino/cli/{TestModelCommand.java => TestModelCommands.java} (90%) diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/CommandEntities.java b/clients/cli/src/main/java/org/apache/gravitino/cli/CommandEntities.java index 2dd50974ea9..47a03b7beb4 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/CommandEntities.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/CommandEntities.java @@ -47,6 +47,7 @@ public class CommandEntities { VALID_ENTITIES.add(SCHEMA); VALID_ENTITIES.add(TABLE); VALID_ENTITIES.add(COLUMN); + VALID_ENTITIES.add(MODEL); VALID_ENTITIES.add(USER); VALID_ENTITIES.add(GROUP); VALID_ENTITIES.add(TAG); diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/ErrorMessages.java b/clients/cli/src/main/java/org/apache/gravitino/cli/ErrorMessages.java index 4bd523ec280..084b5c34c85 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/ErrorMessages.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/ErrorMessages.java @@ -57,6 +57,7 @@ public class ErrorMessages { public static final String UNKNOWN_ROLE = "Unknown role."; public static final String ROLE_EXISTS = "Role already exists."; public static final String TABLE_EXISTS = "Table already exists."; + public static final String MODEL_EXISTS = "Model already exists."; public static final String INVALID_SET_COMMAND = "Unsupported combination of options either use --name, --user, --group or --property and --value."; public static final String INVALID_REMOVE_COMMAND = diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/FullName.java b/clients/cli/src/main/java/org/apache/gravitino/cli/FullName.java index c21d21af483..a3b206dfdd1 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/FullName.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/FullName.java @@ -99,7 +99,7 @@ public String getSchemaName() { /** * Retrieves the model name from the second part of the full name option. * - * @return The model name, or null if not found + * @return The model name, or null if not found. */ public String getModelName() { return getNamePart(2); diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/GravitinoCommandLine.java b/clients/cli/src/main/java/org/apache/gravitino/cli/GravitinoCommandLine.java index 8cd335bebbe..c23fb8b7cd0 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/GravitinoCommandLine.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/GravitinoCommandLine.java @@ -140,6 +140,8 @@ private void executeCommand() { handleCatalogCommand(); } else if (entity.equals(CommandEntities.METALAKE)) { handleMetalakeCommand(); + } else if (entity.equals(CommandEntities.MODEL)) { + handleModelCommand(); } else if (entity.equals(CommandEntities.TOPIC)) { handleTopicCommand(); } else if (entity.equals(CommandEntities.FILESET)) { @@ -1152,6 +1154,9 @@ private void handleFilesetCommand() { } } + /** + * Handles the command execution for Models based on command type and the command line options. + */ private void handleModelCommand() { String url = getUrl(); String auth = getAuth(); @@ -1180,7 +1185,11 @@ private void handleModelCommand() { switch (command) { case CommandActions.DETAILS: - newModelDetails(url, ignore, metalake, catalog, schema, model).handle(); + if (line.hasOption(GravitinoOptions.AUDIT)) { + newModelAudit(url, ignore, metalake, catalog, schema, model).handle(); + } else { + newModelDetails(url, ignore, metalake, catalog, schema, model).handle(); + } break; default: diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/TestableCommandLine.java b/clients/cli/src/main/java/org/apache/gravitino/cli/TestableCommandLine.java index 3cfd84ad83c..6a468749178 100644 --- a/clients/cli/src/main/java/org/apache/gravitino/cli/TestableCommandLine.java +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/TestableCommandLine.java @@ -80,6 +80,7 @@ import org.apache.gravitino.cli.commands.MetalakeDetails; import org.apache.gravitino.cli.commands.MetalakeDisable; import org.apache.gravitino.cli.commands.MetalakeEnable; +import org.apache.gravitino.cli.commands.ModelAudit; import org.apache.gravitino.cli.commands.ModelDetails; import org.apache.gravitino.cli.commands.OwnerDetails; import org.apache.gravitino.cli.commands.RemoveAllTags; @@ -915,6 +916,11 @@ protected ListModel newListModel( return new ListModel(url, ignore, metalake, catalog, schema); } + protected ModelAudit newModelAudit( + String url, boolean ignore, String metalake, String catalog, String schema, String model) { + return new ModelAudit(url, ignore, metalake, catalog, schema, model); + } + protected ModelDetails newModelDetails( String url, boolean ignore, String metalake, String catalog, String schema, String model) { return new ModelDetails(url, ignore, metalake, catalog, schema, model); diff --git a/clients/cli/src/main/java/org/apache/gravitino/cli/commands/ModelAudit.java b/clients/cli/src/main/java/org/apache/gravitino/cli/commands/ModelAudit.java new file mode 100644 index 00000000000..841afd2de9e --- /dev/null +++ b/clients/cli/src/main/java/org/apache/gravitino/cli/commands/ModelAudit.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cli.commands; + +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.cli.ErrorMessages; +import org.apache.gravitino.client.GravitinoClient; +import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchMetalakeException; +import org.apache.gravitino.exceptions.NoSuchModelException; +import org.apache.gravitino.model.Model; +import org.apache.gravitino.model.ModelCatalog; + +/** Displays the audit information of a model. */ +public class ModelAudit extends AuditCommand { + + protected final String metalake; + protected final String catalog; + protected final String schema; + protected final String model; + + /** + * Displays the audit information of a model. + * + * @param url The URL of the Gravitino server. + * @param ignoreVersions If true don't check the client/server versions match. + * @param metalake The name of the metalake. + * @param catalog The name of the catalog. + * @param schema The name of the schema. + * @param model The name of the model. + */ + public ModelAudit( + String url, + boolean ignoreVersions, + String metalake, + String catalog, + String schema, + String model) { + super(url, ignoreVersions); + this.metalake = metalake; + this.catalog = catalog; + this.schema = schema; + this.model = model; + } + + /** Displays the audit information of a model. */ + @Override + public void handle() { + NameIdentifier name = NameIdentifier.of(schema, model); + Model result; + + try (GravitinoClient client = buildClient(this.metalake)) { + ModelCatalog modelCatalog = client.loadCatalog(catalog).asModelCatalog(); + result = modelCatalog.getModel(name); + } catch (NoSuchMetalakeException err) { + System.err.println(ErrorMessages.UNKNOWN_METALAKE); + return; + } catch (NoSuchCatalogException err) { + System.err.println(ErrorMessages.UNKNOWN_CATALOG); + return; + } catch (NoSuchModelException err) { + System.err.println(ErrorMessages.UNKNOWN_MODEL); + return; + } catch (Exception exp) { + System.err.println(exp.getMessage()); + return; + } + + if (result != null) { + displayAuditInfo(result.auditInfo()); + } + } +} diff --git a/clients/cli/src/main/resources/model_help.txt b/clients/cli/src/main/resources/model_help.txt new file mode 100644 index 00000000000..04e9b8262ef --- /dev/null +++ b/clients/cli/src/main/resources/model_help.txt @@ -0,0 +1,8 @@ +gcli model [details] + +Please set the metalake in the Gravitino configuration file or the environment variable before running any of these commands. + +Example commands + +Show model audit information +gcli model details --name catalog_postgres.hr --audit \ No newline at end of file diff --git a/clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommand.java b/clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommands.java similarity index 90% rename from clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommand.java rename to clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommands.java index d222655b641..e486c41a9d1 100644 --- a/clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommand.java +++ b/clients/cli/src/test/java/org/apache/gravitino/cli/TestModelCommands.java @@ -38,13 +38,14 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Options; import org.apache.gravitino.cli.commands.ListModel; +import org.apache.gravitino.cli.commands.ModelAudit; import org.apache.gravitino.cli.commands.ModelDetails; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.testcontainers.shaded.com.google.common.base.Joiner; -public class TestModelCommand { +public class TestModelCommands { private final Joiner joiner = Joiner.on(", ").skipNulls(); private CommandLine mockCommandLine; private Options mockOptions; @@ -267,4 +268,25 @@ void testModelDetailsCommandWithoutModel() { + joiner.join(Collections.singletonList(CommandEntities.MODEL)), output); } + + @Test + void testModelAuditCommand() { + ModelAudit mockAudit = mock(ModelAudit.class); + when(mockCommandLine.hasOption(GravitinoOptions.METALAKE)).thenReturn(true); + when(mockCommandLine.getOptionValue(GravitinoOptions.METALAKE)).thenReturn("metalake_demo"); + when(mockCommandLine.hasOption(GravitinoOptions.NAME)).thenReturn(true); + when(mockCommandLine.getOptionValue(GravitinoOptions.NAME)).thenReturn("catalog.schema.model"); + when(mockCommandLine.hasOption(GravitinoOptions.AUDIT)).thenReturn(true); + + GravitinoCommandLine commandLine = + spy( + new GravitinoCommandLine( + mockCommandLine, mockOptions, CommandEntities.MODEL, CommandActions.DETAILS)); + doReturn(mockAudit) + .when(commandLine) + .newModelAudit( + GravitinoCommandLine.DEFAULT_URL, false, "metalake_demo", "catalog", "schema", "model"); + commandLine.handleCommandLine(); + verify(mockAudit).handle(); + } } diff --git a/docs/cli.md b/docs/cli.md index 64d720f2e8a..0cc7dee4af9 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -23,11 +23,11 @@ alias gcli='java -jar ../../cli/build/libs/gravitino-cli-*-incubating-SNAPSHOT.j Or you use the `gcli.sh` script found in the `clients/cli/bin/` directory to run the CLI. ## Usage - +f The general structure for running commands with the Gravitino CLI is `gcli entity command [options]`. ```bash - usage: gcli [metalake|catalog|schema|table|column|user|group|tag|topic|fileset] [list|details|create|delete|update|set|remove|properties|revoke|grant] [options] + usage: gcli [metalake|catalog|schema|model|table|column|user|group|tag|topic|fileset] [list|details|create|delete|update|set|remove|properties|revoke|grant] [options] Options usage: gcli -a,--audit display audit information From 936d0452aa8245a8d5891eec58bf37bfff9f3840 Mon Sep 17 00:00:00 2001 From: FANNG Date: Fri, 3 Jan 2025 08:54:01 +0800 Subject: [PATCH 6/7] [#5991] feat(gcs): unify the GCS server acount path configuration for fileset and GCSCredentialProvider (#5992) ### What changes were proposed in this pull request? fileset use `gcs-service-account-file` while gcsTokenCredentialProvider use `gcs-credential-file-path`, we'd better unify the name, use `gcs-service-account-file` for GCS credential to unify them. ### Why are the changes needed? Fix: #5991 ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? existing tests --- .../gcs/fs/GCSFileSystemProvider.java | 3 +- .../gravitino/storage/GCSProperties.java | 2 +- .../integration/test/HadoopGCSCatalogIT.java | 6 +-- .../test/GravitinoVirtualFileSystemGCSIT.java | 2 +- .../config/GCSCredentialConfig.java | 7 +-- .../iceberg-rest-server/rewrite_config.py | 3 +- docs/how-to-use-gvfs.md | 16 +++--- docs/iceberg-rest-service.md | 13 +++-- .../service/CatalogWrapperForREST.java | 53 ++++++++++++------- .../integration/test/IcebergRESTGCSIT.java | 5 +- .../service/TestCatalogWrapperForREST.java | 49 +++++++++++++++++ 11 files changed, 114 insertions(+), 45 deletions(-) create mode 100644 iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/TestCatalogWrapperForREST.java diff --git a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java index 0055e167c49..b79b58ef48d 100644 --- a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java +++ b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java @@ -35,7 +35,8 @@ public class GCSFileSystemProvider implements FileSystemProvider { @VisibleForTesting public static final Map GRAVITINO_KEY_TO_GCS_HADOOP_KEY = - ImmutableMap.of(GCSProperties.GCS_SERVICE_ACCOUNT_JSON_PATH, GCS_SERVICE_ACCOUNT_JSON_FILE); + ImmutableMap.of( + GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE, GCS_SERVICE_ACCOUNT_JSON_FILE); @Override public FileSystem getFileSystem(Path path, Map config) throws IOException { diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/GCSProperties.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/GCSProperties.java index ca8599584d1..722c2365a93 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/GCSProperties.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/GCSProperties.java @@ -22,7 +22,7 @@ public class GCSProperties { // The path of service account JSON file of Google Cloud Storage. - public static final String GCS_SERVICE_ACCOUNT_JSON_PATH = "gcs-service-account-file"; + public static final String GRAVITINO_GCS_SERVICE_ACCOUNT_FILE = "gcs-service-account-file"; private GCSProperties() {} } diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java index da056f20d88..2a4c68ce55b 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java @@ -19,7 +19,7 @@ package org.apache.gravitino.catalog.hadoop.integration.test; import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; -import static org.apache.gravitino.storage.GCSProperties.GCS_SERVICE_ACCOUNT_JSON_PATH; +import static org.apache.gravitino.storage.GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; @@ -99,7 +99,7 @@ protected String defaultBaseLocation() { protected void createCatalog() { Map map = Maps.newHashMap(); - map.put(GCS_SERVICE_ACCOUNT_JSON_PATH, SERVICE_ACCOUNT_FILE); + map.put(GRAVITINO_GCS_SERVICE_ACCOUNT_FILE, SERVICE_ACCOUNT_FILE); map.put(FILESYSTEM_PROVIDERS, "gcs"); metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, "comment", map); @@ -117,7 +117,7 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() { String ossLocation = String.format("gs://%s", BUCKET_NAME); Map catalogProps = Maps.newHashMap(); catalogProps.put("location", ossLocation); - catalogProps.put(GCS_SERVICE_ACCOUNT_JSON_PATH, SERVICE_ACCOUNT_FILE); + catalogProps.put(GRAVITINO_GCS_SERVICE_ACCOUNT_FILE, SERVICE_ACCOUNT_FILE); catalogProps.put(FILESYSTEM_PROVIDERS, "gcs"); Catalog localCatalog = diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java index f273708810c..c7f9b7cf4bd 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java @@ -90,7 +90,7 @@ public void startUp() throws Exception { conf.set("fs.gravitino.client.metalake", metalakeName); // Pass this configuration to the real file system - conf.set(GCSProperties.GCS_SERVICE_ACCOUNT_JSON_PATH, SERVICE_ACCOUNT_FILE); + conf.set(GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE, SERVICE_ACCOUNT_FILE); } @AfterAll diff --git a/core/src/main/java/org/apache/gravitino/credential/config/GCSCredentialConfig.java b/core/src/main/java/org/apache/gravitino/credential/config/GCSCredentialConfig.java index 1a2b38ef641..3f8f0292638 100644 --- a/core/src/main/java/org/apache/gravitino/credential/config/GCSCredentialConfig.java +++ b/core/src/main/java/org/apache/gravitino/credential/config/GCSCredentialConfig.java @@ -19,21 +19,18 @@ package org.apache.gravitino.credential.config; -import com.google.common.annotations.VisibleForTesting; import java.util.Map; import javax.annotation.Nullable; import org.apache.gravitino.Config; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; +import org.apache.gravitino.storage.GCSProperties; public class GCSCredentialConfig extends Config { - @VisibleForTesting - public static final String GRAVITINO_GCS_CREDENTIAL_FILE_PATH = "gcs-credential-file-path"; - public static final ConfigEntry GCS_CREDENTIAL_FILE_PATH = - new ConfigBuilder(GRAVITINO_GCS_CREDENTIAL_FILE_PATH) + new ConfigBuilder(GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE) .doc("The path of GCS credential file") .version(ConfigConstants.VERSION_0_7_0) .stringConf() diff --git a/dev/docker/iceberg-rest-server/rewrite_config.py b/dev/docker/iceberg-rest-server/rewrite_config.py index d607eb6ab42..b10cdb4bfb7 100755 --- a/dev/docker/iceberg-rest-server/rewrite_config.py +++ b/dev/docker/iceberg-rest-server/rewrite_config.py @@ -24,7 +24,8 @@ "GRAVITINO_WAREHOUSE" : "warehouse", "GRAVITINO_CREDENTIAL_PROVIDER_TYPE" : "credential-providers", "GRAVITINO_CREDENTIAL_PROVIDERS" : "credential-providers", - "GRAVITINO_GCS_CREDENTIAL_FILE_PATH" : "gcs-credential-file-path", + "GRAVITINO_GCS_CREDENTIAL_FILE_PATH" : "gcs-service-account-file", + "GRAVITINO_GCS_SERVICE_ACCOUNT_FILE" : "gcs-service-account-file", "GRAVITINO_S3_ACCESS_KEY" : "s3-access-key-id", "GRAVITINO_S3_SECRET_KEY" : "s3-secret-access-key", "GRAVITINO_S3_REGION" : "s3-region", diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 4f3515ea9c7..102ec082a76 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -68,11 +68,11 @@ Apart from the above properties, to access fileset like S3, GCS, OSS and custom #### S3 fileset -| Configuration item | Description | Default value | Required | Since version | -|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|--------------------------|------------------| -| `s3-endpoint` | The endpoint of the AWS S3. | (none) | Yes if it's a S3 fileset.| 0.7.0-incubating | -| `s3-access-key-id` | The access key of the AWS S3. | (none) | Yes if it's a S3 fileset.| 0.7.0-incubating | -| `s3-secret-access-key` | The secret key of the AWS S3. | (none) | Yes if it's a S3 fileset.| 0.7.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|------------------------|-------------------------------|---------------|---------------------------|------------------| +| `s3-endpoint` | The endpoint of the AWS S3. | (none) | Yes if it's a S3 fileset. | 0.7.0-incubating | +| `s3-access-key-id` | The access key of the AWS S3. | (none) | Yes if it's a S3 fileset. | 0.7.0-incubating | +| `s3-secret-access-key` | The secret key of the AWS S3. | (none) | Yes if it's a S3 fileset. | 0.7.0-incubating | At the same time, you need to add the corresponding bundle jar 1. [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/) in the classpath if no hadoop environment is available, or @@ -81,9 +81,9 @@ At the same time, you need to add the corresponding bundle jar #### GCS fileset -| Configuration item | Description | Default value | Required | Since version | -|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|---------------------------|------------------| -| `gcs-service-account-file` | The path of GCS service account JSON file. | (none) | Yes if it's a GCS fileset.| 0.7.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|----------------------------|--------------------------------------------|---------------|----------------------------|------------------| +| `gcs-service-account-file` | The path of GCS service account JSON file. | (none) | Yes if it's a GCS fileset. | 0.7.0-incubating | In the meantime, you need to add the corresponding bundle jar 1. [`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp-bundle/) in the classpath if no hadoop environment is available, or diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 3c2f27a3d1c..f21ca35a43a 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -162,7 +162,8 @@ Supports using static GCS credential file or generating GCS token to access GCS | `gravitino.iceberg-rest.io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.gcp.gcs.GCSFileIO` for GCS. | (none) | No | 0.6.0-incubating | | `gravitino.iceberg-rest.credential-provider-type` | Deprecated, please use `gravitino.iceberg-rest.credential-providers` instead. | (none) | No | 0.7.0-incubating | | `gravitino.iceberg-rest.credential-providers` | Supports `gcs-token`, generates a temporary token according to the query data path. | (none) | No | 0.7.0-incubating | -| `gravitino.iceberg-rest.gcs-credential-file-path` | The location of GCS credential file, only used when `credential-providers` is `gcs-token`. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.gcs-credential-file-path` | Deprecated, please use `gravitino.iceberg-rest.gcs-service-account-file` instead. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.gcs-service-account-file` | The location of GCS credential file, only used when `credential-provider-type` is `gcs-token`. | (none) | No | 0.8.0-incubating | For other Iceberg GCS properties not managed by Gravitino like `gcs.project-id`, you could config it directly by `gravitino.iceberg-rest.gcs.project-id`. @@ -450,9 +451,8 @@ Gravitino Iceberg REST server in docker image could access local storage by defa | `GRAVITINO_IO_IMPL` | `gravitino.iceberg-rest.io-impl` | 0.7.0-incubating | | `GRAVITINO_URI` | `gravitino.iceberg-rest.uri` | 0.7.0-incubating | | `GRAVITINO_WAREHOUSE` | `gravitino.iceberg-rest.warehouse` | 0.7.0-incubating | -| `GRAVITINO_CREDENTIAL_PROVIDER_TYPE` | `gravitino.iceberg-rest.credential-providers` | 0.8.0-incubating | | `GRAVITINO_CREDENTIAL_PROVIDERS` | `gravitino.iceberg-rest.credential-providers` | 0.8.0-incubating | -| `GRAVITINO_GCS_CREDENTIAL_FILE_PATH` | `gravitino.iceberg-rest.gcs-credential-file-path` | 0.7.0-incubating | +| `GRAVITINO_GCS_SERVICE_ACCOUNT_FILE` | `gravitino.iceberg-rest.gcs-service-account-file` | 0.8.0-incubating | | `GRAVITINO_S3_ACCESS_KEY` | `gravitino.iceberg-rest.s3-access-key-id` | 0.7.0-incubating | | `GRAVITINO_S3_SECRET_KEY` | `gravitino.iceberg-rest.s3-secret-access-key` | 0.7.0-incubating | | `GRAVITINO_S3_REGION` | `gravitino.iceberg-rest.s3-region` | 0.7.0-incubating | @@ -465,6 +465,13 @@ Gravitino Iceberg REST server in docker image could access local storage by defa | `GRAVITINO_AZURE_CLIENT_ID` | `gravitino.iceberg-rest.azure-client-id` | 0.8.0-incubating | | `GRAVITINO_AZURE_CLIENT_SECRET` | `gravitino.iceberg-rest.azure-client-secret` | 0.8.0-incubating | +The below environment is deprecated, please use the corresponding configuration items instead. + +| Deprecated Environment variables | New environment variables | Since version | Deprecated version | +|--------------------------------------|--------------------------------------|------------------|--------------------| +| `GRAVITINO_CREDENTIAL_PROVIDER_TYPE` | `GRAVITINO_CREDENTIAL_PROVIDERS` | 0.7.0-incubating | 0.8.0-incubating | +| `GRAVITINO_GCS_CREDENTIAL_FILE_PATH` | `GRAVITINO_GCS_SERVICE_ACCOUNT_FILE` | 0.7.0-incubating | 0.8.0-incubating | + Or build it manually to add custom configuration or logics: ```shell diff --git a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/CatalogWrapperForREST.java b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/CatalogWrapperForREST.java index 8ae7bd66ddc..3c86629b522 100644 --- a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/CatalogWrapperForREST.java +++ b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/CatalogWrapperForREST.java @@ -19,6 +19,8 @@ package org.apache.gravitino.iceberg.service; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.util.Collections; import java.util.HashMap; @@ -34,6 +36,7 @@ import org.apache.gravitino.credential.PathBasedCredentialContext; import org.apache.gravitino.iceberg.common.IcebergConfig; import org.apache.gravitino.iceberg.common.ops.IcebergCatalogWrapper; +import org.apache.gravitino.storage.GCSProperties; import org.apache.gravitino.utils.MapUtils; import org.apache.gravitino.utils.PrincipalUtils; import org.apache.iceberg.TableMetadata; @@ -58,6 +61,14 @@ public class CatalogWrapperForREST extends IcebergCatalogWrapper { IcebergConstants.ICEBERG_S3_ENDPOINT, IcebergConstants.ICEBERG_OSS_ENDPOINT); + @SuppressWarnings("deprecation") + private static Map deprecatedProperties = + ImmutableMap.of( + CredentialConstants.CREDENTIAL_PROVIDER_TYPE, + CredentialConstants.CREDENTIAL_PROVIDERS, + "gcs-credential-file-path", + GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE); + public CatalogWrapperForREST(String catalogName, IcebergConfig config) { super(config); this.catalogConfigToClients = @@ -65,7 +76,8 @@ public CatalogWrapperForREST(String catalogName, IcebergConfig config) { config.getIcebergCatalogProperties(), key -> catalogPropertiesToClientKeys.contains(key)); // To be compatible with old properties - Map catalogProperties = checkForCompatibility(config.getAllConfig()); + Map catalogProperties = + checkForCompatibility(config.getAllConfig(), deprecatedProperties); this.catalogCredentialManager = new CatalogCredentialManager(catalogName, catalogProperties); } @@ -131,27 +143,30 @@ private LoadTableResponse injectCredentialConfig( .build(); } - @SuppressWarnings("deprecation") - private Map checkForCompatibility(Map properties) { - HashMap normalizedProperties = new HashMap<>(properties); - String credentialProviderType = properties.get(CredentialConstants.CREDENTIAL_PROVIDER_TYPE); - String credentialProviders = properties.get(CredentialConstants.CREDENTIAL_PROVIDERS); - if (StringUtils.isNotBlank(credentialProviders) - && StringUtils.isNotBlank(credentialProviderType)) { + @VisibleForTesting + static Map checkForCompatibility( + Map properties, Map deprecatedProperties) { + Map newProperties = new HashMap<>(properties); + deprecatedProperties.forEach( + (deprecatedProperty, newProperty) -> { + replaceDeprecatedProperties(newProperties, deprecatedProperty, newProperty); + }); + return newProperties; + } + + private static void replaceDeprecatedProperties( + Map properties, String deprecatedProperty, String newProperty) { + String deprecatedValue = properties.get(deprecatedProperty); + String newValue = properties.get(newProperty); + if (StringUtils.isNotBlank(deprecatedValue) && StringUtils.isNotBlank(newValue)) { throw new IllegalArgumentException( - String.format( - "Should not set both %s and %s", - CredentialConstants.CREDENTIAL_PROVIDER_TYPE, - CredentialConstants.CREDENTIAL_PROVIDERS)); + String.format("Should not set both %s and %s", deprecatedProperty, newProperty)); } - if (StringUtils.isNotBlank(credentialProviderType)) { - LOG.warn( - "%s is deprecated, please use %s instead.", - CredentialConstants.CREDENTIAL_PROVIDER_TYPE, CredentialConstants.CREDENTIAL_PROVIDERS); - normalizedProperties.put(CredentialConstants.CREDENTIAL_PROVIDERS, credentialProviderType); + if (StringUtils.isNotBlank(deprecatedValue)) { + LOG.warn("%s is deprecated, please use %s instead.", deprecatedProperty, newProperty); + properties.remove(deprecatedProperty); + properties.put(newProperty, deprecatedValue); } - - return normalizedProperties; } } diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTGCSIT.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTGCSIT.java index 523d8773748..3396b60e1fd 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTGCSIT.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTGCSIT.java @@ -25,11 +25,11 @@ import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergConstants; import org.apache.gravitino.credential.CredentialConstants; import org.apache.gravitino.credential.GCSTokenCredential; -import org.apache.gravitino.credential.config.GCSCredentialConfig; import org.apache.gravitino.iceberg.common.IcebergConfig; import org.apache.gravitino.integration.test.util.BaseIT; import org.apache.gravitino.integration.test.util.DownloaderUtils; import org.apache.gravitino.integration.test.util.ITUtils; +import org.apache.gravitino.storage.GCSProperties; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; // You should export GRAVITINO_GCS_BUCKET and GOOGLE_APPLICATION_CREDENTIALS to run the test @@ -76,8 +76,7 @@ private Map getGCSConfig() { IcebergConfig.ICEBERG_CONFIG_PREFIX + CredentialConstants.CREDENTIAL_PROVIDERS, GCSTokenCredential.GCS_TOKEN_CREDENTIAL_TYPE); configMap.put( - IcebergConfig.ICEBERG_CONFIG_PREFIX - + GCSCredentialConfig.GRAVITINO_GCS_CREDENTIAL_FILE_PATH, + IcebergConfig.ICEBERG_CONFIG_PREFIX + GCSProperties.GRAVITINO_GCS_SERVICE_ACCOUNT_FILE, gcsCredentialPath); configMap.put( IcebergConfig.ICEBERG_CONFIG_PREFIX + IcebergConstants.IO_IMPL, diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/TestCatalogWrapperForREST.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/TestCatalogWrapperForREST.java new file mode 100644 index 00000000000..809f65d0481 --- /dev/null +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/TestCatalogWrapperForREST.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.iceberg.service; + +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; + +public class TestCatalogWrapperForREST { + + @Test + void testCheckPropertiesForCompatibility() { + ImmutableMap deprecatedMap = ImmutableMap.of("deprecated", "new"); + ImmutableMap propertiesWithDeprecatedKey = ImmutableMap.of("deprecated", "v"); + Map newProperties = + CatalogWrapperForREST.checkForCompatibility(propertiesWithDeprecatedKey, deprecatedMap); + Assertions.assertEquals(newProperties, ImmutableMap.of("new", "v")); + + ImmutableMap propertiesWithoutDeprecatedKey = ImmutableMap.of("k", "v"); + newProperties = + CatalogWrapperForREST.checkForCompatibility(propertiesWithoutDeprecatedKey, deprecatedMap); + Assertions.assertEquals(newProperties, ImmutableMap.of("k", "v")); + + ImmutableMap propertiesWithBothKey = + ImmutableMap.of("deprecated", "v", "new", "v"); + + Assertions.assertThrowsExactly( + IllegalArgumentException.class, + () -> CatalogWrapperForREST.checkForCompatibility(propertiesWithBothKey, deprecatedMap)); + } +} From 6f54874e486e1898930f72b744986feeb54aa913 Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Fri, 3 Jan 2025 10:16:41 +0800 Subject: [PATCH 7/7] [#5966] improvment(authorization): Add path based securable object and user group mapping interface (#5967) ### What changes were proposed in this pull request? Add the following things: - The interface for user-group mapping between Gravitino and underlying user system. ### Why are the changes needed? It's a need for path-based authorization Fix: #5966 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Existing tests. --- ...AuthorizationUserGroupMappingProvider.java | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 authorizations/authorization-common/src/main/java/org/apache/gravitino/authorization/common/AuthorizationUserGroupMappingProvider.java diff --git a/authorizations/authorization-common/src/main/java/org/apache/gravitino/authorization/common/AuthorizationUserGroupMappingProvider.java b/authorizations/authorization-common/src/main/java/org/apache/gravitino/authorization/common/AuthorizationUserGroupMappingProvider.java new file mode 100644 index 00000000000..08b48dc7850 --- /dev/null +++ b/authorizations/authorization-common/src/main/java/org/apache/gravitino/authorization/common/AuthorizationUserGroupMappingProvider.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.gravitino.authorization.common; + +import java.util.Map; + +/** + * The AuthorizationUserGroupMappingProvider interface defines the public API for mapping Gravitino + * users and groups to the that in underlying data source system. + * + *

Typically, the users and group names in Gravitino are the same as the underlying data source. + * However, in some cases, the user and group names in Gravitino may be different from the + * underlying data source. For instance, in GCP IAM, the username is the email address or the + * service account. So the user group mapping provider can be used to map the Gravitino username to + * the email address or service account. + */ +public interface AuthorizationUserGroupMappingProvider { + + /** + * Initialize the user group mapping provider with the configuration. + * + * @param config The configuration map for the user group mapping provider. + */ + default void initialize(Map config) {} + + /** + * Get the username from the underlying data source based on the Gravitino username For instance, + * in GCP IAM, the username is the email address or the service account. + * + * @param gravitinoUserName The Gravitino username. + * @return The username from the underlying data source. + */ + default String getUserName(String gravitinoUserName) { + return gravitinoUserName; + } + + /** + * Get the group name from the underlying data source based on the Gravitino group name. + * + * @param gravitinoGroupName The Gravitino group name. + * @return The group name from the underlying data source. + */ + default String getGroupName(String gravitinoGroupName) { + return gravitinoGroupName; + } +}