Skip to content

Commit f36a597

Browse files
fix(spring): refactor spring configuration (datahub-project#10290)
1 parent 3ac8778 commit f36a597

File tree

138 files changed

+598
-626
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+598
-626
lines changed

build.gradle

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@ buildscript {
22
ext.jdkVersionDefault = 17
33
ext.javaClassVersionDefault = 11
44

5+
def springModules = ['mae-consumer', 'mce-consumer', 'pe-consumer']
6+
57
ext.jdkVersion = { p ->
68
// If Spring 6 is present, hard dependency on jdk17
79
if (p.configurations.any { it.getDependencies().any{
810
(it.getGroup().equals("org.springframework") && it.getVersion().startsWith("6."))
911
|| (it.getGroup().equals("org.springframework.boot") && it.getVersion().startsWith("3.") && !it.getName().equals("spring-boot-starter-test"))
10-
}}) {
12+
}} || springModules.contains(p.name)) {
1113
return 17
1214
} else {
1315
// otherwise we can use the preferred default which can be overridden with a property: -PjdkVersionDefault
@@ -20,7 +22,7 @@ buildscript {
2022
if (p.configurations.any { it.getDependencies().any {
2123
(it.getGroup().equals("org.springframework") && it.getVersion().startsWith("6."))
2224
|| (it.getGroup().equals("org.springframework.boot") && it.getVersion().startsWith("3.") && !it.getName().equals("spring-boot-starter-test"))
23-
}}) {
25+
}} || springModules.contains(p.name)) {
2426
return 17
2527
} else {
2628
// otherwise we can use the preferred default which can be overridden with a property: -PjavaClassVersionDefault
@@ -158,6 +160,7 @@ project.ext.externalDependency = [
158160
'javatuples': 'org.javatuples:javatuples:1.2',
159161
'javaxInject' : 'javax.inject:javax.inject:1',
160162
'javaxValidation' : 'javax.validation:validation-api:2.0.1.Final',
163+
'jakartaValidation': 'jakarta.validation:jakarta.validation-api:3.1.0-M2',
161164
'jerseyCore': 'org.glassfish.jersey.core:jersey-client:2.41',
162165
'jerseyGuava': 'org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1',
163166
'jettyJaas': "org.eclipse.jetty:jetty-jaas:$jettyVersion",
@@ -266,6 +269,7 @@ project.ext.externalDependency = [
266269
'jline':'jline:jline:1.4.1',
267270
'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0',
268271
'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2',
272+
'jakartaAnnotationApi': 'jakarta.annotation:jakarta.annotation-api:3.0.0',
269273
'classGraph': 'io.github.classgraph:classgraph:4.8.168',
270274
]
271275

datahub-frontend/app/config/ConfigurationProvider.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
import com.linkedin.metadata.config.kafka.KafkaConfiguration;
77
import com.linkedin.metadata.spring.YamlPropertySourceFactory;
88
import lombok.Data;
9+
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
910
import org.springframework.boot.context.properties.ConfigurationProperties;
1011
import org.springframework.boot.context.properties.EnableConfigurationProperties;
12+
import org.springframework.context.annotation.Configuration;
1113
import org.springframework.context.annotation.PropertySource;
14+
import org.springframework.stereotype.Component;
1215

1316
/**
1417
* Minimal sharing between metadata-service and frontend Does not use the factories module to avoid
1518
* transitive dependencies.
1619
*/
1720
@EnableConfigurationProperties
18-
@PropertySource(value = "application.yml", factory = YamlPropertySourceFactory.class)
21+
@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class)
1922
@ConfigurationProperties
2023
@Data
2124
public class ConfigurationProvider {

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java

-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import com.linkedin.gms.factory.graphql.GraphQLEngineFactory;
66
import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
77
import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory;
8-
import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
98
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
109
import org.springframework.boot.WebApplicationType;
1110
import org.springframework.boot.autoconfigure.SpringBootApplication;
@@ -31,7 +30,6 @@
3130
DataHubAuthorizerFactory.class,
3231
SimpleKafkaConsumerFactory.class,
3332
KafkaEventConsumerFactory.class,
34-
InternalSchemaRegistryFactory.class,
3533
GraphQLEngineFactory.class
3634
})
3735
})

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import com.linkedin.datahub.upgrade.system.SystemUpdateBlocking;
77
import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking;
88
import com.linkedin.datahub.upgrade.system.elasticsearch.steps.DataHubStartupStep;
9-
import com.linkedin.gms.factory.common.TopicConventionFactory;
109
import com.linkedin.gms.factory.config.ConfigurationProvider;
1110
import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory;
11+
import com.linkedin.gms.factory.kafka.common.TopicConventionFactory;
1212
import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
1313
import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
1414
import com.linkedin.metadata.config.kafka.KafkaConfiguration;
@@ -110,4 +110,14 @@ protected KafkaEventProducer kafkaEventProducer(
110110
@Qualifier("duheKafkaEventProducer") KafkaEventProducer kafkaEventProducer) {
111111
return kafkaEventProducer;
112112
}
113+
114+
@Primary
115+
@Bean(name = "schemaRegistryConfig")
116+
@ConditionalOnProperty(
117+
name = "kafka.schemaRegistry.type",
118+
havingValue = InternalSchemaRegistryFactory.TYPE)
119+
protected SchemaRegistryConfig schemaRegistryConfig(
120+
@Qualifier("duheSchemaRegistryConfig") SchemaRegistryConfig duheSchemaRegistryConfig) {
121+
return duheSchemaRegistryConfig;
122+
}
113123
}

datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
package com.linkedin.datahub.upgrade;
22

3+
import static com.linkedin.metadata.EventUtils.RENAMED_MCL_AVRO_SCHEMA;
4+
import static com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer.topicToSubjectName;
35
import static org.mockito.Mockito.mock;
4-
import static org.testng.AssertJUnit.assertEquals;
5-
import static org.testng.AssertJUnit.assertNotNull;
6+
import static org.testng.Assert.assertEquals;
7+
import static org.testng.Assert.assertNotNull;
68

79
import com.linkedin.datahub.upgrade.system.SystemUpdate;
10+
import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
11+
import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer;
12+
import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer;
813
import com.linkedin.metadata.dao.producer.KafkaEventProducer;
914
import com.linkedin.metadata.entity.EntityServiceImpl;
15+
import com.linkedin.mxe.Topics;
16+
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
17+
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
1018
import io.datahubproject.metadata.context.OperationContext;
19+
import java.io.IOException;
1120
import java.util.List;
1221
import java.util.Map;
1322
import java.util.Optional;
@@ -23,8 +32,8 @@
2332
classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class},
2433
properties = {
2534
"kafka.schemaRegistry.type=INTERNAL",
26-
"DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic",
27-
"METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic"
35+
"DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=" + Topics.DATAHUB_UPGRADE_HISTORY_TOPIC_NAME,
36+
"METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=" + Topics.METADATA_CHANGE_LOG_VERSIONED,
2837
},
2938
args = {"-u", "SystemUpdate"})
3039
public class DatahubUpgradeNoSchemaRegistryTest extends AbstractTestNGSpringContextTests {
@@ -43,15 +52,29 @@ public class DatahubUpgradeNoSchemaRegistryTest extends AbstractTestNGSpringCont
4352

4453
@Autowired private EntityServiceImpl entityService;
4554

55+
@Autowired
56+
@Named("schemaRegistryConfig")
57+
private SchemaRegistryConfig schemaRegistryConfig;
58+
4659
@Test
4760
public void testSystemUpdateInit() {
4861
assertNotNull(systemUpdate);
4962
}
5063

5164
@Test
52-
public void testSystemUpdateKafkaProducerOverride() {
65+
public void testSystemUpdateKafkaProducerOverride() throws RestClientException, IOException {
66+
assertEquals(schemaRegistryConfig.getDeserializer(), MockSystemUpdateDeserializer.class);
67+
assertEquals(schemaRegistryConfig.getSerializer(), MockSystemUpdateSerializer.class);
5368
assertEquals(kafkaEventProducer, duheKafkaEventProducer);
5469
assertEquals(entityService.getProducer(), duheKafkaEventProducer);
70+
71+
MockSystemUpdateSerializer serializer = new MockSystemUpdateSerializer();
72+
serializer.configure(schemaRegistryConfig.getProperties(), false);
73+
SchemaRegistryClient registry = serializer.getSchemaRegistryClient();
74+
assertEquals(
75+
registry.getId(
76+
topicToSubjectName(Topics.METADATA_CHANGE_LOG_VERSIONED), RENAMED_MCL_AVRO_SCHEMA),
77+
2);
5578
}
5679

5780
@Test

datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java

+28-2
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,21 @@
44
import static org.mockito.Mockito.mock;
55
import static org.mockito.Mockito.times;
66
import static org.mockito.Mockito.verify;
7+
import static org.testng.Assert.assertEquals;
78
import static org.testng.AssertJUnit.assertNotNull;
89

910
import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager;
1011
import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking;
1112
import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL;
13+
import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
14+
import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer;
15+
import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer;
16+
import com.linkedin.metadata.dao.producer.KafkaEventProducer;
1217
import com.linkedin.metadata.entity.AspectDao;
1318
import com.linkedin.metadata.entity.EntityService;
19+
import com.linkedin.metadata.entity.EntityServiceImpl;
1420
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs;
21+
import com.linkedin.mxe.Topics;
1522
import io.datahubproject.test.metadata.context.TestOperationContexts;
1623
import java.util.List;
1724
import javax.inject.Named;
@@ -27,8 +34,8 @@
2734
properties = {
2835
"BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED=true",
2936
"kafka.schemaRegistry.type=INTERNAL",
30-
"DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic",
31-
"METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic"
37+
"DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=" + Topics.DATAHUB_UPGRADE_HISTORY_TOPIC_NAME,
38+
"METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=" + Topics.METADATA_CHANGE_LOG_VERSIONED,
3239
},
3340
args = {"-u", "SystemUpdateNonBlocking"})
3441
public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTests {
@@ -38,9 +45,28 @@ public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTe
3845
private SystemUpdateNonBlocking systemUpdateNonBlocking;
3946

4047
@Autowired
48+
@Named("schemaRegistryConfig")
49+
private SchemaRegistryConfig schemaRegistryConfig;
50+
51+
@Autowired
52+
@Named("duheKafkaEventProducer")
53+
private KafkaEventProducer duheKafkaEventProducer;
54+
55+
@Autowired
56+
@Named("kafkaEventProducer")
57+
private KafkaEventProducer kafkaEventProducer;
58+
59+
@Autowired private EntityServiceImpl entityService;
60+
4161
@Test
4262
public void testSystemUpdateNonBlockingInit() {
4363
assertNotNull(systemUpdateNonBlocking);
64+
65+
// Expected system update configuration and producer
66+
assertEquals(schemaRegistryConfig.getDeserializer(), MockSystemUpdateDeserializer.class);
67+
assertEquals(schemaRegistryConfig.getSerializer(), MockSystemUpdateSerializer.class);
68+
assertEquals(duheKafkaEventProducer, kafkaEventProducer);
69+
assertEquals(entityService.getProducer(), duheKafkaEventProducer);
4470
}
4571

4672
@Test

datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
package com.linkedin.datahub.upgrade;
22

3-
import static org.mockito.ArgumentMatchers.anyString;
4-
import static org.mockito.Mockito.mock;
5-
import static org.mockito.Mockito.when;
6-
73
import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
84
import com.linkedin.metadata.graph.GraphService;
95
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
106
import com.linkedin.metadata.models.registry.EntityRegistry;
117
import com.linkedin.metadata.registry.SchemaRegistryService;
8+
import com.linkedin.metadata.registry.SchemaRegistryServiceImpl;
129
import com.linkedin.metadata.search.SearchService;
1310
import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
11+
import com.linkedin.mxe.TopicConventionImpl;
1412
import io.ebean.Database;
15-
import java.util.Optional;
1613
import org.springframework.boot.test.context.TestConfiguration;
1714
import org.springframework.boot.test.mock.mockito.MockBean;
1815
import org.springframework.context.annotation.Bean;
@@ -38,8 +35,6 @@ public class UpgradeCliApplicationTestConfiguration {
3835

3936
@Bean
4037
public SchemaRegistryService schemaRegistryService() {
41-
SchemaRegistryService mockService = mock(SchemaRegistryService.class);
42-
when(mockService.getSchemaIdForTopic(anyString())).thenReturn(Optional.of(0));
43-
return mockService;
38+
return new SchemaRegistryServiceImpl(new TopicConventionImpl());
4439
}
4540
}

docker/build.gradle

+14
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ tasks.register('quickstart') {}
4242
tasks.register('quickstartSlim') {}
4343
tasks.register('quickstartDebug') {}
4444
tasks.register('quickstartPg') {}
45+
tasks.register('quickstartStorage') {}
4546

4647
tasks.register('quickstartNuke') {
4748
doFirst {
@@ -133,6 +134,19 @@ dockerCompose {
133134
stopContainers = false
134135
removeVolumes = false
135136
}
137+
138+
quickstartStorage {
139+
isRequiredBy(tasks.named('quickstartStorage'))
140+
composeAdditionalArgs = ['--profile', 'quickstart-storage']
141+
142+
useComposeFiles = ['profiles/docker-compose.yml']
143+
projectName = 'datahub'
144+
projectNamePrefix = ''
145+
buildBeforeUp = false
146+
buildBeforePull = false
147+
stopContainers = false
148+
removeVolumes = false
149+
}
136150
}
137151
tasks.getByName('quickstartComposeUp').dependsOn(
138152
quickstart_modules.collect { it + ':dockerTag' })

docs/authentication/concepts.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ for configuring multiple ways to authenticate a given request, for example via L
7575

7676
Only if each Authenticator within the chain fails to authenticate a request will it be rejected.
7777

78-
The Authenticator Chain can be configured in the `application.yml` file under `authentication.authenticators`:
78+
The Authenticator Chain can be configured in the `application.yaml` file under `authentication.authenticators`:
7979

8080
```
8181
authentication:

docs/authentication/introducing-metadata-service-authentication.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ To enable Metadata Service Authentication:
5353

5454
OR
5555

56-
- change the Metadata Service `application.yml` configuration file to set `authentication.enabled` to "true" AND
56+
- change the Metadata Service `application.yaml` configuration file to set `authentication.enabled` to "true" AND
5757
- change the Frontend Proxy Service `application.config` configuration file to set `metadataService.auth.enabled` to "true"
5858

5959
After setting the configuration flag, simply restart the Metadata Service to start enforcing Authentication.
@@ -116,7 +116,7 @@ These changes represent the first milestone in Metadata Service Authentication.
116116

117117
That's perfectly fine, for now. Metadata Service Authentication is disabled by default, only enabled if you provide the
118118
environment variable `METADATA_SERVICE_AUTH_ENABLED` to the `datahub-gms` container or change the `authentication.enabled` to "true"
119-
inside your DataHub Metadata Service configuration (`application.yml`).
119+
inside your DataHub Metadata Service configuration (`application.yaml`).
120120

121121
That being said, we will be recommending that you enable Authentication for production use cases, to prevent
122122
arbitrary actors from ingesting metadata into DataHub.
@@ -141,7 +141,7 @@ the root "datahub" user account.
141141

142142
### I want to authenticate requests using a custom Authenticator? How do I do this?
143143

144-
You can configure DataHub to add your custom **Authenticator** to the **Authentication Chain** by changing the `application.yml` configuration file for the Metadata Service:
144+
You can configure DataHub to add your custom **Authenticator** to the **Authentication Chain** by changing the `application.yaml` configuration file for the Metadata Service:
145145

146146
```yml
147147
authentication:

docs/deploy/aws.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ and [here](../../metadata-service/factories/src/main/java/com/linkedin/gms/facto
332332
.
333333

334334
A mapping between the property name used in the above two files and the name used in docker/env file can be
335-
found [here](../../metadata-service/configuration/src/main/resources/application.yml).
335+
found [here](../../metadata-service/configuration/src/main/resources/application.yaml).
336336

337337
### Managed Streaming for Apache Kafka (MSK)
338338

docs/how/updating-datahub.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ Helm with `--atomic`: In general, it is recommended to not use the `--atomic` se
328328

329329
- #6243 apache-ranger authorizer is no longer the core part of DataHub GMS, and it is shifted as plugin. Please refer updated documentation [Configuring Authorization with Apache Ranger](./configuring-authorization-with-apache-ranger.md#configuring-your-datahub-deployment) for configuring `apache-ranger-plugin` in DataHub GMS.
330330
- #6243 apache-ranger authorizer as plugin is not supported in DataHub Kubernetes deployment.
331-
- #6243 Authentication and Authorization plugins configuration are removed from [application.yml](../../metadata-service/configuration/src/main/resources/application.yml). Refer documentation [Migration Of Plugins From application.yml](../plugins.md#migration-of-plugins-from-applicationyml) for migrating any existing custom plugins.
331+
- #6243 Authentication and Authorization plugins configuration are removed from [application.yaml](../../metadata-service/configuration/src/main/resources/application.yaml). Refer documentation [Migration Of Plugins From application.yaml](../plugins.md#migration-of-plugins-from-applicationyml) for migrating any existing custom plugins.
332332
- `datahub check graph-consistency` command has been removed. It was a beta API that we had considered but decided there are better solutions for this. So removing this.
333333
- `graphql_url` option of `powerbi-report-server` source deprecated as the options is not used.
334334
- #6789 BigQuery ingestion: If `enable_legacy_sharded_table_support` is set to False, sharded table names will be suffixed with \_yyyymmdd to make sure they don't clash with non-sharded tables. This means if stateful ingestion is enabled then old sharded tables will be recreated with a new id and attached tags/glossary terms/etc will need to be added again. _This behavior is not enabled by default yet, but will be enabled by default in a future release._

docs/plugins.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,11 @@ All other access are forbidden for the plugin.
252252

253253
> Disclaimer: In BETA version your plugin can access any port and can read/write to any location on file system, however you should implement the plugin as per above access permission to keep your plugin compatible with upcoming release of DataHub.
254254
255-
## Migration Of Plugins From application.yml
256-
If you have any custom Authentication or Authorization plugin define in `authorization` or `authentication` section of [application.yml](../metadata-service/configuration/src/main/resources/application.yml) then migrate them as per below steps.
255+
## Migration Of Plugins From application.yaml
256+
If you have any custom Authentication or Authorization plugin define in `authorization` or `authentication` section of [application.yaml](../metadata-service/configuration/src/main/resources/application.yaml) then migrate them as per below steps.
257257

258258
1. Implement Plugin: For Authentication Plugin follow steps of [Implementing an Authentication Plugin](#implementing-an-authentication-plugin) and for Authorization Plugin follow steps of [Implementing an Authorization Plugin](#implementing-an-authorization-plugin)
259-
2. Install Plugin: Install the plugins as per steps mentioned in [Plugin Installation](#plugin-installation). Here you need to map the configuration from [application.yml](../metadata-service/configuration/src/main/resources/application.yml) to configuration in `config.yml`. This mapping from `application.yml` to `config.yml` is described below
259+
2. Install Plugin: Install the plugins as per steps mentioned in [Plugin Installation](#plugin-installation). Here you need to map the configuration from [application.yaml](../metadata-service/configuration/src/main/resources/application.yaml) to configuration in `config.yml`. This mapping from `application.yaml` to `config.yml` is described below
260260

261261
**Mapping for Authenticators**
262262

0 commit comments

Comments
 (0)