Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mkamalas authored Sep 19, 2024
2 parents f703c95 + 3e3a05f commit 8b43d10
Show file tree
Hide file tree
Showing 790 changed files with 192,495 additions and 12,058 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ jobs:
extra_pip_requirements: "apache-airflow~=2.4.3"
extra_pip_extras: plugin-v2,test-airflow24
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: 'apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt'
extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
Expand All @@ -73,7 +73,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:build
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dagster-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:dagster-plugin:lint :metadata-ingestion-modules:dagster-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
with:
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,13 @@ jobs:
docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head'
docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then
echo 'datahub-integration-service head images'
docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head'
docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
- name: CI Slim Head Images
run: |
if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then
echo 'datahub-ingestion head-slim images'
docker pull '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim'
Expand All @@ -983,7 +990,7 @@ jobs:
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }}
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5"
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
run: |
./smoke-test/run-quickstart.sh
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
- name: Debug info
if: always()
run: |
source metadata-ingestion/venv/bin/activate && pip freeze
source metadata-ingestion/venv/bin/activate && uv pip freeze
set -x
df -hl
docker image ls
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/prefect-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
with:
distribution: "zulu"
java-version: 17
- uses: gradle/gradle-build-action@v2
- uses: gradle/actions/setup-gradle@v3
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
Expand All @@ -55,7 +55,7 @@ jobs:
run: ./gradlew :metadata-ingestion-modules:prefect-plugin:lint :metadata-ingestion-modules:prefect-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10'}}
with:
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Haibo Technology](https://www.botech.com.cn)
- [hipages](https://hipages.com.au/)
- [inovex](https://www.inovex.de/)
- [Inter&Co](https://inter.co/)
- [IOMED](https://iomed.health)
- [Klarna](https://www.klarna.com)
- [LinkedIn](http://linkedin.com)
Expand Down
11 changes: 8 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ buildscript {
ext.hadoop3Version = '3.3.6'
ext.kafkaVersion = '5.5.15'
ext.hazelcastVersion = '5.3.6'
ext.ebeanVersion = '12.16.1'
ext.ebeanVersion = '15.5.2'
ext.googleJavaFormatVersion = '1.18.1'
ext.openLineageVersion = '1.19.0'
ext.logbackClassicJava8 = '1.2.12'
Expand Down Expand Up @@ -104,8 +104,8 @@ project.ext.spec = [

project.ext.externalDependency = [
'akkaHttp': 'com.typesafe.akka:akka-http-core_2.12:10.2.10',
'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2',
'antlr4': 'org.antlr:antlr4:4.7.2',
'antlr4Runtime': 'org.antlr:antlr4-runtime:4.9.3',
'antlr4': 'org.antlr:antlr4:4.9.3',
'assertJ': 'org.assertj:assertj-core:3.11.1',
'avro': 'org.apache.avro:avro:1.11.3',
'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
Expand All @@ -129,8 +129,10 @@ project.ext.externalDependency = [
'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3',
'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3',
'ebean': 'io.ebean:ebean:' + ebeanVersion,
'ebeanTest': 'io.ebean:ebean-test:' + ebeanVersion,
'ebeanAgent': 'io.ebean:ebean-agent:' + ebeanVersion,
'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion,
'ebeanQueryBean': 'io.ebean:querybean-generator:' + ebeanVersion,
'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion,
'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0',
'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
Expand Down Expand Up @@ -359,6 +361,9 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {
exclude group: "org.slf4j", module: "slf4j-log4j12"
exclude group: "org.slf4j", module: "slf4j-nop"
exclude group: "org.slf4j", module: "slf4j-ext"

resolutionStrategy.force externalDependency.antlr4Runtime
resolutionStrategy.force externalDependency.antlr4
}
}

Expand Down
6 changes: 3 additions & 3 deletions datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import java.util.Collections;

import io.datahubproject.metadata.context.ActorContext;
import io.datahubproject.metadata.context.AuthorizerContext;
import io.datahubproject.metadata.context.AuthorizationContext;
import io.datahubproject.metadata.context.EntityRegistryContext;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.OperationContextConfig;
Expand Down Expand Up @@ -183,10 +183,10 @@ protected OperationContext provideOperationContext(
return OperationContext.builder()
.operationContextConfig(systemConfig)
.systemActorContext(systemActorContext)
// Authorizer.EMPTY is fine since it doesn't actually apply to system auth
.authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
.searchContext(SearchContext.EMPTY)
.entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY))
// Authorizer.EMPTY doesn't actually apply to system auth
.authorizerContext(AuthorizerContext.builder().authorizer(Authorizer.EMPTY).build())
.build(systemAuthentication);
}

Expand Down
15 changes: 7 additions & 8 deletions datahub-frontend/app/client/KafkaTrackingProducer.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package client;

import com.linkedin.metadata.config.kafka.KafkaConfiguration;
import com.linkedin.metadata.config.kafka.ProducerConfiguration;
import com.typesafe.config.Config;
import config.ConfigurationProvider;
Expand Down Expand Up @@ -46,7 +47,7 @@ public KafkaTrackingProducer(

if (_isEnabled) {
_logger.debug("Analytics tracking is enabled");
_producer = createKafkaProducer(config, configurationProvider.getKafka().getProducer());
_producer = createKafkaProducer(config, configurationProvider.getKafka());

lifecycle.addStopHook(
() -> {
Expand All @@ -69,7 +70,8 @@ public void send(ProducerRecord<String, String> record) {
}

private static KafkaProducer createKafkaProducer(
Config config, ProducerConfiguration producerConfiguration) {
Config config, KafkaConfiguration kafkaConfiguration) {
final ProducerConfiguration producerConfiguration = kafkaConfiguration.getProducer();
final Properties props = new Properties();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "datahub-frontend");
props.put(
Expand All @@ -78,12 +80,9 @@ private static KafkaProducer createKafkaProducer(
props.put(
ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,
config.getString("analytics.kafka.bootstrap.server"));
props.put(
ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
"org.apache.kafka.common.serialization.StringSerializer"); // Actor urn.
props.put(
ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
"org.apache.kafka.common.serialization.StringSerializer"); // JSON object.
// key: Actor urn.
// value: JSON object.
props.putAll(kafkaConfiguration.getSerde().getUsageEvent().getProducerProperties(null));
props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, producerConfiguration.getMaxRequestSize());
props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, producerConfiguration.getCompressionType());

Expand Down
46 changes: 46 additions & 0 deletions datahub-frontend/app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
import akka.util.ByteString;
import auth.Authenticator;
import com.datahub.authentication.AuthenticationConstants;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.util.Pair;
import com.typesafe.config.Config;
import java.io.InputStream;
import java.net.URI;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -33,6 +36,7 @@
import play.libs.ws.StandaloneWSClient;
import play.libs.ws.ahc.StandaloneAhcWSClient;
import play.mvc.Controller;
import play.mvc.Http.Cookie;
import play.mvc.Http;
import play.mvc.ResponseHeader;
import play.mvc.Result;
Expand Down Expand Up @@ -132,6 +136,9 @@ public CompletableFuture<Result> proxy(String path, Http.Request request)
headers.put(Http.HeaderNames.X_FORWARDED_PROTO, List.of(schema));
}

// Get the current time to measure the duration of the request
Instant start = Instant.now();

return _ws.url(
String.format(
"%s://%s:%s%s", protocol, metadataServiceHost, metadataServicePort, resolvedUri))
Expand Down Expand Up @@ -160,6 +167,15 @@ AuthenticationConstants.LEGACY_X_DATAHUB_ACTOR_HEADER, getDataHubActorHeader(req
.execute()
.thenApply(
apiResponse -> {
// Log the query if it takes longer than the configured threshold and verbose logging is enabled
boolean verboseGraphQLLogging = _config.getBoolean("graphql.verbose.logging");
int verboseGraphQLLongQueryMillis = _config.getInt("graphql.verbose.slowQueryMillis");
Instant finish = Instant.now();
long timeElapsed = Duration.between(start, finish).toMillis();
if (verboseGraphQLLogging && timeElapsed >= verboseGraphQLLongQueryMillis) {
logSlowQuery(request, resolvedUri, timeElapsed);
}

final ResponseHeader header =
new ResponseHeader(
apiResponse.getStatus(),
Expand Down Expand Up @@ -359,4 +375,34 @@ private String mapPath(@Nonnull final String path) {
// Otherwise, return original path
return path;
}


/**
* Called if verbose logging is enabled and request takes longer that the slow query milliseconds defined in the config
* @param request GraphQL request that was made
* @param resolvedUri URI that was requested
* @param duration How long the query took to complete
*/
private void logSlowQuery(Http.Request request, String resolvedUri, float duration) {
StringBuilder jsonBody = new StringBuilder();
Optional<Cookie> actorCookie = request.getCookie("actor");
String actorValue = actorCookie.isPresent() ? actorCookie.get().value() : "N/A";

try {
ObjectMapper mapper = new ObjectMapper();
JsonNode jsonNode = request.body().asJson();
((ObjectNode) jsonNode).remove("query");
jsonBody.append(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonNode));
}
catch (Exception e) {
_logger.info("GraphQL Request Received: {}, Unable to parse JSON body", resolvedUri);
}
String jsonBodyStr = jsonBody.toString();
_logger.info("Slow GraphQL Request Received: {}, Request query string: {}, Request actor: {}, Request JSON: {}, Request completed in {} ms",
resolvedUri,
request.queryString(),
actorValue,
jsonBodyStr,
duration);
}
}
2 changes: 1 addition & 1 deletion datahub-frontend/app/utils/SearchUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ private SearchUtil() {
@Nonnull
public static String escapeForwardSlash(@Nonnull String input) {
if (input.contains("/")) {
input = input.replace("/", "\\\\/");
input = input.replace("/", "\\/");
}
return input;
}
Expand Down
8 changes: 7 additions & 1 deletion datahub-frontend/conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -298,4 +298,10 @@ entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES}
entityClient.restli.get.batchSize = 50
entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE}
entityClient.restli.get.batchConcurrency = 2
entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}
entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}

# Enable verbose authentication logging
graphql.verbose.logging = false
graphql.verbose.logging = ${?GRAPHQL_VERBOSE_LOGGING}
graphql.verbose.slowQueryMillis = 2500
graphql.verbose.slowQueryMillis = ${?GRAPHQL_VERBOSE_LONG_QUERY_MILLIS}
4 changes: 2 additions & 2 deletions datahub-frontend/test/utils/SearchUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ public class SearchUtilTest {
@Test
public void testEscapeForwardSlash() {
// escape "/"
assertEquals("\\\\/foo\\\\/bar", SearchUtil.escapeForwardSlash("/foo/bar"));
assertEquals("\\/foo\\/bar", SearchUtil.escapeForwardSlash("/foo/bar"));
// "/" is escaped but "*" is not escaped and is treated as regex. Since currently we want to
// retain the regex behaviour with "*"
assertEquals("\\\\/foo\\\\/bar\\\\/*", SearchUtil.escapeForwardSlash("/foo/bar/*"));
assertEquals("\\/foo\\/bar\\/*", SearchUtil.escapeForwardSlash("/foo/bar/*"));
assertEquals("", "");
assertEquals("foo", "foo");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@
import com.linkedin.datahub.graphql.resolvers.step.BatchGetStepStatesResolver;
import com.linkedin.datahub.graphql.resolvers.step.BatchUpdateStepStatesResolver;
import com.linkedin.datahub.graphql.resolvers.structuredproperties.CreateStructuredPropertyResolver;
import com.linkedin.datahub.graphql.resolvers.structuredproperties.DeleteStructuredPropertyResolver;
import com.linkedin.datahub.graphql.resolvers.structuredproperties.RemoveStructuredPropertiesResolver;
import com.linkedin.datahub.graphql.resolvers.structuredproperties.UpdateStructuredPropertyResolver;
import com.linkedin.datahub.graphql.resolvers.structuredproperties.UpsertStructuredPropertiesResolver;
Expand Down Expand Up @@ -1034,6 +1035,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("assertion", getResolver(assertionType))
.dataFetcher("form", getResolver(formType))
.dataFetcher("view", getResolver(dataHubViewType))
.dataFetcher("structuredProperty", getResolver(structuredPropertyType))
.dataFetcher("listPolicies", new ListPoliciesResolver(this.entityClient))
.dataFetcher("getGrantedPrivileges", new GetGrantedPrivilegesResolver())
.dataFetcher("listUsers", new ListUsersResolver(this.entityClient))
Expand Down Expand Up @@ -1343,6 +1345,9 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher(
"updateStructuredProperty",
new UpdateStructuredPropertyResolver(this.entityClient))
.dataFetcher(
"deleteStructuredProperty",
new DeleteStructuredPropertyResolver(this.entityClient))
.dataFetcher("raiseIncident", new RaiseIncidentResolver(this.entityClient))
.dataFetcher(
"updateIncidentStatus",
Expand Down Expand Up @@ -2116,6 +2121,9 @@ private void configureStructuredPropertyResolvers(final RuntimeWiring.Builder bu
.getAllowedTypes().stream()
.map(entityTypeType.getKeyProvider())
.collect(Collectors.toList()))));
builder.type(
"StructuredPropertyEntity",
typeWiring -> typeWiring.dataFetcher("exists", new EntityExistsResolver(entityService)));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.datahub.authentication.Actor;
import com.datahub.authentication.Authentication;
import com.datahub.plugins.auth.authorization.Authorizer;
import com.linkedin.metadata.config.DataHubAppConfiguration;
import io.datahubproject.metadata.context.OperationContext;

/** Provided as input to GraphQL resolvers; used to carry information about GQL request context. */
Expand Down Expand Up @@ -31,4 +32,6 @@ default String getActorUrn() {
* @return Returns the operational context
*/
OperationContext getOperationContext();

DataHubAppConfiguration getDataHubAppConfig();
}
Loading

0 comments on commit 8b43d10

Please sign in to comment.