Skip to content

Commit a46cb08

Browse files
authored
Merge branch 'master' into master
2 parents 9c9392d + 280e82a commit a46cb08

File tree

220 files changed

+10861
-2091
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

220 files changed

+10861
-2091
lines changed

.github/ISSUE_TEMPLATE/--bug-report.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ If applicable, add screenshots to help explain your problem.
2929
- Version [e.g. 22]
3030

3131
**Additional context**
32-
Add any other context about the problem here.
32+
Add any other context about the problem here.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
---
2+
name: DataHub v1.0-rc Bug Report
3+
about: Report issues found in DataHub v1.0 Release Candidates
4+
title: "[v1.0-rc/bug] Description of Bug"
5+
labels: bug, datahub-v1.0-rc
6+
assignees: chriscollins3456, david-leifker, maggiehays
7+
8+
---
9+
10+
**Describe the bug**
11+
A clear and concise description of what the bug is.
12+
13+
**To Reproduce**
14+
Steps to reproduce the behavior:
15+
1. Go to '...'
16+
2. Click on '....'
17+
3. Scroll down to '....'
18+
4. See error
19+
20+
**Expected behavior**
21+
A clear and concise description of what you expected to happen.
22+
23+
**Screenshots and/or Screen Recordings**
24+
If applicable, add screenshots and/or screen recordings to help explain the issue.
25+
26+
**System details (please complete the following information):**
27+
- DataHub Version Tag [e.g. v1.0-rc1]
28+
- OS: [e.g. iOS]
29+
- Browser [e.g. chrome, safari]
30+
31+
**Additional context**
32+
Add any other context about the problem here.

.github/workflows/docker-unified.yml

+7-7
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
tag: ${{ steps.tag.outputs.tag }}
4545
slim_tag: ${{ steps.tag.outputs.slim_tag }}
4646
full_tag: ${{ steps.tag.outputs.full_tag }}
47+
short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
4748
unique_tag: ${{ steps.tag.outputs.unique_tag }}
4849
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
4950
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
@@ -65,6 +66,8 @@ jobs:
6566
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
6667
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
6768
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
69+
integrations_service_change: "false"
70+
datahub_executor_change: "false"
6871
steps:
6972
- name: Check out the repo
7073
uses: acryldata/sane-checkout-action@v3
@@ -864,7 +867,8 @@ jobs:
864867
context: .
865868
file: ./docker/datahub-ingestion/Dockerfile
866869
platforms: linux/amd64,linux/arm64/v8
867-
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
870+
# Workaround 2025-01-25 - Depot publishing errors
871+
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
868872
- name: Compute Tag
869873
id: tag
870874
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
@@ -963,7 +967,8 @@ jobs:
963967
context: .
964968
file: ./docker/datahub-ingestion/Dockerfile
965969
platforms: linux/amd64,linux/arm64/v8
966-
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
970+
# Workaround 2025-01-25 - Depot publishing errors
971+
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
967972
- name: Compute Tag (Full)
968973
id: tag
969974
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
@@ -1178,11 +1183,6 @@ jobs:
11781183
docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head'
11791184
docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
11801185
fi
1181-
if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then
1182-
echo 'datahub-integration-service head images'
1183-
docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head'
1184-
docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
1185-
fi
11861186
- name: CI Slim Head Images
11871187
run: |
11881188
if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then

build.gradle

+5-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ buildscript {
3838
ext.springVersion = '6.1.14'
3939
ext.springBootVersion = '3.2.9'
4040
ext.springKafkaVersion = '3.1.6'
41-
ext.openTelemetryVersion = '1.18.0'
41+
ext.openTelemetryVersion = '1.45.0'
4242
ext.neo4jVersion = '5.20.0'
4343
ext.neo4jTestVersion = '5.20.0'
4444
ext.neo4jApocVersion = '5.20.0'
@@ -220,7 +220,10 @@ project.ext.externalDependency = [
220220
'neo4jApocCore': 'org.neo4j.procedure:apoc-core:' + neo4jApocVersion,
221221
'neo4jApocCommon': 'org.neo4j.procedure:apoc-common:' + neo4jApocVersion,
222222
'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion,
223-
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion,
223+
'opentelemetrySdk': 'io.opentelemetry:opentelemetry-sdk:' + openTelemetryVersion,
224+
'opentelemetrySdkTrace': 'io.opentelemetry:opentelemetry-sdk-trace:' + openTelemetryVersion,
225+
'opentelemetryAutoConfig': 'io.opentelemetry:opentelemetry-sdk-extension-autoconfigure:' + openTelemetryVersion,
226+
'opentelemetryAnnotations': 'io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:2.11.0',
224227
'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15',
225228
'parquet': 'org.apache.parquet:parquet-avro:1.12.3',
226229
'parquetHadoop': 'org.apache.parquet:parquet-hadoop:1.13.1',

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import com.linkedin.metadata.service.ViewService;
1313
import graphql.schema.DataFetcher;
1414
import graphql.schema.DataFetchingEnvironment;
15-
import io.opentelemetry.extension.annotations.WithSpan;
15+
import io.opentelemetry.instrumentation.annotations.WithSpan;
1616
import java.util.List;
1717
import java.util.Map;
1818
import java.util.concurrent.CompletableFuture;

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java

+23-18
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22

33
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
44

5-
import com.codahale.metrics.Timer;
5+
import com.linkedin.datahub.graphql.QueryContext;
66
import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils;
77
import com.linkedin.datahub.graphql.exception.AuthorizationException;
88
import com.linkedin.datahub.graphql.types.BatchMutableType;
99
import com.linkedin.metadata.utils.metrics.MetricUtils;
1010
import graphql.schema.DataFetcher;
1111
import graphql.schema.DataFetchingEnvironment;
12+
import io.datahubproject.metadata.context.OperationContext;
1213
import java.util.List;
1314
import java.util.concurrent.CompletableFuture;
1415
import org.slf4j.Logger;
@@ -33,25 +34,29 @@ public MutableTypeBatchResolver(final BatchMutableType<I, B, T> batchMutableType
3334

3435
@Override
3536
public CompletableFuture<List<T>> get(DataFetchingEnvironment environment) throws Exception {
37+
final QueryContext context = environment.getContext();
38+
final OperationContext opContext = context.getOperationContext();
39+
3640
final B[] input =
3741
bindArgument(environment.getArgument("input"), _batchMutableType.batchInputClass());
3842

39-
return GraphQLConcurrencyUtils.supplyAsync(
40-
() -> {
41-
Timer.Context timer = MetricUtils.timer(this.getClass(), "batchMutate").time();
42-
43-
try {
44-
return _batchMutableType.batchUpdate(input, environment.getContext());
45-
} catch (AuthorizationException e) {
46-
throw e;
47-
} catch (Exception e) {
48-
_logger.error("Failed to perform batchUpdate", e);
49-
throw new IllegalArgumentException(e);
50-
} finally {
51-
timer.stop();
52-
}
53-
},
54-
this.getClass().getSimpleName(),
55-
"get");
43+
return opContext.withSpan(
44+
"batchMutate",
45+
() ->
46+
GraphQLConcurrencyUtils.supplyAsync(
47+
() -> {
48+
try {
49+
return _batchMutableType.batchUpdate(input, environment.getContext());
50+
} catch (AuthorizationException e) {
51+
throw e;
52+
} catch (Exception e) {
53+
_logger.error("Failed to perform batchUpdate", e);
54+
throw new IllegalArgumentException(e);
55+
}
56+
},
57+
this.getClass().getSimpleName(),
58+
"get"),
59+
MetricUtils.DROPWIZARD_METRIC,
60+
"true");
5661
}
5762
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import graphql.schema.DataFetcher;
2828
import graphql.schema.DataFetchingEnvironment;
2929
import io.datahubproject.metadata.context.OperationContext;
30-
import io.opentelemetry.extension.annotations.WithSpan;
30+
import io.opentelemetry.instrumentation.annotations.WithSpan;
3131
import java.net.URISyntaxException;
3232
import java.util.Collections;
3333
import java.util.List;

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import com.linkedin.metadata.query.SearchFlags;
2020
import graphql.schema.DataFetcher;
2121
import graphql.schema.DataFetchingEnvironment;
22-
import io.opentelemetry.extension.annotations.WithSpan;
22+
import io.opentelemetry.instrumentation.annotations.WithSpan;
2323
import java.util.Collections;
2424
import java.util.concurrent.CompletableFuture;
2525
import lombok.RequiredArgsConstructor;

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java

+33
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
import com.linkedin.common.urn.Urn;
44
import com.linkedin.datahub.graphql.QueryContext;
55
import com.linkedin.datahub.graphql.generated.MLModelGroupProperties;
6+
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
67
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
8+
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
79
import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper;
10+
import java.util.stream.Collectors;
811
import javax.annotation.Nonnull;
912
import javax.annotation.Nullable;
1013

@@ -33,10 +36,40 @@ public MLModelGroupProperties apply(
3336
result.setVersion(VersionTagMapper.map(context, mlModelGroupProperties.getVersion()));
3437
}
3538
result.setCreatedAt(mlModelGroupProperties.getCreatedAt());
39+
if (mlModelGroupProperties.hasCreated()) {
40+
result.setCreated(
41+
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getCreated()));
42+
}
43+
if (mlModelGroupProperties.getName() != null) {
44+
result.setName(mlModelGroupProperties.getName());
45+
} else {
46+
// backfill name from URN for backwards compatibility
47+
result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here
48+
}
49+
50+
if (mlModelGroupProperties.hasLastModified()) {
51+
result.setLastModified(
52+
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getLastModified()));
53+
}
3654

3755
result.setCustomProperties(
3856
CustomPropertiesMapper.map(mlModelGroupProperties.getCustomProperties(), entityUrn));
3957

58+
final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
59+
if (mlModelGroupProperties.hasTrainingJobs()) {
60+
lineageInfo.setTrainingJobs(
61+
mlModelGroupProperties.getTrainingJobs().stream()
62+
.map(urn -> urn.toString())
63+
.collect(Collectors.toList()));
64+
}
65+
if (mlModelGroupProperties.hasDownstreamJobs()) {
66+
lineageInfo.setDownstreamJobs(
67+
mlModelGroupProperties.getDownstreamJobs().stream()
68+
.map(urn -> urn.toString())
69+
.collect(Collectors.toList()));
70+
}
71+
result.setMlModelLineageInfo(lineageInfo);
72+
4073
return result;
4174
}
4275
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java

+15
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.linkedin.common.urn.Urn;
66
import com.linkedin.datahub.graphql.QueryContext;
77
import com.linkedin.datahub.graphql.generated.MLModelGroup;
8+
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
89
import com.linkedin.datahub.graphql.generated.MLModelProperties;
910
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
1011
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
@@ -87,6 +88,20 @@ public MLModelProperties apply(
8788
.collect(Collectors.toList()));
8889
}
8990
result.setTags(mlModelProperties.getTags());
91+
final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
92+
if (mlModelProperties.hasTrainingJobs()) {
93+
lineageInfo.setTrainingJobs(
94+
mlModelProperties.getTrainingJobs().stream()
95+
.map(urn -> urn.toString())
96+
.collect(Collectors.toList()));
97+
}
98+
if (mlModelProperties.hasDownstreamJobs()) {
99+
lineageInfo.setDownstreamJobs(
100+
mlModelProperties.getDownstreamJobs().stream()
101+
.map(urn -> urn.toString())
102+
.collect(Collectors.toList()));
103+
}
104+
result.setMlModelLineageInfo(lineageInfo);
90105

91106
return result;
92107
}

datahub-graphql-core/src/main/resources/lineage.graphql

+29
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,32 @@ input LineageEdge {
2525
"""
2626
upstreamUrn: String!
2727
}
28+
29+
"""
30+
Represents lineage information for ML entities.
31+
"""
32+
type MLModelLineageInfo {
33+
"""
34+
List of jobs or processes used to train the model.
35+
"""
36+
trainingJobs: [String!]
37+
38+
"""
39+
List of jobs or processes that use this model.
40+
"""
41+
downstreamJobs: [String!]
42+
}
43+
44+
extend type MLModelProperties {
45+
"""
46+
Information related to lineage to this model group
47+
"""
48+
mlModelLineageInfo: MLModelLineageInfo
49+
}
50+
51+
extend type MLModelGroupProperties {
52+
"""
53+
Information related to lineage to this model group
54+
"""
55+
mlModelLineageInfo: MLModelLineageInfo
56+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package com.linkedin.datahub.graphql.types.mlmodel.mappers;
2+
3+
import static org.testng.Assert.assertEquals;
4+
import static org.testng.Assert.assertNotNull;
5+
import static org.testng.Assert.assertNull;
6+
7+
import com.linkedin.common.urn.Urn;
8+
import com.linkedin.ml.metadata.MLModelGroupProperties;
9+
import java.net.URISyntaxException;
10+
import org.testng.annotations.Test;
11+
12+
public class MLModelGroupPropertiesMapperTest {
13+
14+
@Test
15+
public void testMapMLModelGroupProperties() throws URISyntaxException {
16+
// Create backend ML Model Group Properties
17+
MLModelGroupProperties input = new MLModelGroupProperties();
18+
19+
// Set description
20+
input.setDescription("a ml trust model group");
21+
22+
// Set Name
23+
input.setName("ML trust model group");
24+
25+
// Create URN
26+
Urn groupUrn =
27+
Urn.createFromString(
28+
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");
29+
30+
// Map the properties
31+
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
32+
MLModelGroupPropertiesMapper.map(null, input, groupUrn);
33+
34+
// Verify mapped properties
35+
assertNotNull(result);
36+
assertEquals(result.getDescription(), "a ml trust model group");
37+
assertEquals(result.getName(), "ML trust model group");
38+
39+
// Verify lineage info is null as in the mock data
40+
assertNotNull(result.getMlModelLineageInfo());
41+
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
42+
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
43+
}
44+
45+
@Test
46+
public void testMapWithMinimalProperties() throws URISyntaxException {
47+
// Create backend ML Model Group Properties with minimal information
48+
MLModelGroupProperties input = new MLModelGroupProperties();
49+
50+
// Create URN
51+
Urn groupUrn =
52+
Urn.createFromString(
53+
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");
54+
55+
// Map the properties
56+
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
57+
MLModelGroupPropertiesMapper.map(null, input, groupUrn);
58+
59+
// Verify basic mapping with minimal properties
60+
assertNotNull(result);
61+
assertNull(result.getDescription());
62+
63+
// Verify lineage info is null
64+
assertNotNull(result.getMlModelLineageInfo());
65+
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
66+
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
67+
}
68+
}

0 commit comments

Comments
 (0)