Skip to content

Commit

Permalink
feat(ui,graphql): Finish bringing alchemy UI to OSS (datahub-project#…
Browse files Browse the repository at this point in the history
  • Loading branch information
asikowitz authored Feb 19, 2025
1 parent b8987f2 commit 79fbc6a
Show file tree
Hide file tree
Showing 151 changed files with 4,128 additions and 679 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import com.linkedin.datahub.graphql.types.mappers.TimeSeriesAspectMapper;
import com.linkedin.dataset.DatasetFieldProfile;
import com.linkedin.dataset.DatasetProfile;
import com.linkedin.dataset.Quantile;
import com.linkedin.dataset.ValueFrequency;
import com.linkedin.metadata.aspect.EnvelopedAspect;
import com.linkedin.metadata.utils.GenericRecordUtils;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -66,6 +68,37 @@ private static com.linkedin.datahub.graphql.generated.DatasetFieldProfile mapFie
result.setNullProportion(gmsProfile.getNullProportion());
}
result.setSampleValues(gmsProfile.getSampleValues());
if (gmsProfile.hasQuantiles()) {
result.setQuantiles(
gmsProfile.getQuantiles().stream()
.map(DatasetProfileMapper::mapQuantile)
.collect(Collectors.toList()));
}
if (gmsProfile.hasDistinctValueFrequencies()) {
result.setDistinctValueFrequencies(
gmsProfile.getDistinctValueFrequencies().stream()
.map(DatasetProfileMapper::mapValueFrequency)
.collect(Collectors.toList()));
}
return result;
}

private static com.linkedin.datahub.graphql.generated.Quantile mapQuantile(Quantile quantile) {
final com.linkedin.datahub.graphql.generated.Quantile result =
new com.linkedin.datahub.graphql.generated.Quantile();
result.setQuantile(quantile.getQuantile());
result.setValue(quantile.getValue());

return result;
}

private static com.linkedin.datahub.graphql.generated.ValueFrequency mapValueFrequency(
ValueFrequency frequencies) {
final com.linkedin.datahub.graphql.generated.ValueFrequency result =
new com.linkedin.datahub.graphql.generated.ValueFrequency();
result.setValue(frequencies.getValue());
result.setFrequency(frequencies.getFrequency());

return result;
}
}
39 changes: 39 additions & 0 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -7411,6 +7411,34 @@ type DatasetProfile implements TimeSeriesAspect {
partitionSpec: PartitionSpec
}

"""
A quantile along with its corresponding value
"""
type Quantile {
"""
Quantile. E.g. "0.25" for the 25th percentile
"""
quantile: String!
"""
The value of the quantile
"""
value: String!
}

"""
A frequency distribution of a specific value within a dataset
"""
type ValueFrequency {
"""
Specific value. For numeric colums, the value will contain a strigified value
"""
value: String!
"""
Volume of the value
"""
frequency: Long!
}

"""
An individual Dataset Field Profile
"""
Expand Down Expand Up @@ -7469,6 +7497,17 @@ type DatasetFieldProfile {
A set of sample values for the field
"""
sampleValues: [String!]

"""
Sorted list of quantile cutoffs for the field, in ascending order
Only for numerical columns
"""
quantiles: [Quantile!]

"""
Volume of each column value for a low-cardinality / categorical field
"""
distinctValueFrequencies: [ValueFrequency!]
}

"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
package com.linkedin.datahub.graphql.types.dataset.mappers;

import com.google.common.collect.ImmutableList;
import com.linkedin.data.template.SetMode;
import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.graphql.generated.DatasetProfile;
import com.linkedin.dataset.DatasetFieldProfile;
import com.linkedin.dataset.DatasetFieldProfileArray;
import com.linkedin.dataset.Quantile;
import com.linkedin.dataset.QuantileArray;
import com.linkedin.dataset.ValueFrequency;
import com.linkedin.dataset.ValueFrequencyArray;
import com.linkedin.metadata.aspect.EnvelopedAspect;
import com.linkedin.metadata.utils.GenericRecordUtils;
import java.util.ArrayList;
Expand All @@ -19,6 +24,19 @@ public void testMapperFullProfile() {
input.setRowCount(10L);
input.setColumnCount(45L);
input.setSizeInBytes(15L);

ValueFrequency valueFrequency = new ValueFrequency();
valueFrequency.setValue("2");
valueFrequency.setFrequency(10L);

Quantile quantile25 = new Quantile();
quantile25.setQuantile("0.25");
quantile25.setValue("1");

Quantile quantile75 = new Quantile();
quantile75.setQuantile("0.75");
quantile75.setValue("5");

input.setFieldProfiles(
new DatasetFieldProfileArray(
ImmutableList.of(
Expand All @@ -33,7 +51,11 @@ public void testMapperFullProfile() {
.setNullProportion(20.5f)
.setUniqueCount(30L)
.setUniqueProportion(30.5f)
.setSampleValues(new StringArray(ImmutableList.of("val1", "val2"))),
.setSampleValues(new StringArray(ImmutableList.of("val1", "val2")))
.setQuantiles(new QuantileArray(ImmutableList.of(quantile25, quantile75)))
.setDistinctValueFrequencies(
new ValueFrequencyArray(ImmutableList.of(valueFrequency)),
SetMode.IGNORE_NULL),
new DatasetFieldProfile()
.setFieldPath("/field2")
.setMax("2")
Expand All @@ -45,7 +67,11 @@ public void testMapperFullProfile() {
.setNullProportion(30.5f)
.setUniqueCount(40L)
.setUniqueProportion(40.5f)
.setSampleValues(new StringArray(ImmutableList.of("val3", "val4"))))));
.setSampleValues(new StringArray(ImmutableList.of("val3", "val4")))
.setQuantiles(new QuantileArray(ImmutableList.of(quantile25, quantile75)))
.setDistinctValueFrequencies(
new ValueFrequencyArray(ImmutableList.of(valueFrequency)),
SetMode.IGNORE_NULL))));
final EnvelopedAspect inputAspect =
new EnvelopedAspect().setAspect(GenericRecordUtils.serializeAspect(input));
final DatasetProfile actual = DatasetProfileMapper.map(null, inputAspect);
Expand All @@ -68,7 +94,14 @@ public void testMapperFullProfile() {
"2",
"4",
"3",
new ArrayList<>(ImmutableList.of("val1", "val2"))),
new ArrayList<>(ImmutableList.of("val1", "val2")),
new ArrayList<com.linkedin.datahub.graphql.generated.Quantile>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.Quantile("0.25", "1"),
new com.linkedin.datahub.graphql.generated.Quantile("0.75", "5"))),
new ArrayList<com.linkedin.datahub.graphql.generated.ValueFrequency>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.ValueFrequency("2", 10L)))),
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile(
"/field2",
40L,
Expand All @@ -80,7 +113,15 @@ public void testMapperFullProfile() {
"3",
"5",
"4",
new ArrayList<>(ImmutableList.of("val3", "val4"))))));
new ArrayList<>(ImmutableList.of("val3", "val4")),
new ArrayList<com.linkedin.datahub.graphql.generated.Quantile>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.Quantile("0.25", "1"),
new com.linkedin.datahub.graphql.generated.Quantile("0.75", "5"))),
new ArrayList<com.linkedin.datahub.graphql.generated.ValueFrequency>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.ValueFrequency(
"2", 10L)))))));
Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis());
Assert.assertEquals(actual.getRowCount(), expected.getRowCount());
Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount());
Expand Down Expand Up @@ -113,6 +154,24 @@ public void testMapperFullProfile() {
Assert.assertEquals(
actual.getFieldProfiles().get(0).getSampleValues(),
expected.getFieldProfiles().get(0).getSampleValues());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles().get(0).getQuantile(),
expected.getFieldProfiles().get(0).getQuantiles().get(0).getQuantile());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles().get(0).getValue(),
expected.getFieldProfiles().get(0).getQuantiles().get(0).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles().get(1).getQuantile(),
expected.getFieldProfiles().get(0).getQuantiles().get(1).getQuantile());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles().get(1).getValue(),
expected.getFieldProfiles().get(0).getQuantiles().get(1).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getDistinctValueFrequencies().get(0).getValue(),
expected.getFieldProfiles().get(0).getDistinctValueFrequencies().get(0).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getDistinctValueFrequencies().get(0).getFrequency(),
expected.getFieldProfiles().get(0).getDistinctValueFrequencies().get(0).getFrequency());

Assert.assertEquals(
actual.getFieldProfiles().get(1).getFieldPath(),
Expand Down Expand Up @@ -141,6 +200,24 @@ public void testMapperFullProfile() {
Assert.assertEquals(
actual.getFieldProfiles().get(1).getSampleValues(),
expected.getFieldProfiles().get(1).getSampleValues());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getQuantiles().get(0).getQuantile(),
expected.getFieldProfiles().get(1).getQuantiles().get(0).getQuantile());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles().get(0).getValue(),
expected.getFieldProfiles().get(0).getQuantiles().get(0).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getQuantiles().get(1).getQuantile(),
expected.getFieldProfiles().get(1).getQuantiles().get(1).getQuantile());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getQuantiles().get(1).getValue(),
expected.getFieldProfiles().get(1).getQuantiles().get(1).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getDistinctValueFrequencies().get(0).getValue(),
expected.getFieldProfiles().get(1).getDistinctValueFrequencies().get(0).getValue());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getDistinctValueFrequencies().get(0).getFrequency(),
expected.getFieldProfiles().get(1).getDistinctValueFrequencies().get(0).getFrequency());
}

@Test
Expand Down Expand Up @@ -176,9 +253,11 @@ public void testMapperPartialProfile() {
new ArrayList<>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile(
"/field1", 30L, 30.5f, null, null, null, null, null, null, null, null),
"/field1", 30L, 30.5f, null, null, null, null, null, null, null, null, null,
null),
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile(
"/field2", 40L, 40.5f, null, null, "6", "2", "3", "5", "4", null))));
"/field2", 40L, 40.5f, null, null, "6", "2", "3", "5", "4", null, null,
null))));
Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis());
Assert.assertEquals(actual.getRowCount(), expected.getRowCount());
Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount());
Expand Down Expand Up @@ -211,6 +290,12 @@ public void testMapperPartialProfile() {
Assert.assertEquals(
actual.getFieldProfiles().get(0).getSampleValues(),
expected.getFieldProfiles().get(0).getSampleValues());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getQuantiles(),
expected.getFieldProfiles().get(0).getQuantiles());
Assert.assertEquals(
actual.getFieldProfiles().get(0).getDistinctValueFrequencies(),
expected.getFieldProfiles().get(0).getDistinctValueFrequencies());

Assert.assertEquals(
actual.getFieldProfiles().get(1).getFieldPath(),
Expand Down Expand Up @@ -239,5 +324,11 @@ public void testMapperPartialProfile() {
Assert.assertEquals(
actual.getFieldProfiles().get(1).getSampleValues(),
expected.getFieldProfiles().get(1).getSampleValues());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getQuantiles(),
expected.getFieldProfiles().get(1).getQuantiles());
Assert.assertEquals(
actual.getFieldProfiles().get(1).getDistinctValueFrequencies(),
expected.getFieldProfiles().get(1).getDistinctValueFrequencies());
}
}
2 changes: 2 additions & 0 deletions datahub-web-react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"@visx/marker": "^3.5.0",
"@visx/scale": "^3.2.0",
"@visx/shape": "^3.2.0",
"@visx/stats": "^3.12.0",
"@visx/tooltip": "^3.12.0",
"@visx/xychart": "^3.2.0",
"@visx/zoom": "^3.1.1",
"analytics": "^0.8.9",
Expand Down
2 changes: 1 addition & 1 deletion datahub-web-react/src/Mocks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ const glossaryTerm2 = {
__typename: 'GlossaryTerm',
};

const glossaryTerm3 = {
export const glossaryTerm3 = {
urn: 'urn:li:glossaryTerm:example.glossaryterm2',
type: 'GLOSSARY_TERM',
name: 'glossaryterm2',
Expand Down
Loading

0 comments on commit 79fbc6a

Please sign in to comment.