Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
haon85 authored Jan 29, 2025
2 parents 9c9392d + 280e82a commit a46cb08
Show file tree
Hide file tree
Showing 220 changed files with 10,861 additions and 2,091 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/--bug-report.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ If applicable, add screenshots to help explain your problem.
- Version [e.g. 22]

**Additional context**
Add any other context about the problem here.
Add any other context about the problem here.
32 changes: 32 additions & 0 deletions .github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
name: DataHub v1.0-rc Bug Report
about: Report issues found in DataHub v1.0 Release Candidates
title: "[v1.0-rc/bug] Description of Bug"
labels: bug, datahub-v1.0-rc
assignees: chriscollins3456, david-leifker, maggiehays

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots and/or Screen Recordings**
If applicable, add screenshots and/or screen recordings to help explain the issue.

**System details (please complete the following information):**
- DataHub Version Tag [e.g. v1.0-rc1]
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]

**Additional context**
Add any other context about the problem here.
14 changes: 7 additions & 7 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ jobs:
tag: ${{ steps.tag.outputs.tag }}
slim_tag: ${{ steps.tag.outputs.slim_tag }}
full_tag: ${{ steps.tag.outputs.full_tag }}
short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
unique_tag: ${{ steps.tag.outputs.unique_tag }}
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
Expand All @@ -65,6 +66,8 @@ jobs:
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
integrations_service_change: "false"
datahub_executor_change: "false"
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -864,7 +867,8 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
# Workaround 2025-01-25 - Depot publishing errors
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
- name: Compute Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
Expand Down Expand Up @@ -963,7 +967,8 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
# Workaround 2025-01-25 - Depot publishing errors
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
Expand Down Expand Up @@ -1178,11 +1183,6 @@ jobs:
docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head'
docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then
echo 'datahub-integration-service head images'
docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head'
docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
- name: CI Slim Head Images
run: |
if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then
Expand Down
7 changes: 5 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ buildscript {
ext.springVersion = '6.1.14'
ext.springBootVersion = '3.2.9'
ext.springKafkaVersion = '3.1.6'
ext.openTelemetryVersion = '1.18.0'
ext.openTelemetryVersion = '1.45.0'
ext.neo4jVersion = '5.20.0'
ext.neo4jTestVersion = '5.20.0'
ext.neo4jApocVersion = '5.20.0'
Expand Down Expand Up @@ -220,7 +220,10 @@ project.ext.externalDependency = [
'neo4jApocCore': 'org.neo4j.procedure:apoc-core:' + neo4jApocVersion,
'neo4jApocCommon': 'org.neo4j.procedure:apoc-common:' + neo4jApocVersion,
'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion,
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion,
'opentelemetrySdk': 'io.opentelemetry:opentelemetry-sdk:' + openTelemetryVersion,
'opentelemetrySdkTrace': 'io.opentelemetry:opentelemetry-sdk-trace:' + openTelemetryVersion,
'opentelemetryAutoConfig': 'io.opentelemetry:opentelemetry-sdk-extension-autoconfigure:' + openTelemetryVersion,
'opentelemetryAnnotations': 'io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:2.11.0',
'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15',
'parquet': 'org.apache.parquet:parquet-avro:1.12.3',
'parquetHadoop': 'org.apache.parquet:parquet-hadoop:1.13.1',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import com.linkedin.metadata.service.ViewService;
import graphql.schema.DataFetcher;
import graphql.schema.DataFetchingEnvironment;
import io.opentelemetry.extension.annotations.WithSpan;
import io.opentelemetry.instrumentation.annotations.WithSpan;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;

import com.codahale.metrics.Timer;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils;
import com.linkedin.datahub.graphql.exception.AuthorizationException;
import com.linkedin.datahub.graphql.types.BatchMutableType;
import com.linkedin.metadata.utils.metrics.MetricUtils;
import graphql.schema.DataFetcher;
import graphql.schema.DataFetchingEnvironment;
import io.datahubproject.metadata.context.OperationContext;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import org.slf4j.Logger;
Expand All @@ -33,25 +34,29 @@ public MutableTypeBatchResolver(final BatchMutableType<I, B, T> batchMutableType

@Override
public CompletableFuture<List<T>> get(DataFetchingEnvironment environment) throws Exception {
final QueryContext context = environment.getContext();
final OperationContext opContext = context.getOperationContext();

final B[] input =
bindArgument(environment.getArgument("input"), _batchMutableType.batchInputClass());

return GraphQLConcurrencyUtils.supplyAsync(
() -> {
Timer.Context timer = MetricUtils.timer(this.getClass(), "batchMutate").time();

try {
return _batchMutableType.batchUpdate(input, environment.getContext());
} catch (AuthorizationException e) {
throw e;
} catch (Exception e) {
_logger.error("Failed to perform batchUpdate", e);
throw new IllegalArgumentException(e);
} finally {
timer.stop();
}
},
this.getClass().getSimpleName(),
"get");
return opContext.withSpan(
"batchMutate",
() ->
GraphQLConcurrencyUtils.supplyAsync(
() -> {
try {
return _batchMutableType.batchUpdate(input, environment.getContext());
} catch (AuthorizationException e) {
throw e;
} catch (Exception e) {
_logger.error("Failed to perform batchUpdate", e);
throw new IllegalArgumentException(e);
}
},
this.getClass().getSimpleName(),
"get"),
MetricUtils.DROPWIZARD_METRIC,
"true");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import graphql.schema.DataFetcher;
import graphql.schema.DataFetchingEnvironment;
import io.datahubproject.metadata.context.OperationContext;
import io.opentelemetry.extension.annotations.WithSpan;
import io.opentelemetry.instrumentation.annotations.WithSpan;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import com.linkedin.metadata.query.SearchFlags;
import graphql.schema.DataFetcher;
import graphql.schema.DataFetchingEnvironment;
import io.opentelemetry.extension.annotations.WithSpan;
import io.opentelemetry.instrumentation.annotations.WithSpan;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import lombok.RequiredArgsConstructor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.MLModelGroupProperties;
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -33,10 +36,40 @@ public MLModelGroupProperties apply(
result.setVersion(VersionTagMapper.map(context, mlModelGroupProperties.getVersion()));
}
result.setCreatedAt(mlModelGroupProperties.getCreatedAt());
if (mlModelGroupProperties.hasCreated()) {
result.setCreated(
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getCreated()));
}
if (mlModelGroupProperties.getName() != null) {
result.setName(mlModelGroupProperties.getName());
} else {
// backfill name from URN for backwards compatibility
result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here
}

if (mlModelGroupProperties.hasLastModified()) {
result.setLastModified(
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getLastModified()));
}

result.setCustomProperties(
CustomPropertiesMapper.map(mlModelGroupProperties.getCustomProperties(), entityUrn));

final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
if (mlModelGroupProperties.hasTrainingJobs()) {
lineageInfo.setTrainingJobs(
mlModelGroupProperties.getTrainingJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
if (mlModelGroupProperties.hasDownstreamJobs()) {
lineageInfo.setDownstreamJobs(
mlModelGroupProperties.getDownstreamJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
result.setMlModelLineageInfo(lineageInfo);

return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.MLModelGroup;
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
import com.linkedin.datahub.graphql.generated.MLModelProperties;
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
Expand Down Expand Up @@ -87,6 +88,20 @@ public MLModelProperties apply(
.collect(Collectors.toList()));
}
result.setTags(mlModelProperties.getTags());
final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
if (mlModelProperties.hasTrainingJobs()) {
lineageInfo.setTrainingJobs(
mlModelProperties.getTrainingJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
if (mlModelProperties.hasDownstreamJobs()) {
lineageInfo.setDownstreamJobs(
mlModelProperties.getDownstreamJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
result.setMlModelLineageInfo(lineageInfo);

return result;
}
Expand Down
29 changes: 29 additions & 0 deletions datahub-graphql-core/src/main/resources/lineage.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,32 @@ input LineageEdge {
"""
upstreamUrn: String!
}

"""
Represents lineage information for ML entities.
"""
type MLModelLineageInfo {
"""
List of jobs or processes used to train the model.
"""
trainingJobs: [String!]

"""
List of jobs or processes that use this model.
"""
downstreamJobs: [String!]
}

extend type MLModelProperties {
"""
Information related to lineage to this model group
"""
mlModelLineageInfo: MLModelLineageInfo
}

extend type MLModelGroupProperties {
"""
Information related to lineage to this model group
"""
mlModelLineageInfo: MLModelLineageInfo
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.linkedin.datahub.graphql.types.mlmodel.mappers;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;

import com.linkedin.common.urn.Urn;
import com.linkedin.ml.metadata.MLModelGroupProperties;
import java.net.URISyntaxException;
import org.testng.annotations.Test;

public class MLModelGroupPropertiesMapperTest {

@Test
public void testMapMLModelGroupProperties() throws URISyntaxException {
// Create backend ML Model Group Properties
MLModelGroupProperties input = new MLModelGroupProperties();

// Set description
input.setDescription("a ml trust model group");

// Set Name
input.setName("ML trust model group");

// Create URN
Urn groupUrn =
Urn.createFromString(
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");

// Map the properties
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
MLModelGroupPropertiesMapper.map(null, input, groupUrn);

// Verify mapped properties
assertNotNull(result);
assertEquals(result.getDescription(), "a ml trust model group");
assertEquals(result.getName(), "ML trust model group");

// Verify lineage info is null as in the mock data
assertNotNull(result.getMlModelLineageInfo());
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
}

@Test
public void testMapWithMinimalProperties() throws URISyntaxException {
// Create backend ML Model Group Properties with minimal information
MLModelGroupProperties input = new MLModelGroupProperties();

// Create URN
Urn groupUrn =
Urn.createFromString(
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");

// Map the properties
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
MLModelGroupPropertiesMapper.map(null, input, groupUrn);

// Verify basic mapping with minimal properties
assertNotNull(result);
assertNull(result.getDescription());

// Verify lineage info is null
assertNotNull(result.getMlModelLineageInfo());
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
}
}
Loading

0 comments on commit a46cb08

Please sign in to comment.