Skip to content

Commit

Permalink
Merge branch 'master' into teradata-profiling-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
brock-acryl authored Feb 3, 2025
2 parents 5a6266f + 64aaaf1 commit 3fd5b1d
Show file tree
Hide file tree
Showing 74 changed files with 1,478 additions and 435 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1039,12 +1039,12 @@ jobs:
cypress_matrix=$(printf "{\"test_strategy\":\"cypress\",\"batch\":\"0\",\"batch_count\":\"$cypress_batch_count\"}"; for ((i=1;i<cypress_batch_count;i++)); do printf ",{\"test_strategy\":\"cypress\", \"batch_count\":\"$cypress_batch_count\",\"batch\":\"%d\"}" $i; done)
includes=''
if [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
includes=$cypress_matrix
elif [ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]; then
includes=$python_matrix
elif [[ "${{ needs.setup.outputs.backend_change }}" == 'true' || "${{ needs.setup.outputs.smoke_test_change }}" == 'true' ]]; then
if [[ "${{ needs.setup.outputs.backend_change }}" == 'true' || "${{ needs.setup.outputs.smoke_test_change }}" == 'true' || "${{ needs.setup.outputs.publish }}" == 'true' ]]; then
includes="$python_matrix,$cypress_matrix"
elif [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
includes="$cypress_matrix"
elif [[ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]]; then
includes="$python_matrix"
fi
echo "matrix={\"include\":[$includes] }" >> "$GITHUB_OUTPUT"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package com.linkedin.datahub.upgrade.config.restoreindices;

import com.linkedin.datahub.upgrade.config.SystemUpdateCondition;
import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
import com.linkedin.datahub.upgrade.system.restoreindices.dashboardinfo.ReindexDashboardInfo;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import io.datahubproject.metadata.context.OperationContext;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Conditional;
import org.springframework.context.annotation.Configuration;

@Configuration
@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class)
public class ReindexDashboardInfoConfig {

@Bean
public NonBlockingSystemUpgrade reindexDashboardInfo(
final OperationContext opContext,
final EntityService<?> entityService,
final AspectDao aspectDao,
@Value("${systemUpdate.dashboardInfo.enabled}") final boolean enabled,
@Value("${systemUpdate.dashboardInfo.batchSize}") final Integer batchSize,
@Value("${systemUpdate.dashboardInfo.delayMs}") final Integer delayMs,
@Value("${systemUpdate.dashboardInfo.limit}") final Integer limit) {
return new ReindexDashboardInfo(
opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.linkedin.datahub.upgrade.system.restoreindices.dashboardinfo;

import com.google.common.collect.ImmutableList;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import io.datahubproject.metadata.context.OperationContext;
import java.util.List;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;

/**
* A job that reindexes all dashboard info aspects as part of reindexing dashboards relationship.
* This is required to fix the dashboards relationships for dashboards
*/
@Slf4j
public class ReindexDashboardInfo implements NonBlockingSystemUpgrade {

private final List<UpgradeStep> _steps;

public ReindexDashboardInfo(
@Nonnull OperationContext opContext,
EntityService<?> entityService,
AspectDao aspectDao,
boolean enabled,
Integer batchSize,
Integer batchDelayMs,
Integer limit) {
if (enabled) {
_steps =
ImmutableList.of(
new ReindexDashboardInfoStep(
opContext, entityService, aspectDao, batchSize, batchDelayMs, limit));
} else {
_steps = ImmutableList.of();
}
}

@Override
public String id() {
return this.getClass().getName();
}

@Override
public List<UpgradeStep> steps() {
return _steps;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package com.linkedin.datahub.upgrade.system.restoreindices.dashboardinfo;

import static com.linkedin.metadata.Constants.*;

import com.linkedin.datahub.upgrade.system.AbstractMCLStep;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import io.datahubproject.metadata.context.OperationContext;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.Nullable;

@Slf4j
public class ReindexDashboardInfoStep extends AbstractMCLStep {

public ReindexDashboardInfoStep(
OperationContext opContext,
EntityService<?> entityService,
AspectDao aspectDao,
Integer batchSize,
Integer batchDelayMs,
Integer limit) {
super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit);
}

@Override
public String id() {
return "dashboard-info-v1";
}

@Nonnull
@Override
protected String getAspectName() {
return DASHBOARD_INFO_ASPECT_NAME;
}

@Nullable
@Override
protected String getUrnLike() {
return "urn:li:" + DASHBOARD_ENTITY_NAME + ":%";
}
}
2 changes: 1 addition & 1 deletion docker/datahub-frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then

# Upgrade Alpine and base packages
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
ENV JMX_VERSION=0.18.0
ENV JMX_VERSION=0.20.0
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl sqlite libc6-compat snappy \
&& apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-gms/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION

FROM alpine:3.21 AS base

ENV JMX_VERSION=0.18.0
ENV JMX_VERSION=0.20.0

# Re-declaring args from above to make them available in this stage (will inherit default values)
ARG ALPINE_REPO_URL
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-mae-consumer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ARG MAVEN_CENTRAL_REPO_URL
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

# Upgrade Alpine and base packages
ENV JMX_VERSION=0.18.0
ENV JMX_VERSION=0.20.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash coreutils sqlite libc6-compat snappy \
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-mce-consumer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ARG MAVEN_CENTRAL_REPO_URL
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

# Upgrade Alpine and base packages
ENV JMX_VERSION=0.18.0
ENV JMX_VERSION=0.20.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash sqlite libc6-compat snappy \
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-upgrade/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ ARG MAVEN_CENTRAL_REPO_URL
# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

ENV JMX_VERSION=0.18.0
ENV JMX_VERSION=0.20.0

# Upgrade Alpine and base packages
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
Expand Down
179 changes: 179 additions & 0 deletions docs/actions/events/entity-change-event.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,3 +417,182 @@ This event is emitted when a new entity has been hard-deleted on DataHub.
}
}
```

## Action Request Events (Proposals)

Action Request events represent proposals for changes to entities that may require approval before being applied. These events have entityType "actionRequest" and use the `LIFECYCLE` category with `CREATE` operation.

### Domain Association Request Event

This event is emitted when a domain association is proposed for an entity on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:abc-123",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"domains": "[\"urn:li:domain:marketing\"]",
"actionRequestType": "DOMAIN_ASSOCIATION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"resourceType": "dataset"
}
}
```

### Owner Association Request Event

This event is emitted when an owner association is proposed for an entity on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:def-456",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"owners": "[{\"type\":\"TECHNICAL_OWNER\",\"typeUrn\":\"urn:li:ownershipType:technical_owner\",\"ownerUrn\":\"urn:li:corpuser:jdoe\"}]",
"actionRequestType": "OWNER_ASSOCIATION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"resourceType": "dataset"
}
}
```

### Tag Association Request Event

This event is emitted when a tag association is proposed for an entity on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:ghi-789",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"actionRequestType": "TAG_ASSOCIATION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"tagUrn": "urn:li:tag:pii",
"resourceType": "dataset"
}
}
```

### Create Glossary Term Request Event

This event is emitted when a new glossary term creation is proposed on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:jkl-101",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"parentNodeUrn": "urn:li:glossaryNode:123",
"glossaryEntityName": "ExampleTerm",
"actionRequestType": "CREATE_GLOSSARY_TERM",
"resourceType": "glossaryTerm"
}
}
```

### Term Association Request Event

This event is emitted when a glossary term association is proposed for an entity on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:mno-102",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"glossaryTermUrn": "urn:li:glossaryTerm:123",
"actionRequestType": "TERM_ASSOCIATION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"resourceType": "dataset"
}
}
```

### Update Description Request Event

This event is emitted when an update to an entity's description is proposed on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:pqr-103",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"description": "Example description for a dataset.",
"actionRequestType": "UPDATE_DESCRIPTION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"resourceType": "dataset"
}
}
```

### Structured Property Association Request Event

This event is emitted when a structured property association is proposed for an entity on DataHub.

#### Sample Event
```json
{
"entityType": "actionRequest",
"entityUrn": "urn:li:actionRequest:stu-104",
"category": "LIFECYCLE",
"operation": "CREATE",
"auditStamp": {
"actor": "urn:li:corpuser:jdoe",
"time": 1234567890
},
"version": 0,
"parameters": {
"structuredProperties": "[{\"propertyUrn\":\"urn:li:structuredProperty:123\",\"values\":[\"value1\",\"value2\"]}]",
"actionRequestType": "STRUCTURED_PROPERTY_ASSOCIATION",
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,example.table,PROD)",
"resourceType": "dataset"
}
}
```
8 changes: 7 additions & 1 deletion docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Other Notable Changes

- #12433: Fixes the searchable annotations in the model supporting `Dashboard` to `Dashboard` lineage within the `DashboardInfo` aspect. Mainly, users of Sigma and PowerBI Apps ingestion may be affected by this adjustment. Consequently, a [reindex](https://datahubproject.io/docs/how/restore-indices/) will be automatically triggered during the system upgrade.

## 0.15.0

- OpenAPI Update: PIT Keep Alive parameter added to scroll endpoints. NOTE: This parameter requires the `pointInTimeCreationEnabled` feature flag to be enabled and the `elasticSearch.implementation` configuration to be `elasticsearch`. This feature is not supported for OpenSearch at this time and the parameter will not be respected without both of these set.
- OpenAPI Update 2: Previously there was an incorrectly marked parameter named `sort` on the generic list entities endpoint for v3. This parameter is deprecated and only supports a single string value while the documentation indicates it supports a list of strings. This documentation error has been fixed and the correct field, `sortCriteria`, is now documented which supports a list of strings.
- OpenAPI Update 2: Previously there was an incorrectly marked parameter named `sort` on the generic list entities endpoint for v3. This parameter is deprecated and only supports a single string value while the documentation indicates it supports a list of strings. This documentation error has been fixed and the correct field, `sortCriteria`, is now documented which supports a list of strings.

### Known Issues

- Persistence Exception: No Rows Updated may occur if a transaction does not change any aspect's data.

### Breaking Changes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ In order to update the executor, ie. to deploy a new container version, you'll n

### Deploying on Kubernetes

The Helm chart [datahub-executor-worker](https://github.com/acryldata/datahub-executor-helm/tree/main/charts/datahub-executor-worker) can be used to deploy on a Kubernetes cluster. These instructions also apply for deploying to Amazon Elastic Kubernetes Service (EKS) or Google Kubernetes Engine (GKE).
The Helm chart [datahub-executor-worker](https://executor-helm.acryl.io/index.yaml) can be used to deploy on a Kubernetes cluster. These instructions also apply for deploying to Amazon Elastic Kubernetes Service (EKS) or Google Kubernetes Engine (GKE).

1. **Download Chart**: Download the [latest release](https://github.com/acryldata/datahub-executor-helm/releases) of the chart
1. **Download Chart**: Download the [latest release](https://executor-helm.acryl.io/index.yaml) of the chart
2. **Unpack the release archive**:
```
tar zxvf v0.0.4.tar.gz --strip-components=2
Expand Down
Loading

0 comments on commit 3fd5b1d

Please sign in to comment.