Skip to content

fix(ingestion/s3): groupby group-splitting issue #35557

fix(ingestion/s3): groupby group-splitting issue

fix(ingestion/s3): groupby group-splitting issue #35557

Workflow file for this run

name: build & test
on:
push:
branches:
- master
paths-ignore:
- "docs/**"
- "**.md"
pull_request:
branches:
- "**"
paths-ignore:
- "docs/**"
- "**.md"
workflow_dispatch:
release:
types: [published]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
setup:
runs-on: ubuntu-latest
outputs:
frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' }}
ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' }}
backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' }}
docker_change: ${{ steps.ci-optimize.outputs.docker-change == 'true' }}
frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: ./.github/actions/ci-optimization
id: ci-optimize
build:
strategy:
fail-fast: false
matrix:
command: [
# metadata-ingestion and airflow-plugin each have dedicated build jobs
"except_metadata_ingestion",
"frontend",
]
timezone: ["UTC"]
include:
# We only need the timezone variation for frontend tests.
- command: "frontend"
timezone: "America/New_York"
runs-on: ubuntu-latest
timeout-minutes: 60
needs: setup
steps:
- name: Free up disk space
run: |
sudo apt-get remove 'dotnet-*' azure-cli || true
sudo rm -rf /usr/local/lib/android/ || true
sudo docker image prune -a -f || true
- uses: szenius/set-timezone@v1.2
with:
timezoneLinux: ${{ matrix.timezone }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- name: Gradle build (and test) for NOT metadata ingestion
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
# datahub-schematron:cli excluded due to dependency on metadata-ingestion
run: |
./gradlew build \
-x :metadata-ingestion:build \
-x :metadata-ingestion:check \
-x :docs-website:build \
-x :metadata-integration:java:spark-lineage:test \
-x :metadata-io:test \
-x :metadata-ingestion-modules:airflow-plugin:build \
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:prefect-plugin:build \
-x :metadata-ingestion-modules:prefect-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
-x :metadata-integration:java:datahub-schematron:cli:test \
--parallel
- name: Gradle build (and test) for frontend
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
run: |
./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
- name: Gradle compile (jdk8) for legacy Spark
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
run: |
./gradlew -PjavaClassVersionDefault=8 :metadata-integration:java:spark-lineage:compileJava
- name: Gather coverage files
run: |
echo "BACKEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahuyb-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
echo "FRONTEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
- uses: actions/upload-artifact@v4
if: always()
with:
name: Test Results (build) - ${{ matrix.command}}-${{ matrix.timezone }}
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
!**/binary/**
- name: Ensure codegen is updated
uses: ./.github/actions/ensure-codegen-updated
- name: Upload backend coverage to Codecov
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.BACKEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: backend
name: ${{ matrix.command }}
verbose: true
- name: Upload frontend coverage to Codecov
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.FRONTEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: frontend
name: ${{ matrix.command }}
verbose: true
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
quickstart-compose-validation:
runs-on: ubuntu-latest
needs: setup
if: ${{ needs.setup.outputs.docker_change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Quickstart Compose Validation
run: ./docker/quickstart/generate_and_compare.sh
event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}