diff --git a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml index 3509d8a450af2..ea797b2295138 100644 --- a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml @@ -25,7 +25,7 @@ body: the latest release or main to see if the issue is fixed before reporting it. multiple: false options: - - "2.8.2" + - "2.8.3" - "main (development)" - "Other Airflow 2 version (please specify below)" validations: diff --git a/.github/actions/cleanup-docker/action.yml b/.github/actions/cleanup-docker/action.yml new file mode 100644 index 0000000000000..fa64e5b264dec --- /dev/null +++ b/.github/actions/cleanup-docker/action.yml @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Cleanup docker' +description: 'Cleans up docker' +runs: + using: "composite" + steps: + - name: "Cleanup docker" + shell: bash + run: docker system prune --all --force --volumes diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index c1619874a95bc..2b4abed5ee31f 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -88,6 +88,9 @@ jobs: }}" if: github.repository == 'apache/airflow' steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: Discover PR merge commit id: discover-pr-merge-commit run: | @@ -113,8 +116,6 @@ jobs: } }' --jq '.data.node.labels.nodes[]' | jq --slurp -c '[.[].name]' >> ${GITHUB_OUTPUT} if: github.event_name == 'pull_request_target' - - name: Cleanup repo - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v4 with: ref: ${{ env.TARGET_COMMIT_SHA }} @@ -135,6 +136,8 @@ jobs: # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT # CODE WILL BE RUN FROM THE PR. #################################################################################################### + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Setup python" uses: actions/setup-python@v5 with: @@ -183,7 +186,8 @@ jobs: VERSION_SUFFIX_FOR_PYPI: "dev0" USE_UV: "true" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v4 with: @@ -227,6 +231,8 @@ jobs: # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS # ISOLATED FROM THE RUNNER. #################################################################################################### + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: Build CI Image ${{ matrix.python-version }}:${{env.IMAGE_TAG}} uses: ./.github/actions/build-ci-images with: @@ -262,7 +268,8 @@ jobs: INCLUDE_NOT_READY_PROVIDERS: "true" USE_UV: "true" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v4 with: @@ -306,6 +313,8 @@ jobs: # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS # ISOLATED FROM THE RUNNER. #################################################################################################### + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze with: diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index c0ae69a4b86b0..aafcccacb0c64 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -102,7 +102,8 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: "dev0" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: inputs.do-build == 'true' - uses: actions/checkout@v4 @@ -110,6 +111,9 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false if: inputs.do-build == 'true' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze with: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81368e6d5a6f7..64ecdb93a1f29 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,12 +139,15 @@ jobs: canary-run: ${{ steps.source-run-info.outputs.canary-run }} run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: Fetch incoming commit ${{ github.sha }} with its parent uses: actions/checkout@v4 with: @@ -219,12 +222,15 @@ jobs: needs.build-info.outputs.canary-run == 'true' && needs.build-info.outputs.default-branch == 'main' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - name: "Check that image builds quickly" @@ -262,13 +268,16 @@ jobs: runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} needs: [build-info] steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v4 with: # Need to fetch all history for selective checks tests fetch-depth: 0 persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - uses: actions/setup-python@v5 with: python-version: "${{needs.build-info.outputs.default-python-version}}" @@ -285,12 +294,15 @@ jobs: needs: [build-info] if: needs.build-info.outputs.run-www-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Setup node" uses: actions/setup-node@v4 with: @@ -313,13 +325,16 @@ jobs: needs: [build-info] if: needs.build-info.outputs.needs-api-codegen == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: fetch-depth: 2 persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: @@ -407,7 +422,8 @@ jobs: # Force more parallelism for pull even on public images PARALLELISM: 6 steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: needs.build-info.outputs.in-workflow-build == 'false' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -415,6 +431,9 @@ jobs: with: persist-credentials: false if: needs.build-info.outputs.in-workflow-build == 'false' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: needs.build-info.outputs.in-workflow-build == 'false' - name: "Install Breeze" uses: ./.github/actions/breeze if: needs.build-info.outputs.in-workflow-build == 'false' @@ -442,12 +461,15 @@ jobs: VERSION_SUFFIX_FOR_PYPI: "dev0" if: needs.build-info.outputs.ci-image-build == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze with: @@ -515,12 +537,15 @@ jobs: needs.build-info.outputs.basic-checks-only == 'false' && needs.build-info.outputs.latest-versions-only != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -555,12 +580,15 @@ jobs: needs.build-info.outputs.canary-run == 'true' && needs.build-info.outputs.latest-versions-only != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker # Install python from scratch. No cache used. We always want to have fresh version of everything - uses: actions/setup-python@v5 with: @@ -641,12 +669,15 @@ jobs: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" if: needs.build-info.outputs.basic-checks-only == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Setup python" uses: actions/setup-python@v5 with: @@ -694,12 +725,15 @@ jobs: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -760,12 +794,15 @@ jobs: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -793,12 +830,15 @@ jobs: needs.build-info.outputs.skip-provider-tests != 'true' && needs.build-info.outputs.latest-versions-only != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -856,12 +896,15 @@ jobs: needs.build-info.outputs.canary-run == 'true' && needs.build-info.outputs.skip-provider-tests != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -926,13 +969,15 @@ jobs: VERSION_SUFFIX_FOR_PYPI: "dev0" if: needs.build-info.outputs.skip-provider-tests != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: contains(fromJson(needs.build-info.outputs.python-versions),matrix.python-version) - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{matrix.python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -978,12 +1023,15 @@ jobs: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1022,14 +1070,14 @@ jobs: tests-postgres: timeout-minutes: 130 name: > - DB:Postgres${{matrix.postgres-version}},Py${{matrix.python-version}}: + DB:Postgres${{matrix.backend-version}},Py${{matrix.python-version}}: ${{needs.build-info.outputs.parallel-test-types-list-as-string}} runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: "${{fromJson(needs.build-info.outputs.python-versions)}}" - postgres-version: "${{fromJson(needs.build-info.outputs.postgres-versions)}}" + backend-version: "${{fromJson(needs.build-info.outputs.postgres-versions)}}" exclude: "${{fromJson(needs.build-info.outputs.postgres-exclude)}}" fail-fast: false env: @@ -1040,19 +1088,20 @@ jobs: DEBUG_RESOURCES: "${{needs.build-info.outputs.debug-resources}}" BACKEND: "postgres" PYTHON_MAJOR_MINOR_VERSION: "${{matrix.python-version}}" - POSTGRES_VERSION: "${{matrix.postgres-version}}" - BACKEND_VERSION: "${{matrix.postgres-version}}" - JOB_ID: "postgres-${{matrix.postgres-version}}-${{matrix.python-version}}" + BACKEND_VERSION: "${{matrix.backend-version}}" + JOB_ID: "postgres-${{matrix.backend-version}}-${{matrix.python-version}}" ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{env.IMAGE_TAG}}" uses: ./.github/actions/prepare_breeze_and_image - name: > @@ -1066,7 +1115,7 @@ jobs: --parallel-test-types "${{needs.build-info.outputs.parallel-test-types-list-as-string}}" - name: "Tests ARM Pytest collection: ${{matrix.python-version}}" run: breeze testing db-tests --collect-only --remove-arm-packages - if: matrix.postgres-version == needs.build-info.outputs.default-postgres-version + if: matrix.backend-version == needs.build-info.outputs.default-postgres-version - name: > Post Tests success: Postgres" uses: ./.github/actions/post_tests_success @@ -1094,7 +1143,6 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" DOWNGRADE_SQLALCHEMY: "true" JOB_ID: > @@ -1102,13 +1150,15 @@ jobs: ${{needs.build-info.outputs.default-postgres-version}} if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1143,7 +1193,6 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" UPGRADE_BOTO: "true" JOB_ID: > @@ -1151,13 +1200,15 @@ jobs: ${{needs.build-info.outputs.default-postgres-version}} if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.run-amazon-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1195,7 +1246,6 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" PYDANTIC: ${{ matrix.pydantic }} JOB_ID: > @@ -1203,13 +1253,15 @@ jobs: ${{needs.build-info.outputs.default-postgres-version}} if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1244,7 +1296,6 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" DOWNGRADE_PENDULUM: "true" JOB_ID: > @@ -1254,13 +1305,15 @@ jobs: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.latest-versions-only != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1295,7 +1348,6 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" PYTHON_VERSION: "${needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" AIRFLOW_ENABLE_AIP_44: "false" JOB_ID: > @@ -1303,13 +1355,15 @@ jobs: ${{needs.build-info.outputs.default-postgres-version}} if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.run-amazon-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1330,14 +1384,14 @@ jobs: tests-mysql: timeout-minutes: 130 name: > - DB:MySQL${{matrix.mysql-version}}, Py${{matrix.python-version}}: + DB:MySQL${{matrix.backend-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.parallel-test-types-list-as-string}} runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: "${{fromJson(needs.build-info.outputs.python-versions)}}" - mysql-version: "${{fromJson(needs.build-info.outputs.mysql-versions)}}" + backend-version: "${{fromJson(needs.build-info.outputs.mysql-versions)}}" exclude: "${{fromJson(needs.build-info.outputs.mysql-exclude)}}" fail-fast: false env: @@ -1349,18 +1403,19 @@ jobs: BACKEND: "mysql" ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{matrix.python-version}}" - MYSQL_VERSION: "${{matrix.mysql-version}}" - BACKEND_VERSION: "${{matrix.mysql-version}}" - JOB_ID: "mysql-${{matrix.mysql-version}}-${{matrix.python-version}}" + BACKEND_VERSION: "${{matrix.backend-version}}" + JOB_ID: "mysql-${{matrix.backend-version}}-${{matrix.python-version}}" if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{env.IMAGE_TAG}}" uses: ./.github/actions/prepare_breeze_and_image - name: > @@ -1406,13 +1461,15 @@ jobs: BACKEND_VERSION: "" JOB_ID: "sqlite-${{matrix.python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{env.IMAGE_TAG}}" uses: ./.github/actions/prepare_breeze_and_image - name: > @@ -1447,19 +1504,20 @@ jobs: BACKEND: "postgres" ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" - POSTGRES_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-postgres-version}}" JOB_ID: "integration-postgres" SKIP_PROVIDER_TESTS: "${{needs.build-info.outputs.skip-provider-tests}}" if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Prepare breeze & CI image: ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{env.IMAGE_TAG}}" uses: ./.github/actions/prepare_breeze_and_image - name: "Integration Tests Postgres: cassandra" @@ -1522,13 +1580,12 @@ jobs: BACKEND: "mysql" ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" - MYSQL_VERSION: "${{needs.build-info.outputs.default-mysql-version}}" BACKEND_VERSION: "${{needs.build-info.outputs.default-mysql-version}}" JOB_ID: "integration-mysql" SKIP_PROVIDER_TESTS: "${{needs.build-info.outputs.skip-provider-tests}}" if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: needs.build-info.outputs.is-airflow-runner == 'true' @@ -1537,6 +1594,9 @@ jobs: with: persist-credentials: false if: needs.build-info.outputs.is-airflow-runner == 'true' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: needs.build-info.outputs.is-airflow-runner == 'true' - name: "Prepare breeze & CI image: ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{env.IMAGE_TAG}}" uses: ./.github/actions/prepare_breeze_and_image if: needs.build-info.outputs.is-airflow-runner == 'true' @@ -1569,13 +1629,15 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.is-airflow-runner == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1585,14 +1647,12 @@ jobs: env: BACKEND: "postgres" BACKEND_VERSION: ${{needs.build-info.outputs.default-postgres-version}} - POSTGRES_VERSION: ${{needs.build-info.outputs.default-postgres-version}} - name: > Tests: mysql:${{needs.build-info.outputs.default-python-version}}:Quarantined run: breeze testing tests || true env: BACKEND: "mysql" BACKEND_VERSION: ${{needs.build-info.outputs.default-mysql-version}} - MYSQL_VERSION: ${{needs.build-info.outputs.default-mysql-version}} - name: > Tests: sqlite:${{needs.build-info.outputs.default-python-version}}:Quarantined run: breeze testing tests || true @@ -1627,13 +1687,15 @@ jobs: ENABLE_COVERAGE: "${{needs.build-info.outputs.run-coverage}}" if: needs.build-info.outputs.run-tests == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1667,13 +1729,15 @@ jobs: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.latest-versions-only != 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image @@ -1704,12 +1768,15 @@ jobs: env: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Download all artifacts from the current build" uses: actions/download-artifact@v4 with: @@ -1797,7 +1864,8 @@ jobs: # Force more parallelism for pull on public images PARALLELISM: 6 steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: needs.build-info.outputs.in-workflow-build == 'false' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -1805,6 +1873,9 @@ jobs: with: persist-credentials: false if: needs.build-info.outputs.in-workflow-build == 'false' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: needs.build-info.outputs.in-workflow-build == 'false' - name: "Install Breeze" uses: ./.github/actions/breeze if: needs.build-info.outputs.in-workflow-build == 'false' @@ -1826,13 +1897,16 @@ jobs: needs: [build-info, wait-for-prod-images] if: needs.build-info.outputs.prod-image-build == 'true' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: fetch-depth: 2 persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - name: > @@ -1867,13 +1941,16 @@ jobs: env: PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: fetch-depth: 2 persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - name: Pull PROD images ${{ env.PYTHON_VERSIONS }}:${{ needs.build-info.outputs.image-tag }} @@ -1909,12 +1986,15 @@ jobs: ( needs.build-info.outputs.run-kubernetes-tests == 'true' || needs.build-info.outputs.needs-helm-tests == 'true') steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze id: breeze @@ -1986,13 +2066,16 @@ jobs: PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} if: needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: # Needed to perform push action persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh >> ${GITHUB_OUTPUT} diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index efa6ed89325d9..836999038cb66 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -63,12 +63,15 @@ jobs: IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" uses: ./.github/actions/prepare_breeze_and_image with: @@ -84,12 +87,15 @@ jobs: RUNS_ON: "${{inputs.runs-on}}" PYTHON_MAJOR_MINOR_VERSION: "${{inputs.default-python-version}}" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze with: diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 4e5f98a03d3b2..7fdaec187b275 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -61,7 +61,8 @@ jobs: IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: inputs.needs-mypy == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -69,21 +70,15 @@ jobs: with: persist-credentials: false if: inputs.needs-mypy == 'true' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: inputs.needs-mypy == 'true' - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" uses: ./.github/actions/prepare_breeze_and_image id: breeze with: python-version: ${{ inputs.breeze-python-version }} if: inputs.needs-mypy == 'true' - - name: Cache pre-commit envs - uses: actions/cache@v4 - with: - path: ~/.cache/pre-commit - # yamllint disable-line rule:line-length - key: "pre-commit-${{steps.breeze.outputs.host-python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}" - restore-keys: | - pre-commit-${{steps.breeze.outputs.host-python-version}}- - if: inputs.needs-mypy == 'true' - name: "MyPy checks for ${{ matrix.mypy-folder }}" run: | pip install pre-commit @@ -94,4 +89,5 @@ jobs: SKIP_GROUP_OUTPUT: "true" DEFAULT_BRANCH: ${{ inputs.default-branch }} RUFF_FORMAT: "github" + INCLUDE_MYPY_VOLUME: "false" if: inputs.needs-mypy == 'true' diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index b6f17b1f70680..f9768eca1a9f1 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -106,7 +106,8 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: "dev0" steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: inputs.do-build == 'true' - uses: actions/checkout@v4 @@ -114,6 +115,9 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false if: inputs.do-build == 'true' + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker + if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze with: diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index 436526694b15e..037a5f9aa6e80 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -91,12 +91,15 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.sha }} steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze with: @@ -145,5 +148,3 @@ jobs: breeze prod-image build --tag-as-latest --install-packages-from-context --push --python ${{ matrix.python}} --platform "linux/amd64" if: inputs.push-latest-images == 'true' - - name: "Clean docker cache for ${{ matrix.platform }}" - run: docker system prune --all --force diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml index 3d6d4e065e0a0..aa578d4f1d860 100644 --- a/.github/workflows/release_dockerhub_image.yml +++ b/.github/workflows/release_dockerhub_image.yml @@ -54,12 +54,15 @@ jobs: GITHUB_CONTEXT: ${{ toJson(github) }} VERBOSE: true steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - name: Selective checks @@ -90,12 +93,15 @@ jobs: "potiuk", ]'), github.event.sender.login) steps: - - name: Cleanup repo + - name: "Cleanup repo" + shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false + - name: Cleanup docker + uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - name: Free space diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba2707de50fdb..6f5cdc50b84c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -335,10 +335,8 @@ repos: exclude: | (?x)^( ^airflow\/providers\/google\/cloud\/operators\/mlengine.py$| - ^airflow\/providers\/google\/cloud\/operators\/vertex_ai\/custom_job.py$| ^airflow\/providers\/google\/cloud\/operators\/cloud_storage_transfer_service.py$| ^airflow\/providers\/apache\/spark\/operators\/spark_submit.py\.py$| - ^airflow\/providers\/google\/cloud\/operators\/vertex_ai\/auto_ml\.py$| ^airflow\/providers\/apache\/spark\/operators\/spark_submit\.py$| ^airflow\/providers\/databricks\/operators\/databricks_sql\.py$| )$ diff --git a/Dockerfile b/Dockerfile index 534b0c68a710e..2cc4788a83499 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,7 +45,7 @@ ARG AIRFLOW_UID="50000" ARG AIRFLOW_USER_HOME_DIR=/home/airflow # latest released version here -ARG AIRFLOW_VERSION="2.8.2" +ARG AIRFLOW_VERSION="2.8.3" ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" diff --git a/Dockerfile.ci b/Dockerfile.ci index 8e048e77795fa..a9e8249d74604 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -855,19 +855,19 @@ function environment_initialization() { if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} == "true" ]]; then return fi - if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then - if [[ ${BACKEND:=} == "mssql" ]]; then - echo "${COLOR_RED}ARM platform is not supported for ${BACKEND} backend. Exiting.${COLOR_RESET}" - exit 1 - fi - fi - echo echo "${COLOR_BLUE}Running Initialization. Your basic configuration is:${COLOR_RESET}" echo echo " * ${COLOR_BLUE}Airflow home:${COLOR_RESET} ${AIRFLOW_HOME}" echo " * ${COLOR_BLUE}Airflow sources:${COLOR_RESET} ${AIRFLOW_SOURCES}" - echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__CORE__SQL_ALCHEMY_CONN:=}" + echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN:=}" + if [[ ${BACKEND=} == "postgres" ]]; then + echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Postgres: ${POSTGRES_VERSION}" + elif [[ ${BACKEND=} == "mysql" ]]; then + echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} MySQL: ${MYSQL_VERSION}" + elif [[ ${BACKEND=} == "sqlite" ]]; then + echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Sqlite" + fi echo if [[ ${STANDALONE_DAG_PROCESSOR=} == "true" ]]; then diff --git a/INTHEWILD.md b/INTHEWILD.md index 5f6cb7461ace9..18298820db117 100644 --- a/INTHEWILD.md +++ b/INTHEWILD.md @@ -531,3 +531,4 @@ Currently, **officially** using Airflow: 1. [Zynga](https://www.zynga.com) 1. [Ørsted](https://orsted.com/en) [[@arjunanan6](https://github.com/arjunanan6)] 1. [好大夫在线](https://www.haodf.com/) [[@leiguorui](https://github.com/leiguorui)] +1. [Prophecy.io](https://www.prophecy.io/) [[@pateash](https://github.com/pateash)] diff --git a/NOTICE b/NOTICE index 33371e44a76a4..f6040a224c826 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2023 The Apache Software Foundation +Copyright 2016-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/README.md b/README.md index 7f726cfd4583b..502bf380d4e5d 100644 --- a/README.md +++ b/README.md @@ -98,14 +98,14 @@ Airflow is not a streaming solution, but it is often used to process real-time d Apache Airflow is tested with: -| | Main version (dev) | Stable version (2.8.2) | -|-------------|------------------------------|------------------------| -| Python | 3.8, 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11 | -| Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | -| Kubernetes | 1.25, 1.26, 1.27, 1.28, 1.29 | 1.25, 1.26, 1.27, 1.28 | -| PostgreSQL | 12, 13, 14, 15, 16 | 12, 13, 14, 15, 16 | -| MySQL | 8.0, Innovation | 8.0, Innovation | -| SQLite | 3.15.0+ | 3.15.0+ | +| | Main version (dev) | Stable version (2.8.3) | +|-------------|------------------------------|-----------------------------| +| Python | 3.8, 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11 | +| Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | +| Kubernetes | 1.25, 1.26, 1.27, 1.28, 1.29 | 1.25, 1.26, 1.27, 1.28, 1.29| +| PostgreSQL | 12, 13, 14, 15, 16 | 12, 13, 14, 15, 16 | +| MySQL | 8.0, Innovation | 8.0, Innovation | +| SQLite | 3.15.0+ | 3.15.0+ | \* Experimental @@ -180,15 +180,15 @@ them to the appropriate format and workflow that your tool requires. ```bash -pip install 'apache-airflow==2.8.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.8.2/constraints-3.8.txt" +pip install 'apache-airflow==2.8.3' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.8.3/constraints-3.8.txt" ``` 2. Installing with extras (i.e., postgres, google) ```bash -pip install 'apache-airflow[postgres,google]==2.8.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.8.2/constraints-3.8.txt" +pip install 'apache-airflow[postgres,google]==2.8.3' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.8.3/constraints-3.8.txt" ``` For information on installing provider packages, check @@ -293,7 +293,7 @@ Apache Airflow version life cycle: | Version | Current Patch/Minor | State | First Release | Limited Support | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-------------------|------------------| -| 2 | 2.8.2 | Supported | Dec 17, 2020 | TBD | TBD | +| 2 | 2.8.3 | Supported | Dec 17, 2020 | TBD | TBD | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | | 1.8 | 1.8.2 | EOL | Mar 19, 2017 | Jan 03, 2018 | Jan 03, 2018 | diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 0fea7c09025c0..93f3090d81984 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -21,6 +21,43 @@ .. towncrier release notes start +Airflow 2.8.3 (2024-03-11) +-------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ + +The smtp provider is now pre-installed when you install Airflow. (#37713) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Bug Fixes +""""""""" +- Add "MENU" permission in auth manager (#37881) +- Fix external_executor_id being overwritten (#37784) +- Make more MappedOperator members modifiable (#37828) +- Set parsing context dag_id in dag test command (#37606) + +Miscellaneous +""""""""""""" +- Remove useless methods from security manager (#37889) +- Improve code coverage for TriggerRuleDep (#37680) +- The SMTP provider is now preinstalled when installing Airflow (#37713) +- Bump min versions of openapi validators (#37691) +- Properly include ``airflow_pre_installed_providers.txt`` artifact (#37679) + +Doc Only Changes +"""""""""""""""" +- Clarify lack of sync between workers and scheduler (#37913) +- Simplify some docs around airflow_local_settings (#37835) +- Add section about local settings configuration (#37829) +- Fix docs of ``BranchDayOfWeekOperator`` (#37813) +- Write to secrets store is not supported by design (#37814) +- ``ERD`` generating doc improvement (#37808) +- Update incorrect config value (#37706) +- Update security model to clarify Connection Editing user's capabilities (#37688) +- Fix ImportError on examples dags (#37571) + + Airflow 2.8.2 (2024-02-26) -------------------------- diff --git a/airflow/api_connexion/schemas/dag_schema.py b/airflow/api_connexion/schemas/dag_schema.py index 3b26e0d2770e3..0bffdcf685784 100644 --- a/airflow/api_connexion/schemas/dag_schema.py +++ b/airflow/api_connexion/schemas/dag_schema.py @@ -69,6 +69,7 @@ class Meta: tags = fields.List(fields.Nested(DagTagSchema), dump_only=True) max_active_tasks = auto_field(dump_only=True) max_active_runs = auto_field(dump_only=True) + max_consecutive_failed_dag_runs = auto_field(dump_only=True) has_task_concurrency_limits = auto_field(dump_only=True) has_import_errors = auto_field(dump_only=True) next_dagrun = auto_field(dump_only=True) diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index 50e145eda5c60..d307cb45392a5 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -309,6 +309,7 @@ def _get_dagbag_dag_details(dag: DAG) -> dict: "tags": dag.tags, "max_active_tasks": dag.max_active_tasks, "max_active_runs": dag.max_active_runs, + "max_consecutive_failed_dag_runs": dag.max_consecutive_failed_dag_runs, "has_task_concurrency_limits": any( t.max_active_tis_per_dag is not None or t.max_active_tis_per_dagrun is not None for t in dag.tasks ), diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 24845b4edd406..545977bd53422 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -116,6 +116,17 @@ core: type: string example: ~ default: "16" + max_consecutive_failed_dag_runs_per_dag: + description: | + (experimental) The maximum number of consecutive DAG failures before DAG is automatically paused. + This is also configurable per DAG level with ``max_consecutive_failed_dag_runs``, + which is defaulted as ``max_consecutive_failed_dag_runs_per_dag``. + If not specified, then the value is considered as 0, + meaning that the dags are never paused out by default. + version_added: 2.9.0 + type: string + example: ~ + default: "0" mp_start_method: description: | The name of the method used in order to start Python processes via the multiprocessing module. diff --git a/airflow/datasets/__init__.py b/airflow/datasets/__init__.py index 1edcbb946dbb1..4f8d587727f14 100644 --- a/airflow/datasets/__init__.py +++ b/airflow/datasets/__init__.py @@ -95,9 +95,6 @@ def __or__(self, other: BaseDatasetEventInput) -> DatasetAny: def __and__(self, other: BaseDatasetEventInput) -> DatasetAll: return DatasetAll(self, other) - def as_expression(self) -> dict[str, Any]: - raise NotImplementedError - def evaluate(self, statuses: dict[str, bool]) -> bool: raise NotImplementedError @@ -129,11 +126,6 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash(self.uri) - def as_expression(self) -> dict[str, Any]: - if self.extra is None: - return {"uri": self.uri} - return {"uri": self.uri, "extra": self.extra} - def iter_datasets(self) -> Iterator[tuple[str, Dataset]]: yield self.uri, self @@ -149,9 +141,6 @@ class _DatasetBooleanCondition(BaseDatasetEventInput): def __init__(self, *objects: BaseDatasetEventInput) -> None: self.objects = objects - def as_expression(self) -> dict[str, Any]: - return {"objects": [o.as_expression() for o in self.objects]} - def evaluate(self, statuses: dict[str, bool]) -> bool: return self.agg_func(x.evaluate(statuses=statuses) for x in self.objects) diff --git a/airflow/migrations/versions/0137_2_9_0_adding_adding_max_failure_runs_column_.py b/airflow/migrations/versions/0137_2_9_0_adding_adding_max_failure_runs_column_.py new file mode 100644 index 0000000000000..6cb12a3200332 --- /dev/null +++ b/airflow/migrations/versions/0137_2_9_0_adding_adding_max_failure_runs_column_.py @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Adding max_consecutive_failed_dag_runs column to dag_model table + +Revision ID: 8e1c784a4fc7 +Revises: 1fd565369930 +Create Date: 2024-01-18 15:02:24.587206 + +""" + +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision = '8e1c784a4fc7' +down_revision = 'ab34f260b71c' +branch_labels = None +depends_on = None +airflow_version = '2.9.0' + + +def upgrade(): + """Apply Adding max_consecutive_failed_dag_runs column to dag_model table""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('dag', schema=None) as batch_op: + batch_op.add_column(sa.Column('max_consecutive_failed_dag_runs', sa.Integer())) + + # ### end Alembic commands ### + + +def downgrade(): + """Unapply Adding max_consecutive_failed_dag_runs column to dag_model table""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('dag', schema=None) as batch_op: + batch_op.drop_column('max_consecutive_failed_dag_runs') + + # ### end Alembic commands ### diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 2ac247d73970e..0a51aa8e10ae4 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -364,6 +364,8 @@ class DAG(LoggingMixin): :param max_active_runs: maximum number of active DAG runs, beyond this number of DAG runs in a running state, the scheduler won't create new active DAG runs + :param max_consecutive_failed_dag_runs: (experimental) maximum number of consecutive failed DAG runs, + beyond this the scheduler will disable the DAG :param dagrun_timeout: specify how long a DagRun should be up before timing out / failing, so that new DagRuns can be created. :param sla_miss_callback: specify a function or list of functions to call when reporting SLA @@ -456,6 +458,9 @@ def __init__( concurrency: int | None = None, max_active_tasks: int = airflow_conf.getint("core", "max_active_tasks_per_dag"), max_active_runs: int = airflow_conf.getint("core", "max_active_runs_per_dag"), + max_consecutive_failed_dag_runs: int = airflow_conf.getint( + "core", "max_consecutive_failed_dag_runs_per_dag" + ), dagrun_timeout: timedelta | None = None, sla_miss_callback: None | SLAMissCallback | list[SLAMissCallback] = None, default_view: str = airflow_conf.get_mandatory_value("webserver", "dag_default_view").lower(), @@ -617,6 +622,16 @@ def __init__( self.last_loaded: datetime = timezone.utcnow() self.safe_dag_id = dag_id.replace(".", "__dot__") self.max_active_runs = max_active_runs + self.max_consecutive_failed_dag_runs = max_consecutive_failed_dag_runs + if self.max_consecutive_failed_dag_runs == 0: + self.max_consecutive_failed_dag_runs = airflow_conf.getint( + "core", "max_consecutive_failed_dag_runs_per_dag" + ) + if self.max_consecutive_failed_dag_runs < 0: + raise AirflowException( + f"Invalid max_consecutive_failed_dag_runs: {self.max_consecutive_failed_dag_runs}." + f"Requires max_consecutive_failed_dag_runs >= 0" + ) if self.timetable.active_runs_limit is not None: if self.timetable.active_runs_limit < self.max_active_runs: raise AirflowException( @@ -3034,6 +3049,16 @@ def bulk_sync_to_db( ) return cls.bulk_write_to_db(dags=dags, session=session) + def simplify_dataset_expression(self, dataset_expression) -> dict | None: + """Simplifies a nested dataset expression into a 'any' or 'all' format with URIs.""" + if dataset_expression is None: + return None + if dataset_expression.get("__type") == "dataset": + return dataset_expression["__var"]["uri"] + + new_key = "any" if dataset_expression["__type"] == "dataset_any" else "all" + return {new_key: [self.simplify_dataset_expression(item) for item in dataset_expression["__var"]]} + @classmethod @provide_session def bulk_write_to_db( @@ -3053,6 +3078,8 @@ def bulk_write_to_db( if not dags: return + from airflow.serialization.serialized_objects import BaseSerialization # Avoid circular import. + log.info("Sync %s DAGs", len(dags)) dag_by_ids = {dag.dag_id: dag for dag in dags} @@ -3111,16 +3138,16 @@ def bulk_write_to_db( orm_dag.description = dag.description orm_dag.max_active_tasks = dag.max_active_tasks orm_dag.max_active_runs = dag.max_active_runs + orm_dag.max_consecutive_failed_dag_runs = dag.max_consecutive_failed_dag_runs orm_dag.has_task_concurrency_limits = any( t.max_active_tis_per_dag is not None or t.max_active_tis_per_dagrun is not None for t in dag.tasks ) orm_dag.schedule_interval = dag.schedule_interval orm_dag.timetable_description = dag.timetable.description - if (dataset_triggers := dag.dataset_triggers) is None: - orm_dag.dataset_expression = None - else: - orm_dag.dataset_expression = dataset_triggers.as_expression() + orm_dag.dataset_expression = dag.simplify_dataset_expression( + BaseSerialization.serialize(dag.dataset_triggers) + ) orm_dag.processor_subdir = processor_subdir @@ -3583,6 +3610,7 @@ class DagModel(Base): max_active_tasks = Column(Integer, nullable=False) max_active_runs = Column(Integer, nullable=True) + max_consecutive_failed_dag_runs = Column(Integer, nullable=False) has_task_concurrency_limits = Column(Boolean, nullable=False) has_import_errors = Column(Boolean(), default=False, server_default="0") @@ -3634,6 +3662,11 @@ def __init__(self, concurrency=None, **kwargs): if self.max_active_runs is None: self.max_active_runs = airflow_conf.getint("core", "max_active_runs_per_dag") + if self.max_consecutive_failed_dag_runs is None: + self.max_consecutive_failed_dag_runs = airflow_conf.getint( + "core", "max_consecutive_failed_dag_runs_per_dag" + ) + if self.has_task_concurrency_limits is None: # Be safe -- this will be updated later once the DAG is parsed self.has_task_concurrency_limits = True @@ -3931,6 +3964,9 @@ def dag( concurrency: int | None = None, max_active_tasks: int = airflow_conf.getint("core", "max_active_tasks_per_dag"), max_active_runs: int = airflow_conf.getint("core", "max_active_runs_per_dag"), + max_consecutive_failed_dag_runs: int = airflow_conf.getint( + "core", "max_consecutive_failed_dag_runs_per_dag" + ), dagrun_timeout: timedelta | None = None, sla_miss_callback: None | SLAMissCallback | list[SLAMissCallback] = None, default_view: str = airflow_conf.get_mandatory_value("webserver", "dag_default_view").lower(), @@ -3985,6 +4021,7 @@ def factory(*args, **kwargs): concurrency=concurrency, max_active_tasks=max_active_tasks, max_active_runs=max_active_runs, + max_consecutive_failed_dag_runs=max_consecutive_failed_dag_runs, dagrun_timeout=dagrun_timeout, sla_miss_callback=sla_miss_callback, default_view=default_view, diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index ec317a6070a7e..40880a644429c 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -561,6 +561,45 @@ def fetch_task_instances( tis = tis.where(TI.task_id.in_(task_ids)) return session.scalars(tis).all() + @internal_api_call + def _check_last_n_dagruns_failed(self, dag_id, max_consecutive_failed_dag_runs, session): + """Check if last N dags failed.""" + dag_runs = ( + session.query(DagRun) + .filter(DagRun.dag_id == dag_id) + .order_by(DagRun.execution_date.desc()) + .limit(max_consecutive_failed_dag_runs) + .all() + ) + """ Marking dag as paused, if needed""" + to_be_paused = len(dag_runs) >= max_consecutive_failed_dag_runs and all( + dag_run.state == DagRunState.FAILED for dag_run in dag_runs + ) + + if to_be_paused: + from airflow.models.dag import DagModel + + self.log.info( + "Pausing DAG %s because last %s DAG runs failed.", + self.dag_id, + max_consecutive_failed_dag_runs, + ) + filter_query = [ + DagModel.dag_id == self.dag_id, + DagModel.root_dag_id == self.dag_id, # for sub-dags + ] + session.execute( + update(DagModel) + .where(or_(*filter_query)) + .values(is_paused=True) + .execution_options(synchronize_session="fetch") + ) + else: + self.log.debug( + "Limit of consecutive DAG failed dag runs is not reached, DAG %s will not be paused.", + self.dag_id, + ) + @provide_session def get_task_instances( self, @@ -787,6 +826,16 @@ def recalculate(self) -> _UnfinishedStates: msg="task_failure", ) + # Check if the max_consecutive_failed_dag_runs has been provided and not 0 + # and last consecutive failures are more + if dag.max_consecutive_failed_dag_runs > 0: + self.log.debug( + "Checking consecutive failed DAG runs for DAG %s, limit is %s", + self.dag_id, + dag.max_consecutive_failed_dag_runs, + ) + self._check_last_n_dagruns_failed(dag.dag_id, dag.max_consecutive_failed_dag_runs, session) + # if all leaves succeeded and no unfinished tasks, the run succeeded elif not unfinished.tis and all(x.state in State.success_states for x in tis_for_dagrun_state): self.log.info("Marking run %s successful", self) diff --git a/airflow/providers/common/io/provider.yaml b/airflow/providers/common/io/provider.yaml index 022205bc33928..acd175c509b98 100644 --- a/airflow/providers/common/io/provider.yaml +++ b/airflow/providers/common/io/provider.yaml @@ -53,14 +53,14 @@ config: common.io: description: Common IO configuration section options: - xcom_objectstorage_path: + xcom_objectstore_path: description: | Path to a location on object storage where XComs can be stored in url format. version_added: 1.3.0 type: string example: "s3://conn_id@bucket/path" default: "" - xcom_objectstorage_threshold: + xcom_objectstore_threshold: description: | Threshold in bytes for storing XComs in object storage. -1 means always store in the database. 0 means always store in object storage. Any positive number means @@ -69,7 +69,7 @@ config: type: integer example: "1000000" default: "-1" - xcom_objectstorage_compression: + xcom_objectstore_compression: description: | Compression algorithm to use when storing XComs in object storage. Supported algorithms are a.o.: snappy, zip, gzip, bz2, and lzma. If not specified, no compression will be used. diff --git a/airflow/providers/common/io/xcom/backend.py b/airflow/providers/common/io/xcom/backend.py index ce8c7a87314c5..14d1009ead99b 100644 --- a/airflow/providers/common/io/xcom/backend.py +++ b/airflow/providers/common/io/xcom/backend.py @@ -122,6 +122,7 @@ def serialize_value( suffix = "." + _get_compression_suffix(compression) else: suffix = "" + compression = None threshold = conf.getint(SECTION, "xcom_objectstore_threshold", fallback=-1) diff --git a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py index d7eb3e01f7d5d..5269c4db25537 100644 --- a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +++ b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py @@ -22,12 +22,14 @@ from typing import TYPE_CHECKING, Sequence +from deprecated import deprecated from google.api_core.exceptions import NotFound from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.cloud.aiplatform import datasets from google.cloud.aiplatform.models import Model from google.cloud.aiplatform_v1.types.training_pipeline import TrainingPipeline +from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.hooks.vertex_ai.auto_ml import AutoMLHook from airflow.providers.google.cloud.links.vertex_ai import ( VertexAIModelLink, @@ -607,7 +609,7 @@ class DeleteAutoMLTrainingJobOperator(GoogleCloudBaseOperator): AutoMLTabularTrainingJob, AutoMLTextTrainingJob, or AutoMLVideoTrainingJob. """ - template_fields = ("training_pipeline", "region", "project_id", "impersonation_chain") + template_fields = ("training_pipeline_id", "region", "project_id", "impersonation_chain") def __init__( self, @@ -623,7 +625,7 @@ def __init__( **kwargs, ) -> None: super().__init__(**kwargs) - self.training_pipeline = training_pipeline_id + self.training_pipeline_id = training_pipeline_id self.region = region self.project_id = project_id self.retry = retry @@ -632,6 +634,16 @@ def __init__( self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + @property + @deprecated( + reason="`training_pipeline` is deprecated and will be removed in the future. " + "Please use `training_pipeline_id` instead.", + category=AirflowProviderDeprecationWarning, + ) + def training_pipeline(self): + """Alias for ``training_pipeline_id``, used for compatibility (deprecated).""" + return self.training_pipeline_id + def execute(self, context: Context): hook = AutoMLHook( gcp_conn_id=self.gcp_conn_id, diff --git a/airflow/providers/google/cloud/operators/vertex_ai/custom_job.py b/airflow/providers/google/cloud/operators/vertex_ai/custom_job.py index 71c4526e9249b..dcd5acbcad271 100644 --- a/airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +++ b/airflow/providers/google/cloud/operators/vertex_ai/custom_job.py @@ -20,12 +20,14 @@ from typing import TYPE_CHECKING, Sequence +from deprecated import deprecated from google.api_core.exceptions import NotFound from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.cloud.aiplatform.models import Model from google.cloud.aiplatform_v1.types.dataset import Dataset from google.cloud.aiplatform_v1.types.training_pipeline import TrainingPipeline +from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.hooks.vertex_ai.custom_job import CustomJobHook from airflow.providers.google.cloud.links.vertex_ai import ( VertexAIModelLink, @@ -1328,7 +1330,7 @@ class DeleteCustomTrainingJobOperator(GoogleCloudBaseOperator): account from the list granting this role to the originating account (templated). """ - template_fields = ("training_pipeline", "custom_job", "region", "project_id", "impersonation_chain") + template_fields = ("training_pipeline_id", "custom_job_id", "region", "project_id", "impersonation_chain") def __init__( self, @@ -1345,8 +1347,8 @@ def __init__( **kwargs, ) -> None: super().__init__(**kwargs) - self.training_pipeline = training_pipeline_id - self.custom_job = custom_job_id + self.training_pipeline_id = training_pipeline_id + self.custom_job_id = custom_job_id self.region = region self.project_id = project_id self.retry = retry @@ -1355,6 +1357,26 @@ def __init__( self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + @property + @deprecated( + reason="`training_pipeline` is deprecated and will be removed in the future. " + "Please use `training_pipeline_id` instead.", + category=AirflowProviderDeprecationWarning, + ) + def training_pipeline(self): + """Alias for ``training_pipeline_id``, used for compatibility (deprecated).""" + return self.training_pipeline_id + + @property + @deprecated( + reason="`custom_job` is deprecated and will be removed in the future. " + "Please use `custom_job_id` instead.", + category=AirflowProviderDeprecationWarning, + ) + def custom_job(self): + """Alias for ``custom_job_id``, used for compatibility (deprecated).""" + return self.custom_job_id + def execute(self, context: Context): hook = CustomJobHook( gcp_conn_id=self.gcp_conn_id, diff --git a/airflow/providers/microsoft/azure/operators/adls.py b/airflow/providers/microsoft/azure/operators/adls.py index 345336b2c4cb2..6afb495077963 100644 --- a/airflow/providers/microsoft/azure/operators/adls.py +++ b/airflow/providers/microsoft/azure/operators/adls.py @@ -16,22 +16,74 @@ # under the License. from __future__ import annotations -from typing import TYPE_CHECKING, Any, Sequence +from typing import IO, TYPE_CHECKING, Any, AnyStr, Iterable, Sequence from airflow.models import BaseOperator -from airflow.providers.microsoft.azure.hooks.data_lake import AzureDataLakeHook +from airflow.providers.microsoft.azure.hooks.data_lake import AzureDataLakeHook, AzureDataLakeStorageV2Hook if TYPE_CHECKING: from airflow.utils.context import Context +DEFAULT_AZURE_DATA_LAKE_CONN_ID = "azure_data_lake_default" + + +class ADLSCreateObjectOperator(BaseOperator): + """ + Creates a new object from passed data to Azure Data Lake on specified file. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:ADLSCreateObjectOperator` + + :param file_system_name: Name of the file system or instance of FileSystemProperties. + :param file_name: Name of the file which needs to be created in the file system. + :param data: The data that will be uploaded. + :param length: Size of the data in bytes (optional). + :param replace: Whether to forcibly overwrite existing files/directories. + If False and remote path is a directory, will quit regardless if any files + would be overwritten or not. If True, only matching filenames are actually + overwritten. + :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. + """ + + template_fields: Sequence[str] = ("file_system_name", "file_name", "data") + ui_color = "#e4f0e8" + + def __init__( + self, + *, + file_system_name: str, + file_name: str, + data: bytes | str | Iterable[AnyStr] | IO[AnyStr], + length: int | None = None, + replace: bool = False, + azure_data_lake_conn_id: str = DEFAULT_AZURE_DATA_LAKE_CONN_ID, + **kwargs, + ) -> None: + super().__init__(**kwargs) + + self.file_system_name = file_system_name + self.file_name = file_name + self.replace = replace + self.data = data # type: ignore[var-annotated] + self.length = length + self.azure_data_lake_conn_id = azure_data_lake_conn_id + + def execute(self, context: Context) -> dict[str, Any]: + self.log.debug("Uploading %s to %s", self.data, self.file_name) + hook = AzureDataLakeStorageV2Hook(adls_conn_id=self.azure_data_lake_conn_id) + return hook.create_file(file_system_name=self.file_system_name, file_name=self.file_name).upload_data( + data=self.data, length=self.length, overwrite=self.replace + ) + class ADLSDeleteOperator(BaseOperator): """ Delete files in the specified path. - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:ADLSDeleteOperator` + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:ADLSDeleteOperator` :param path: A directory or file to remove :param recursive: Whether to loop into directories in the location and remove the files @@ -48,7 +100,7 @@ def __init__( path: str, recursive: bool = False, ignore_not_found: bool = True, - azure_data_lake_conn_id: str = "azure_data_lake_default", + azure_data_lake_conn_id: str = DEFAULT_AZURE_DATA_LAKE_CONN_ID, **kwargs, ) -> None: super().__init__(**kwargs) @@ -69,26 +121,19 @@ class ADLSListOperator(BaseOperator): This operator returns a python list with the names of files which can be used by `xcom` in the downstream tasks. - :param path: The Azure Data Lake path to find the objects. Supports glob - strings (templated) - :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:ADLSListOperator` - **Example**: - The following Operator would list all the Parquet files from ``folder/output/`` - folder in the specified ADLS account :: - - adls_files = ADLSListOperator( - task_id="adls_files", - path="folder/output/*.parquet", - azure_data_lake_conn_id="azure_data_lake_default", - ) + :param path: The Azure Data Lake path to find the objects. Supports glob strings (templated) + :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. """ template_fields: Sequence[str] = ("path",) ui_color = "#901dd2" def __init__( - self, *, path: str, azure_data_lake_conn_id: str = "azure_data_lake_default", **kwargs + self, *, path: str, azure_data_lake_conn_id: str = DEFAULT_AZURE_DATA_LAKE_CONN_ID, **kwargs ) -> None: super().__init__(**kwargs) self.path = path diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index 956fdba293593..3cc8b3c16ccbe 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -78,7 +78,8 @@ dependencies: - apache-airflow>=2.6.0 - adlfs>=2023.10.0 - azure-batch>=8.0.0 - - azure-cosmos>=4.0.0 + # azure-cosmos 4.6.0 fail on mypy, limit version till we fix the issue + - azure-cosmos>=4.0.0,<4.6.0 - azure-mgmt-cosmosdb - azure-datalake-store>=0.0.45 - azure-identity>=1.3.1 diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml index 6212974927f17..767edf12eb6e7 100644 --- a/airflow/reproducible_build.yaml +++ b/airflow/reproducible_build.yaml @@ -1,2 +1,2 @@ -release-notes-hash: e9074c5f236fefdaf03cfc8d1a75e23d -source-date-epoch: 1708946510 +release-notes-hash: ba2e7a8d91504ba2db3292dd184cfd5c +source-date-epoch: 1709800692 diff --git a/airflow/serialization/pydantic/dag.py b/airflow/serialization/pydantic/dag.py index 1046f0b16a8e9..fa1cac535f60b 100644 --- a/airflow/serialization/pydantic/dag.py +++ b/airflow/serialization/pydantic/dag.py @@ -131,6 +131,7 @@ class DagModelPydantic(BaseModelPydantic): max_active_tasks: int max_active_runs: Optional[int] + max_consecutive_failed_dag_runs: Optional[int] has_task_concurrency_limits: bool has_import_errors: Optional[bool] = False diff --git a/airflow/serialization/schema.json b/airflow/serialization/schema.json index 6a52ac52d971e..744b66b9b9e4a 100644 --- a/airflow/serialization/schema.json +++ b/airflow/serialization/schema.json @@ -174,6 +174,7 @@ "_concurrency": { "type" : "number"}, "_max_active_tasks": { "type" : "number"}, "max_active_runs": { "type" : "number"}, + "max_consecutive_failed_dag_runs": { "type" : "number"}, "default_args": { "$ref": "#/definitions/dict" }, "start_date": { "$ref": "#/definitions/datetime" }, "end_date": { "$ref": "#/definitions/datetime" }, diff --git a/airflow/www/static/js/datasetUtils.js b/airflow/www/static/js/datasetUtils.js index b8e00cd7838a5..dabc0a91cb42c 100644 --- a/airflow/www/static/js/datasetUtils.js +++ b/airflow/www/static/js/datasetUtils.js @@ -21,19 +21,18 @@ import { getMetaValue } from "./utils"; -export function openDatasetModal( - dagId, - summary = "", - nextDatasets = [], - error = null -) { +export function openDatasetModal(dagId, summary, nextDatasets, error) { + const datasetEvents = nextDatasets.events || []; + const expression = nextDatasets.dataset_expression; const datasetsUrl = getMetaValue("datasets_url"); + $("#dataset_expression").empty(); $("#datasets_tbody").empty(); $("#datasets_error").hide(); $("#dag_id").text(dagId); + $("#dataset_expression").text(JSON.stringify(expression, null, 2)); $("#datasetNextRunModal").modal({}); - $("#next_run_summary").text(summary); - nextDatasets.forEach((d) => { + if (summary) $("#next_run_summary").text(summary); + datasetEvents.forEach((d) => { const row = document.createElement("tr"); const uriCell = document.createElement("td"); @@ -63,11 +62,12 @@ export function getDatasetTooltipInfo(dagId, run, setNextDatasets) { nextRunUrl = nextRunUrl.replace("__DAG_ID__", dagId); } $.get(nextRunUrl) - .done((datasets) => { + .done((nextDatasets) => { + const datasetEvents = nextDatasets.events; let count = 0; let title = "Pending datasets:
"; - setNextDatasets(datasets); - datasets.forEach((d) => { + setNextDatasets(nextDatasets); + datasetEvents.forEach((d) => { if (!d.created_at) { if (count < 4) title += `${d.uri}
`; count += 1; diff --git a/airflow/www/templates/airflow/dataset_next_run_modal.html b/airflow/www/templates/airflow/dataset_next_run_modal.html index 2d0417ae6089d..5698c9968121d 100644 --- a/airflow/www/templates/airflow/dataset_next_run_modal.html +++ b/airflow/www/templates/airflow/dataset_next_run_modal.html @@ -30,6 +30,8 @@