diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index 2ba0b917cad9..f88af8e39af2 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -28,7 +28,7 @@ env: CUDA_VISIBLE_DEVICES: 0,1 jobs: - model_job: + run_models_gpu: name: " " strategy: fail-fast: false @@ -80,23 +80,23 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -rs -v --make-reports=${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -rs -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Run test shell: bash run: | - mkdir -p /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} - echo "hello" > /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/hello.txt - echo "${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}" + mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/self-new-model-pr-caller.yml b/.github/workflows/self-new-model-pr-caller.yml index e0edf8b7be67..888b4f7a8ce5 100644 --- a/.github/workflows/self-new-model-pr-caller.yml +++ b/.github/workflows/self-new-model-pr-caller.yml @@ -36,7 +36,7 @@ jobs: python -m pip install GitPython echo "new_model=$(python utils/check_if_new_model_added.py | tail -n 1)" >> $GITHUB_OUTPUT - run_new_model_tests: + run_models_gpu: name: Run all tests for the new model # Triggered if it is a new model PR and the required label is added if: ${{ needs.check_for_new_model.outputs.new_model != '' && contains(github.event.pull_request.labels.*.name, 'single-model-run-slow') }} @@ -90,23 +90,23 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v -rs --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v -rs --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Make sure report directory exists shell: bash run: | - mkdir -p /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - echo "hello" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/hello.txt - echo "${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}" + mkdir -p /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index 7906325e83bb..875e715b068b 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (nightly-ci) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -183,7 +183,7 @@ jobs: name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - run_all_tests_torch_cuda_extensions_gpu: + run_torch_cuda_extensions_gpu: name: Torch CUDA extension tests strategy: fail-fast: false @@ -231,19 +231,19 @@ jobs: - name: Run all tests on GPU working-directory: /workspace/transformers run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook @@ -253,7 +253,7 @@ jobs: setup, run_tests_single_gpu, run_tests_multi_gpu, - run_all_tests_torch_cuda_extensions_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Preliminary job status diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml index 7be658c43202..ca47c454f689 100644 --- a/.github/workflows/self-past.yml +++ b/.github/workflows/self-past.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (past-ci) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -228,7 +228,7 @@ jobs: name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - run_all_tests_torch_cuda_extensions_gpu: + run_torch_cuda_extensions_gpu: name: Torch CUDA extension tests if: inputs.framework == 'pytorch' strategy: @@ -286,19 +286,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} + path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook @@ -308,7 +308,7 @@ jobs: setup, run_tests_single_gpu, run_tests_multi_gpu, - run_all_tests_torch_cuda_extensions_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Preliminary job status diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index b285a5f8fc0a..8705f398b2b5 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -145,7 +145,7 @@ jobs: echo "matrix=$keys" >> $GITHUB_OUTPUT echo "test_map=$test_map" >> $GITHUB_OUTPUT - run_tests_amdgpu: + run_models_gpu: name: Model tests needs: setup_gpu # `dummy` means there is no test to run @@ -230,19 +230,19 @@ jobs: - name: Run all non-slow selected tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} + python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports send_results: name: Send results to webhook @@ -252,7 +252,7 @@ jobs: check_runner_status, check_runners, setup_gpu, - run_tests_amdgpu, + run_models_gpu, # run_tests_torch_cuda_extensions_single_gpu, # run_tests_torch_cuda_extensions_multi_gpu ] diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 17dff31fa4e3..1bc02ccd826e 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -385,19 +385,19 @@ jobs: working-directory: /workspace/transformers # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. run: | - python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_tests_torch_cuda_extensions_multi_gpu: name: Torch CUDA extension tests @@ -475,19 +475,19 @@ jobs: working-directory: /workspace/transformers # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. run: | - python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports send_results: name: Send results to webhook diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index 09926071802a..d2ab90d13318 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -108,7 +108,7 @@ jobs: run: | python3 utils/print_env.py - run_tests_single_gpu: + run_models_gpu_single_gpu: name: Single GPU tests strategy: max-parallel: 1 # For now, not to parallelize. Can change later if it works well. @@ -162,21 +162,21 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports - run_tests_multi_gpu: + run_models_gpu_multi_gpu: name: Multi GPU tests strategy: max-parallel: 1 @@ -230,19 +230,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports run_examples_gpu: name: Examples tests @@ -287,19 +287,19 @@ jobs: working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu + name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports run_pipelines_torch_gpu: name: PyTorch pipelines tests @@ -343,21 +343,21 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - run_tests_torch_deepspeed_gpu: + run_torch_cuda_extensions_gpu: name: Torch ROCm deepspeed tests strategy: fail-fast: false @@ -400,19 +400,19 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_deepspeed_gpu tests/deepspeed tests/extended + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_extract_warnings: name: Extract warnings in CI artifacts @@ -422,11 +422,11 @@ jobs: check_runner_status, check_runners, setup, - run_tests_single_gpu, - run_tests_multi_gpu, + run_models_gpu_single_gpu, + run_models_gpu_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu, - run_tests_torch_deepspeed_gpu + run_torch_cuda_extensions_gpu ] steps: - name: Checkout transformers @@ -471,11 +471,11 @@ jobs: check_runner_status, check_runners, setup, - run_tests_single_gpu, - run_tests_multi_gpu, + run_models_gpu_single_gpu, + run_models_gpu_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu, - run_tests_torch_deepspeed_gpu, + run_torch_cuda_extensions_gpu, run_extract_warnings ] steps: diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 59b992bcd250..40689c629a09 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -14,7 +14,7 @@ jobs: name: Model CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_tests_gpu + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-models" secrets: inherit @@ -46,7 +46,7 @@ jobs: name: DeepSpeed CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_all_tests_torch_cuda_extensions_gpu + job: run_torch_cuda_extensions_gpu slack_report_channel: "#transformers-ci-daily-deepspeed" secrets: inherit @@ -54,6 +54,6 @@ jobs: name: Quantization CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_tests_quantization_torch_gpu + job: run_quantization_torch_gpu slack_report_channel: "#transformers-ci-daily-quantization" secrets: inherit diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index fa41bffc0bc8..5911c81bf4f9 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -2,7 +2,7 @@ name: Self-hosted runner (scheduled) # Note that each job's dependencies go into a corresponding docker file. # -# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# For example for `run_torch_cuda_extensions_gpu` the docker image is # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` @@ -33,7 +33,7 @@ env: jobs: setup: - if: contains(fromJSON('["run_tests_gpu", "run_tests_quantization_torch_gpu"]'), inputs.job) + if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job) name: Setup strategy: matrix: @@ -64,7 +64,7 @@ jobs: run: pip freeze - id: set-matrix - if: ${{ inputs.job == 'run_tests_gpu' }} + if: ${{ inputs.job == 'run_models_gpu' }} name: Identify models to test working-directory: /transformers/tests run: | @@ -72,7 +72,7 @@ jobs: echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - id: set-matrix-quantization - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: Identify quantization method to test working-directory: /transformers/tests run: | @@ -82,8 +82,8 @@ jobs: run: | nvidia-smi - run_tests_gpu: - if: ${{ inputs.job == 'run_tests_gpu' }} + run_models_gpu: + if: ${{ inputs.job == 'run_models_gpu' }} name: " " needs: setup strategy: @@ -134,19 +134,19 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports run_pipelines_tf_gpu: if: ${{ inputs.job == 'run_pipelines_tf_gpu' }} @@ -185,19 +185,19 @@ jobs: - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ always() }} run: | - cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt + cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu + name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports run_examples_gpu: if: ${{ inputs.job == 'run_examples_gpu' }} @@ -236,22 +236,22 @@ jobs: working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu + name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports - run_all_tests_torch_cuda_extensions_gpu: - if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }} + run_torch_cuda_extensions_gpu: + if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} name: Torch CUDA extension tests strategy: fail-fast: false @@ -296,22 +296,22 @@ jobs: - name: Run all tests on GPU working-directory: /workspace/transformers run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - run_tests_quantization_torch_gpu: - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + run_quantization_torch_gpu: + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: " " needs: setup strategy: @@ -357,26 +357,26 @@ jobs: - name: Run quantization tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} + name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports run_extract_warnings: - # Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic. - if: ${{ always() && inputs.job == 'run_tests_gpu' }} + # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. + if: ${{ always() && inputs.job == 'run_models_gpu' }} name: Extract warnings in CI artifacts runs-on: ubuntu-22.04 - needs: [setup, run_tests_gpu] + needs: [setup, run_models_gpu] steps: - name: Checkout transformers uses: actions/checkout@v4 @@ -416,12 +416,12 @@ jobs: name: Slack Report needs: [ setup, - run_tests_gpu, + run_models_gpu, run_pipelines_torch_gpu, run_pipelines_tf_gpu, run_examples_gpu, - run_all_tests_torch_cuda_extensions_gpu, - run_tests_quantization_torch_gpu, + run_torch_cuda_extensions_gpu, + run_quantization_torch_gpu, run_extract_warnings ] if: ${{ always() }} diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index 88660914bfdc..75905dde495e 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -35,7 +35,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Send message to Slack - if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job != 'run_quantization_torch_gpu' }} env: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} @@ -61,7 +61,7 @@ jobs: # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - name: Failure table artifacts # Only the model testing job is concerned for this step - if: ${{ inputs.job == 'run_tests_gpu' }} + if: ${{ inputs.job == 'run_models_gpu' }} uses: actions/upload-artifact@v4 with: name: prev_ci_results @@ -70,7 +70,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Send message to Slack for quantization workflow - if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + if: ${{ inputs.job == 'run_quantization_torch_gpu' }} env: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} diff --git a/utils/notification_service.py b/utils/notification_service.py index 158e01942b81..ba082b046fce 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -992,13 +992,13 @@ def prepare_reports(title, header, reports, to_truncate=True): "job_link": {}, } for model in models - if f"run_all_tests_gpu_{model}_test_reports" in available_artifacts + if f"run_models_gpu_{model}_test_reports" in available_artifacts } unclassified_model_failures = [] for model in model_results.keys(): - for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: + for artifact_path in available_artifacts[f"run_models_gpu_{model}_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job @@ -1052,10 +1052,10 @@ def prepare_reports(title, header, reports, to_truncate=True): # Additional runs additional_files = { - "PyTorch pipelines": "run_tests_torch_pipeline_gpu", - "TensorFlow pipelines": "run_tests_tf_pipeline_gpu", - "Examples directory": "run_examples_gpu", - "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports", + "PyTorch pipelines": "run_pipelines_torch_gpu_test_reports", + "TensorFlow pipelines": "run_pipelines_tf_gpu_test_reports", + "Examples directory": "run_examples_gpu_test_reports", + "Torch CUDA extension tests": "run_torch_cuda_extensions_gpu_test_reports", } if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"): @@ -1075,7 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True): "run_pipelines_torch_gpu": "PyTorch pipelines", "run_pipelines_tf_gpu": "TensorFlow pipelines", "run_examples_gpu": "Examples directory", - "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests", + "run_torch_cuda_extensions_gpu": "Torch CUDA extension tests", } # Remove some entries in `additional_files` if they are not concerned. @@ -1133,10 +1133,10 @@ def prepare_reports(title, header, reports, to_truncate=True): ) # Let's only check the warning for the model testing job. Currently, the job `run_extract_warnings` is only run - # when `inputs.job` (in the workflow file) is `run_tests_gpu`. The reason is: otherwise we need to save several + # when `inputs.job` (in the workflow file) is `run_models_gpu`. The reason is: otherwise we need to save several # artifacts with different names which complicates the logic for an insignificant part of the CI workflow reporting. selected_warnings = [] - if job_name == "run_tests_gpu": + if job_name == "run_models_gpu": if "warnings_in_ci" in available_artifacts: directory = available_artifacts["warnings_in_ci"].paths[0]["path"] with open(os.path.join(directory, "selected_warnings.json")) as fp: @@ -1147,7 +1147,7 @@ def prepare_reports(title, header, reports, to_truncate=True): # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as # results. - if job_name == "run_tests_gpu": + if job_name == "run_models_gpu": with open("prev_ci_results/model_results.json", "w", encoding="UTF-8") as fp: json.dump(model_results, fp, indent=4, ensure_ascii=False) diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py index 11bc57e618a7..1687eeaa25f3 100644 --- a/utils/notification_service_quantization.py +++ b/utils/notification_service_quantization.py @@ -200,7 +200,7 @@ def post_reply(self): "job_link": {}, } for quant in quantization_matrix - if f"run_tests_quantization_torch_gpu_{quant}" in available_artifacts + if f"run_quantization_torch_gpu_{ quant }_test_reports" in available_artifacts } github_actions_jobs = get_jobs( @@ -217,7 +217,7 @@ def post_reply(self): break for quant in quantization_results.keys(): - for artifact_path in available_artifacts[f"run_tests_quantization_torch_gpu_{quant}"].paths: + for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{ quant }_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job diff --git a/utils/split_model_tests.py b/utils/split_model_tests.py index fc8800ffcf1c..e5083aaeb46f 100644 --- a/utils/split_model_tests.py +++ b/utils/split_model_tests.py @@ -18,7 +18,7 @@ to split the list of jobs to run into multiple slices each containing a smaller number of jobs. This way, we can bypass the maximum of 256 jobs in a matrix. -See the `setup` and `run_tests_gpu` jobs defined in the workflow file `.github/workflows/self-scheduled.yml` for more +See the `setup` and `run_models_gpu` jobs defined in the workflow file `.github/workflows/self-scheduled.yml` for more details. Usage: