From 27742f6e559742ed7918229a3be3ddc45f8bc3ed Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Thu, 12 Dec 2024 07:56:46 -0800 Subject: [PATCH] Deflake some pkgci jobs. (#19472) * Increase real weight test timeouts from 4 minutes to 10 minutes to work around https://github.com/iree-org/iree/actions/runs/12281522213/job/34271200734#step:9:1461 ``` ============================== slowest durations =============================== 240.00s call SHARK-TestSuite/iree_tests/sharktank/punet/int8/test_cases.json::sdxl_unet_int8_export.mlir::gpu_rocm::real_weights 31.44s call SHARK-TestSuite/iree_tests/sharktank/punet/fp16/test_cases.json::sdxl_unet_fp16_export.mlir::gpu_rocm::real_weights 11.22s call SHARK-TestSuite/iree_tests/sharktank/llama/open-llama-3b-v2-f16/test_cases.json::open-llama-3b-v2-f16.mlirbc::gpu_rocm::real_weights_prefill 0.08s call SHARK-TestSuite/iree_tests/pytorch/models/resnet50/test_cases.json::resnet50.mlirbc::gpu_rocm::real_weights 0.07s call SHARK-TestSuite/iree_tests/pytorch/models/opt-125M/test_cases.json::opt-125M.mlirbc::gpu_rocm::real_weights (10 durations < 0.005s hidden. Use -vv to show these durations.) =========================== short test summary info ============================ PASSED SHARK-TestSuite/iree_tests/sharktank/llama/open-llama-3b-v2-f16/test_cases.json::open-llama-3b-v2-f16.mlirbc::gpu_rocm::real_weights_prefill PASSED SHARK-TestSuite/iree_tests/sharktank/punet/fp16/test_cases.json::sdxl_unet_fp16_export.mlir::gpu_rocm::real_weights XFAIL SHARK-TestSuite/iree_tests/pytorch/models/opt-125M/test_cases.json::opt-125M.mlirbc::gpu_rocm::real_weights - Expected compilation to fail (included in 'expected_compile_failures') XFAIL SHARK-TestSuite/iree_tests/pytorch/models/resnet50/test_cases.json::resnet50.mlirbc::gpu_rocm::real_weights - Expected compilation to fail (included in 'expected_compile_failures') FAILED SHARK-TestSuite/iree_tests/sharktank/punet/int8/test_cases.json::sdxl_unet_int8_export.mlir::gpu_rocm::real_weights - Failed: Timeout >240.0s ======= 1 failed, 2 passed, 2 deselected, 2 xfailed in 282.99s (0:04:42) ======= ``` * Skip flaky test_gridsample_zeros_padding op test to work around https://github.com/iree-org/iree/actions/runs/12286576807/job/34287344921#step:8:59 ``` _ IREE compile and run: test_gridsample_zeros_padding::model.mlir::model.mlir::cpu_llvm_sync _ [gw3] linux -- Python 3.11.10 /home/runner/work/iree/iree/venv/bin/python Error invoking iree-run-module Error code: 1 Stderr diagnostics: Stdout diagnostics: EXEC @test_gridsample_zeros_padding [FAILED] result[0]: element at index 3 (2.80544E+13) does not match the expected (0); expected that the view is equal to contents of a view of 1x1x2x4xf32 expected: 1x1x2x4xf32=[[[0 0 1.7 0][0 1.7 0 0]]] actual: 1x1x2x4xf32=[[[0 0 1.7 2.80544E+13][2.80544E+13 1.7 0 2.80544E+13]]] ``` and https://github.com/iree-org/iree/actions/runs/12285879922/job/34285283119#step:8:51 ``` _ IREE compile and run: test_gridsample_zeros_padding::model.mlir::model.mlir::cpu_llvm_sync _ [gw3] linux -- Python 3.11.11 /home/runner/work/iree/iree/venv/bin/python Error invoking iree-run-module Error code: 1 Stderr diagnostics: Stdout diagnostics: EXEC @test_gridsample_zeros_padding [FAILED] result[0]: element at index 3 (39529.7) does not match the expected (0); expected that the view is equal to contents of a view of 1x1x2x4xf32 expected: 1x1x2x4xf32=[[[0 0 1.7 0][0 1.7 0 0]]] actual: 1x1x2x4xf32=[[[0 0 1.7 39529.7][39529.7 1.7 0 39529.7]]] ``` (This test seems to be failing consistently as of https://github.com/iree-org/iree/commit/ea9176ab6f299d5d0fb01b887bc7b4478fad9c4b, but with differing outputs, we could mark it as failing or skip) --- .github/workflows/pkgci_regression_test.yml | 2 +- .../iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml index 86d6169672b7..1b22e6ee8950 100644 --- a/.github/workflows/pkgci_regression_test.yml +++ b/.github/workflows/pkgci_regression_test.yml @@ -112,7 +112,7 @@ jobs: --no-skip-tests-missing-files \ --capture=no \ --log-cli-level=info \ - --timeout=240 \ + --timeout=600 \ --durations=0 \ --config-files=${MODELS_CONFIG_FILE_PATH} diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json index abbacba76305..351c3420407c 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json @@ -13,7 +13,9 @@ "onnx/node/generated/test_group_normalization_epsilon_expanded", "onnx/node/generated/test_group_normalization_example_expanded" ], - "skip_run_tests": [], + "skip_run_tests": [ + "onnx/node/generated/test_gridsample_zeros_padding" + ], "expected_compile_failures": [ "onnx/node/generated/test_affine_grid_2d", "onnx/node/generated/test_affine_grid_2d_align_corners",