diff --git a/release/ray_release/byod/requirements_byod.in b/release/ray_release/byod/requirements_byod.in index 574a6b2c8cec4..f3c798d1fb9da 100644 --- a/release/ray_release/byod/requirements_byod.in +++ b/release/ray_release/byod/requirements_byod.in @@ -2,6 +2,8 @@ ale-py anyscale boto3 +cmake +crc32c cython gcsfs gsutil diff --git a/release/ray_release/byod/requirements_byod.txt b/release/ray_release/byod/requirements_byod.txt index bd81595acbc68..f5e1d91e0e7db 100644 --- a/release/ray_release/byod/requirements_byod.txt +++ b/release/ray_release/byod/requirements_byod.txt @@ -388,6 +388,25 @@ cloudpickle==2.2.1 \ --hash=sha256:61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f \ --hash=sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5 # via gym +cmake==3.26.4 \ + --hash=sha256:05cfd76c637eb22058c95e2dc383cadd4e0615e2643e637bb498a6cc24825790 \ + --hash=sha256:1b92f9f59f48c803106dbdd6750b0f571a0500e25d3a62c42ba84bb7a9240d10 \ + --hash=sha256:1d887be5f1a3f17559a78707a6bc0560f4f8cb93cebb9d823d90a63e68bae09b \ + --hash=sha256:230227bf99f36614de84cdc92ffce3a50eb2803020e946f8da945a08fcf766bf \ + --hash=sha256:235d8eac93a28dcce5a1cd7130412885a2aa53d5735cb2230e0f26f589347b65 \ + --hash=sha256:24110035aff586a04a6a6fcf4609270642e4f503c0620c962dff75b653f81414 \ + --hash=sha256:248a90816abfc10ff6e1109b54b8235c3e62f0ac92da16541753deb3b5ae063d \ + --hash=sha256:3175442985558d5415b97f264a6a1bb0af5ecfe10e3f7510257b1ea66bd33848 \ + --hash=sha256:3e280e81713408987b7053f5b922c9f94e45668ca6efff1f02846309ca0b5b0f \ + --hash=sha256:432837364aa6cab2826a72e8a4cdd3586f5ac9ce495217ccd59aa70f2bba8120 \ + --hash=sha256:93015da6f1c0e1e5f2debf752f1803ea52d742d915ad674043d36e471f937507 \ + --hash=sha256:93a03bad17b9741acaff4a8651f8596496506602fa123e70fe67142f1b21ee2e \ + --hash=sha256:b7a6946c345497c14064e0c9585b30f5aaebbefdfc0b245b6bb5a978eb4fc85f \ + --hash=sha256:c3b0e72750c0f6c0373242c1299bc4ffdbebdd5004966ae6df0b2e9845aa6990 \ + --hash=sha256:d45b30b9ce7280829888c78650177ab525df2b6785e1a5b3d82b4c147d828c0e \ + --hash=sha256:d726671ae7ae4aa6989e73d26b9f8f8e6af45163a26ea243949d72246566fdd8 \ + --hash=sha256:e058e59154a1e490fb9425b420f87e28144292397607638d73e323509f7efae6 + # via -r release/ray_release/byod/requirements_byod.in colorama==0.4.6 \ --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 @@ -395,6 +414,86 @@ colorama==0.4.6 \ # anyscale # halo # log-symbols +crc32c==2.3.post0 \ + --hash=sha256:00de0d157dc17bbf01fef615aff6095a78b3561aad37b4ba4a300b11311aae55 \ + --hash=sha256:01787094f281ae7c8f645d7b3c309a02bac45cb385206eee651aa27d933a87e5 \ + --hash=sha256:04220e1db5567dc234d1e9dc182c5b8241905057ec19967ac3a917bcaf06d70e \ + --hash=sha256:0467261c67377a92ad6665a9590b3820cfb12d59c3c6ccac6326200e032ddda4 \ + --hash=sha256:05ad2f6b6392b2a0af159142e6ec029cddd15d67a76b7762b3316cbb5cc8e22c \ + --hash=sha256:05b69167116680bd40116c8fac847950d1eb170fdd42a814602223b4e002b0bb \ + --hash=sha256:061b084e27d92dc3e1a9efd500e6e3feee9e97e8cefe2fbdcc0011cba7f3242a \ + --hash=sha256:08fdba1351d5cbb428d9ba3ce5c03d43687e7b23c6bc0cf99973306e5549dab9 \ + --hash=sha256:0b5f5a18e9bfe98a273d9618d5c04470fe983dcebcd453fac07c398dfac7db10 \ + --hash=sha256:1101b4e680085ea7c01074d38378610392262bc56936ec17eed61f1372197193 \ + --hash=sha256:1285d33cdeda2d2248994d41706f88f0fe58265ae907d23221c07028e79f9670 \ + --hash=sha256:129b0ac8ee712ce42aae36d7e6e5202ab080f06117f57ba2c894226586e80050 \ + --hash=sha256:176b3c5ff7fa4e2f83c241ab9dc4fd1584d1c9333d4c7295c16c9f6097c29933 \ + --hash=sha256:19d6a505582194ed0b2bb257cf3729c922e7e92f457a9f7f5493cf821cb19afc \ + --hash=sha256:217c1b64be777cf235556066c363f4dec22b29a956a174f6361037b1b2065c63 \ + --hash=sha256:21919937ecac802e436c1a9978e8b27522ca87bf67dc5ce3a5b5622c0b5c3a06 \ + --hash=sha256:21ef9ebdfe4f2e45c94327a1a9a222a899be784d78674065ada6e8e240d3a4a8 \ + --hash=sha256:247ada85bd0a54012e910ff46697871f80bcff9018b59f7de23161726a146b2a \ + --hash=sha256:295053584dc3a11d8f02d6ccc6dd3698331e252cd816d7652b0723c516ef3c41 \ + --hash=sha256:305ca4eb8c399081a68ca0274ae176753be8430fc874d1e7397a2cbc95748733 \ + --hash=sha256:31f59b051a7fa4a2ada3f76a79014be38c45e9d3c906eca381e9007677ffe506 \ + --hash=sha256:335f1fcd5fb346be4ac4c982f89bb66b39c93a2c2d4bcdb3e3188d8adcb225b2 \ + --hash=sha256:363b8f2993f07eb8ac665c7227cb2a569fb1f4eb1551a05695bc2f94c23307e4 \ + --hash=sha256:38f4c8eaa77fa9bbe690b58546dd3f2e244c13d5d0a01fa93076d3817a22bc68 \ + --hash=sha256:3bb11668f75a7f4f699b9a125aaf15259687f691beb95e756e3bea80d7163645 \ + --hash=sha256:3c24ebb561e10a5eada2128a7357e41969155cebe7b34656176fc24412d45c8b \ + --hash=sha256:3e547c06a1dda463daf398661af6bda767debe0097630b48c463605e38ade31a \ + --hash=sha256:3ef1b87ac755e20933bc8136a45ca9993a03c0b0ba16dd946ab287108305332b \ + --hash=sha256:42c0363d68a95d133af02803772395b42bc202840ea70a317e2b46beb9e53af0 \ + --hash=sha256:4459462c732232ffb29b58decd246ed5cdb8c16ae141f57f03cb2e3445dc1d2e \ + --hash=sha256:470bb05c224ede904af0278d18ac26f1132a8cfc7a11cfa0406c26ef75765427 \ + --hash=sha256:585ab3307a2aa73b935f0b0358197f0af5306204d646ac321ecf01f2a3725f94 \ + --hash=sha256:5be4ad72c198c4a22515ef2ad728f9829ee3d75e6c7f3e41030c8266e46c0c7c \ + --hash=sha256:5c9e58f96a0e56e60ea683504f605b76c73b7f885837bfc8bd8346b054133045 \ + --hash=sha256:6072cc60102a8ac86f45ab8d29c8679a8ac0445477eed3d0af6d0becceea1392 \ + --hash=sha256:68941ac55632f231120baeeba72690cdf2ec2531fea3ceab4612dbf855411b05 \ + --hash=sha256:69cb66a0c680ae531df7f32833a3d6df26aeeb144c0f7a8899d2d5bb7c9cdc2c \ + --hash=sha256:69cd27f493a6bcbeb1b59eea4a978734ebf3ce2b6f757a99405d6eebd38af551 \ + --hash=sha256:6bfc277c43fa1dd4c4a91a1b56347008e34c8214dd99b1424b5d636272f2922c \ + --hash=sha256:75e9a588e7241d09de9023dc51174cc2c9ac7c453ae0e26a5718e266b48ae392 \ + --hash=sha256:75eb7cc4200745cbc717bb0b83b538e6582be980d4f8c9f9bb0740a23e93a4d8 \ + --hash=sha256:771ad4abacb89a14e1f5cbbb291ead652a0c9ed4be2c9b579c869957c0b03e3d \ + --hash=sha256:7a61e6754ee54bbae9253035eabb5658d0ada9162eb1b98feabefb044b95e6c0 \ + --hash=sha256:7d02e45cddebfa82694fc9fc7df2f42366431e90b0abd40c5c63758bc9234123 \ + --hash=sha256:7d4b39ca6791830c4f1c053d2d8983627af702f0445535ff53d3220f35cf6ce6 \ + --hash=sha256:7fb366626bf7ef66e55656c8385fcc94f22f8d3847a7a84c810d2e3f63f54c62 \ + --hash=sha256:80ddf6b0594bb980a635ff4818c0c64927193c1a09e8b5b6986769e94a7ba9ee \ + --hash=sha256:85b3395c476916b098a20cfb8686558865ca3ef71caaf9e6b0a548b2049ee87d \ + --hash=sha256:86837a00716056c29f9d84c980cdac050ba3c7610c9edca1b2ac01192715725d \ + --hash=sha256:89bcf1158c577635bdc61b27d29deb2fe0c1191a54a490f976563a73abe3b2e3 \ + --hash=sha256:8a3ff6b893ab482f0841a2b7e394adb749b1a896c854ce92f72c60e2ea3a3553 \ + --hash=sha256:8af10d0c3752db01dfa77c6c4c8fa070bdefe939eb9ab94b4dec1dcc2cc11fa5 \ + --hash=sha256:9ebbcde06765fcde3d2d440153839a9ac675866fb25aa86219595c370e6d3f7c \ + --hash=sha256:a2f6105a430f4f1f393cc660bf8ac3a4860396fa9b5ac8bf0c7ba1de044a3cbc \ + --hash=sha256:a4d98142f6e40dec28994846a6acafd96ba822d81b3c6671633fb11d41692c32 \ + --hash=sha256:a57d1ec8f1aa45c14a51770b011359b511eb7dcc6ffd7c8fc9e918e2aa009416 \ + --hash=sha256:acb0d4a1cf19fdc2946ab9b1dc5d4f1347e97b356a863fbba2d8a3d3c1cbe815 \ + --hash=sha256:b3628ff77ca4cf3c3f0209d5eb824b79d8e324bbb1feaff3fb6bff8adc23ec08 \ + --hash=sha256:b60dd506177d2ea68fb548caa9cc383f46c024947d1990604c11aa615ec9da62 \ + --hash=sha256:b6a4df4a978dbf43e548a008dc4686f6e24d52defb8c03a79b67aebfeaa2caa6 \ + --hash=sha256:b6ad74ec499a3d6981900c1e2873b1e6a19e2ee3c650a3e611c3076ad9167f3a \ + --hash=sha256:b823aacfefe0001a08667d7a4d7dd87133537e3628ed581fa416829a5dad26fd \ + --hash=sha256:bce06be1d9aa7e4b3e3038fe80facafa3526bec9e484ec089c035b8018727c1b \ + --hash=sha256:c002f55429a12ec87a0b33a073b384d26edd46d89b3cf7cfb6ddf5abb6e2bfab \ + --hash=sha256:c102e8988618e3bd15f4297ab95631c6d9e59326f9af17bf7d71c2ad4639a7f7 \ + --hash=sha256:c55e8a45e360aeb3cea2cf9d9fb3771a711ed3c3fce2d91c874d767aae4f5cee \ + --hash=sha256:cb4222a766f59b1cd8cbe56af5dbdfd3a2c0ec40b60c9ee6efe4a5cabc94112d \ + --hash=sha256:cce0c79dbf4d4f2276cb9e32f4d0dfc3ce5d8cc5c3f0a0bc62612505cd779c67 \ + --hash=sha256:cee4275dfb3cfc4a1e4c338089f3223fce878d7151cebb095937c07410371908 \ + --hash=sha256:d18e82f66a0c25c8fb10b21e71cf2f2aa81441fd1a062809249e98a338ef9c81 \ + --hash=sha256:d7bcc07f3a63cc8be7536ca35a4c5b96763b8e0ce5d48f30d9374ec7e381b057 \ + --hash=sha256:d9ea9e6eb2912051a40ac6877646925ee3a058c4aa3868988fe1d8c4577f57d4 \ + --hash=sha256:dbae415e9ec7dfdcdeac981cf4833d9942ce9de175b2be5a21c641c3a88e609b \ + --hash=sha256:de0307898c24a8ae29d4b94ddd3aa81b73d3b2b0e490d226e3a3dd05526dfca1 \ + --hash=sha256:e0925a7a8fcc216744e86c21f1749c22f950f9bdad512cb5c80ee85017625bd5 \ + --hash=sha256:e311f52e24b633e8d588ab9a0e7992bfcfe8284a1655202bdac5aee80254a3fd \ + --hash=sha256:e88bbdeba430dfee6d83192a9e55c8e89884a3c5215d1b7643395ecbbd1b502b \ + --hash=sha256:ec216adbbf7ad1506918c8199a144d26740650b594f79755f5f1affec7e7820b + # via -r release/ray_release/byod/requirements_byod.in crcmod==1.7 \ --hash=sha256:dc7051a0db5f2bd48665a990d3ec1cc305a466a77358ca4492826f41f283601e # via gsutil diff --git a/release/ray_release/command_runner/anyscale_job_runner.py b/release/ray_release/command_runner/anyscale_job_runner.py index 0df6678b1e86c..223ca7f093ea1 100644 --- a/release/ray_release/command_runner/anyscale_job_runner.py +++ b/release/ray_release/command_runner/anyscale_job_runner.py @@ -3,7 +3,7 @@ import re import tempfile import shlex -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Dict, Optional, List from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.command_runner.job_runner import JobRunner @@ -203,6 +203,7 @@ def run_command( env: Optional[Dict] = None, timeout: float = 3600.0, raise_on_timeout: bool = True, + pip: Optional[List[str]] = None, ) -> float: prepare_command_strs = [] prepare_command_timeouts = [] @@ -262,6 +263,7 @@ def run_command( working_dir=".", upload_path=self.upload_path, timeout=int(timeout), + pip=pip, ) try: error = self.job_manager.last_job_result.state.error diff --git a/release/ray_release/command_runner/command_runner.py b/release/ray_release/command_runner/command_runner.py index f82f029bb178e..7e63fe9590844 100644 --- a/release/ray_release/command_runner/command_runner.py +++ b/release/ray_release/command_runner/command_runner.py @@ -1,5 +1,5 @@ import abc -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from ray_release.cluster_manager.cluster_manager import ClusterManager from ray_release.file_manager.file_manager import FileManager @@ -100,6 +100,7 @@ def run_command( env: Optional[Dict] = None, timeout: float = 3600.0, raise_on_timeout: bool = True, + pip: Optional[List[str]] = None, ) -> float: """Run command.""" raise NotImplementedError diff --git a/release/ray_release/glue.py b/release/ray_release/glue.py index afbd8f4272542..8b022209106b6 100644 --- a/release/ray_release/glue.py +++ b/release/ray_release/glue.py @@ -324,6 +324,7 @@ def _running_test_script( env=command_env, timeout=command_timeout, raise_on_timeout=not is_long_running, + pip=test.get_byod_pips(), ) except ( TestCommandError, diff --git a/release/ray_release/job_manager/anyscale_job_manager.py b/release/ray_release/job_manager/anyscale_job_manager.py index 11bf883f7a1c5..85d9df1250cc5 100644 --- a/release/ray_release/job_manager/anyscale_job_manager.py +++ b/release/ray_release/job_manager/anyscale_job_manager.py @@ -4,7 +4,7 @@ import tempfile from collections import deque from contextlib import contextmanager -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple, List from anyscale.sdk.anyscale_client.models import ( @@ -52,6 +52,7 @@ def _run_job( env_vars: Dict[str, Any], working_dir: Optional[str] = None, upload_path: Optional[str] = None, + pip: Optional[List[str]] = None, ) -> None: env = os.environ.copy() env.setdefault("ANYSCALE_HOST", str(ANYSCALE_HOST)) @@ -60,7 +61,10 @@ def _run_job( anyscale_client = self.sdk - runtime_env = {"env_vars": env_vars} + runtime_env = { + "env_vars": env_vars, + "pip": pip or [], + } if working_dir: runtime_env["working_dir"] = working_dir if upload_path: @@ -254,9 +258,14 @@ def run_and_wait( working_dir: Optional[str] = None, timeout: int = 120, upload_path: Optional[str] = None, + pip: Optional[List[str]] = None, ) -> Tuple[int, float]: self._run_job( - cmd_to_run, env_vars, working_dir=working_dir, upload_path=upload_path + cmd_to_run, + env_vars, + working_dir=working_dir, + upload_path=upload_path, + pip=pip, ) return self._wait_job(timeout) diff --git a/release/ray_release/schema.json b/release/ray_release/schema.json index b9b5f038bb852..a812fd709fbff 100644 --- a/release/ray_release/schema.json +++ b/release/ray_release/schema.json @@ -112,6 +112,9 @@ "pre_run_cmds": { "type": "array" }, + "pip": { + "type": "array" + }, "runtime_env": { "type": "array" } diff --git a/release/ray_release/test.py b/release/ray_release/test.py index ec4edde9766a3..b52b1bea85ca7 100644 --- a/release/ray_release/test.py +++ b/release/ray_release/test.py @@ -141,6 +141,14 @@ def get_byod_runtime_env(self) -> Dict[str, str]: return {} return _convert_env_list_to_dict(self["cluster"]["byod"].get("runtime_env", [])) + def get_byod_pips(self) -> List[str]: + """ + Returns the list of pips for the BYOD cluster. + """ + if not self.is_byod_cluster(): + return [] + return self["cluster"]["byod"].get("pip", []) + def get_name(self) -> str: """ Returns the name of the test. diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 1e81a59e47428..3f29b3cdb882c 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -695,8 +695,14 @@ jailed: true frequency: nightly + python: "3.8" team: data cluster: + byod: + type: gpu + runtime_env: + - RAY_task_oom_retries=50 + - RAY_min_memory_free_bytes=1000000000 cluster_env: app_config_oom.yaml cluster_compute: compute_cpu_16.yaml @@ -722,8 +728,14 @@ jailed: true frequency: nightly + python: "3.8" team: data cluster: + byod: + type: gpu + runtime_env: + - RAY_task_oom_retries=50 + - RAY_min_memory_free_bytes=1000000000 cluster_env: app_config_oom.yaml cluster_compute: compute_cpu_16.yaml @@ -750,7 +762,13 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu + runtime_env: + - RAY_task_oom_retries=50 + - RAY_min_memory_free_bytes=1000000000 cluster_env: app_config_oom.yaml cluster_compute: compute_cpu_16_worker_nodes_2.yaml @@ -4760,6 +4778,7 @@ frequency: nightly team: data + python: "3.8" cluster: cluster_env: dask_on_ray/large_scale_dask_on_ray_app_config.yaml cluster_compute: dask_on_ray/dask_on_ray_stress_compute.yaml @@ -5049,6 +5068,7 @@ frequency: nightly team: core + python: "3.8" cluster: cluster_env: oom/stress_tests_tune_air_oom_app_config.yaml cluster_compute: oom/stress_tests_tune_air_oom_compute.yaml @@ -5484,7 +5504,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: inference.yaml @@ -5509,7 +5532,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: shuffle_app_config.yaml cluster_compute: shuffle_compute.yaml @@ -5533,7 +5559,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5559,7 +5588,12 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu + pip: + - git+https://github.com/ray-project/ray_shuffling_data_loader.git@add-embedding-model cluster_env: pipelined_training_app.yaml cluster_compute: pipelined_training_compute.yaml @@ -5584,7 +5618,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: data_ingest_benchmark_compute.yaml @@ -5609,7 +5646,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: data_ingest_benchmark_compute.yaml @@ -5634,7 +5674,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: data_ingest_benchmark_compute_gpu.yaml @@ -5664,7 +5707,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: data_ingest_benchmark_compute_gpu.yaml @@ -5689,7 +5735,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5712,7 +5761,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5736,7 +5788,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5759,7 +5814,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: read_tfrecords_benchmark_app.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5783,7 +5841,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5807,7 +5868,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5831,7 +5895,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: multi_node_benchmark_compute.yaml @@ -5855,7 +5922,10 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5958,7 +6028,11 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + runtime_env: + - RAY_worker_killing_policy=retriable_lifo cluster_env: shuffle/shuffle_app_config.yaml cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml @@ -5983,7 +6057,13 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + runtime_env: + - RAY_worker_killing_policy=retriable_lifo + pip: + - ray[default] cluster_env: shuffle/shuffle_app_config.yaml cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml @@ -6008,7 +6088,13 @@ frequency: nightly team: data + python: "3.8" cluster: + byod: + runtime_env: + - RAY_worker_killing_policy=retriable_lifo + pip: + - ray[default] cluster_env: shuffle/shuffle_app_config.yaml cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml @@ -6265,8 +6351,14 @@ frequency: nightly team: data + python: "3.8" cluster: # leave oom disabled as test is marked unstable at the moment. + byod: + runtime_env: + - RAY_memory_monitor_refresh_ms=0 + pip: + - ray[default] cluster_env: shuffle/shuffle_app_config_oom_disabled.yaml cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml