Skip to content

Commit

Permalink
Merge branch 'master' into prepared-pg
Browse files Browse the repository at this point in the history
  • Loading branch information
rynewang authored Aug 23, 2024
2 parents 15c430a + fd84b9d commit cec311f
Show file tree
Hide file tree
Showing 1,019 changed files with 26,606 additions and 15,115 deletions.
2 changes: 2 additions & 0 deletions .buildkite/_forge.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ steps:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
cuda:
- "11.7.1-cudnn8"
- "11.8.0-cudnn8"
Expand All @@ -41,6 +42,7 @@ steps:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
env:
PYTHON_VERSION: "{{matrix}}"

Expand Down
2 changes: 1 addition & 1 deletion .buildkite/base.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ steps:
label: "wanda: oss-ci-base_gpu-py{{matrix}}"
wanda: ci/docker/base.gpu.wanda.yaml
matrix:
- "3.11"
- "3.12"
env:
PYTHON: "{{matrix}}"

Expand Down
1 change: 1 addition & 0 deletions .buildkite/build.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ steps:
- "3.9"
- "3.10"
- "3.11"
- "3.12"

- label: ":tapioca: build: ray-ml py{{matrix}} docker (x86_64)"
tags:
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/core.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,10 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker --
python/ray/util/dask/... python/ray/tests/modin/... core
--build-name data16build
--build-name datalbuild
--parallelism-per-worker 2
depends_on:
- data16build
- datalbuild
- forge

- label: ":ray: core: dashboard tests"
Expand Down
28 changes: 14 additions & 14 deletions .buildkite/data.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ steps:
- name: data6build
wanda: ci/docker/data6.build.wanda.yaml

- name: data16build
wanda: ci/docker/data16.build.wanda.yaml
- name: datalbuild
wanda: ci/docker/datal.build.wanda.yaml

- name: databuild-multipy
label: "wanda: databuild-py{{matrix}}"
Expand Down Expand Up @@ -42,7 +42,7 @@ steps:
--except-tags data_integration,doctest
depends_on: data6build

- label: ":database: data: arrow 16 tests"
- label: ":database: data: arrow 17 tests"
tags:
- python
- data
Expand All @@ -52,12 +52,12 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... //python/ray/air/... data
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}"
--worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--build-name data16build
--build-name datalbuild
--except-tags data_integration,doctest
depends_on: data16build
depends_on: datalbuild

- label: ":database: data: arrow 16 {{matrix.python}} tests ({{matrix.worker_id}})"
key: data16_python_tests
- label: ":database: data: arrow 17 {{matrix.python}} tests ({{matrix.worker_id}})"
key: datal_python_tests
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
tags:
- python
Expand Down Expand Up @@ -109,17 +109,17 @@ steps:
commands:
# doc tests
- bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... data
--build-name data16build
--build-name datalbuild
--except-tags gpu
--only-tags doctest
--parallelism-per-worker 2
# doc examples
- bazel run //ci/ray_ci:test_in_docker -- //doc/... data
--build-name data16build
--build-name datalbuild
--except-tags gpu,post_wheel_build,doctest
--parallelism-per-worker 2
--skip-ray-installation
depends_on: data16build
depends_on: datalbuild

- label: ":database: data: doc gpu tests"
tags:
Expand Down Expand Up @@ -162,9 +162,9 @@ steps:
instance_type: small
commands:
- bazel run //ci/ray_ci:test_in_docker -- python/ray/dashboard/... data
--build-name data16build
--build-name datalbuild
--parallelism-per-worker 3
depends_on: data16build
depends_on: datalbuild

- label: ":database: data: flaky tests"
key: data_flaky_tests
Expand All @@ -177,9 +177,9 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... data --run-flaky-tests
--parallelism-per-worker 3
--build-name data16build
--build-name datalbuild
--except-tags gpu_only,gpu
depends_on: data16build
depends_on: datalbuild

- label: ":database: data: flaky gpu tests"
key: data_flaky_gpu_tests
Expand Down
4 changes: 4 additions & 0 deletions .buildkite/lint.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ steps:
- ./ci/lint/lint.sh {{matrix}}
matrix:
- api_annotations
- api_policy_check core
- api_policy_check serve
- api_policy_check data
- api_policy_check train
- api_policy_check rllib

- label: ":lint-roller: lint: linkcheck"
instance_type: medium
Expand Down
67 changes: 66 additions & 1 deletion .buildkite/ml.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ steps:
IMAGE_TO: mlbuild
RAYCI_IS_GPU_BUILD: "false"

- name: mlbuild-multipy
label: "wanda: mlbuild-py{{matrix}}"
wanda: ci/docker/ml.build.wanda.yaml
depends_on: oss-ci-base_ml-multipy
env:
IMAGE_FROM: cr.ray.io/rayproject/oss-ci-base_ml-py{{matrix}}
IMAGE_TO: mlbuild-py{{matrix}}
PYTHON: "{{matrix}}"
RAYCI_IS_GPU_BUILD: "false"
matrix:
- "3.12"

- name: mllightning2gpubuild
wanda: ci/docker/mllightning2gpu.build.wanda.yaml
depends_on: oss-ci-base_gpu
Expand All @@ -29,6 +41,18 @@ steps:
IMAGE_TO: mlgpubuild
RAYCI_IS_GPU_BUILD: "true"

- name: mlgpubuild-multipy
label: "wanda: mlgpubuild-py{{matrix}}"
wanda: ci/docker/ml.build.wanda.yaml
depends_on: oss-ci-base_gpu-multipy
env:
IMAGE_FROM: cr.ray.io/rayproject/oss-ci-base_gpu-py{{matrix}}
IMAGE_TO: mlgpubuild-py{{matrix}}
PYTHON: "{{matrix}}"
RAYCI_IS_GPU_BUILD: "true"
matrix:
- "3.12"

# tests
- label: ":train: ml: train tests"
tags: train
Expand All @@ -40,6 +64,28 @@ steps:
--except-tags gpu_only,gpu,minimal,tune,doctest,needs_credentials
depends_on: [ "mlbuild", "forge" ]

- label: ":train: ml: {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
tags:
- python
- train
- tune
- ml
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... //python/ray/tune/... //python/ray/air/... ml
--workers 4 --worker-id {{matrix.worker_id}} --parallelism-per-worker 3
--python-version {{matrix.python}}
--except-tags gpu_only,gpu,minimal,doctest,needs_credentials,soft_imports,rllib,multinode
depends_on:
- mlbuild-multipy
- forge
job_env:
matrix:
setup:
python: ["3.12"]
worker_id: ["0", "1", "2", "3"]

- label: ":train: ml: train gpu tests"
tags:
- train
Expand All @@ -53,6 +99,25 @@ steps:
--only-tags gpu,gpu_only
depends_on: [ "mlgpubuild", "forge" ]

- label: ":train: ml: train gpu {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
tags:
- train
- gpu
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... //python/ray/air/... //doc/... ml
--workers 2 --worker-id {{matrix.worker_id}} --parallelism-per-worker 2
--python-version {{matrix.python}}
--build-name mlgpubuild-py{{matrix.python}}
--only-tags gpu,gpu_only
--except-tags doctest
depends_on: [ "mlgpubuild-multipy", "forge" ]
matrix:
setup:
python: ["3.12"]
worker_id: ["0", "1"]

- label: ":train: ml: train authentication tests"
tags:
- train
Expand Down Expand Up @@ -199,7 +264,7 @@ steps:
tags:
- train
- skip-on-premerge
instance_type: medium
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... ml --run-flaky-tests
--parallelism-per-worker 2
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/release-automation/pre_release.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ steps:
instance_type: default
commands:
- bazel build --build_python_zip --incompatible_use_python_toolchains=false --python_path=python //ci/ray_ci/automation:update_version
- cp bazel-bin/ci/ray_ci/automation/update_version.zip /artifact-mount/
- cp bazel-bin/ci/ray_ci/automation/update_version /artifact-mount/

- label: "Trigger Postmerge test"
if: build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") != "1"
Expand Down
1 change: 1 addition & 0 deletions .buildkite/release/build.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ steps:
# release test suite. We don't have release tests for Python 3.10 yet.
- "3.9"
- "3.11"
- "3.12"
platform:
- cu12.3.2-cudnn9
- cpu
Expand Down
46 changes: 6 additions & 40 deletions .buildkite/rllib.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,39 +31,27 @@ steps:
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
depends_on: rllibbuild

- label: ":brain: rllib: learning tests tf2-static-graph"
tags: rllib
parallelism: 3
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
--except-tags torch_only,tf2_only,no_tf_static_graph,multi_gpu
--test-arg --framework=tf
depends_on: rllibbuild

- label: ":brain: rllib: learning tests pytorch"
tags: rllib
parallelism: 4
parallelism: 5
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
--except-tags tf_only,tf2_only,multi_gpu,learning_tests_pytorch_use_all_core
--except-tags tf_only,tf2_only,gpu,multi_gpu,learning_tests_pytorch_use_all_core
--test-arg --framework=torch
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags learning_tests_pytorch_use_all_core
--except-tags tf_only,tf2_only,multi_gpu
--except-tags tf_only,tf2_only,gpu,multi_gpu
--test-arg --framework=torch
--skip-ray-installation
depends_on: rllibbuild

- label: ":brain: rllib: examples"
tags: rllib
parallelism: 3
parallelism: 5
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand All @@ -79,18 +67,6 @@ steps:
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
depends_on: rllibbuild

- label: ":brain: rllib: learning tests tf2-eager-tracing"
tags: rllib
parallelism: 2
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--only-tags learning_tests_discrete,learning_tests_continuous,crashing_cartpole,stateless_cartpole
--except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing
--test-arg --framework=tf2
depends_on: rllibbuild

- label: ":brain: rllib: tests dir"
tags: rllib_directly
parallelism: 2
Expand All @@ -107,7 +83,7 @@ steps:
tags:
- rllib_gpu
- gpu
parallelism: 3
parallelism: 5
instance_type: gpu
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down Expand Up @@ -153,16 +129,6 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags torch_2.x_only_benchmark
depends_on: rllibbuild

- label: ":brain: rllib: memory leak tf2-eager-tracing tests"
tags: rllib
instance_type: medium
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--only-tags memory_leak_tests
--except-tags flaky
--test-arg --framework=tf2
depends_on: rllibbuild

- label: ":brain: rllib: memory leak pytorch tests"
tags: rllib
instance_type: medium
Expand Down Expand Up @@ -199,7 +165,7 @@ steps:
tags:
- rllib_gpu
- gpu
parallelism: 2
parallelism: 5
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
Expand Down
Loading

0 comments on commit cec311f

Please sign in to comment.