From aa094c560b35c104e12c391fc0d254832536ba7a Mon Sep 17 00:00:00 2001 From: chendong-1998 Date: Tue, 25 Feb 2025 18:45:32 +0800 Subject: [PATCH 1/4] Add simple workflow use npu image. Signed-off-by: chendong-1998 --- .github/workflows/npu_test.yml | 45 ++++++++++++++++++++++++++++++++++ tests/npu/test_npu_worker.py | 31 +++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 .github/workflows/npu_test.yml create mode 100644 tests/npu/test_npu_worker.py diff --git a/.github/workflows/npu_test.yml b/.github/workflows/npu_test.yml new file mode 100644 index 00000000..6be0c30f --- /dev/null +++ b/.github/workflows/npu_test.yml @@ -0,0 +1,45 @@ +name: npu_test + +on: + # Trigger the workflow on push or pull request, + # but only for the main branch + push: + branches: + - main + - v0.2.x + paths: + - "**/*.py" + - .github/workflows/npu_test.yml + pull_request: + branches: + - main + - v0.2.x + paths: + - "**/*.py" + - .github/workflows/npu_test.yml + +# Declare permissions just read content. +permissions: + contents: read + +jobs: + ray: + runs-on: [self-hosted, npu-0] + env: + HTTP_PROXY: ${{ secrets.PROXY_HTTP }} + HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} + NO_PROXY: "localhost,127.0.0.1" + HF_HUB_ENABLE_HF_TRANSFER: 1 + container: + image: chendong136/verlnpu:ubuntu20.04-py310-cann8.0.0.beta1-ray2.42-torch-npu2.5.1rc1 + options: -v /usr/local/Ascend/driver:/usr/local/Ascend/driver --previleged --shm-size=10g + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + - name: Install the current repository + run: | + pip install --upgrade "ray>=2.40.0" + - name: Running ray tests that need NPUs + run: | + pytest -s tests/npu/test_npu_worker.py \ No newline at end of file diff --git a/tests/npu/test_npu_worker.py b/tests/npu/test_npu_worker.py new file mode 100644 index 00000000..dc1c8a1a --- /dev/null +++ b/tests/npu/test_npu_worker.py @@ -0,0 +1,31 @@ +import pytest +import ray +import torch + + +@pytest.fixture +def ray_cluster_with_npus(): + # Assume in the NPU environment. + ray.init() + yield + ray.shutdown() + + +@ray.remote(resources={"NPU": 1}) +class Worker: + def __init__(self): + pass + + def test_torch_npu_avalable(self): + available = False + try: + import torch_npu + available = torch.npu.is_available() + except Exception as e: + pass + return available + + +def test_torch_npu(ray_cluster_with_npus): + worker = Worker.remote() + assert ray.get(worker.remote()) From 2e0ce697d97025e6793f8b07541109bbe2abf1da Mon Sep 17 00:00:00 2001 From: chendong-1998 Date: Wed, 26 Feb 2025 16:23:40 +0800 Subject: [PATCH 2/4] fix lint and copyright Signed-off-by: chendong-1998 --- tests/npu/test_npu_worker.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/npu/test_npu_worker.py b/tests/npu/test_npu_worker.py index dc1c8a1a..0550b191 100644 --- a/tests/npu/test_npu_worker.py +++ b/tests/npu/test_npu_worker.py @@ -1,3 +1,17 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest import ray import torch @@ -13,9 +27,10 @@ def ray_cluster_with_npus(): @ray.remote(resources={"NPU": 1}) class Worker: + def __init__(self): pass - + def test_torch_npu_avalable(self): available = False try: @@ -24,7 +39,7 @@ def test_torch_npu_avalable(self): except Exception as e: pass return available - + def test_torch_npu(ray_cluster_with_npus): worker = Worker.remote() From 84e9d53c431264bb1f6f310a33c32e2d2d87f9ed Mon Sep 17 00:00:00 2001 From: chendong-1998 Date: Wed, 26 Feb 2025 16:27:51 +0800 Subject: [PATCH 3/4] fix typo Signed-off-by: chendong-1998 --- .github/workflows/npu_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/npu_test.yml b/.github/workflows/npu_test.yml index 6be0c30f..8a638bc2 100644 --- a/.github/workflows/npu_test.yml +++ b/.github/workflows/npu_test.yml @@ -32,7 +32,7 @@ jobs: HF_HUB_ENABLE_HF_TRANSFER: 1 container: image: chendong136/verlnpu:ubuntu20.04-py310-cann8.0.0.beta1-ray2.42-torch-npu2.5.1rc1 - options: -v /usr/local/Ascend/driver:/usr/local/Ascend/driver --previleged --shm-size=10g + options: -v /usr/local/Ascend/driver:/usr/local/Ascend/driver --privileged --shm-size=10g steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: From fc2e88af29a61c41cbf76287d53df7d74578bee8 Mon Sep 17 00:00:00 2001 From: chendong-1998 Date: Wed, 26 Feb 2025 17:11:19 +0800 Subject: [PATCH 4/4] add small job test cpu arch Signed-off-by: chendong-1998 --- .github/workflows/npu_test.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/npu_test.yml b/.github/workflows/npu_test.yml index 8a638bc2..bc101e56 100644 --- a/.github/workflows/npu_test.yml +++ b/.github/workflows/npu_test.yml @@ -23,6 +23,12 @@ permissions: contents: read jobs: + test-cpu-arch: + runs-on: [self-hosted, npu-0] + steps: + - name: verify host cpu architecture. + run: | + lscpu ray: runs-on: [self-hosted, npu-0] env: @@ -32,7 +38,9 @@ jobs: HF_HUB_ENABLE_HF_TRANSFER: 1 container: image: chendong136/verlnpu:ubuntu20.04-py310-cann8.0.0.beta1-ray2.42-torch-npu2.5.1rc1 - options: -v /usr/local/Ascend/driver:/usr/local/Ascend/driver --privileged --shm-size=10g + volumes: + - /usr/local/Ascend/driver:/usr/local/Ascend/driver + options: --privileged --shm-size=10g steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: