Skip to content

Commit

Permalink
Adopt Rapids 25.02 and CUDA 12.8 (#2183)
Browse files Browse the repository at this point in the history
* When building with DOCA in CI restrict the `MORPHEUS_CUDA_ARCHITECTURES` to just those supported by DOCA.
* Install DOCA from an apt repository rather than downloading a deb file
* Avoid a version conflict for the `mft` package by manually specifying  the version of `mft` to ensure the package is installed from the DOCA repo and not the CUDA repo.
* Incorporates changes from #2097 to `cudf_helpers.pyx` to match changes made to cudf.

Closes #2182 

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)
  - Will Killian (https://github.com/willkill07)

URL: #2183
  • Loading branch information
dagardner-nv authored Feb 19, 2025
1 parent 745002d commit e80ba4d
Show file tree
Hide file tree
Showing 55 changed files with 236 additions and 221 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04"
}
Expand Down Expand Up @@ -47,7 +47,7 @@
"initializeCommand": [
"/bin/bash",
"-c",
"${localWorkspaceFolder}/.devcontainer/initialize-command.sh && mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"
"${localWorkspaceFolder}/.devcontainer/initialize-command.sh && mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"
],
"postAttachCommand": [
"/bin/bash",
Expand All @@ -66,7 +66,7 @@
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
45 changes: 26 additions & 19 deletions .devcontainer/docker/optional_deps/doca.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,39 +17,46 @@
set -e

MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}

LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-22.04}

DOCA_OS_VERSION=${DOCA_OS_VERSION:-"22.04"}
DOCA_VERSION=${DOCA_VERSION:-2.7.0}
PKG_ARCH=${PKG_ARCH:-$(dpkg --print-architecture)}

REAL_ARCH=${REAL_ARCH:-$(arch)}
if [[ ${REAL_ARCH} == "x86_64" ]]; then
DOCA_ARCH="x86_64"
elif [[ ${REAL_ARCH} == "aarch64" ]]; then
DOCA_ARCH="arm64-sbsa"
else
echo "Unsupported architecture: ${REAL_ARCH}"
exit 1
fi

DOCA_URL="https://linux.mellanox.com/public/repo/doca/${DOCA_VERSION}/${LINUX_DISTRO}${DOCA_OS_VERSION}/${DOCA_ARCH}/"
DOCA_GPG_URL="https://linux.mellanox.com/public/repo/doca/GPG-KEY-Mellanox.pub"

# Exit early if nothing to do
if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
exit 0
fi

WORKING_DIR=$1

mkdir -p ${WORKING_DIR}
echo "Installing DOCA using directory: ${WORKING_DIR}"

DEB_DIR=${WORKING_DIR}/deb

mkdir -p ${DEB_DIR}

DOCA_OS_VERSION="ubuntu2204"
DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_${PKG_ARCH}.deb"

# Upgrade the base packages (diff between image and Canonical upstream repo)
apt update -y
apt upgrade -y
echo "Adding DOCA repo: ${DOCA_URL}"
curl ${DOCA_GPG_URL} | gpg --dearmor > /etc/apt/trusted.gpg.d/GPG-KEY-Mellanox.pub
echo "deb [signed-by=/etc/apt/trusted.gpg.d/GPG-KEY-Mellanox.pub] $DOCA_URL ./" > /etc/apt/sources.list.d/doca.list

# Install wget
apt install -y --no-install-recommends wget

wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
apt install ./doca-host.deb
apt update
apt install -y doca-all
apt install -y doca-gpu doca-gpu-dev

# Need to explicitly install the version of mft provided by the DOCA repo overriding the verdion from the cuda repo
# to avoid version conflicts.
# If/when we update either the OS, DOCA or CUDA version, we need to update the mft version here as well by checking
# the output of `apt policy mft`
apt install -y doca-all doca-gpu doca-gpu-dev mft=4.28.0-92

# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_pipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ on:
env:
CHANGE_TARGET: "${{ github.base_ref }}"
CUDA_PATH: "/usr/local/cuda/"
CUDA_VER: "12.5"
CUDA_VER: "12.8"
GH_TOKEN: "${{ github.token }}"
GIT_COMMIT: "${{ github.sha }}"
MORPHEUS_ROOT: "${{ github.workspace }}/morpheus"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ jobs:
conda_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_build_label) }}
# Upload morpheus conda packages only for non PR branches. Use 'main' for main branch and 'dev' for all other branches
conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }}
base_container: rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.10
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-250102
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-250102
base_container: rapidsai/ci-conda:cuda12.8.0-ubuntu22.04-py3.10
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-250213
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-250213
secrets:
CONDA_TOKEN: ${{ secrets.CONDA_TOKEN }}
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ option(MORPHEUS_USE_IWYU "Enable running include-what-you-use as part of the bui

set(MORPHEUS_PY_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/wheel" CACHE STRING "Location to install the python directory")

set(MORPHEUS_RAPIDS_VERSION "24.10" CACHE STRING "Sets default versions for RAPIDS libraries.")
set(MORPHEUS_RAPIDS_VERSION "25.02" CACHE STRING "Sets default versions for RAPIDS libraries.")
set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data")
mark_as_advanced(MORPHEUS_CACHE_DIR)

Expand Down
4 changes: 2 additions & 2 deletions ci/conda/recipes/morpheus-libs/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ cuda_compiler:
- cuda-nvcc

cuda_compiler_version:
- 12.5
- 12.8

python:
- 3.10

rapids_version:
- 24.10
- 25.02
4 changes: 2 additions & 2 deletions ci/conda/recipes/morpheus/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ cuda_compiler:
- cuda-nvcc

cuda_compiler_version:
- 12.5
- 12.8

python:
- 3.10

rapids_version:
- 24.10
- 25.02
36 changes: 24 additions & 12 deletions ci/runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@

# Args used in FROM commands must come first
ARG FROM_IMAGE="rapidsai/ci-conda"
ARG CUDA_PKG_VER=12-0
ARG CUDA_SHORT_VER=12.5
ARG CUDA_VER=12.5.1
ARG CUDA_SHORT_VER=12.8
ARG CUDA_VER=12.8.0
ARG LINUX_DISTRO=ubuntu
ARG LINUX_VER=22.04
ARG PROJ_NAME=morpheus
Expand All @@ -35,6 +34,9 @@ SHELL ["/bin/bash", "-c"]

ENV REAL_ARCH=${REAL_ARCH}

# https://github.com/rapidsai/ci-imgs/issues/241
RUN rm -rf /tmp/sccache* /root/.cache

# Create conda environment
COPY ./dependencies.yaml /tmp/conda/

Expand All @@ -47,7 +49,9 @@ ARG PROJ_NAME
ARG PYTHON_VER
ARG REAL_ARCH

RUN rapids-dependency-file-generator \
RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
rapids-dependency-file-generator \
--config /tmp/conda/dependencies.yaml \
--output conda \
--file-key build \
Expand All @@ -61,15 +65,20 @@ ENV MORPHEUS_SUPPORT_DOCA=ON

COPY ./.devcontainer/docker/optional_deps/doca.sh /tmp/doca/

RUN apt update && \
RUN --mount=type=cache,id=apt,target=/var/cache/apt \
apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
apt upgrade -y && \
apt install --no-install-recommends -y \
automake \
build-essential \
libtool \
automake && \
apt clean && \
PKG_ARCH=${TARGETARCH} /tmp/doca/doca.sh /tmp/doca && \
rm -rf /tmp/doca
libtool

RUN --mount=type=cache,id=apt,target=/var/cache/apt \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
/tmp/doca/doca.sh /tmp/doca && \
rm -rf /tmp/doca && \
apt clean

# ============ test ==================
FROM base as test
Expand All @@ -81,14 +90,17 @@ ARG CUDA_SHORT_VER
ARG PROJ_NAME
ARG PYTHON_VER

RUN apt update && \
RUN --mount=type=cache,id=apt,target=/var/cache/apt \
apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
apt install --no-install-recommends -y \
openjdk-11-jre-headless && \
apt clean && \
rm -rf /var/lib/apt/lists/*

RUN rapids-dependency-file-generator \
RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
rapids-dependency-file-generator \
--config /tmp/conda/dependencies.yaml \
--output conda \
--file-key test \
Expand Down
13 changes: 8 additions & 5 deletions ci/scripts/github/cmake_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ _FLAGS=()
_FLAGS+=("-B" "${BUILD_DIR}")
_FLAGS+=("-G" "Ninja")
_FLAGS+=("-DCMAKE_MESSAGE_CONTEXT_SHOW=ON")
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS")
_FLAGS+=("-DMORPHEUS_USE_CCACHE=ON")
_FLAGS+=("-DMORPHEUS_PYTHON_INPLACE_BUILD=OFF")
_FLAGS+=("-DMORPHEUS_PYTHON_BUILD_STUBS=ON")
Expand All @@ -27,12 +26,16 @@ _FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=ON")
_FLAGS+=("-DMORPHEUS_BUILD_TESTS=ON")
_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=ON")
_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_DFP=ON")
if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then
_FLAGS+=("-DMORPHEUS_SUPPORT_DOCA=ON")
# Set the CMAKE_CUDA_ARCHITECTURES to just 80;86 since that is what DOCA supports for now
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=80;86")
else
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS")
fi

if [[ "${LOCAL_CI}" == "" ]]; then
_FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)")
fi
export CMAKE_BUILD_ALL_FEATURES="${_FLAGS[@]}"
unset _FLAGS

if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then
export CMAKE_BUILD_ALL_FEATURES="${CMAKE_BUILD_ALL_FEATURES} -DMORPHEUS_SUPPORT_DOCA=ON"
fi
2 changes: 1 addition & 1 deletion ci/scripts/github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ source ${WORKSPACE}/ci/scripts/github/morpheus_env.sh
source ${WORKSPACE}/ci/scripts/github/cmake_all.sh
/usr/bin/nvidia-smi

update_conda_env "${WORKSPACE}/conda/environments/all_cuda-125_arch-${REAL_ARCH}.yaml"
update_conda_env "${WORKSPACE}/conda/environments/all_cuda-128_arch-${REAL_ARCH}.yaml"

log_toolchain

Expand Down
6 changes: 3 additions & 3 deletions ci/scripts/run_ci_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ GIT_BRANCH=$(git branch --show-current)
GIT_COMMIT=$(git log -n 1 --pretty=format:%H)

LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp}
CONTAINER_VER=${CONTAINER_VER:-250102}
CUDA_VER=${CUDA_VER:-12.5}
CUDA_FULL_VER=${CUDA_FULL_VER:-12.5.1}
CONTAINER_VER=${CONTAINER_VER:-250213}
CUDA_VER=${CUDA_VER:-12.8}
CUDA_FULL_VER=${CUDA_FULL_VER:-12.8.0}
DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""}

# Configure the base docker img
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ dependencies:
- clangdev=16
- click>=8
- cmake=3.27
- cuda-cudart-dev=12.5
- cuda-cudart=12.5
- cuda-nvcc=12.5
- cuda-nvml-dev=12.5
- cuda-nvrtc-dev=12.5
- cuda-nvrtc=12.5
- cuda-nvtx-dev=12.5
- cuda-nvtx=12.5
- cuda-cudart-dev=12.8
- cuda-cudart=12.8
- cuda-nvcc=12.8
- cuda-nvml-dev=12.8
- cuda-nvrtc-dev=12.8
- cuda-nvrtc=12.8
- cuda-nvtx-dev=12.8
- cuda-nvtx=12.8
- cuda-sanitizer-api
- cuda-version=12.5
- cudf=24.10
- cuml=24.10.*
- cuda-version=12.8
- cudf=25.02
- cuml=25.02.*
- cupy
- cxx-compiler
- cython=3.0
Expand All @@ -59,7 +59,7 @@ dependencies:
- jsonpatch>=1.33
- kfp
- libcublas-dev
- libcudf=24.10
- libcudf=25.02
- libcufft-dev
- libcurand-dev
- libcusolver-dev
Expand Down Expand Up @@ -87,9 +87,9 @@ dependencies:
- pre-commit
- pybind11-stubgen=0.10.5
- pydantic
- pylibcudf=24.10
- pylibcudf=25.02
- pylint=3.0.3
- pynvml=11.4
- pynvml=12
- pypdf=3.17.4
- pytest-asyncio
- pytest-benchmark=4.0
Expand All @@ -100,7 +100,7 @@ dependencies:
- python-graphviz
- python=3.10
- rapidjson=1.1.0
- rapids-dask-dependency=24.10
- rapids-dask-dependency=25.02
- rdma-core>=48
- requests-cache=1.1
- requests-toolbelt=1.0
Expand Down Expand Up @@ -142,4 +142,4 @@ dependencies:
- python-logging-loki
- sentence-transformers==2.7
- torch==2.4.0
name: all_cuda-125_arch-aarch64
name: all_cuda-128_arch-aarch64
Loading

0 comments on commit e80ba4d

Please sign in to comment.