Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adopt Rapids 25.02 and CUDA 12.8 #2183

Merged
merged 35 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
15e4f2d
Move cudf helper dtype_from_column_view import from type to column
mroeschke Jan 9, 2025
cc0bab7
First pass at Rapids 25.02 deps
dagardner-nv Jan 17, 2025
028fc7d
Merge branch 'cudf/dtype_from_column_view' of github.com:mroeschke/Mo…
dagardner-nv Jan 17, 2025
5ba2788
WIP: Updating to 25.02
dagardner-nv Jan 17, 2025
1fde300
Update to match changes in the subword_tokenize function
dagardner-nv Jan 17, 2025
b4719b8
Update to reflect that null_mask was moved to pylibcudf
dagardner-nv Jan 17, 2025
290b243
Update pynvml
dagardner-nv Jan 17, 2025
f03448e
Update stubs
dagardner-nv Jan 17, 2025
521aea0
Formatting
dagardner-nv Jan 17, 2025
2643d4e
Revert temporary changes
dagardner-nv Jan 17, 2025
c9b0afe
Remove temporary changes and template mrc dep
dagardner-nv Jan 17, 2025
89c6206
Merge branch 'branch-25.02' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jan 23, 2025
d7cd381
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Feb 4, 2025
b9782e3
Update child conda yamls based on changes in dependency.yaml
dagardner-nv Feb 4, 2025
44e4084
Quick find/replace pass at updating to CUDA 12.8
dagardner-nv Feb 4, 2025
bd7e9e4
more find/replace
dagardner-nv Feb 4, 2025
edac4c7
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Feb 4, 2025
9bceda0
Temporarily add the local channel, remove once MRC #536 is merged
dagardner-nv Feb 11, 2025
f4897b7
Update stubs
dagardner-nv Feb 11, 2025
b53a691
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Feb 11, 2025
5288aeb
Adopt updated branch-25.06 of utilities
dagardner-nv Feb 11, 2025
a2ed3f3
Fix cuda version
dagardner-nv Feb 11, 2025
0a86a33
Revert "Temporarily add the local channel, remove once MRC #536 is me…
dagardner-nv Feb 11, 2025
69de7eb
Temporarily disable DOCA, currently not building
dagardner-nv Feb 12, 2025
164c22d
Avoid bloat in the CI container and speedup builds by using a cached …
dagardner-nv Feb 12, 2025
8c47244
Update container versions
dagardner-nv Feb 12, 2025
5879425
Work-around DOCA mft version conflicts
dagardner-nv Feb 13, 2025
b4592ca
Update CI container versions
dagardner-nv Feb 13, 2025
9ab5ac2
Temp disable doca
dagardner-nv Feb 13, 2025
2ce2d99
Pull in DOCA via an apt repo
dagardner-nv Feb 14, 2025
63fab99
Re-enable DOCA support
dagardner-nv Feb 14, 2025
8568281
When DOCA is enabled restrict CMAKE_CUDA_ARCHITECTURES to those suppo…
dagardner-nv Feb 14, 2025
646a121
Update conda yaml path
dagardner-nv Feb 14, 2025
48dba77
Revert unintentional change to external/morpheus-visualizations
dagardner-nv Feb 14, 2025
2ae5732
Revert unintentional change to external/morpheus-visualizations
dagardner-nv Feb 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04"
}
Expand Down Expand Up @@ -47,7 +47,7 @@
"initializeCommand": [
"/bin/bash",
"-c",
"${localWorkspaceFolder}/.devcontainer/initialize-command.sh && mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"
"${localWorkspaceFolder}/.devcontainer/initialize-command.sh && mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"
],
"postAttachCommand": [
"/bin/bash",
Expand All @@ -66,7 +66,7 @@
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
45 changes: 26 additions & 19 deletions .devcontainer/docker/optional_deps/doca.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,39 +17,46 @@
set -e

MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}

LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-22.04}

DOCA_OS_VERSION=${DOCA_OS_VERSION:-"22.04"}
DOCA_VERSION=${DOCA_VERSION:-2.7.0}
PKG_ARCH=${PKG_ARCH:-$(dpkg --print-architecture)}

REAL_ARCH=${REAL_ARCH:-$(arch)}
if [[ ${REAL_ARCH} == "x86_64" ]]; then
DOCA_ARCH="x86_64"
elif [[ ${REAL_ARCH} == "aarch64" ]]; then
DOCA_ARCH="arm64-sbsa"
else
echo "Unsupported architecture: ${REAL_ARCH}"
exit 1
fi

DOCA_URL="https://linux.mellanox.com/public/repo/doca/${DOCA_VERSION}/${LINUX_DISTRO}${DOCA_OS_VERSION}/${DOCA_ARCH}/"
DOCA_GPG_URL="https://linux.mellanox.com/public/repo/doca/GPG-KEY-Mellanox.pub"

# Exit early if nothing to do
if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
exit 0
fi

WORKING_DIR=$1

mkdir -p ${WORKING_DIR}
echo "Installing DOCA using directory: ${WORKING_DIR}"

DEB_DIR=${WORKING_DIR}/deb

mkdir -p ${DEB_DIR}

DOCA_OS_VERSION="ubuntu2204"
DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_${PKG_ARCH}.deb"

# Upgrade the base packages (diff between image and Canonical upstream repo)
apt update -y
apt upgrade -y
echo "Adding DOCA repo: ${DOCA_URL}"
curl ${DOCA_GPG_URL} | gpg --dearmor > /etc/apt/trusted.gpg.d/GPG-KEY-Mellanox.pub
echo "deb [signed-by=/etc/apt/trusted.gpg.d/GPG-KEY-Mellanox.pub] $DOCA_URL ./" > /etc/apt/sources.list.d/doca.list

# Install wget
apt install -y --no-install-recommends wget

wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
apt install ./doca-host.deb
apt update
apt install -y doca-all
apt install -y doca-gpu doca-gpu-dev

# Need to explicitly install the version of mft provided by the DOCA repo overriding the verdion from the cuda repo
# to avoid version conflicts.
# If/when we update either the OS, DOCA or CUDA version, we need to update the mft version here as well by checking
# the output of `apt policy mft`
apt install -y doca-all doca-gpu doca-gpu-dev mft=4.28.0-92

# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_pipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ on:
env:
CHANGE_TARGET: "${{ github.base_ref }}"
CUDA_PATH: "/usr/local/cuda/"
CUDA_VER: "12.5"
CUDA_VER: "12.8"
GH_TOKEN: "${{ github.token }}"
GIT_COMMIT: "${{ github.sha }}"
MORPHEUS_ROOT: "${{ github.workspace }}/morpheus"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ jobs:
conda_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_build_label) }}
# Upload morpheus conda packages only for non PR branches. Use 'main' for main branch and 'dev' for all other branches
conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }}
base_container: rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.10
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-250102
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-250102
base_container: rapidsai/ci-conda:cuda12.8.0-ubuntu22.04-py3.10
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-250213
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-250213
secrets:
CONDA_TOKEN: ${{ secrets.CONDA_TOKEN }}
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ option(MORPHEUS_USE_IWYU "Enable running include-what-you-use as part of the bui

set(MORPHEUS_PY_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/wheel" CACHE STRING "Location to install the python directory")

set(MORPHEUS_RAPIDS_VERSION "24.10" CACHE STRING "Sets default versions for RAPIDS libraries.")
set(MORPHEUS_RAPIDS_VERSION "25.02" CACHE STRING "Sets default versions for RAPIDS libraries.")
set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data")
mark_as_advanced(MORPHEUS_CACHE_DIR)

Expand Down
4 changes: 2 additions & 2 deletions ci/conda/recipes/morpheus-libs/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ cuda_compiler:
- cuda-nvcc

cuda_compiler_version:
- 12.5
- 12.8

python:
- 3.10

rapids_version:
- 24.10
- 25.02
4 changes: 2 additions & 2 deletions ci/conda/recipes/morpheus/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ cuda_compiler:
- cuda-nvcc

cuda_compiler_version:
- 12.5
- 12.8

python:
- 3.10

rapids_version:
- 24.10
- 25.02
36 changes: 24 additions & 12 deletions ci/runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@

# Args used in FROM commands must come first
ARG FROM_IMAGE="rapidsai/ci-conda"
ARG CUDA_PKG_VER=12-0
ARG CUDA_SHORT_VER=12.5
ARG CUDA_VER=12.5.1
ARG CUDA_SHORT_VER=12.8
ARG CUDA_VER=12.8.0
ARG LINUX_DISTRO=ubuntu
ARG LINUX_VER=22.04
ARG PROJ_NAME=morpheus
Expand All @@ -35,6 +34,9 @@ SHELL ["/bin/bash", "-c"]

ENV REAL_ARCH=${REAL_ARCH}

# https://github.com/rapidsai/ci-imgs/issues/241
RUN rm -rf /tmp/sccache* /root/.cache

# Create conda environment
COPY ./dependencies.yaml /tmp/conda/

Expand All @@ -47,7 +49,9 @@ ARG PROJ_NAME
ARG PYTHON_VER
ARG REAL_ARCH

RUN rapids-dependency-file-generator \
RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
rapids-dependency-file-generator \
--config /tmp/conda/dependencies.yaml \
--output conda \
--file-key build \
Expand All @@ -61,15 +65,20 @@ ENV MORPHEUS_SUPPORT_DOCA=ON

COPY ./.devcontainer/docker/optional_deps/doca.sh /tmp/doca/

RUN apt update && \
RUN --mount=type=cache,id=apt,target=/var/cache/apt \
apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
apt upgrade -y && \
apt install --no-install-recommends -y \
automake \
build-essential \
libtool \
automake && \
apt clean && \
PKG_ARCH=${TARGETARCH} /tmp/doca/doca.sh /tmp/doca && \
rm -rf /tmp/doca
libtool

RUN --mount=type=cache,id=apt,target=/var/cache/apt \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
/tmp/doca/doca.sh /tmp/doca && \
rm -rf /tmp/doca && \
apt clean

# ============ test ==================
FROM base as test
Expand All @@ -81,14 +90,17 @@ ARG CUDA_SHORT_VER
ARG PROJ_NAME
ARG PYTHON_VER

RUN apt update && \
RUN --mount=type=cache,id=apt,target=/var/cache/apt \
apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
apt install --no-install-recommends -y \
openjdk-11-jre-headless && \
apt clean && \
rm -rf /var/lib/apt/lists/*

RUN rapids-dependency-file-generator \
RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
rapids-dependency-file-generator \
--config /tmp/conda/dependencies.yaml \
--output conda \
--file-key test \
Expand Down
13 changes: 8 additions & 5 deletions ci/scripts/github/cmake_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ _FLAGS=()
_FLAGS+=("-B" "${BUILD_DIR}")
_FLAGS+=("-G" "Ninja")
_FLAGS+=("-DCMAKE_MESSAGE_CONTEXT_SHOW=ON")
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS")
_FLAGS+=("-DMORPHEUS_USE_CCACHE=ON")
_FLAGS+=("-DMORPHEUS_PYTHON_INPLACE_BUILD=OFF")
_FLAGS+=("-DMORPHEUS_PYTHON_BUILD_STUBS=ON")
Expand All @@ -27,12 +26,16 @@ _FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=ON")
_FLAGS+=("-DMORPHEUS_BUILD_TESTS=ON")
_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=ON")
_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_DFP=ON")
if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then
_FLAGS+=("-DMORPHEUS_SUPPORT_DOCA=ON")
# Set the CMAKE_CUDA_ARCHITECTURES to just 80;86 since that is what DOCA supports for now
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=80;86")
else
_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS")
fi

if [[ "${LOCAL_CI}" == "" ]]; then
_FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)")
fi
export CMAKE_BUILD_ALL_FEATURES="${_FLAGS[@]}"
unset _FLAGS

if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then
export CMAKE_BUILD_ALL_FEATURES="${CMAKE_BUILD_ALL_FEATURES} -DMORPHEUS_SUPPORT_DOCA=ON"
fi
2 changes: 1 addition & 1 deletion ci/scripts/github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ source ${WORKSPACE}/ci/scripts/github/morpheus_env.sh
source ${WORKSPACE}/ci/scripts/github/cmake_all.sh
/usr/bin/nvidia-smi

update_conda_env "${WORKSPACE}/conda/environments/all_cuda-125_arch-${REAL_ARCH}.yaml"
update_conda_env "${WORKSPACE}/conda/environments/all_cuda-128_arch-${REAL_ARCH}.yaml"

log_toolchain

Expand Down
6 changes: 3 additions & 3 deletions ci/scripts/run_ci_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ GIT_BRANCH=$(git branch --show-current)
GIT_COMMIT=$(git log -n 1 --pretty=format:%H)

LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp}
CONTAINER_VER=${CONTAINER_VER:-250102}
CUDA_VER=${CUDA_VER:-12.5}
CUDA_FULL_VER=${CUDA_FULL_VER:-12.5.1}
CONTAINER_VER=${CONTAINER_VER:-250213}
CUDA_VER=${CUDA_VER:-12.8}
CUDA_FULL_VER=${CUDA_FULL_VER:-12.8.0}
DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""}

# Configure the base docker img
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ dependencies:
- clangdev=16
- click>=8
- cmake=3.27
- cuda-cudart-dev=12.5
- cuda-cudart=12.5
- cuda-nvcc=12.5
- cuda-nvml-dev=12.5
- cuda-nvrtc-dev=12.5
- cuda-nvrtc=12.5
- cuda-nvtx-dev=12.5
- cuda-nvtx=12.5
- cuda-cudart-dev=12.8
- cuda-cudart=12.8
- cuda-nvcc=12.8
- cuda-nvml-dev=12.8
- cuda-nvrtc-dev=12.8
- cuda-nvrtc=12.8
- cuda-nvtx-dev=12.8
- cuda-nvtx=12.8
- cuda-sanitizer-api
- cuda-version=12.5
- cudf=24.10
- cuml=24.10.*
- cuda-version=12.8
- cudf=25.02
- cuml=25.02.*
- cupy
- cxx-compiler
- cython=3.0
Expand All @@ -59,7 +59,7 @@ dependencies:
- jsonpatch>=1.33
- kfp
- libcublas-dev
- libcudf=24.10
- libcudf=25.02
- libcufft-dev
- libcurand-dev
- libcusolver-dev
Expand Down Expand Up @@ -87,9 +87,9 @@ dependencies:
- pre-commit
- pybind11-stubgen=0.10.5
- pydantic
- pylibcudf=24.10
- pylibcudf=25.02
- pylint=3.0.3
- pynvml=11.4
- pynvml=12
- pypdf=3.17.4
- pytest-asyncio
- pytest-benchmark=4.0
Expand All @@ -100,7 +100,7 @@ dependencies:
- python-graphviz
- python=3.10
- rapidjson=1.1.0
- rapids-dask-dependency=24.10
- rapids-dask-dependency=25.02
- rdma-core>=48
- requests-cache=1.1
- requests-toolbelt=1.0
Expand Down Expand Up @@ -142,4 +142,4 @@ dependencies:
- python-logging-loki
- sentence-transformers==2.7
- torch==2.4.0
name: all_cuda-125_arch-aarch64
name: all_cuda-128_arch-aarch64
Loading