Skip to content

Commit

Permalink
Merge branch 'main' into farhadr/ft_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
farhadrgh authored Jan 13, 2025
2 parents 5d50cfc + db237fc commit bcfdfb4
Show file tree
Hide file tree
Showing 14 changed files with 298 additions and 96 deletions.
26 changes: 0 additions & 26 deletions .github/workflows/pre-commit.yml

This file was deleted.

125 changes: 125 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: "[Optional] BioNemo Image Build and Unit Tests"

on:
pull_request:
branches: [main]
push:
branches: [main]
merge_group:
types: [checks_requested]

defaults:
run:
shell: bash -x -e -u -o pipefail {0}

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: "recursive"
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- run: pip install -r requirements-dev.txt
- run: ./ci/scripts/static_checks.sh
- uses: trufflesecurity/trufflehog@main
with:
extra_args: --only-verified

build-bionemo-image:
needs: pre-commit
runs-on: self-hosted-azure-cpu
if: ${{ !contains(github.event.pull_request.labels.*.name, 'SKIP_CI') }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}
submodules: "recursive"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Docker Metadata
id: metadata
uses: docker/metadata-action@v5
with:
images: nemoci.azurecr.io/bionemo
labels: nemo.library=bionemo
tags: |
type=schedule
type=ref,event=branch
type=ref,event=tag
type=ref,event=pr
type=raw,value=${{ github.run_id }}
- uses: int128/docker-build-cache-config-action@v1
id: cache
with:
image: nemoci.azurecr.io/bionemo/build-cache
pull-request-cache: true

- name: Build and push
uses: docker/build-push-action@v5
with:
file: ${{ github.run_id }}/Dockerfile
context: ${{ github.run_id }}/
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
cache-from: ${{ steps.cache.outputs.cache-from }}
cache-to: ${{ steps.cache.outputs.cache-to }}

run-tests:
needs: build-bionemo-image
runs-on: self-hosted-nemo-gpus-1
defaults:
run:
working-directory: ./${{ github.run_id }}
container:
image: nemoci.azurecr.io/bionemo:${{ github.run_id }}
options: --gpus all
volumes:
- /home/azureuser/actions-runner-bionemo/cache:/github/home/.cache
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}

- name: Run tests
env:
BIONEMO_DATA_SOURCE: ngc
run: ./ci/scripts/run_pytest.sh --no-nbval --skip-slow

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
working-directory: ${{ github.run_id }}

- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
working-directory: ${{ github.run_id }}

clean-up:
needs: run-tests
runs-on: self-hosted-nemo-gpus-1
if: ${{ always() }}
steps:
- name: clean up image
run: docker rmi nemoci.azurecr.io/bionemo:${{ github.run_id }}

# TODO: exclude tests from base image; run tests from github workspace mounted in the image.
# TODO: figure out way of cleaning up working directory (requires sudo or for us to fix file ownership from release container)
100 changes: 67 additions & 33 deletions Dockerfile.arm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base image with apex and transformer engine, but without NeMo or Megatron-LM.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3

FROM rust:1.82.0 as rust-env

Expand Down Expand Up @@ -55,24 +55,27 @@ RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip --disable-pip-version-check --no-cache-di
git+https://github.com/Dao-AILab/[email protected]

# Build LLVM and triton
# It's important to select a specific version of LLVM as per triton's README instructions, and
# also important to constrain the build targets to the systems we care about or else there will
# be many strange unlinked symbol issues. Here we assume this dockerfile is build on an aarch64
# target (host), and build for NVIDIA GPUS (NVPTX). Unclear why, but we also need to build for
# AMDGPUs to get triton to properly build or else there are linker issues.
RUN git clone https://github.com/llvm/llvm-project.git && \
pip install ninja && \
cd llvm-project && \
git fetch origin 5e5a22caf88ac1ccfa8dc5720295fdeba0ad9372 && \
git checkout 5e5a22caf88ac1ccfa8dc5720295fdeba0ad9372 && \
git fetch origin 10dc3a8e916d73291269e5e2b82dd22681489aa1 && \
git checkout 10dc3a8e916d73291269e5e2b82dd22681489aa1 && \
mkdir build && cd build && \
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" && \
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" ../llvm && \
ninja && \
export LLVM_BUILD_DIR=${WORKDIR}/llvm-project/build && \

cd ${WORKDIR} && \
git clone https://github.com/triton-lang/triton.git && \
pip install cmake wheel pybind11 && \
cd triton && \
git fetch origin 79c6c9b209a5692b9a895398f4f3a033f8f80415 && \
git checkout 79c6c9b209a5692b9a895398f4f3a033f8f80415 && \
LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install python/ && \

git fetch origin release/3.1.x && \
git checkout release/3.1.x && \
LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install --verbose python/ && \
cd ${WORKDIR} && \
rm -rf llvm-project && \
rm -rf triton
Expand All @@ -93,25 +96,20 @@ RUN rm -rf /build

# Addressing Security Scan Vulnerabilities
RUN rm -rf /opt/pytorch/pytorch/third_party/onnx
RUN apt-get update && \
apt-get install -y openssh-client=1:8.9p1-3ubuntu0.10 && \
rm -rf /var/lib/apt/lists/*
RUN apt purge -y libslurm37 libpmi2-0 && \
RUN apt purge -y libpmi2-0 && \
apt autoremove -y
RUN source /usr/local/nvm/nvm.sh && \
NODE_VER=$(nvm current) && \
nvm deactivate && \
nvm uninstall $NODE_VER && \
sed -i "/NVM/d" /root/.bashrc && \
sed -i "/nvm.sh/d" /etc/bash.bashrc

# Use UV to install python packages from the workspace. This just installs packages into the system's python
# environment, and does not use the current uv.lock file.
# environment, and does not use the current uv.lock file. Note that with python 3.12, we now need to set
# UV_BREAK_SYSTEM_PACKAGES, since the pytorch base image has made the decision not to use a virtual environment and UV
# does not respect the PIP_BREAK_SYSTEM_PACKAGES environment variable set in the base dockerfile.
COPY --from=ghcr.io/astral-sh/uv:0.4.25 /uv /usr/local/bin/uv
ENV UV_LINK_MODE=copy \
UV_COMPILE_BYTECODE=1 \
UV_PYTHON_DOWNLOADS=never \
UV_SYSTEM_PYTHON=true
UV_SYSTEM_PYTHON=true \
UV_NO_CACHE=1 \
UV_BREAK_SYSTEM_PACKAGES=1

# Install the bionemo-geomtric requirements ahead of copying over the rest of the repo, so that we can cache their
# installation. These involve building some torch extensions, so they can take a while to install.
Expand All @@ -133,12 +131,35 @@ COPY --from=rust-env /usr/local/rustup /usr/local/rustup
ENV PATH="/usr/local/cargo/bin:/usr/local/rustup/bin:${PATH}"
ENV RUSTUP_HOME="/usr/local/rustup"

# Build decord
# # Build decord
# This needs a specific version of ffmpeg:
# root@e1fc53d00844:/workspace/bionemo2# ffmpeg -version
# ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
# built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
# configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
# libavutil 56. 70.100 / 56. 70.100
# libavcodec 58.134.100 / 58.134.100
# libavformat 58. 76.100 / 58. 76.100
# libavdevice 58. 13.100 / 58. 13.100
# libavfilter 7.110.100 / 7.110.100
# libswscale 5. 9.100 / 5. 9.100
# libswresample 3. 9.100 / 3. 9.100
# libpostproc 55. 9.100 / 55. 9.100
#
# Issue link: https://github.com/dmlc/decord/issues/257
# Diff to make it all work https://github.com/dmlc/decord/issues/186#issuecomment-1171882325

# Consider this:
# sudo apt install libnvidia-decode-550
# cp /usr/lib/aarch64-linux-gnu/libnvcuvid* /usr/local/cuda/
# cmake .. -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release

RUN apt-get update && \
apt-get install -y build-essential python3-dev python3-setuptools make cmake && \
apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev && \
apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev
RUN --mount=type=bind,source=./arm_build/decord_ffmpeg6_fix.patch,target=/decord_ffmpeg6_fix.patch \
git clone --recursive https://github.com/dmlc/decord && \
cd decord && \
cd decord && git apply /decord_ffmpeg6_fix.patch && \
mkdir build && cd build && \
cmake .. -DUSE_CUDA=0 -DCMAKE_BUILD_TYPE=Release && \
make && \
Expand Down Expand Up @@ -173,20 +194,28 @@ RUN git clone --single-branch --branch 1.15.0rc4 https://github.com/single-cell-

WORKDIR /workspace/bionemo2
# Note, we need to mount the .git folder here so that setuptools-scm is able to fetch git tag for version.
# For some reason, we do not need to do the tensorstore verson package hack on arm64, while we do need this for x86 build.
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
<<EOF
set -eo pipefail
uv pip install maturin --no-build-isolation && uv pip install --no-build-isolation \
uv pip install maturin --no-build-isolation --break-system-packages
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
pip install --use-deprecated=legacy-resolver --no-build-isolation \
tensorstore==0.1.45

RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
# Comment out mamba install in NeMo as this causes issues.
sed -i "/mamba-ssm/d" ./3rdparty/NeMo/requirements/requirements_nlp.txt && \
uv pip install --no-build-isolation \
./3rdparty/* \
./sub-packages/bionemo-* \
-r /requirements-cve.txt \
-r /requirements-test.txt
rm -rf ./3rdparty
rm -rf /tmp/*
rm -rf ./sub-packages/bionemo-noodles/target
EOF
-r /requirements-test.txt && rm -rf ./3rdparty && rm -rf /tmp/* && rm -rf ./sub-packages/bionemo-noodles/target \
&& rm -rf /root/.cache/*

# In the devcontainer image, we just copy over the finished `dist-packages` folder from the build image back into the
# base pytorch container. We can then set up a non-root user and uninstall the bionemo and 3rd-party packages, so that
Expand Down Expand Up @@ -286,6 +315,12 @@ for sub in ./3rdparty/* ./sub-packages/bionemo-*; do
uv pip install --no-deps --no-build-isolation --editable $sub
done
EOF
# This is needed because faiss is not compatible with ARM at all.
# Bionemo doesn't use faiss, but megatron core does.
# We do not use this codepath at all, therefore we just make is_sve_supported return False
# to circumvent python import issues
RUN sed -i '42i\ # Bionemo hack to fix ARM issues with faiss\n return False' /usr/local/lib/python3.12/dist-packages/faiss/loader.py

# Since the entire repo is owned by root, swithcing username for development breaks things.
ARG USERNAME=bionemo
RUN chown $USERNAME:$USERNAME -R /workspace/bionemo2/
Expand All @@ -312,7 +347,6 @@ COPY --from=rust-env /usr/local/rustup /usr/local/rustup


# RUN rm -rf /usr/local/cargo /usr/local/rustup
RUN rm -rf /root/.cache/bazel
RUN chmod 777 -R /workspace/bionemo2/

# Transformer engine attention defaults
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

[![Click here to deploy.](https://uohmivykqgnnbiouffke.supabase.co/storage/v1/object/public/landingpage/brevdeploynavy.svg)](https://console.brev.dev/launchable/deploy/now?launchableID=env-2pPDA4sJyTuFf3KsCv5KWRbuVlU)
[![Docs Build](https://img.shields.io/github/actions/workflow/status/NVIDIA/bionemo-framework/pages/pages-build-deployment?label=docs-build)](https://nvidia.github.io/bionemo-framework)
![Latest Tag](https://img.shields.io/github/v/tag/NVIDIA/bionemo-framework?label=latest-version)
[![Latest Tag](https://img.shields.io/github/v/tag/NVIDIA/bionemo-framework?label=latest-version)](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara/containers/bionemo-framework/tags)
[![codecov](https://codecov.io/gh/NVIDIA/bionemo-framework/branch/main/graph/badge.svg?token=XqhegdZRqB)](https://codecov.io/gh/NVIDIA/bionemo-framework)

NVIDIA BioNeMo Framework is a collection of programming tools, libraries, and models for computational drug discovery.
It accelerates the most time-consuming and costly stages of building and adapting biomolecular AI models by providing
Expand Down
Loading

0 comments on commit bcfdfb4

Please sign in to comment.