Skip to content

Commit

Permalink
[CI] upgrade torch to 2.3.0 and cuda to 12.1 (#7399)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rhett-Ying committed May 11, 2024
1 parent 9fde953 commit 6475057
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 15 deletions.
18 changes: 9 additions & 9 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240227_1200"
image "dgllib/dgl-ci-cpu:v240511_1440"
args "-u root"
alwaysPull true
}
Expand All @@ -338,7 +338,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
image "dgllib/dgl-ci-gpu:cu121_v240511_1440"
args "-u root"
alwaysPull true
}
Expand Down Expand Up @@ -393,7 +393,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240227_1200"
image "dgllib/dgl-ci-cpu:v240511_1440"
args "-u root"
alwaysPull true
}
Expand All @@ -412,7 +412,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
image "dgllib/dgl-ci-gpu:cu121_v240511_1440"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand Down Expand Up @@ -467,7 +467,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
image "dgllib/dgl-ci-gpu:cu121_v240511_1440"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand All @@ -492,7 +492,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240227_1200"
image "dgllib/dgl-ci-cpu:v240511_1440"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -545,7 +545,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
image "dgllib/dgl-ci-gpu:cu121_v240511_1440"
args "-u root --runtime nvidia --shm-size=8gb"
alwaysPull true
}
Expand Down Expand Up @@ -574,7 +574,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240227_1200"
image "dgllib/dgl-ci-cpu:v240511_1440"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -621,7 +621,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240227_1200"
image "dgllib/dgl-ci-cpu:v240511_1440"
args "-u root"
alwaysPull true
}
Expand Down
3 changes: 1 addition & 2 deletions docker/Dockerfile.ci_gpu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CI docker GPU env
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04

ENV TZ=US
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
Expand Down Expand Up @@ -35,5 +35,4 @@ ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH}
ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
ENV CUDA_VISIBLE_DEVICES=0
ENV TF_FORCE_GPU_ALLOW_GROWTH=true
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_cpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rdflib
requests[security]==2.28
scikit-learn
scipy
torch==2.0.0+cpu
torch==2.3.0+cpu
torchdata
torcheval
torchmetrics
Expand Down
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_gpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ rdflib
requests[security]==2.28
scikit-learn
scipy
torch==2.0.0+cu118
torch==2.3.0+cu121
torchdata
torcheval
torchmetrics
Expand Down
5 changes: 4 additions & 1 deletion tests/scripts/build_dgl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ if [ $# -ne 1 ]; then
fi

if [[ $1 != "cpu" ]]; then
CMAKE_VARS="$CMAKE_VARS -DUSE_CUDA=ON"
# CI is now running on g4dn instance. Specify target arch to avoid below
# error: Unknown CUDA Architecture Name 9.0a in CUDA_SELECT_NVCC_ARCH_FLAGS
export TORCH_CUDA_ARCH_LIST=7.5 # For dgl_sparse and tensoradaptor.
CMAKE_VARS="$CMAKE_VARS -DUSE_CUDA=ON -DCUDA_ARCH_NAME=Turing" # For graphbolt.
fi

# This is a semicolon-separated list of Python interpreters containing PyTorch.
Expand Down
2 changes: 1 addition & 1 deletion tests/scripts/task_unit_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SET DGLBACKEND=!BACKEND!
SET DGL_LIBRARY_PATH=!CD!\build
SET DGL_DOWNLOAD_DIR=!CD!\_download

python -m pip install pytest psutil pandas pyyaml pydantic rdflib torchmetrics || EXIT /B 1
python -m pip install pytest psutil pandas pyyaml pydantic rdflib torchmetrics expecttest || EXIT /B 1
python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\python\!DGLBACKEND! || EXIT /B 1
python -m pytest -v --junitxml=pytest_common.xml --durations=100 tests\python\common || EXIT /B 1
ENDLOCAL
Expand Down
2 changes: 2 additions & 0 deletions tests/scripts/task_unit_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ fi

conda activate ${DGLBACKEND}-ci

python3 -m pip install expecttest

if [ $DGLBACKEND == "mxnet" ]
then
python3 -m pytest -v --junitxml=pytest_compute.xml --durations=100 --ignore=tests/python/common/test_ffi.py tests/python/common || fail "common"
Expand Down

0 comments on commit 6475057

Please sign in to comment.