diff --git a/Jenkinsfile b/Jenkinsfile index 24971b469237..5aaf94212316 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -134,6 +134,8 @@ def is_admin(name) { return (name in admins) } +def regression_test_done = false + pipeline { agent any triggers { @@ -196,7 +198,6 @@ pipeline { } when { triggeredBy 'IssueCommentCause' } steps { - // container('dgl-ci-lint') { checkout scm script { def comment = env.GITHUB_COMMENT @@ -229,12 +230,12 @@ pipeline { } pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ") currentBuild.result = 'SUCCESS' - return + regression_test_done = true } - // } } } stage('CI') { + when { expression { !regression_test_done } } stages { stage('Lint Check') { agent { diff --git a/benchmarks/run.sh b/benchmarks/run.sh index dd85fd934e67..a749b1a3cd25 100644 --- a/benchmarks/run.sh +++ b/benchmarks/run.sh @@ -9,7 +9,9 @@ ROOT=/asv/dgl conda activate base pip install --upgrade pip -pip install asv +# Newer asv version like 0.5.1 has different result format, +# so we fix the version here. Or `generate_excel.py` has to be changed. +pip install asv==0.4.2 pip uninstall -y dgl export DGL_BENCH_DEVICE=$DEVICE diff --git a/benchmarks/scripts/build_dgl_asv.sh b/benchmarks/scripts/build_dgl_asv.sh index 48f45773c766..47162dcf0fb1 100644 --- a/benchmarks/scripts/build_dgl_asv.sh +++ b/benchmarks/scripts/build_dgl_asv.sh @@ -2,19 +2,15 @@ set -e -# . /opt/conda/etc/profile.d/conda.sh -# conda activate pytorch-ci # Default building only with cpu DEVICE=${DGL_BENCH_DEVICE:-cpu} pip install -r /asv/torch_gpu_pip.txt -pip install pandas rdflib ogb # build -if [[ $DEVICE == "cpu" ]]; then - CMAKE_VARS="" -else - CMAKE_VARS="-DUSE_CUDA=ON" +CMAKE_VARS="-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda" +if [[ $DEVICE == "gpu" ]]; then + CMAKE_VARS="-DUSE_CUDA=ON $CMAKE_VARS" fi arch=`uname -m` if [[ $arch == *"x86"* ]]; then @@ -22,8 +18,6 @@ if [[ $arch == *"x86"* ]]; then fi mkdir -p build pushd build -cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS .. -make -j +cmake $CMAKE_VARS .. +make -j8 popd - -# conda deactivate diff --git a/benchmarks/scripts/generate_excel.py b/benchmarks/scripts/generate_excel.py index 4d34c5c1b4bf..c8fe34009547 100644 --- a/benchmarks/scripts/generate_excel.py +++ b/benchmarks/scripts/generate_excel.py @@ -23,13 +23,9 @@ def get_branch_name_from_hash(hash): def main(): results_path = Path("../results") results_path.is_dir() - benchmark_json_path = results_path / "benchmarks.json" - with benchmark_json_path.open() as f: - benchmark_json = json.load(f) machines = [f for f in results_path.glob("*") if f.is_dir()] output_results_dict = {} for machine in machines: - # commit_results_dict = {} per_machine_result = {} commit_results_json_paths = [ f for f in machine.glob("*") if f.name != "machine.json" diff --git a/benchmarks/scripts/install_dgl_asv.sh b/benchmarks/scripts/install_dgl_asv.sh index 5e492c45cd18..b4d26f4a1476 100644 --- a/benchmarks/scripts/install_dgl_asv.sh +++ b/benchmarks/scripts/install_dgl_asv.sh @@ -2,8 +2,6 @@ set -e -# . /opt/conda/etc/profile.d/conda.sh - # install pushd python rm -rf build *.egg-info dist diff --git a/benchmarks/scripts/publish.sh b/benchmarks/scripts/publish.sh index 1cf7d2002843..833df473b4ea 100644 --- a/benchmarks/scripts/publish.sh +++ b/benchmarks/scripts/publish.sh @@ -26,20 +26,20 @@ else fi WS_ROOT=/asv/dgl -docker pull public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 -if [ -z "$DGL_REG_CONF"]; then +docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116 +if [ -z "$DGL_REG_CONF" ]; then DOCKER_ENV_OPT="$DOCKER_ENV_OPT" else DOCKER_ENV_OPT=" -e DGL_REG_CONF=$DGL_REG_CONF $DOCKER_ENV_OPT" fi -if [ -z "$INSTANCE_TYPE"]; then +if [ -z "$INSTANCE_TYPE" ]; then DOCKER_ENV_OPT="$DOCKER_ENV_OPT" else DOCKER_ENV_OPT=" -e INSTANCE_TYPE=$INSTANCE_TYPE $DOCKER_ENV_OPT" fi -if [ -z "$MOUNT_PATH"]; then +if [ -z "$MOUNT_PATH" ]; then DOCKER_MOUNT_OPT="" else DOCKER_MOUNT_OPT="-v ${MOUNT_PATH}:/tmp/dataset -v ${MOUNT_PATH}/dgl_home/:/root/.dgl/" @@ -56,16 +56,18 @@ if [[ $DEVICE == "cpu" ]]; then $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ - --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash + --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash else docker run --name dgl-reg \ --rm --gpus all \ $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ - --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash + --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash fi +pwd + docker exec dgl-reg mkdir -p $WS_ROOT docker cp ../../.git dgl-reg:$WS_ROOT docker cp ../ dgl-reg:$WS_ROOT/benchmarks/ diff --git a/benchmarks/scripts/torch_gpu_pip.txt b/benchmarks/scripts/torch_gpu_pip.txt index 2e9fd098060c..64c04dc54a08 100644 --- a/benchmarks/scripts/torch_gpu_pip.txt +++ b/benchmarks/scripts/torch_gpu_pip.txt @@ -1,12 +1,13 @@ ---find-links https://download.pytorch.org/whl/torch -torch==1.9.0+cu111 -torchvision +--find-links https://download.pytorch.org/whl/torch_stable.html +torch==1.13.1+cu116 +torchvision==0.14.1+cu116 +torchmetrics pytest nose numpy cython scipy -networkx==2.5.1 +networkx matplotlib nltk requests[security] @@ -15,5 +16,4 @@ awscli torchtext pandas rdflib -ogb==1.3.1 -torchmetrics \ No newline at end of file +ogb diff --git a/docker/Dockerfile.ci_gpu_cu11 b/docker/Dockerfile.ci_benchmark similarity index 59% rename from docker/Dockerfile.ci_gpu_cu11 rename to docker/Dockerfile.ci_benchmark index 794181445365..af4af27a080e 100644 --- a/docker/Dockerfile.ci_gpu_cu11 +++ b/docker/Dockerfile.ci_benchmark @@ -1,5 +1,8 @@ # CI docker GPU env -FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu16.04 +FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04 + +ENV TZ=US +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update --fix-missing @@ -15,16 +18,6 @@ RUN bash /install/ubuntu_install_conda.sh ENV CONDA_ALWAYS_YES="true" -COPY install/conda_env/torch_gpu.yml /install/conda_env/torch_gpu.yml -COPY install/conda_env/torch_gpu_pip_latest.txt /install/conda_env/torch_gpu_pip.txt -RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_gpu.yml"] - -# COPY install/conda_env/tensorflow_gpu.yml /install/conda_env/tensorflow_gpu.yml -# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/tensorflow_gpu.yml"] - -# COPY install/conda_env/mxnet_gpu.yml /install/conda_env/mxnet_gpu.yml -# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/mxnet_gpu.yml"] - ENV CONDA_ALWAYS_YES= # Environment variables diff --git a/docker/install/ubuntu_install_conda.sh b/docker/install/ubuntu_install_conda.sh index 98977a7a6e67..19cf19b261df 100644 --- a/docker/install/ubuntu_install_conda.sh +++ b/docker/install/ubuntu_install_conda.sh @@ -7,7 +7,7 @@ apt-get update --fix-missing && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh && \ +wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/conda && \ rm ~/miniconda.sh && \ /opt/conda/bin/conda clean -tipsy && \