Merge pull request #926 from rapidsai/branch-22.06

rapidsai · Jun 7, 2022 · a6b298d · a6b298d
2 parents 451b3b3 + d400ad1
commit a6b298d
Show file tree

Hide file tree

Showing 35 changed files with 350 additions and 183 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,4 +1,5 @@
 # Configuration file for Python coverage tests
 [run]
+disable_warnings = include-ignored
 include = dask_cuda/*
 omit = dask_cuda/tests/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
         hooks:
               - id: isort
       - repo: https://github.com/ambv/black
-        rev: 19.10b0
+        rev: 22.3.0
         hooks:
               - id: black
       - repo: https://gitlab.com/pycqa/flake8

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,39 @@
+# dask-cuda 22.06.00 (7 Jun 2022)
+
+## 🚨 Breaking Changes
+
+- Upgrade `numba` pinning to be in-line with rest of rapids ([#912](https://github.com/rapidsai/dask-cuda/pull/912)) [@galipremsagar](https://github.com/galipremsagar)
+
+## 🐛 Bug Fixes
+
+- Reduce `test_cudf_cluster_device_spill` test and speed it up ([#918](https://github.com/rapidsai/dask-cuda/pull/918)) [@pentschev](https://github.com/pentschev)
+- Update ImportError tests with --pre-import ([#914](https://github.com/rapidsai/dask-cuda/pull/914)) [@pentschev](https://github.com/pentschev)
+- Add xfail mark to `test_pre_import_not_found` ([#908](https://github.com/rapidsai/dask-cuda/pull/908)) [@pentschev](https://github.com/pentschev)
+- Increase spill tests timeout to 30 seconds ([#901](https://github.com/rapidsai/dask-cuda/pull/901)) [@pentschev](https://github.com/pentschev)
+- Fix errors related with `distributed.worker.memory.terminate` ([#900](https://github.com/rapidsai/dask-cuda/pull/900)) [@pentschev](https://github.com/pentschev)
+- Skip tests on import error for some optional packages ([#899](https://github.com/rapidsai/dask-cuda/pull/899)) [@pentschev](https://github.com/pentschev)
+- Update auto host_memory computation when threads per worker &gt; 1 ([#896](https://github.com/rapidsai/dask-cuda/pull/896)) [@ayushdg](https://github.com/ayushdg)
+- Update black to 22.3.0 ([#889](https://github.com/rapidsai/dask-cuda/pull/889)) [@charlesbluca](https://github.com/charlesbluca)
+- Remove legacy `check_python_3` ([#886](https://github.com/rapidsai/dask-cuda/pull/886)) [@pentschev](https://github.com/pentschev)
+
+## 📖 Documentation
+
+- Add documentation for `RAPIDS_NO_INITIALIZE` ([#898](https://github.com/rapidsai/dask-cuda/pull/898)) [@charlesbluca](https://github.com/charlesbluca)
+- Use upstream warning functions for CUDA initialization ([#894](https://github.com/rapidsai/dask-cuda/pull/894)) [@charlesbluca](https://github.com/charlesbluca)
+
+## 🛠️ Improvements
+
+- Pin `dask` and `distributed` for release ([#922](https://github.com/rapidsai/dask-cuda/pull/922)) [@galipremsagar](https://github.com/galipremsagar)
+- Pin `dask` &amp; `distributed` for release ([#916](https://github.com/rapidsai/dask-cuda/pull/916)) [@galipremsagar](https://github.com/galipremsagar)
+- Upgrade `numba` pinning to be in-line with rest of rapids ([#912](https://github.com/rapidsai/dask-cuda/pull/912)) [@galipremsagar](https://github.com/galipremsagar)
+- Removing test of `cudf.merge_sorted()` ([#905](https://github.com/rapidsai/dask-cuda/pull/905)) [@madsbk](https://github.com/madsbk)
+- Disable `include-ignored` coverage warnings ([#903](https://github.com/rapidsai/dask-cuda/pull/903)) [@pentschev](https://github.com/pentschev)
+- Fix ci/local script ([#902](https://github.com/rapidsai/dask-cuda/pull/902)) [@Ethyling](https://github.com/Ethyling)
+- Use conda to build python packages during GPU tests ([#897](https://github.com/rapidsai/dask-cuda/pull/897)) [@Ethyling](https://github.com/Ethyling)
+- Pull `requirements.txt` into Conda recipe ([#893](https://github.com/rapidsai/dask-cuda/pull/893)) [@jakirkham](https://github.com/jakirkham)
+- Unpin `dask` &amp; `distributed` for development ([#892](https://github.com/rapidsai/dask-cuda/pull/892)) [@galipremsagar](https://github.com/galipremsagar)
+- Build packages using mambabuild ([#846](https://github.com/rapidsai/dask-cuda/pull/846)) [@Ethyling](https://github.com/Ethyling)
+
 # dask-cuda 22.04.00 (6 Apr 2022)
 
 ## 🚨 Breaking Changes

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 ################################################################################
 # dask-cuda cpu build
 ################################################################################
@@ -68,8 +68,11 @@ pip install git+https://github.com/dask/distributed.git@main
 # BUILD - Package builds
 ################################################################################
 
-gpuci_logger "Build conda pkg for libcudf"
-gpuci_conda_retry build conda/recipes/dask-cuda --python=${PYTHON}
+# FIXME: Move boa install to gpuci/rapidsai
+gpuci_mamba_retry install -c conda-forge boa
+
+gpuci_logger "Build conda pkg for dask-cuda"
+gpuci_conda_retry mambabuild conda/recipes/dask-cuda --python=${PYTHON}
 
 rm -rf dist/
 python setup.py sdist bdist_wheel

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -26,15 +26,19 @@ cd "$WORKSPACE"
 export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 export UCX_PATH=$CONDA_PREFIX
-export UCXPY_VERSION=0.25.*
+export UCXPY_VERSION=0.26.*
+unset GIT_DESCRIBE_TAG
 
 # Enable NumPy's __array_function__ protocol (needed for NumPy 1.16.x,
 # will possibly be enabled by default starting on 1.17)
 export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
 
-# Install dask and distributed from master branch. Usually needed during
+# Install dask and distributed from main branch. Usually needed during
 # development time and disabled before a new dask-cuda release.
-export INSTALL_DASK_MASTER=0
+export INSTALL_DASK_MAIN=0
+
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2022.05.2"
 
 ################################################################################
 # SETUP - Check environment
@@ -49,43 +53,38 @@ nvidia-smi
 gpuci_logger "Activate conda env"
 . /opt/conda/etc/profile.d/conda.sh
 conda activate rapids
+
 conda info
 conda config --show-sources
 conda list --show-channel-urls
 
-# Fixing Numpy version to avoid RuntimeWarning: numpy.ufunc size changed, may
-# indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject.
-# Also installing cucim in order to test GDS spilling
-gpuci_mamba_retry install "cudatoolkit=$CUDA_REL" \
-              "cudf=${MINOR_VERSION}" "dask-cudf=${MINOR_VERSION}" \
-              "ucx-py=${UCXPY_VERSION}" "ucx-proc=*=gpu" \
-              "rapids-build-env=$MINOR_VERSION.*" \
-              "cucim"
-
+# Installing cucim in order to test GDS spilling
 # Pin pytest-asyncio because latest versions modify the default asyncio
 # `event_loop_policy`. See https://github.com/dask/distributed/pull/4212 .
-gpuci_mamba_retry install "pytest-asyncio=<0.14.0"
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# gpuci_mamba_retry remove -f rapids-build-env
-# gpuci_mamba_retry install "your-pkg=1.0.0"
-
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-# Install the main version of dask and distributed
-if [[ "${INSTALL_DASK_MASTER}" == 1 ]]; then
-    gpuci_logger "pip install git+https://github.com/dask/distributed.git@main --upgrade"
-    pip install "git+https://github.com/dask/distributed.git@main" --upgrade
-    gpuci_logger "pip install git+https://github.com/dask/dask.git@main --upgrade"
-    pip install "git+https://github.com/dask/dask.git@main" --upgrade
+gpuci_mamba_retry install "cudf=${MINOR_VERSION}" \
+              "dask-cudf=${MINOR_VERSION}" \
+              "ucx-py=${UCXPY_VERSION}" \
+              "ucx-proc=*=gpu" \
+              "cucim" \
+              "pytest-asyncio=<0.14.0"
+
+# Install latest nightly version for dask and distributed if needed
+if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
+  gpuci_logger "Installing dask and distributed from dask nightly channel"
+  gpuci_mamba_retry install -c dask/label/dev \
+    "dask/label/dev::dask" \
+    "dask/label/dev::distributed"
+else
+  gpuci_logger "gpuci_mamba_retry install conda-forge::dask==${DASK_STABLE_VERSION} conda-forge::distributed==${DASK_STABLE_VERSION} conda-forge::dask-core==${DASK_STABLE_VERSION} --force-reinstall"
+  gpuci_mamba_retry install conda-forge::dask==${DASK_STABLE_VERSION} conda-forge::distributed==${DASK_STABLE_VERSION} conda-forge::dask-core==${DASK_STABLE_VERSION} --force-reinstall
 fi
 
+
 gpuci_logger "Check versions"
 python --version
 $CC --version
 $CXX --version
+
 conda info
 conda config --show-sources
 conda list --show-channel-urls
@@ -94,9 +93,14 @@ conda list --show-channel-urls
 # BUILD - Build dask-cuda
 ################################################################################
 
-gpuci_logger "Build dask-cuda"
-cd "$WORKSPACE"
-python -m pip install -e .
+# TODO: Move boa install to gpuci/rapidsai
+gpuci_mamba_retry install boa
+
+gpuci_logger "Build and install dask-cuda"
+cd "${WORKSPACE}"
+CONDA_BLD_DIR="${WORKSPACE}/.conda-bld"
+gpuci_conda_retry mambabuild --croot "${CONDA_BLD_DIR}" conda/recipes/dask-cuda --python="${PYTHON}"
+gpuci_mamba_retry install -c "${CONDA_BLD_DIR}" dask-cuda
 
 ################################################################################
 # TEST - Run pytests for ucx-py
@@ -118,4 +122,3 @@ fi
 if [ -n "${CODECOV_TOKEN}" ]; then
     codecov -t $CODECOV_TOKEN
 fi
-
diff --git a/ci/local/build.sh b/ci/local/build.sh
@@ -131,7 +131,7 @@ DOCKER_MAJOR=$(docker -v|sed 's/[^[0-9]*\([0-9]*\).*/\1/')
 GPU_OPTS="--gpus device=${NVIDIA_VISIBLE_DEVICES}"
 if [ "$DOCKER_MAJOR" -lt 19 ]
 then
-    GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${NVIDIA_VISIBLE_DEVICES}'"
+    GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}"
 fi
 
 docker run --rm -it ${GPU_OPTS} \

diff --git a/conda/recipes/dask-cuda/meta.yaml b/conda/recipes/dask-cuda/meta.yaml
@@ -2,6 +2,8 @@
 
 # Usage:
 #   conda build -c conda-forge .
+{% set data = load_setup_py_data() %}
+
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set git_revision_count=environ.get('GIT_DESCRIBE_NUMBER', 0) %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
@@ -10,7 +12,7 @@ package:
   version: {{ version }}
 
 source:
-  path: ../../..
+  git_url: ../../..
 
 build:
   number: {{ git_revision_count }}
@@ -27,12 +29,9 @@ requirements:
     - setuptools
   run:
     - python
-    - dask==2022.03.0
-    - distributed==2022.03.0
-    - pynvml>=11.0.0
-    - numpy>=1.16.0
-    - numba>=0.53.1
-    - click==8.0.4
+    {% for r in data.get("install_requires", []) %}
+    - {{ r }}
+    {% endfor %}
 
 test:
   imports:

diff --git a/dask_cuda/__init__.py b/dask_cuda/__init__.py
@@ -19,8 +19,10 @@
 
 
 # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
-dask.dataframe.shuffle.rearrange_by_column_tasks = get_rearrange_by_column_tasks_wrapper(
-    dask.dataframe.shuffle.rearrange_by_column_tasks
+dask.dataframe.shuffle.rearrange_by_column_tasks = (
+    get_rearrange_by_column_tasks_wrapper(
+        dask.dataframe.shuffle.rearrange_by_column_tasks
+    )
 )
 
 

diff --git a/dask_cuda/benchmarks/local_cudf_merge.py b/dask_cuda/benchmarks/local_cudf_merge.py
@@ -36,7 +36,7 @@ def generate_chunk(i_chunk, local_size, num_chunks, chunk_type, frac_match, gpu)
         import numpy as xp
         import pandas as xdf
 
-    xp.random.seed(2 ** 32 - 1)
+    xp.random.seed(2**32 - 1)
 
     chunk_type = chunk_type or "build"
     frac_match = frac_match or 1.0
@@ -258,7 +258,10 @@ def main(args):
         for (w1, w2), v in bandwidths.items()
     }
     total_nbytes = {
-        (scheduler_workers[w1].name, scheduler_workers[w2].name,): format_bytes(sum(nb))
+        (
+            scheduler_workers[w1].name,
+            scheduler_workers[w2].name,
+        ): format_bytes(sum(nb))
         for (w1, w2), nb in total_nbytes.items()
     }
 
@@ -379,21 +382,30 @@ def main(args):
 def parse_args():
     special_args = [
         {
-            "name": ["-b", "--backend",],
+            "name": [
+                "-b",
+                "--backend",
+            ],
             "choices": ["dask", "explicit-comms"],
             "default": "dask",
             "type": str,
             "help": "The backend to use.",
         },
         {
-            "name": ["-t", "--type",],
+            "name": [
+                "-t",
+                "--type",
+            ],
             "choices": ["cpu", "gpu"],
             "default": "gpu",
             "type": str,
             "help": "Do merge with GPU or CPU dataframes",
         },
         {
-            "name": ["-c", "--chunk-size",],
+            "name": [
+                "-c",
+                "--chunk-size",
+            ],
             "default": 1_000_000,
             "metavar": "n",
             "type": int,
@@ -444,9 +456,17 @@ def parse_args():
             "action": "store_true",
             "help": "Write output as markdown",
         },
-        {"name": "--runs", "default": 3, "type": int, "help": "Number of runs",},
         {
-            "name": ["-s", "--set-index",],
+            "name": "--runs",
+            "default": 3,
+            "type": int,
+            "help": "Number of runs",
+        },
+        {
+            "name": [
+                "-s",
+                "--set-index",
+            ],
             "action": "store_true",
             "help": "Call set_index on the key column to sort the joined dataframe.",
         },

diff --git a/dask_cuda/benchmarks/local_cudf_shuffle.py b/dask_cuda/benchmarks/local_cudf_shuffle.py
@@ -138,7 +138,10 @@ def main(args):
         for (w1, w2), v in bandwidths.items()
     }
     total_nbytes = {
-        (scheduler_workers[w1].name, scheduler_workers[w2].name,): format_bytes(sum(nb))
+        (
+            scheduler_workers[w1].name,
+            scheduler_workers[w2].name,
+        ): format_bytes(sum(nb))
         for (w1, w2), nb in total_nbytes.items()
     }
 
@@ -251,14 +254,20 @@ def parse_args():
             "help": "Number of input partitions (default '100')",
         },
         {
-            "name": ["-b", "--backend",],
+            "name": [
+                "-b",
+                "--backend",
+            ],
             "choices": ["dask", "explicit-comms"],
             "default": "dask",
             "type": str,
             "help": "The backend to use.",
         },
         {
-            "name": ["-t", "--type",],
+            "name": [
+                "-t",
+                "--type",
+            ],
             "choices": ["cpu", "gpu"],
             "default": "gpu",
             "type": str,
@@ -276,7 +285,12 @@ def parse_args():
             "action": "store_true",
             "help": "Write output as markdown",
         },
-        {"name": "--runs", "default": 3, "type": int, "help": "Number of runs",},
+        {
+            "name": "--runs",
+            "default": 3,
+            "type": int,
+            "help": "Number of runs",
+        },
     ]
 
     return parse_benchmark_args(