diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59e18888..78ce7a05 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,27 +28,30 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} docs-build: - if: github.ref_type == 'branch' && github.event_name == 'push' + if: github.ref_type == 'branch' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: - build_type: branch - node_type: "gpu-latest-1" arch: "amd64" + branch: ${{ inputs.branch }} + build_type: ${{ inputs.build_type || 'branch' }} container_image: "rapidsai/ci:latest" + date: ${{ inputs.date }} + node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" + sha: ${{ inputs.sha }} upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -67,8 +70,10 @@ jobs: fetch-depth: 0 - name: Build wheel run: ci/build_python_pypi.sh + env: + GH_TOKEN: ${{ github.token }} - name: Publish distribution 📦 to PyPI - if: inputs.build_type == 'nightly' uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.RAPIDSAI_PYPI_TOKEN }} + skip-existing: true diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index abcd0c66..7cf94c02 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,29 +18,29 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" @@ -58,3 +58,5 @@ jobs: fetch-depth: 0 - name: Build wheel run: ci/build_python_pypi.sh + env: + GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3a6641d8..d5c918a2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d7467bd..3e8d9f8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,39 @@ +# dask-cuda 23.06.00 (7 Jun 2023) + +## 🚨 Breaking Changes + +- Update minimum Python version to Python 3.9 ([#1164](https://github.com/rapidsai/dask-cuda/pull/1164)) [@shwina](https://github.com/shwina) + +## 🐛 Bug Fixes + +- Increase pytest CI timeout ([#1196](https://github.com/rapidsai/dask-cuda/pull/1196)) [@pentschev](https://github.com/pentschev) +- Increase minimum timeout to wait for workers in CI ([#1193](https://github.com/rapidsai/dask-cuda/pull/1193)) [@pentschev](https://github.com/pentschev) +- Disable `np.bool` deprecation warning ([#1182](https://github.com/rapidsai/dask-cuda/pull/1182)) [@pentschev](https://github.com/pentschev) +- Always upload on branch/nightly builds ([#1177](https://github.com/rapidsai/dask-cuda/pull/1177)) [@raydouglass](https://github.com/raydouglass) +- Workaround for `DeviceHostFile` tests with CuPy>=12.0.0 ([#1175](https://github.com/rapidsai/dask-cuda/pull/1175)) [@pentschev](https://github.com/pentschev) +- Temporarily relax Python constraint ([#1166](https://github.com/rapidsai/dask-cuda/pull/1166)) [@vyasr](https://github.com/vyasr) + +## 📖 Documentation + +- [doc] Add document about main guard. ([#1157](https://github.com/rapidsai/dask-cuda/pull/1157)) [@trivialfis](https://github.com/trivialfis) + +## 🚀 New Features + +- Require Numba 0.57.0+ ([#1185](https://github.com/rapidsai/dask-cuda/pull/1185)) [@jakirkham](https://github.com/jakirkham) +- Revert "Temporarily relax Python constraint" ([#1171](https://github.com/rapidsai/dask-cuda/pull/1171)) [@vyasr](https://github.com/vyasr) +- Update to zict 3.0 ([#1160](https://github.com/rapidsai/dask-cuda/pull/1160)) [@pentschev](https://github.com/pentschev) + +## 🛠️ Improvements + +- Add `__main__` entrypoint to dask-cuda-worker CLI ([#1181](https://github.com/rapidsai/dask-cuda/pull/1181)) [@hmacdope](https://github.com/hmacdope) +- run docs nightly too ([#1176](https://github.com/rapidsai/dask-cuda/pull/1176)) [@AyodeAwe](https://github.com/AyodeAwe) +- Fix GHAs Workflows ([#1172](https://github.com/rapidsai/dask-cuda/pull/1172)) [@ajschmidt8](https://github.com/ajschmidt8) +- Remove `matrix_filter` from workflows ([#1168](https://github.com/rapidsai/dask-cuda/pull/1168)) [@charlesbluca](https://github.com/charlesbluca) +- Revert to branch-23.06 for shared-action-workflows ([#1167](https://github.com/rapidsai/dask-cuda/pull/1167)) [@shwina](https://github.com/shwina) +- Update minimum Python version to Python 3.9 ([#1164](https://github.com/rapidsai/dask-cuda/pull/1164)) [@shwina](https://github.com/shwina) +- Remove usage of rapids-get-rapids-version-from-git ([#1163](https://github.com/rapidsai/dask-cuda/pull/1163)) [@jjacobelli](https://github.com/jjacobelli) +- Use ARC V2 self-hosted runners for GPU jobs ([#1159](https://github.com/rapidsai/dask-cuda/pull/1159)) [@jjacobelli](https://github.com/jjacobelli) + # dask-cuda 23.04.00 (6 Apr 2023) ## 🚨 Breaking Changes diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 338ff974..eede5b8e 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -18,7 +18,7 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) -VERSION_NUMBER=$(rapids-get-rapids-version-from-git) +VERSION_NUMBER="23.06" rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ @@ -31,7 +31,7 @@ sphinx-build -b dirhtml ./source _html sphinx-build -b text ./source _text popd -if [[ "${RAPIDS_BUILD_TYPE}" == "branch" ]]; then +if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then rapids-logger "Upload Docs to S3" aws s3 sync --no-progress --delete docs/_html "s3://rapidsai-docs/dask-cuda/${VERSION_NUMBER}/html" aws s3 sync --no-progress --delete docs/_text "s3://rapidsai-docs/dask-cuda/${VERSION_NUMBER}/txt" diff --git a/ci/build_python_pypi.sh b/ci/build_python_pypi.sh index 5fea926c..bda39160 100755 --- a/ci/build_python_pypi.sh +++ b/ci/build_python_pypi.sh @@ -8,7 +8,7 @@ python -m pip install build --user export GIT_DESCRIBE_TAG=$(git describe --abbrev=0 --tags) export GIT_DESCRIBE_NUMBER=$(git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count) -# Compute/export VERSION_SUFFIX +# Compute/export RAPIDS_DATE_STRING source rapids-env-update python -m build \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index b7303795..f03402f4 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -37,6 +37,8 @@ sed_runner "s/dask-cudf=.*/dask-cudf=${NEXT_SHORT_TAG}/g" dependencies.yaml sed_runner "s/cucim=.*/cucim=${NEXT_SHORT_TAG}/g" dependencies.yaml sed_runner "s/ucx-py=.*/ucx-py=${NEXT_UCXPY_VERSION}/g" dependencies.yaml +# CI files for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done +sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh diff --git a/ci/test_python.sh b/ci/test_python.sh index b9610bca..73a93fca 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -41,10 +41,11 @@ set +e rapids-logger "pytest dask-cuda" pushd dask_cuda DASK_CUDA_TEST_SINGLE_GPU=1 \ +DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 30m pytest \ +timeout 40m pytest \ -vv \ --capture=no \ --cache-clear \ diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index 128da207..5ab74e1f 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -499,3 +499,7 @@ def config( else: client = Client(scheduler, security=security) print_cluster_config(client) + + +if __name__ == "__main__": + worker() diff --git a/dask_cuda/device_host_file.py b/dask_cuda/device_host_file.py index fb31c3dd..a0fe92e8 100644 --- a/dask_cuda/device_host_file.py +++ b/dask_cuda/device_host_file.py @@ -2,7 +2,6 @@ import itertools import logging import os -import sys import time import numpy @@ -240,34 +239,6 @@ def __init__( # Dict of objects that will not be spilled by DeviceHostFile. self.others = {} - if sys.version_info < (3, 9): - - def __new__( - cls, - # So named such that dask will pass in the worker's local - # directory when constructing this through the "data" callback. - worker_local_directory, - *, - device_memory_limit=None, - memory_limit=None, - log_spilling=False, - ): - """ - This is here to support Python 3.8. Right now (to support - 3.8), ZictBase inherits from typing.MutableMapping through - which inspect.signature determines that the signature of - __init__ is just (*args, **kwargs). We need to advertise the - correct signature so that distributed will correctly figure - out that it needs to pass the worker's local directory. In - Python 3.9 and later, typing.MutableMapping is just an alias - for collections.abc.MutableMapping and we don't need to do - anything. - - With this pass-through definition of __new__, the - signature of the constructor is correctly determined. - """ - return super().__new__(cls) - def __setitem__(self, key, value): if key in self.device_buffer: # Make sure we register the removal of an existing key diff --git a/dask_cuda/tests/test_device_host_file.py b/dask_cuda/tests/test_device_host_file.py index 4a480794..17d4055c 100644 --- a/dask_cuda/tests/test_device_host_file.py +++ b/dask_cuda/tests/test_device_host_file.py @@ -2,8 +2,10 @@ import numpy as np import pytest +from packaging import version import dask.array +import distributed from distributed.protocol import ( deserialize, deserialize_bytes, @@ -51,7 +53,16 @@ def test_device_host_file_short( random.shuffle(full) for k, v in full: - dhf[k] = v + try: + dhf[k] = v + except TypeError as e: + # TODO: Remove when pinning to distributed>=2023.5.1 . + # See https://github.com/rapidsai/dask-cuda/issues/1174 and + # https://github.com/dask/distributed/pull/7836 . + if version.parse(distributed.__version__) <= version.parse("2023.5.0"): + dhf[k] = v + else: + raise e random.shuffle(full) diff --git a/dask_cuda/tests/test_spill.py b/dask_cuda/tests/test_spill.py index bbd24d5a..d795f8f8 100644 --- a/dask_cuda/tests/test_spill.py +++ b/dask_cuda/tests/test_spill.py @@ -2,7 +2,6 @@ from time import sleep import pytest -from zict.file import _safe_key as safe_key import dask from dask import array as da @@ -31,7 +30,8 @@ def device_host_file_size_matches( # `dhf.disk` is only available when Worker's `memory_limit != 0` if dhf.disk is not None: file_path = [ - os.path.join(dhf.disk.directory, safe_key(k)) for k in dhf.disk.keys() + os.path.join(dhf.disk.directory, fname) + for fname in dhf.disk.filenames.values() ] file_size = [os.path.getsize(f) for f in file_path] byte_sum += sum(file_size) diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index 468c37f4..9fe31333 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -446,7 +446,9 @@ def wait_workers( client: distributed.Client Instance of client, used to query for number of workers connected. min_timeout: float - Minimum number of seconds to wait before timeout. + Minimum number of seconds to wait before timeout. This value may be + overridden by setting the `DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT` with + a positive integer. seconds_per_gpu: float Seconds to wait for each GPU on the system. For example, if its value is 2 and there is a total of 8 GPUs (workers) being started, @@ -463,6 +465,8 @@ def wait_workers( ------- True if all workers were started, False if a timeout occurs. """ + min_timeout_env = os.environ.get("DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT", None) + min_timeout = min_timeout if min_timeout_env is None else int(min_timeout_env) n_gpus = n_gpus or get_n_gpus() timeout = max(min_timeout, seconds_per_gpu * n_gpus) diff --git a/dependencies.yaml b/dependencies.yaml index b484afb5..5dc1e0c6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -76,10 +76,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - py: "3.8" - packages: - - python=3.8 - matrix: py: "3.9" packages: @@ -90,18 +86,18 @@ dependencies: - python=3.10 - matrix: packages: - - python>=3.8,<3.11 + - python>=3.9,<3.11 run_python: common: - output_types: [conda, requirements] packages: - dask==2023.3.2 - distributed==2023.3.2.1 - - numba>=0.54 + - numba>=0.57 - numpy>=1.21 - pandas>=1.3,<1.6.0dev0 - pynvml>=11.0.0,<11.5 - - zict>=0.1.3 + - zict>=2.0.0 - output_types: [conda] packages: - dask-core==2023.3.2 @@ -109,13 +105,13 @@ dependencies: common: - output_types: [conda] packages: - - cucim=23.04 - - cudf=23.04 - - dask-cudf=23.04 + - cucim=23.06 + - cudf=23.06 + - dask-cudf=23.06 - pytest - pytest-cov - ucx-proc=*=gpu - - ucx-py=0.31 + - ucx-py=0.32 specific: - output_types: conda matrices: diff --git a/docs/source/examples/best-practices.rst b/docs/source/examples/best-practices.rst index 84cc78b8..2de3809c 100644 --- a/docs/source/examples/best-practices.rst +++ b/docs/source/examples/best-practices.rst @@ -9,9 +9,7 @@ When choosing between two multi-GPU setups, it is best to pick the one where mos `DGX `_, a cloud instance with `multi-gpu options `_ , a high-density GPU HPC instance, etc. This is done for two reasons: - Moving data between GPUs is costly and performance decreases when computation stops due to communication overheads, Host-to-Device/Device-to-Host transfers, etc -- Multi-GPU instances often come with accelerated networking like `NVLink `_. These accelerated -networking paths usually have much higher throughput/bandwidth compared with traditional networking *and* don't force and Host-to-Device/Device-to-Host transfers. See -`Accelerated Networking`_ for more discussion +- Multi-GPU instances often come with accelerated networking like `NVLink `_. These accelerated networking paths usually have much higher throughput/bandwidth compared with traditional networking *and* don't force and Host-to-Device/Device-to-Host transfers. See `Accelerated Networking`_ for more discussion. .. code-block:: python diff --git a/docs/source/examples/ucx.rst b/docs/source/examples/ucx.rst index 6230caf6..18c569ff 100644 --- a/docs/source/examples/ucx.rst +++ b/docs/source/examples/ucx.rst @@ -69,7 +69,7 @@ To start a Dask scheduler using UCX with automatic configuration and one GB of R .. note:: The ``interface="ib0"`` is intentionally specified above to ensure RDMACM is used in systems that support InfiniBand. On systems that don't support InfiniBand or where RDMACM isn't required, the ``interface`` argument may be omitted or specified to listen on a different interface. - We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. + We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `__. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. Workers ^^^^^^^ @@ -86,7 +86,7 @@ To start workers with automatic UCX configuration and an RMM pool of 14GB per GP .. note:: Analogous to the scheduler setup, the ``interface="ib0"`` is intentionally specified above to ensure RDMACM is used in systems that support InfiniBand. On systems that don't support InfiniBand or where RDMACM isn't required, the ``interface`` argument may be omitted or specified to listen on a different interface. - We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. + We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `__. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. Client ^^^^^^ @@ -122,7 +122,7 @@ Alternatively, the ``with dask.config.set`` statement from the example above may We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. ``dask cuda worker`` with Manual Configuration ------------------------------------------- +---------------------------------------------- When using ``dask cuda worker`` with UCX communication and manual configuration, the scheduler, workers, and client must all be started manually, each using the same UCX configuration. diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index c5592b43..c42bd483 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -16,6 +16,10 @@ To create a Dask-CUDA cluster using all available GPUs and connect a Dask.distri cluster = LocalCUDACluster() client = Client(cluster) +.. tip:: + + Be sure to include an ``if __name__ == "__main__":`` block when using :py:class:`dask_cuda.LocalCUDACluster` in a standalone Python script. See `standalone Python scripts `_ for more details. + ``dask cuda worker`` -------------------- diff --git a/pyproject.toml b/pyproject.toml index 6377693b..9fcf0e70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,15 +17,15 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "dask ==2023.3.2", "distributed ==2023.3.2.1", "pynvml >=11.0.0,<11.5", "numpy >=1.21", - "numba >=0.54", + "numba >=0.57", "pandas >=1.3,<1.6.0dev0", - "zict >=0.1.3", + "zict >=2.0.0", ] classifiers = [ "Intended Audience :: Developers", @@ -33,7 +33,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] @@ -128,6 +127,8 @@ filterwarnings = [ "ignore:make_current is deprecated:DeprecationWarning:", # remove after https://github.com/rapidsai/dask-cuda/issues/1087 is closed "ignore:There is no current event loop:DeprecationWarning:tornado", + # remove after unpinning Dask/Distributed 2023.3.2 + "ignore:.*np.bool.*:DeprecationWarning:", ] [tool.setuptools] diff --git a/setup.py b/setup.py index 3b72644b..89a56cc0 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # versioneer.get_versions. orig_get_versions = versioneer.get_versions - version = os.environ["GIT_DESCRIBE_TAG"] + os.environ.get("VERSION_SUFFIX", "") + version = os.environ["GIT_DESCRIBE_TAG"] + os.environ.get("RAPIDS_DATE_STRING", "") def get_versions(): data = orig_get_versions()