From 72235220e4792c03767b38e2fac0c34dc769585c Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Thu, 23 Mar 2023 14:56:31 -0400 Subject: [PATCH 01/20] DOC --- .github/workflows/build.yaml | 6 +++--- .github/workflows/pr.yaml | 10 +++++----- .github/workflows/test.yaml | 2 +- dependencies.yaml | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59e18888..c86fa102 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' && github.event_name == 'push' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: branch node_type: "gpu-latest-1" @@ -48,7 +48,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index abcd0c66..c5dca84e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,26 +18,26 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request node_type: "gpu-latest-1" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3a6641d8..d5c918a2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/dependencies.yaml b/dependencies.yaml index 8b0cbf1c..e52ad896 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -106,13 +106,13 @@ dependencies: common: - output_types: [conda] packages: - - cucim=23.04 - - cudf=23.04 - - dask-cudf=23.04 + - cucim=23.06 + - cudf=23.06 + - dask-cudf=23.06 - pytest - pytest-cov - ucx-proc=*=gpu - - ucx-py=0.31 + - ucx-py=0.32 specific: - output_types: conda matrices: From 590d26ab4d61b785789ee6bacae29c37337e6703 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 11 Apr 2023 18:15:55 +0800 Subject: [PATCH 02/20] Add document about main guard. (#1157) Close https://github.com/rapidsai/dask-cuda/issues/1152 . Authors: - Jiaming Yuan (https://github.com/trivialfis) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/dask-cuda/pull/1157 --- docs/source/examples/best-practices.rst | 4 +--- docs/source/examples/ucx.rst | 6 +++--- docs/source/quickstart.rst | 4 ++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/source/examples/best-practices.rst b/docs/source/examples/best-practices.rst index 84cc78b8..2de3809c 100644 --- a/docs/source/examples/best-practices.rst +++ b/docs/source/examples/best-practices.rst @@ -9,9 +9,7 @@ When choosing between two multi-GPU setups, it is best to pick the one where mos `DGX `_, a cloud instance with `multi-gpu options `_ , a high-density GPU HPC instance, etc. This is done for two reasons: - Moving data between GPUs is costly and performance decreases when computation stops due to communication overheads, Host-to-Device/Device-to-Host transfers, etc -- Multi-GPU instances often come with accelerated networking like `NVLink `_. These accelerated -networking paths usually have much higher throughput/bandwidth compared with traditional networking *and* don't force and Host-to-Device/Device-to-Host transfers. See -`Accelerated Networking`_ for more discussion +- Multi-GPU instances often come with accelerated networking like `NVLink `_. These accelerated networking paths usually have much higher throughput/bandwidth compared with traditional networking *and* don't force and Host-to-Device/Device-to-Host transfers. See `Accelerated Networking`_ for more discussion. .. code-block:: python diff --git a/docs/source/examples/ucx.rst b/docs/source/examples/ucx.rst index 6230caf6..18c569ff 100644 --- a/docs/source/examples/ucx.rst +++ b/docs/source/examples/ucx.rst @@ -69,7 +69,7 @@ To start a Dask scheduler using UCX with automatic configuration and one GB of R .. note:: The ``interface="ib0"`` is intentionally specified above to ensure RDMACM is used in systems that support InfiniBand. On systems that don't support InfiniBand or where RDMACM isn't required, the ``interface`` argument may be omitted or specified to listen on a different interface. - We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. + We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `__. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. Workers ^^^^^^^ @@ -86,7 +86,7 @@ To start workers with automatic UCX configuration and an RMM pool of 14GB per GP .. note:: Analogous to the scheduler setup, the ``interface="ib0"`` is intentionally specified above to ensure RDMACM is used in systems that support InfiniBand. On systems that don't support InfiniBand or where RDMACM isn't required, the ``interface`` argument may be omitted or specified to listen on a different interface. - We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. + We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `__. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. Client ^^^^^^ @@ -122,7 +122,7 @@ Alternatively, the ``with dask.config.set`` statement from the example above may We specify ``UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda`` above for optimal performance with InfiniBand, see details `here `_. If not using InfiniBand, that option may be omitted. In UCX 1.12 and newer, that option is default and may be omitted as well even when using InfiniBand. ``dask cuda worker`` with Manual Configuration ------------------------------------------- +---------------------------------------------- When using ``dask cuda worker`` with UCX communication and manual configuration, the scheduler, workers, and client must all be started manually, each using the same UCX configuration. diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index c5592b43..c42bd483 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -16,6 +16,10 @@ To create a Dask-CUDA cluster using all available GPUs and connect a Dask.distri cluster = LocalCUDACluster() client = Client(cluster) +.. tip:: + + Be sure to include an ``if __name__ == "__main__":`` block when using :py:class:`dask_cuda.LocalCUDACluster` in a standalone Python script. See `standalone Python scripts `_ for more details. + ``dask cuda worker`` -------------------- From eed3eb6b02951ac2311b81f546b9053184254c4b Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Mon, 17 Apr 2023 17:48:26 +0200 Subject: [PATCH 03/20] Use ARC V2 self-hosted runners for GPU jobs (#1159) This PR is updating the runner labels to use ARC V2 self-hosted runners for GPU jobs. This is needed to resolve the auto-scalling issues. Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/1159 --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c86fa102..0189fab2 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -41,7 +41,7 @@ jobs: uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: branch - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c5dca84e..b5408e27 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -40,7 +40,7 @@ jobs: uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" From 15d448059cdb66ad04fa76fbd0efb41569028a4e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 18 Apr 2023 16:02:13 +0200 Subject: [PATCH 04/20] Update to zict 3.0 (#1160) With the release of zict 3.0 a few changes were made to resources that were used in spilling tests that are being updated here. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Benjamin Zaitlen (https://github.com/quasiben) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/1160 --- dask_cuda/tests/test_spill.py | 4 ++-- dependencies.yaml | 2 +- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dask_cuda/tests/test_spill.py b/dask_cuda/tests/test_spill.py index bbd24d5a..d795f8f8 100644 --- a/dask_cuda/tests/test_spill.py +++ b/dask_cuda/tests/test_spill.py @@ -2,7 +2,6 @@ from time import sleep import pytest -from zict.file import _safe_key as safe_key import dask from dask import array as da @@ -31,7 +30,8 @@ def device_host_file_size_matches( # `dhf.disk` is only available when Worker's `memory_limit != 0` if dhf.disk is not None: file_path = [ - os.path.join(dhf.disk.directory, safe_key(k)) for k in dhf.disk.keys() + os.path.join(dhf.disk.directory, fname) + for fname in dhf.disk.filenames.values() ] file_size = [os.path.getsize(f) for f in file_path] byte_sum += sum(file_size) diff --git a/dependencies.yaml b/dependencies.yaml index 025d49a0..26bcda98 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -101,7 +101,7 @@ dependencies: - numpy>=1.21 - pandas>=1.3,<1.6.0dev0 - pynvml>=11.0.0,<11.5 - - zict>=0.1.3 + - zict>=2.0.0 - output_types: [conda] packages: - dask-core==2023.3.2 diff --git a/pyproject.toml b/pyproject.toml index 6377693b..4b2eeef5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "numpy >=1.21", "numba >=0.54", "pandas >=1.3,<1.6.0dev0", - "zict >=0.1.3", + "zict >=2.0.0", ] classifiers = [ "Intended Audience :: Developers", From 9dbfd1c936f8a60b42bb275bd73d201d71b606b6 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Thu, 20 Apr 2023 21:48:38 +0200 Subject: [PATCH 05/20] Remove usage of rapids-get-rapids-version-from-git (#1163) Instead of using `rapids-get-rapids-version-from-git` we can just hardcode the version and use `update-version.sh` to update it Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/1163 --- ci/build_docs.sh | 2 +- ci/release/update-version.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 338ff974..0c285421 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -18,7 +18,7 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) -VERSION_NUMBER=$(rapids-get-rapids-version-from-git) +VERSION_NUMBER="23.06" rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index b7303795..f03402f4 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -37,6 +37,8 @@ sed_runner "s/dask-cudf=.*/dask-cudf=${NEXT_SHORT_TAG}/g" dependencies.yaml sed_runner "s/cucim=.*/cucim=${NEXT_SHORT_TAG}/g" dependencies.yaml sed_runner "s/ucx-py=.*/ucx-py=${NEXT_UCXPY_VERSION}/g" dependencies.yaml +# CI files for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done +sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh From f65a11d8bc78673368fd2520393487c11163f7d6 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:07:42 -0400 Subject: [PATCH 06/20] Update minimum Python version to Python 3.9 (#1164) Authors: - Ashwin Srinath (https://github.com/shwina) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Charles Blackmon-Luca (https://github.com/charlesbluca) - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1164 --- .github/workflows/build.yaml | 6 +++--- .github/workflows/pr.yaml | 13 ++++++++----- .github/workflows/test.yaml | 5 ++++- dask_cuda/device_host_file.py | 29 ----------------------------- dependencies.yaml | 6 +----- pyproject.toml | 3 +-- 6 files changed, 17 insertions(+), 45 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0189fab2..4c880602 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' && github.event_name == 'push' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39 with: build_type: branch node_type: "gpu-v100-latest-1" @@ -48,7 +48,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@py-39 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b5408e27..57b44fa6 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,26 +18,29 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@py-39 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@py-39 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39 with: + # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` + # packages. also remove the line in `test.yaml` + matrix_filter: map(select(.PY_VER == "3.10")) build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39 with: build_type: pull-request node_type: "gpu-v100-latest-1" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d5c918a2..9e02ec83 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,8 +16,11 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39 with: + # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` + # packages. also remove the line in `pr.yaml` + matrix_filter: map(select(.PY_VER == "3.10")) build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} diff --git a/dask_cuda/device_host_file.py b/dask_cuda/device_host_file.py index fb31c3dd..a0fe92e8 100644 --- a/dask_cuda/device_host_file.py +++ b/dask_cuda/device_host_file.py @@ -2,7 +2,6 @@ import itertools import logging import os -import sys import time import numpy @@ -240,34 +239,6 @@ def __init__( # Dict of objects that will not be spilled by DeviceHostFile. self.others = {} - if sys.version_info < (3, 9): - - def __new__( - cls, - # So named such that dask will pass in the worker's local - # directory when constructing this through the "data" callback. - worker_local_directory, - *, - device_memory_limit=None, - memory_limit=None, - log_spilling=False, - ): - """ - This is here to support Python 3.8. Right now (to support - 3.8), ZictBase inherits from typing.MutableMapping through - which inspect.signature determines that the signature of - __init__ is just (*args, **kwargs). We need to advertise the - correct signature so that distributed will correctly figure - out that it needs to pass the worker's local directory. In - Python 3.9 and later, typing.MutableMapping is just an alias - for collections.abc.MutableMapping and we don't need to do - anything. - - With this pass-through definition of __new__, the - signature of the constructor is correctly determined. - """ - return super().__new__(cls) - def __setitem__(self, key, value): if key in self.device_buffer: # Make sure we register the removal of an existing key diff --git a/dependencies.yaml b/dependencies.yaml index 26bcda98..613ab230 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -76,10 +76,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - py: "3.8" - packages: - - python=3.8 - matrix: py: "3.9" packages: @@ -90,7 +86,7 @@ dependencies: - python=3.10 - matrix: packages: - - python>=3.8,<3.11 + - python>=3.9,<3.11 run_python: common: - output_types: [conda, requirements] diff --git a/pyproject.toml b/pyproject.toml index 4b2eeef5..f6675ccd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "dask ==2023.3.2", "distributed ==2023.3.2.1", @@ -33,7 +33,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] From 7df68b77a95576850f8ac8122405b7e91dc0d7f0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Apr 2023 18:34:53 -0700 Subject: [PATCH 07/20] Temporarily relax Python constraint (#1166) This PR unblocks RAPIDS CI since many places attempt to install dask-cuda from source. We can undo this change once the rest of RAPIDS has moved to Python 3.9. We will also want to discuss better strategies for handling dask-cuda in CI as part of our ongoing discussions around improving latest dask usage in CI. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: https://github.com/rapidsai/dask-cuda/pull/1166 --- dask_cuda/device_host_file.py | 29 +++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/dask_cuda/device_host_file.py b/dask_cuda/device_host_file.py index a0fe92e8..fb31c3dd 100644 --- a/dask_cuda/device_host_file.py +++ b/dask_cuda/device_host_file.py @@ -2,6 +2,7 @@ import itertools import logging import os +import sys import time import numpy @@ -239,6 +240,34 @@ def __init__( # Dict of objects that will not be spilled by DeviceHostFile. self.others = {} + if sys.version_info < (3, 9): + + def __new__( + cls, + # So named such that dask will pass in the worker's local + # directory when constructing this through the "data" callback. + worker_local_directory, + *, + device_memory_limit=None, + memory_limit=None, + log_spilling=False, + ): + """ + This is here to support Python 3.8. Right now (to support + 3.8), ZictBase inherits from typing.MutableMapping through + which inspect.signature determines that the signature of + __init__ is just (*args, **kwargs). We need to advertise the + correct signature so that distributed will correctly figure + out that it needs to pass the worker's local directory. In + Python 3.9 and later, typing.MutableMapping is just an alias + for collections.abc.MutableMapping and we don't need to do + anything. + + With this pass-through definition of __new__, the + signature of the constructor is correctly determined. + """ + return super().__new__(cls) + def __setitem__(self, key, value): if key in self.device_buffer: # Make sure we register the removal of an existing key diff --git a/pyproject.toml b/pyproject.toml index f6675ccd..388a3029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.9" +requires-python = ">=3.8" dependencies = [ "dask ==2023.3.2", "distributed ==2023.3.2.1", From 04bff21efb533c09c68c07b41986ea0260eae68f Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Fri, 28 Apr 2023 14:07:43 -0400 Subject: [PATCH 08/20] Revert to branch-23.06 for shared-action-workflows (#1167) Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/1167 --- .github/workflows/build.yaml | 6 +++--- .github/workflows/pr.yaml | 10 +++++----- .github/workflows/test.yaml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4c880602..0189fab2 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' && github.event_name == 'push' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: branch node_type: "gpu-v100-latest-1" @@ -48,7 +48,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 57b44fa6..109561a4 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,20 +18,20 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` # packages. also remove the line in `test.yaml` @@ -40,7 +40,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request node_type: "gpu-v100-latest-1" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9e02ec83..ce3692a8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` # packages. also remove the line in `pr.yaml` From 76936cd8efb0b6937ae212a6486a7efbd6d5506d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 2 May 2023 22:32:13 -0400 Subject: [PATCH 09/20] Remove `matrix_filter` from workflows (#1168) Should be safe to do this now that cuDF 3.9 nightlies are being published Authors: - Charles Blackmon-Luca (https://github.com/charlesbluca) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/1168 --- .github/workflows/pr.yaml | 3 --- .github/workflows/test.yaml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 109561a4..b5408e27 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -33,9 +33,6 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: - # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` - # packages. also remove the line in `test.yaml` - matrix_filter: map(select(.PY_VER == "3.10")) build_type: pull-request docs-build: needs: conda-python-build diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ce3692a8..d5c918a2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -18,9 +18,6 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: - # TODO: remove the `matrix_filter` line after `cudf` is publishing `3.9` - # packages. also remove the line in `pr.yaml` - matrix_filter: map(select(.PY_VER == "3.10")) build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} From 1733b51636468e7e62b38700af7db7615c645d91 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 4 May 2023 02:26:56 -0700 Subject: [PATCH 10/20] Revert "Temporarily relax Python constraint" (#1171) We undid the pinning in order to unblock the Python 3.8->3.9 transition in RAPIDS, which is now complete. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1171 --- dask_cuda/device_host_file.py | 29 ----------------------------- pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/dask_cuda/device_host_file.py b/dask_cuda/device_host_file.py index fb31c3dd..a0fe92e8 100644 --- a/dask_cuda/device_host_file.py +++ b/dask_cuda/device_host_file.py @@ -2,7 +2,6 @@ import itertools import logging import os -import sys import time import numpy @@ -240,34 +239,6 @@ def __init__( # Dict of objects that will not be spilled by DeviceHostFile. self.others = {} - if sys.version_info < (3, 9): - - def __new__( - cls, - # So named such that dask will pass in the worker's local - # directory when constructing this through the "data" callback. - worker_local_directory, - *, - device_memory_limit=None, - memory_limit=None, - log_spilling=False, - ): - """ - This is here to support Python 3.8. Right now (to support - 3.8), ZictBase inherits from typing.MutableMapping through - which inspect.signature determines that the signature of - __init__ is just (*args, **kwargs). We need to advertise the - correct signature so that distributed will correctly figure - out that it needs to pass the worker's local directory. In - Python 3.9 and later, typing.MutableMapping is just an alias - for collections.abc.MutableMapping and we don't need to do - anything. - - With this pass-through definition of __new__, the - signature of the constructor is correctly determined. - """ - return super().__new__(cls) - def __setitem__(self, key, value): if key in self.device_buffer: # Make sure we register the removal of an existing key diff --git a/pyproject.toml b/pyproject.toml index 388a3029..f6675ccd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "dask ==2023.3.2", "distributed ==2023.3.2.1", From 0a6691ffe45c583dcf0a6f47d0ca89994b4d9052 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 11 May 2023 14:52:28 -0400 Subject: [PATCH 11/20] Fix GHAs Workflows (#1172) The `rapids-env-update` command needs a `GH_TOKEN` environment for CI now due to the changes below: - https://github.com/rapidsai/gha-tools/pull/53 Similar to: https://github.com/rapidsai/shared-action-workflows/pull/87 Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Ray Douglass (https://github.com/raydouglass) --- .github/workflows/build.yaml | 2 ++ .github/workflows/pr.yaml | 2 ++ ci/build_python_pypi.sh | 2 +- setup.py | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0189fab2..47ea6e79 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -67,6 +67,8 @@ jobs: fetch-depth: 0 - name: Build wheel run: ci/build_python_pypi.sh + env: + GH_TOKEN: ${{ github.token }} - name: Publish distribution 📦 to PyPI if: inputs.build_type == 'nightly' uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b5408e27..7cf94c02 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -58,3 +58,5 @@ jobs: fetch-depth: 0 - name: Build wheel run: ci/build_python_pypi.sh + env: + GH_TOKEN: ${{ github.token }} diff --git a/ci/build_python_pypi.sh b/ci/build_python_pypi.sh index 5fea926c..bda39160 100755 --- a/ci/build_python_pypi.sh +++ b/ci/build_python_pypi.sh @@ -8,7 +8,7 @@ python -m pip install build --user export GIT_DESCRIBE_TAG=$(git describe --abbrev=0 --tags) export GIT_DESCRIBE_NUMBER=$(git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count) -# Compute/export VERSION_SUFFIX +# Compute/export RAPIDS_DATE_STRING source rapids-env-update python -m build \ diff --git a/setup.py b/setup.py index 3b72644b..89a56cc0 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # versioneer.get_versions. orig_get_versions = versioneer.get_versions - version = os.environ["GIT_DESCRIBE_TAG"] + os.environ.get("VERSION_SUFFIX", "") + version = os.environ["GIT_DESCRIBE_TAG"] + os.environ.get("RAPIDS_DATE_STRING", "") def get_versions(): data = orig_get_versions() From b263f0021ed53262247efdbea1ac4b7dadea88c9 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 15 May 2023 17:05:12 +0200 Subject: [PATCH 12/20] Workaround for `DeviceHostFile` tests with CuPy>=12.0.0 (#1175) As discussed in https://github.com/rapidsai/dask-cuda/issues/1174, we must workaround test failures until Distributed can be unpinned. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/dask-cuda/pull/1175 --- dask_cuda/tests/test_device_host_file.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dask_cuda/tests/test_device_host_file.py b/dask_cuda/tests/test_device_host_file.py index 4a480794..17d4055c 100644 --- a/dask_cuda/tests/test_device_host_file.py +++ b/dask_cuda/tests/test_device_host_file.py @@ -2,8 +2,10 @@ import numpy as np import pytest +from packaging import version import dask.array +import distributed from distributed.protocol import ( deserialize, deserialize_bytes, @@ -51,7 +53,16 @@ def test_device_host_file_short( random.shuffle(full) for k, v in full: - dhf[k] = v + try: + dhf[k] = v + except TypeError as e: + # TODO: Remove when pinning to distributed>=2023.5.1 . + # See https://github.com/rapidsai/dask-cuda/issues/1174 and + # https://github.com/dask/distributed/pull/7836 . + if version.parse(distributed.__version__) <= version.parse("2023.5.0"): + dhf[k] = v + else: + raise e random.shuffle(full) From cf6e9fb69b4a2f02d258f469a7c19fb474a3ca75 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Wed, 17 May 2023 15:12:08 -0500 Subject: [PATCH 13/20] run docs nightly too (#1176) This PR configures `dask-cuda` docs builds to also run nightly (not just on PR merges only) Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1176 --- .github/workflows/build.yaml | 9 ++++++--- ci/build_docs.sh | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 47ea6e79..df62f69f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -35,16 +35,19 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} docs-build: - if: github.ref_type == 'branch' && github.event_name == 'push' + if: github.ref_type == 'branch' needs: [conda-python-build] secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: - build_type: branch - node_type: "gpu-v100-latest-1" arch: "amd64" + branch: ${{ inputs.branch }} + build_type: ${{ inputs.build_type || 'branch' }} container_image: "rapidsai/ci:latest" + date: ${{ inputs.date }} + node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" + sha: ${{ inputs.sha }} upload-conda: needs: [conda-python-build] secrets: inherit diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 0c285421..eede5b8e 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -31,7 +31,7 @@ sphinx-build -b dirhtml ./source _html sphinx-build -b text ./source _text popd -if [[ "${RAPIDS_BUILD_TYPE}" == "branch" ]]; then +if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then rapids-logger "Upload Docs to S3" aws s3 sync --no-progress --delete docs/_html "s3://rapidsai-docs/dask-cuda/${VERSION_NUMBER}/html" aws s3 sync --no-progress --delete docs/_text "s3://rapidsai-docs/dask-cuda/${VERSION_NUMBER}/txt" From 16815109410e8709f91623f7712dafa9a2b07777 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 25 May 2023 09:20:22 -0400 Subject: [PATCH 14/20] Always upload on branch/nightly builds (#1177) Since the `build.yaml` workflow only runs on branch pushes, tag pushes, or nightly calls, it should always upload the wheel to PyPI like it does for conda packages. This will fix the missing release uploads like this: https://github.com/rapidsai/dask-cuda/actions/runs/4678841210/jobs/8288889977 Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - AJ Schmidt (https://github.com/ajschmidt8) - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1177 --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index df62f69f..78ce7a05 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -73,7 +73,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Publish distribution 📦 to PyPI - if: inputs.build_type == 'nightly' uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.RAPIDSAI_PYPI_TOKEN }} + skip-existing: true From c94b4ae305068573e219cee1f54ff4f94ff0c5f0 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 26 May 2023 13:25:26 +0200 Subject: [PATCH 15/20] Disable `np.bool` deprecation warning (#1182) Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/dask-cuda/pull/1182 --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f6675ccd..d29e871a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,6 +127,8 @@ filterwarnings = [ "ignore:make_current is deprecated:DeprecationWarning:", # remove after https://github.com/rapidsai/dask-cuda/issues/1087 is closed "ignore:There is no current event loop:DeprecationWarning:tornado", + # remove after unpinning Dask/Distributed 2023.3.2 + "ignore:.*np.bool.*:DeprecationWarning:", ] [tool.setuptools] From 856c4fe60855906f8eddaab286adca0126104cd3 Mon Sep 17 00:00:00 2001 From: Hugo MacDermott-Opeskin Date: Fri, 26 May 2023 22:04:18 +1000 Subject: [PATCH 16/20] Add `__main__` entrypoint to dask-cuda-worker CLI (#1181) Fixes #1180 Making the CLI runnable with `python -m ` so that we can use the same call for both CLIs in `dask-jobqueue` Authors: - Hugo MacDermott-Opeskin (https://github.com/hmacdope) - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1181 --- dask_cuda/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index 128da207..5ab74e1f 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -499,3 +499,7 @@ def config( else: client = Client(scheduler, security=security) print_cluster_config(client) + + +if __name__ == "__main__": + worker() From 59c1553095e361a262a7a3ee3b47a375b3933aeb Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 31 May 2023 12:43:48 -0700 Subject: [PATCH 17/20] Require Numba 0.57.0+ (#1185) Aligns with the rest of RAPIDS. Also needed for CUDA 12 support. Authors: - https://github.com/jakirkham Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/dask-cuda/pull/1185 --- dependencies.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 613ab230..5dc1e0c6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -93,7 +93,7 @@ dependencies: packages: - dask==2023.3.2 - distributed==2023.3.2.1 - - numba>=0.54 + - numba>=0.57 - numpy>=1.21 - pandas>=1.3,<1.6.0dev0 - pynvml>=11.0.0,<11.5 diff --git a/pyproject.toml b/pyproject.toml index d29e871a..9fcf0e70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "distributed ==2023.3.2.1", "pynvml >=11.0.0,<11.5", "numpy >=1.21", - "numba >=0.54", + "numba >=0.57", "pandas >=1.3,<1.6.0dev0", "zict >=2.0.0", ] From cdb38ad025e6e027b99f731112fccba2a484c95a Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 6 Jun 2023 21:57:31 +0200 Subject: [PATCH 18/20] Increase minimum timeout to wait for workers in CI (#1192) (#1193) We have been getting timeouts waiting for workers in CI, those are not reproducible locally. The reason for that is probably some sort of congestion causing spinup to take longer in CI, therefore this change introduces a variable that can be used to control the minimum timeout and the minimum timeout is doubled in CI. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Ray Douglass (https://github.com/raydouglass) --- ci/test_python.sh | 1 + dask_cuda/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index b9610bca..c988ee15 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -41,6 +41,7 @@ set +e rapids-logger "pytest dask-cuda" pushd dask_cuda DASK_CUDA_TEST_SINGLE_GPU=1 \ +DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index 468c37f4..9fe31333 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -446,7 +446,9 @@ def wait_workers( client: distributed.Client Instance of client, used to query for number of workers connected. min_timeout: float - Minimum number of seconds to wait before timeout. + Minimum number of seconds to wait before timeout. This value may be + overridden by setting the `DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT` with + a positive integer. seconds_per_gpu: float Seconds to wait for each GPU on the system. For example, if its value is 2 and there is a total of 8 GPUs (workers) being started, @@ -463,6 +465,8 @@ def wait_workers( ------- True if all workers were started, False if a timeout occurs. """ + min_timeout_env = os.environ.get("DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT", None) + min_timeout = min_timeout if min_timeout_env is None else int(min_timeout_env) n_gpus = n_gpus or get_n_gpus() timeout = max(min_timeout, seconds_per_gpu * n_gpus) From 5836cdee34d3278f9c1c6c9eb9f99de1b2e6952b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 7 Jun 2023 15:03:48 +0200 Subject: [PATCH 19/20] Increase pytest CI timeout (#1196) Rather than individual tests hanging, the primary nightly problem seems to be that the `pytest` timeout is too short, increase it by 10 minutes to check if that is sufficient. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Ray Douglass (https://github.com/raydouglass) --- ci/test_python.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index c988ee15..73a93fca 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -45,7 +45,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 30m pytest \ +timeout 40m pytest \ -vv \ --capture=no \ --cache-clear \ From af05c73e989fd83bc87e627712b5c86bb60adaa1 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 7 Jun 2023 10:41:08 -0400 Subject: [PATCH 20/20] update changelog --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d7467bd..3e8d9f8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,39 @@ +# dask-cuda 23.06.00 (7 Jun 2023) + +## 🚨 Breaking Changes + +- Update minimum Python version to Python 3.9 ([#1164](https://github.com/rapidsai/dask-cuda/pull/1164)) [@shwina](https://github.com/shwina) + +## 🐛 Bug Fixes + +- Increase pytest CI timeout ([#1196](https://github.com/rapidsai/dask-cuda/pull/1196)) [@pentschev](https://github.com/pentschev) +- Increase minimum timeout to wait for workers in CI ([#1193](https://github.com/rapidsai/dask-cuda/pull/1193)) [@pentschev](https://github.com/pentschev) +- Disable `np.bool` deprecation warning ([#1182](https://github.com/rapidsai/dask-cuda/pull/1182)) [@pentschev](https://github.com/pentschev) +- Always upload on branch/nightly builds ([#1177](https://github.com/rapidsai/dask-cuda/pull/1177)) [@raydouglass](https://github.com/raydouglass) +- Workaround for `DeviceHostFile` tests with CuPy>=12.0.0 ([#1175](https://github.com/rapidsai/dask-cuda/pull/1175)) [@pentschev](https://github.com/pentschev) +- Temporarily relax Python constraint ([#1166](https://github.com/rapidsai/dask-cuda/pull/1166)) [@vyasr](https://github.com/vyasr) + +## 📖 Documentation + +- [doc] Add document about main guard. ([#1157](https://github.com/rapidsai/dask-cuda/pull/1157)) [@trivialfis](https://github.com/trivialfis) + +## 🚀 New Features + +- Require Numba 0.57.0+ ([#1185](https://github.com/rapidsai/dask-cuda/pull/1185)) [@jakirkham](https://github.com/jakirkham) +- Revert "Temporarily relax Python constraint" ([#1171](https://github.com/rapidsai/dask-cuda/pull/1171)) [@vyasr](https://github.com/vyasr) +- Update to zict 3.0 ([#1160](https://github.com/rapidsai/dask-cuda/pull/1160)) [@pentschev](https://github.com/pentschev) + +## 🛠️ Improvements + +- Add `__main__` entrypoint to dask-cuda-worker CLI ([#1181](https://github.com/rapidsai/dask-cuda/pull/1181)) [@hmacdope](https://github.com/hmacdope) +- run docs nightly too ([#1176](https://github.com/rapidsai/dask-cuda/pull/1176)) [@AyodeAwe](https://github.com/AyodeAwe) +- Fix GHAs Workflows ([#1172](https://github.com/rapidsai/dask-cuda/pull/1172)) [@ajschmidt8](https://github.com/ajschmidt8) +- Remove `matrix_filter` from workflows ([#1168](https://github.com/rapidsai/dask-cuda/pull/1168)) [@charlesbluca](https://github.com/charlesbluca) +- Revert to branch-23.06 for shared-action-workflows ([#1167](https://github.com/rapidsai/dask-cuda/pull/1167)) [@shwina](https://github.com/shwina) +- Update minimum Python version to Python 3.9 ([#1164](https://github.com/rapidsai/dask-cuda/pull/1164)) [@shwina](https://github.com/shwina) +- Remove usage of rapids-get-rapids-version-from-git ([#1163](https://github.com/rapidsai/dask-cuda/pull/1163)) [@jjacobelli](https://github.com/jjacobelli) +- Use ARC V2 self-hosted runners for GPU jobs ([#1159](https://github.com/rapidsai/dask-cuda/pull/1159)) [@jjacobelli](https://github.com/jjacobelli) + # dask-cuda 23.04.00 (6 Apr 2023) ## 🚨 Breaking Changes