From 1e59a933bf94392bf0c6c084d2b3f25fc3ef1fcd Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 10 Sep 2024 15:47:47 -0500 Subject: [PATCH] Add script to test nightly environments are solvable and using recent nightlies. (#690) This PR adds tests that run on every `integration` PR and on a nightly basis to ensure that RAPIDS conda environments can be solved with recent packages. This will help us diagnose and react to problems arising from conda environment conflicts, which sometimes force conda to select older nightly builds of RAPIDS packages. A partial solution for https://github.com/rapidsai/ops/issues/2947. Next steps: - [x] Wait for https://github.com/rapidsai/cugraph/pull/4639 to merge. - [x] After this PR is merged, merge https://github.com/rapidsai/workflows/pull/59 as well. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/integration/pull/690 --- .github/workflows/pr.yaml | 16 +++- .github/workflows/test.yaml | 23 ++++++ ci/check_conda_nightly_env.py | 145 ++++++++++++++++++++++++++++++++++ ci/release/update-version.sh | 1 + ci/test_conda_nightly_env.sh | 28 +++++++ 5 files changed, 209 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/test.yaml create mode 100644 ci/check_conda_nightly_env.py create mode 100755 ci/test_conda_nightly_env.sh diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 356955cf..727e5180 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -10,13 +10,21 @@ concurrency: cancel-in-progress: true jobs: + pr-builder: + needs: + - build + - test-conda-nightly-env + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 build: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request - pr-builder: - needs: - - build + test-conda-nightly-env: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + # We use a build workflow so that we get CPU jobs and high matrix coverage + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + with: + build_type: pull-request + script: "ci/test_conda_nightly_env.sh" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 00000000..bb57d711 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,23 @@ +name: test + +on: + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + +jobs: + test-conda-nightly-env: + secrets: inherit + # We use a build workflow so that we get CPU jobs and high matrix coverage + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + with: + build_type: pull-request + script: "ci/test_conda_nightly_env.sh" diff --git a/ci/check_conda_nightly_env.py b/ci/check_conda_nightly_env.py new file mode 100644 index 00000000..32bf4fae --- /dev/null +++ b/ci/check_conda_nightly_env.py @@ -0,0 +1,145 @@ +import json +import re +import subprocess +import sys +from datetime import datetime, timedelta + + +OLD_PACKAGE_THRESHOLD_DAYS = 3 + +EXCLUDED_PACKAGES = { + # These packages are not built every night: + "rapids", + "rapids-xgboost", + # These packages do not have date strings: + "cubinlinker", + "pynvjitlink", + "rapids-dask-dependency", + "libxgboost", + "py-xgboost", + "xgboost", + # TODO: Do we want ucx-proc on rapidsai or from conda-forge? + "ucx-proc", +} + +# ANSI color codes used to highlight lines +FAIL = "\033[31m" +WARNING = "\033[33m" +OKGREEN = "\033[32m" +ENDC = "\033[0m" + + +def is_rapids_nightly_package(package_info): + return package_info["channel"] == "rapidsai-nightly" + + +def get_package_date(package): + if package["name"] in EXCLUDED_PACKAGES: + return None + + # Matches 6 digits starting with "2", which should be YYMMDD + date_re = r"_(2\d{5})_" + + # Use regex to find the date string in the input + match = re.search(date_re, package["build_string"]) + + if match: + # Convert the date string to a datetime object + date_string = match.group(1) + date_object = datetime.strptime(date_string, "%y%m%d") + return date_object + + print( + f"{WARNING}Date string not found for {package['name']} " + f"in the build string '{package['build_string']}'.{ENDC}" + ) + + +def check_env(json_path): + """Validate rapids conda environments. + + Parses JSON output of `conda create` and check the dates on the RAPIDS + packages to ensure nightlies are relatively new. + + Returns an exit code value. + """ + + exit_code = 0 + + with open(json_path) as f: + try: + json_data = json.load(f) + except ValueError as e: + print("Error: JSON data file from conda failed to load:") + print(e) + return 1 + + if "error" in json_data: + print("Error: conda failed:") + print() + print(json_data["error"]) + return 1 + + package_data = json_data["actions"]["LINK"] + + rapids_package_data = list(filter(is_rapids_nightly_package, package_data)) + + # Dictionary to store the packages and their dates + rapids_package_dates = { + package["name"]: get_package_date(package) for package in rapids_package_data + } + + # If there are old packages, show an error + today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + old_threshold = today - timedelta(days=OLD_PACKAGE_THRESHOLD_DAYS) + old_packages = { + package: date + for package, date in rapids_package_dates.items() + if date is not None and date < old_threshold + } + if old_packages: + exit_code = 1 + print() + print( + f"{FAIL}Error: The following packages are more than " + f"{OLD_PACKAGE_THRESHOLD_DAYS} days old:{ENDC}" + ) + for package, date in sorted(old_packages.items()): + date_string = date.strftime("%Y-%m-%d") + print(f"{FAIL} - {(package + ':'):<24}\t{date_string}{ENDC}") + + # If there are undated packages, show an error + undated_packages = { + package: date + for package, date in rapids_package_dates.items() + if package not in EXCLUDED_PACKAGES and date is None + } + if undated_packages: + exit_code = 1 + print() + print( + f"{FAIL}Error: The following packages are missing dates in their build strings:{ENDC}" + ) + for package, date in sorted(undated_packages.items()): + print(f"{FAIL} - {package}{ENDC}") + + print() + print( + f"The following packages are less than {OLD_PACKAGE_THRESHOLD_DAYS} days old:" + ) + for package, date in sorted(rapids_package_dates.items()): + if date is None: + continue + date_string = date.strftime("%Y-%m-%d") + status = WARNING if date < today else OKGREEN + print(f"{status} - {(package + ':'):<24}\t{date_string}{ENDC}") + + return exit_code + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Provide only one argument, the filepath to a JSON output from " "conda.") + sys.exit(1) + + sys.exit(check_env(sys.argv[1])) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7892aec0..fd5ba876 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -31,6 +31,7 @@ function sed_runner() { } sed_runner "/RAPIDS_VER=/ s/[0-9][0-9].[0-9][0-9]/${NEXT_SHORT_TAG}/" ci/conda-pack.sh +sed_runner "/RAPIDS_VERSION=/ s/[0-9][0-9].[0-9][0-9]/${NEXT_SHORT_TAG}/" ci/test_conda_nightly_env.sh for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" diff --git a/ci/test_conda_nightly_env.sh b/ci/test_conda_nightly_env.sh new file mode 100755 index 00000000..1cbd9e83 --- /dev/null +++ b/ci/test_conda_nightly_env.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +RAPIDS_VERSION="24.10" +CUDA_VERSION=${RAPIDS_CUDA_VERSION%.*} + +JSON_FILENAME="rapids_cuda${CUDA_VERSION}_py${RAPIDS_PY_VERSION}.json" + +rapids-logger "Creating conda environment with rapids=${RAPIDS_VERSION}, python=${RAPIDS_PY_VERSION}, cuda-version=${CUDA_VERSION}" + +rapids-conda-retry \ + create \ + -n rapids-${RAPIDS_VERSION} \ + -c rapidsai-nightly \ + -c conda-forge \ + -c nvidia \ + rapids=${RAPIDS_VERSION} \ + python=${RAPIDS_PY_VERSION} \ + cuda-version=${CUDA_VERSION} \ + --dry-run \ + --json \ + | tee "${JSON_FILENAME}" + +rapids-logger "Parsing results from conda dry-run with rapids=${RAPIDS_VERSION}, python=${RAPIDS_PY_VERSION}, cuda-version=${CUDA_VERSION}" + +python ci/check_conda_nightly_env.py "${JSON_FILENAME}"