From 68e07a21ba05b0c8f34d428e76114519d476f3c3 Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 25 Apr 2025 10:18:17 +0100 Subject: [PATCH 1/2] Update Iris benchmarking to align with templating. --- .github/workflows/benchmarks_report.yml | 2 +- .github/workflows/benchmarks_run.yml | 17 +- benchmarks/README.md | 70 ++-- benchmarks/_asv_delegated_abc.py | 249 +++++++++++++ benchmarks/asv.conf.json | 4 +- benchmarks/asv_delegated.py | 331 ++++++------------ benchmarks/asv_delegated_iris.py | 134 ------- benchmarks/benchmarks/__init__.py | 7 +- .../benchmarks/generate_data/__init__.py | 10 +- benchmarks/bm_runner.py | 52 ++- benchmarks/custom_bms/install.py | 16 +- benchmarks/custom_bms/tracemallocbench.py | 4 +- 12 files changed, 473 insertions(+), 423 deletions(-) create mode 100644 benchmarks/_asv_delegated_abc.py delete mode 100644 benchmarks/asv_delegated_iris.py mode change 100644 => 100755 benchmarks/bm_runner.py diff --git a/.github/workflows/benchmarks_report.yml b/.github/workflows/benchmarks_report.yml index 93a2bc1a77..d47ed9b453 100644 --- a/.github/workflows/benchmarks_report.yml +++ b/.github/workflows/benchmarks_report.yml @@ -80,4 +80,4 @@ jobs: - name: Post reports env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: python benchmarks/bm_runner.py _gh_post + run: benchmarks/bm_runner.py _gh_post diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml index 287735c335..f32d42caaf 100644 --- a/.github/workflows/benchmarks_run.yml +++ b/.github/workflows/benchmarks_run.yml @@ -21,6 +21,8 @@ on: jobs: pre-checks: + # This workflow supports two different scenarios (overnight and branch). + # The pre-checks job determines which scenario is being run. runs-on: ubuntu-latest if: github.repository == 'SciTools/iris' outputs: @@ -36,9 +38,11 @@ jobs: # SEE ALSO .github/labeler.yml . paths: requirements/locks/*.lock setup.py - id: overnight + name: Check overnight scenario if: github.event_name != 'pull_request' run: echo "check=true" >> "$GITHUB_OUTPUT" - id: branch + name: Check branch scenario if: > github.event_name == 'pull_request' && @@ -67,7 +71,8 @@ jobs: steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v4 + - name: Checkout repo + uses: actions/checkout@v4 with: fetch-depth: 0 @@ -107,6 +112,8 @@ jobs: echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV - name: Benchmark this pull request + # If the 'branch' condition(s) are met: use the bm_runner to compare + # the proposed merge with the base branch. if: needs.pre-checks.outputs.branch == 'true' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -115,10 +122,14 @@ jobs: nox -s benchmarks -- branch origin/${{ github.base_ref }} - name: Run overnight benchmarks + # If the 'overnight' condition(s) are met: use the bm_runner to compare + # each of the last 24 hours' commits to their parents. id: overnight if: needs.pre-checks.outputs.overnight == 'true' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # The first_commit argument allows a custom starting point - useful + # for manual re-running. run: | first_commit=${{ inputs.first_commit }} if [ "$first_commit" == "" ] @@ -132,6 +143,8 @@ jobs: fi - name: Warn of failure + # The overnight run is not on a pull request, so a failure could go + # unnoticed without being actively advertised. if: > failure() && steps.overnight.outcome == 'failure' @@ -143,6 +156,7 @@ jobs: gh issue create --title "$title" --body "$body" --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY - name: Upload any benchmark reports + # Uploading enables more downstream processing e.g. posting a PR comment. if: success() || steps.overnight.outcome == 'failure' uses: actions/upload-artifact@v4 with: @@ -150,6 +164,7 @@ jobs: path: .github/workflows/benchmark_reports - name: Archive asv results + # Store the raw ASV database(s) to help manual investigations. if: ${{ always() }} uses: actions/upload-artifact@v4 with: diff --git a/benchmarks/README.md b/benchmarks/README.md index 911d5f7833..09ea920176 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,6 +1,6 @@ -# Iris Performance Benchmarking +# SciTools Performance Benchmarking -Iris uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv) +SciTools uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv) (ASV) setup to benchmark performance. This is primarily designed to check for performance shifts between commits using statistical analysis, but can also be easily repurposed for manual comparative and scalability analyses. @@ -21,25 +21,30 @@ by the PR. (This run is managed by [the aforementioned GitHub Action](../.github/workflows/benchmark.yml)). To run locally: the **benchmark runner** provides conveniences for -common benchmark setup and run tasks, including replicating the automated -overnight run locally. This is accessed via the Nox `benchmarks` session - see -`nox -s benchmarks -- --help` for detail (_see also: -[bm_runner.py](./bm_runner.py)_). Alternatively you can directly run `asv ...` -commands from this directory (you will still need Nox installed - see -[Benchmark environments](#benchmark-environments)). +common benchmark setup and run tasks, including replicating the benchmarking +performed by GitHub Actions workflows. This can be accessed by: + +- The Nox `benchmarks` session - (use + `nox -s benchmarks -- --help` for details). +- `benchmarks/bm_runner.py` (use the `--help` argument for details). +- Directly running `asv` commands from the `benchmarks/` directory (check + whether environment setup has any extra dependencies - see + [Benchmark environments](#benchmark-environments)). + +### Reducing run time A significant portion of benchmark run time is environment management. Run-time -can be reduced by placing the benchmark environment on the same file system as -your -[Conda package cache](https://conda.io/projects/conda/en/latest/user-guide/configuration/use-condarc.html#specify-pkg-directories), -if it is not already. You can achieve this by either: - -- Temporarily reconfiguring `ENV_PARENT` in `delegated_env_commands` - in [asv.conf.json](asv.conf.json) to reference a location on the same file - system as the Conda package cache. +can be reduced by co-locating the benchmark environment and your +[Conda package cache](https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/custom-env-and-pkg-locations.html) +on the same [file system](https://en.wikipedia.org/wiki/File_system), if they +are not already. This can be done in several ways: + +- Temporarily reconfiguring `env_parent` in + [`_asv_delegated_abc`](_asv_delegated_abc.py) to reference a location on the same + file system as the Conda package cache. - Using an alternative Conda package cache location during the benchmark run, e.g. via the `$CONDA_PKGS_DIRS` environment variable. -- Moving your Iris repo to the same file system as the Conda package cache. +- Moving your repo checkout to the same file system as the Conda package cache. ### Environment variables @@ -73,7 +78,8 @@ requirements will not be delayed by repeated environment setup - especially relevant given the [benchmark runner](bm_runner.py)'s use of [--interleave-rounds](https://asv.readthedocs.io/en/stable/commands.html?highlight=interleave-rounds#asv-run), or any time you know you will repeatedly benchmark the same commit. **NOTE:** -Iris environments are large so this option can consume a lot of disk space. +SciTools environments tend to large so this option can consume a lot of disk +space. ## Writing benchmarks @@ -97,6 +103,7 @@ for manual investigations; and consider committing any useful benchmarks as [on-demand benchmarks](#on-demand-benchmarks) for future developers to use. ### Data generation + **Important:** be sure not to use the benchmarking environment to generate any test objects/files, as this environment changes with each commit being benchmarked, creating inconsistent benchmark 'conditions'. The @@ -106,7 +113,7 @@ solution; read more detail there. ### ASV re-run behaviour Note that ASV re-runs a benchmark multiple times between its `setup()` routine. -This is a problem for benchmarking certain Iris operations such as data +This is a problem for benchmarking certain SciTools operations such as data realisation, since the data will no longer be lazy after the first run. Consider writing extra steps to restore objects' original state _within_ the benchmark itself. @@ -117,10 +124,13 @@ maintain result accuracy this should be accompanied by increasing the number of repeats _between_ `setup()` calls using the `repeat` attribute. `warmup_time = 0` is also advisable since ASV performs independent re-runs to estimate run-time, and these will still be subject to the original problem. +The `@disable_repeat_between_setup` decorator in +[`benchmarks/__init__.py`](benchmarks/__init__.py) offers a convenience for +all this. ### Custom benchmarks -Iris benchmarking implements custom benchmark types, such as a `tracemalloc` +SciTools benchmarking implements custom benchmark types, such as a `tracemalloc` benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more detail. @@ -131,10 +141,10 @@ limited available runtime and risk of false-positives. It remains useful for manual investigations).** When comparing performance between commits/file-type/whatever it can be helpful -to know if the differences exist in scaling or non-scaling parts of the Iris -functionality in question. This can be done using a size parameter, setting -one value to be as small as possible (e.g. a scalar `Cube`), and the other to -be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences +to know if the differences exist in scaling or non-scaling parts of the +operation under test. This can be done using a size parameter, setting +one value to be as small as possible (e.g. a scalar value), and the other to +be significantly larger (e.g. a 1000x1000 array). Performance differences might only be seen for the larger value, or the smaller, or both, getting you closer to the root cause. @@ -151,13 +161,15 @@ suites for the UK Met Office NG-VAT project. ## Benchmark environments We have disabled ASV's standard environment management, instead using an -environment built using the same Nox scripts as Iris' test environments. This -is done using ASV's plugin architecture - see -[asv_delegated_conda.py](asv_delegated_conda.py) and the extra config items in -[asv.conf.json](asv.conf.json). +environment built using the same scripts that set up the package test +environments. +This is done using ASV's plugin architecture - see +[`asv_delegated.py`](asv_delegated.py) and associated +references in [`asv.conf.json`](asv.conf.json) (`environment_type` and +`plugins`). (ASV is written to control the environment(s) that benchmarks are run in - minimising external factors and also allowing it to compare between a matrix of dependencies (each in a separate environment). We have chosen to sacrifice these features in favour of testing each commit with its intended dependencies, -controlled by Nox + lock-files). +controlled by the test environment setup script(s)). diff --git a/benchmarks/_asv_delegated_abc.py b/benchmarks/_asv_delegated_abc.py new file mode 100644 index 0000000000..a80d2c638e --- /dev/null +++ b/benchmarks/_asv_delegated_abc.py @@ -0,0 +1,249 @@ +# Copyright SciTools contributors +# +# This file is part of SciTools and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""ASV plug-in providing an alternative :class:`asv.environments.Environment` subclass. + +Preps an environment via custom user scripts, then uses that as the +benchmarking environment. + +This module is intended as the generic code that can be shared between +repositories. Providing a functional benchmarking environment relies on correct +subclassing of the :class:`_DelegatedABC` class to specialise it for the repo in +question. The parent and subclass are separated into their own dedicated files, +which isolates ALL repo-specific code to a single file, thus simplifying the +templating process. + +""" + +from abc import ABC, abstractmethod +from contextlib import contextmanager, suppress +from os import environ +from pathlib import Path +import sys + +from asv.console import log +from asv.environment import Environment, EnvironmentUnavailable +from asv.repo import Repo + + +class _DelegatedABC(Environment, ABC): + """Manage a benchmark environment using custom user scripts, run at each commit. + + Ignores user input variations - ``matrix`` / ``pythons`` / + ``exclude``, since environment is being managed outside ASV. + + A vanilla :class:`asv.environment.Environment` is created for containing + the expected ASV configuration files and checked-out project. The actual + 'functional' environment is created/updated using + :meth:`_prep_env_override`, then the location is recorded via + a symlink within the ASV environment. The symlink is used as the + environment path used for any executable calls (e.g. + ``python my_script.py``). + + Intended as the generic parent class that can be shared between + repositories. Providing a functional benchmarking environment relies on + correct subclassing of this class to specialise it for the repo in question. + + Warnings + -------- + :class:`_DelegatedABC` is an abstract base class. It MUST ONLY be used via + subclasses implementing their own :meth:`_prep_env_override`, and also + :attr:`tool_name`, which must be unique. + + """ + + tool_name = "delegated-ABC" + """Required by ASV as a unique identifier of the environment type.""" + + DELEGATED_LINK_NAME = "delegated_env" + """The name of the symlink to the delegated environment.""" + + COMMIT_ENVS_VAR = "ASV_COMMIT_ENVS" + """Env var that instructs a dedicated environment be created per commit.""" + + def __init__(self, conf, python, requirements, tagged_env_vars): + """Get a 'delegated' environment based on the given ASV config object. + + Parameters + ---------- + conf : dict + ASV configuration object. + + python : str + Ignored - environment management is delegated. The value is always + ``DELEGATED``. + + requirements : dict (str -> str) + Ignored - environment management is delegated. The value is always + an empty dict. + + tagged_env_vars : dict (tag, key) -> value + Ignored - environment management is delegated. The value is always + an empty dict. + + Raises + ------ + EnvironmentUnavailable + The original environment or delegated environment cannot be created. + + """ + ignored = [] + if python: + ignored.append(f"{python=}") + if requirements: + ignored.append(f"{requirements=}") + if tagged_env_vars: + ignored.append(f"{tagged_env_vars=}") + message = ( + f"Ignoring ASV setting(s): {', '.join(ignored)}. Benchmark " + "environment management is delegated to third party script(s)." + ) + log.warning(message) + self._python = "DELEGATED" + self._requirements = {} + self._tagged_env_vars = {} + super().__init__( + conf, + self._python, + self._requirements, + self._tagged_env_vars, + ) + + self._path_undelegated = Path(self._path) + """Preserves the 'true' path of the environment so that self._path can + be safely modified and restored.""" + + @property + def _path_delegated(self) -> Path: + """The path of the symlink to the delegated environment.""" + return self._path_undelegated / self.DELEGATED_LINK_NAME + + @property + def _delegated_found(self) -> bool: + """Whether self._path_delegated successfully resolves to a directory.""" + resolved = None + with suppress(FileNotFoundError): + resolved = self._path_delegated.resolve(strict=True) + result = resolved is not None and resolved.is_dir() + return result + + def _symlink_to_delegated(self, delegated_env_path: Path) -> None: + """Create the symlink to the delegated environment.""" + self._path_delegated.unlink(missing_ok=True) + self._path_delegated.parent.mkdir(parents=True, exist_ok=True) + self._path_delegated.symlink_to(delegated_env_path, target_is_directory=True) + assert self._delegated_found + + def _setup(self): + """Temporarily try to set the user's active env as the delegated env. + + Environment prep will be run anyway once ASV starts checking out + commits, but this step tries to provide a usable environment (with + python, etc.) at the moment that ASV expects it. + + """ + current_env = Path(sys.executable).parents[1] + message = ( + "Temporarily using user's active environment as benchmarking " + f"environment: {current_env} . " + ) + try: + self._symlink_to_delegated(current_env) + _ = self.find_executable("python") + except Exception: + message = ( + f"Delegated environment {self.name} not yet set up (unable to " + "determine current environment)." + ) + self._path_delegated.unlink(missing_ok=True) + + message += "Correct environment will be set up at the first commit checkout." + log.warning(message) + + @abstractmethod + def _prep_env_override(self, env_parent_dir: Path) -> Path: + """Run aspects of :meth:`_prep_env` that vary between repos. + + This is the method that is expected to do the preparing + (:meth:`_prep_env` only performs pre- and post- steps). MUST be + overridden in any subclass environments before they will work. + + Parameters + ---------- + env_parent_dir : Path + The directory that the prepared environment should be placed in. + + Returns + ------- + Path + The path to the prepared environment. + """ + pass + + def _prep_env(self, commit_hash: str) -> None: + """Prepare the delegated environment for the given commit hash.""" + message = ( + f"Running delegated environment management for: {self.name} " + f"at commit: {commit_hash[:8]}" + ) + log.info(message) + + env_parent = Path(self._env_dir).resolve() + new_env_per_commit = self.COMMIT_ENVS_VAR in environ + if new_env_per_commit: + env_parent = env_parent / commit_hash[:8] + + delegated_env_path = self._prep_env_override(env_parent) + assert delegated_env_path.is_relative_to(env_parent) + + # Record the environment's path via a symlink within this environment. + self._symlink_to_delegated(delegated_env_path) + + message = f"Environment {self.name} updated to spec at {commit_hash[:8]}" + log.info(message) + + def checkout_project(self, repo: Repo, commit_hash: str) -> None: + """Check out the working tree of the project at given commit hash.""" + super().checkout_project(repo, commit_hash) + self._prep_env(commit_hash) + + @contextmanager + def _delegate_path(self): + """Context manager to use the delegated env path as this env's path.""" + if not self._delegated_found: + message = f"Delegated environment not found at: {self._path_delegated}" + log.error(message) + raise EnvironmentUnavailable(message) + + try: + self._path = str(self._path_delegated) + yield + finally: + self._path = str(self._path_undelegated) + + def find_executable(self, executable): + """Find an executable (e.g. python, pip) in the DELEGATED environment. + + Raises + ------ + OSError + If the executable is not found in the environment. + """ + if not self._delegated_found: + # Required during environment setup. OSError expected if executable + # not found. + raise OSError + + with self._delegate_path(): + return super().find_executable(executable) + + def run_executable(self, executable, args, **kwargs): + """Run a given executable (e.g. python, pip) in the DELEGATED environment.""" + with self._delegate_path(): + return super().run_executable(executable, args, **kwargs) + + def run(self, args, **kwargs): + # This is not a specialisation - just implementing the abstract method. + log.debug(f"Running '{' '.join(args)}' in {self.name}") + return self.run_executable("python", args, **kwargs) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 5c214f053f..bc0f6e55e3 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -3,7 +3,7 @@ "project": "scitools-iris", "project_url": "https://github.com/SciTools/iris", "repo": "..", - "environment_type": "delegated-iris", + "environment_type": "delegated", "show_commit_url": "https://github.com/scitools/iris/commit/", "branches": ["upstream/main"], @@ -11,7 +11,7 @@ "env_dir": ".asv/env", "results_dir": ".asv/results", "html_dir": ".asv/html", - "plugins": [".asv_delegated_iris"], + "plugins": [".asv_delegated"], "command_comment": [ "The inherited setup of the Iris test environment takes care of ", diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index a1cf416cce..7490c1328a 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -1,171 +1,32 @@ -# Copyright Iris contributors +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. -"""ASV plug-in providing an alternative :class:`asv.environments.Environment` subclass. +"""Repository-specific adaptation of :mod:`_asv_delegated_abc`.""" -Preps an environment via custom user scripts, then uses that as the -benchmarking environment. - -This module is intended as the generic code that can be shared between -repositories. Providing a functional benchmarking environment relies on correct -subclassing of the :class:`Delegated` class to specialise it for the repo in -question. - -""" - -from abc import ABC, abstractmethod -from contextlib import contextmanager, suppress +import ast +import enum from os import environ +from os.path import getmtime from pathlib import Path -import sys - -from asv.console import log -from asv.environment import Environment, EnvironmentUnavailable -from asv.repo import Repo - - -class Delegated(Environment, ABC): - """Manage a benchmark environment using custom user scripts, run at each commit. - - Ignores user input variations - ``matrix`` / ``pythons`` / - ``exclude``, since environment is being managed outside ASV. +import re - A vanilla :class:`asv.environment.Environment` is created for containing - the expected ASV configuration files and checked-out project. The actual - 'functional' environment is created/updated using - :meth:`_prep_env_override`, then the location is recorded via - a symlink within the ASV environment. The symlink is used as the - environment path used for any executable calls (e.g. - ``python my_script.py``). +from asv import util as asv_util - Intended as the generic parent class that can be shared between - repositories. Providing a functional benchmarking environment relies on - correct subclassing of this class to specialise it for the repo in question. +from _asv_delegated_abc import _DelegatedABC - Warnings - -------- - :class:`Delegated` is an abstract base class. It MUST ONLY be used via - subclasses implementing their own :meth:`_prep_env_override`, and also - :attr:`tool_name`, which must be unique. - """ +class Delegated(_DelegatedABC): + """Specialism of :class:`_DelegatedABC` for benchmarking this repo.""" tool_name = "delegated" - """Required by ASV as a unique identifier of the environment type.""" - - DELEGATED_LINK_NAME = "delegated_env" - """The name of the symlink to the delegated environment.""" - - COMMIT_ENVS_VAR = "ASV_COMMIT_ENVS" - """Env var that instructs a dedicated environment be created per commit.""" - - def __init__(self, conf, python, requirements, tagged_env_vars): - """Get a 'delegated' environment based on the given ASV config object. - - Parameters - ---------- - conf : dict - ASV configuration object. - python : str - Ignored - environment management is delegated. The value is always - ``DELEGATED``. - - requirements : dict (str -> str) - Ignored - environment management is delegated. The value is always - an empty dict. - - tagged_env_vars : dict (tag, key) -> value - Ignored - environment management is delegated. The value is always - an empty dict. - - Raises - ------ - EnvironmentUnavailable - The original environment or delegated environment cannot be created. - - """ - ignored = [] - if python: - ignored.append(f"{python=}") - if requirements: - ignored.append(f"{requirements=}") - if tagged_env_vars: - ignored.append(f"{tagged_env_vars=}") - message = ( - f"Ignoring ASV setting(s): {', '.join(ignored)}. Benchmark " - "environment management is delegated to third party script(s)." - ) - log.warning(message) - self._python = "DELEGATED" - self._requirements = {} - self._tagged_env_vars = {} - super().__init__( - conf, - self._python, - self._requirements, - self._tagged_env_vars, - ) - - self._path_undelegated = Path(self._path) - """Preserves the 'true' path of the environment so that self._path can - be safely modified and restored.""" - - @property - def _path_delegated(self) -> Path: - """The path of the symlink to the delegated environment.""" - return self._path_undelegated / self.DELEGATED_LINK_NAME - - @property - def _delegated_found(self) -> bool: - """Whether self._path_delegated successfully resolves to a directory.""" - resolved = None - with suppress(FileNotFoundError): - resolved = self._path_delegated.resolve(strict=True) - result = resolved is not None and resolved.is_dir() - return result - - def _symlink_to_delegated(self, delegated_env_path: Path) -> None: - """Create the symlink to the delegated environment.""" - self._path_delegated.unlink(missing_ok=True) - self._path_delegated.parent.mkdir(parents=True, exist_ok=True) - self._path_delegated.symlink_to(delegated_env_path, target_is_directory=True) - assert self._delegated_found - - def _setup(self): - """Temporarily try to set the user's active env as the delegated env. - - Environment prep will be run anyway once ASV starts checking out - commits, but this step tries to provide a usable environment (with - python, etc.) at the moment that ASV expects it. - - """ - current_env = Path(sys.executable).parents[1] - message = ( - "Temporarily using user's active environment as benchmarking " - f"environment: {current_env} . " - ) - try: - self._symlink_to_delegated(current_env) - _ = self.find_executable("python") - except Exception: - message = ( - f"Delegated environment {self.name} not yet set up (unable to " - "determine current environment)." - ) - self._path_delegated.unlink(missing_ok=True) - - message += "Correct environment will be set up at the first commit checkout." - log.warning(message) - - @abstractmethod def _prep_env_override(self, env_parent_dir: Path) -> Path: - """Run aspects of :meth:`_prep_env` that vary between repos. + """Environment preparation specialised for this repo. - This is the method that is expected to do the preparing - (:meth:`_prep_env` only performs pre- and post- steps). MUST be - overridden in any subclass environments before they will work. + Scans the checked-out commit of Iris to work out the appropriate + preparation command, including gathering any extra information that said + command needs. Parameters ---------- @@ -177,71 +38,97 @@ def _prep_env_override(self, env_parent_dir: Path) -> Path: Path The path to the prepared environment. """ - pass - - def _prep_env(self, commit_hash: str) -> None: - """Prepare the delegated environment for the given commit hash.""" - message = ( - f"Running delegated environment management for: {self.name} " - f"at commit: {commit_hash[:8]}" - ) - log.info(message) - - env_parent = Path(self._env_dir).resolve() - new_env_per_commit = self.COMMIT_ENVS_VAR in environ - if new_env_per_commit: - env_parent = env_parent / commit_hash[:8] - - delegated_env_path = self._prep_env_override(env_parent) - assert delegated_env_path.is_relative_to(env_parent) - - # Record the environment's path via a symlink within this environment. - self._symlink_to_delegated(delegated_env_path) - - message = f"Environment {self.name} updated to spec at {commit_hash[:8]}" - log.info(message) - - def checkout_project(self, repo: Repo, commit_hash: str) -> None: - """Check out the working tree of the project at given commit hash.""" - super().checkout_project(repo, commit_hash) - self._prep_env(commit_hash) - - @contextmanager - def _delegate_path(self): - """Context manager to use the delegated env path as this env's path.""" - if not self._delegated_found: - message = f"Delegated environment not found at: {self._path_delegated}" - log.error(message) - raise EnvironmentUnavailable(message) - - try: - self._path = str(self._path_delegated) - yield - finally: - self._path = str(self._path_undelegated) - - def find_executable(self, executable): - """Find an executable (e.g. python, pip) in the DELEGATED environment. - - Raises - ------ - OSError - If the executable is not found in the environment. - """ - if not self._delegated_found: - # Required during environment setup. OSError expected if executable - # not found. - raise OSError - - with self._delegate_path(): - return super().find_executable(executable) - - def run_executable(self, executable, args, **kwargs): - """Run a given executable (e.g. python, pip) in the DELEGATED environment.""" - with self._delegate_path(): - return super().run_executable(executable, args, **kwargs) - - def run(self, args, **kwargs): - # This is not a specialisation - just implementing the abstract method. - log.debug(f"Running '{' '.join(args)}' in {self.name}") - return self.run_executable("python", args, **kwargs) + # The project checkout. + build_dir = Path(self._build_root) / self._repo_subdir + + class Mode(enum.Enum): + """The scenarios where the correct env setup script is known.""" + + NOX = enum.auto() + """``PY_VER=x.xx nox --session=tests --install-only`` is supported.""" + + mode = None + + noxfile = build_dir / "noxfile.py" + if noxfile.is_file(): + # Our noxfile originally did not support `--install-only` - you + # could either run the tests, or run nothing at all. Adding + # `run_always` to `prepare_venv` enabled environment setup without + # running tests. + noxfile_tree = ast.parse(source=noxfile.read_text()) + prep_session = next( + filter( + lambda node: getattr(node, "name", "") == "prepare_venv", + ast.walk(noxfile_tree), + ) + ) + prep_session_code = ast.unparse(prep_session) + if ( + "session.run(" not in prep_session_code + and "session.run_always(" in prep_session_code + ): + mode = Mode.NOX + + match mode: + # Just NOX for now but the architecture is here for future cases. + case Mode.NOX: + # Need to determine a single Python version to run with. + req_dir = build_dir / "requirements" + lockfile_dir = req_dir / "locks" + if not lockfile_dir.is_dir(): + lockfile_dir = req_dir / "ci" / "nox.lock" + + if not lockfile_dir.is_dir(): + message = "No lockfile directory found in the expected locations." + raise FileNotFoundError(message) + + def py_ver_from_lockfiles(lockfile: Path) -> str: + pattern = re.compile(r"py(\d+)-") + search = pattern.search(lockfile.name) + assert search is not None + version = search.group(1) + return f"{version[0]}.{version[1:]}" + + python_versions = [ + py_ver_from_lockfiles(lockfile) + for lockfile in lockfile_dir.glob("*.lock") + ] + python_version = max(python_versions) + + # Construct and run the environment preparation command. + local_envs = dict(environ) + local_envs["PY_VER"] = python_version + # Prevent Nox re-using env with wrong Python version. + env_parent_dir = ( + env_parent_dir / f"nox{python_version.replace('.', '')}" + ) + env_command = [ + "nox", + f"--envdir={env_parent_dir}", + "--session=tests", + "--install-only", + "--no-error-on-external-run", + "--verbose", + ] + _ = asv_util.check_output( + env_command, + timeout=self._install_timeout, + cwd=build_dir, + env=local_envs, + ) + + env_parent_contents = list(env_parent_dir.iterdir()) + if len(env_parent_contents) != 1: + message = ( + f"{env_parent_dir} contains {len(env_parent_contents)} " + "items, expected 1. Cannot determine the environment " + "directory." + ) + raise FileNotFoundError(message) + (delegated_env_path,) = env_parent_contents + + case _: + message = "No environment setup is known for this commit of Iris." + raise NotImplementedError(message) + + return delegated_env_path diff --git a/benchmarks/asv_delegated_iris.py b/benchmarks/asv_delegated_iris.py deleted file mode 100644 index 035afb5a32..0000000000 --- a/benchmarks/asv_delegated_iris.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Iris-specific adaptation of :mod:`asv_delegated`.""" - -import ast -import enum -from os import environ -from os.path import getmtime -from pathlib import Path -import re - -from asv import util as asv_util - -from asv_delegated import Delegated - - -class DelegatedIris(Delegated): - """Specialism of :class:`Delegated` for benchmarking Iris.""" - - tool_name = "delegated-iris" - - def _prep_env_override(self, env_parent_dir: Path) -> Path: - """Iris-specific environment preparation. - - Scans the checked-out commit of Iris to work out the appropriate - preparation command, including gathering any extra information that said - command needs. - - Parameters - ---------- - env_parent_dir : Path - The directory that the prepared environment should be placed in. - - Returns - ------- - Path - The path to the prepared environment. - """ - # The project checkout. - build_dir = Path(self._build_root) / self._repo_subdir - - class Mode(enum.Enum): - """The scenarios where the correct env setup script is known.""" - - NOX = enum.auto() - """``PY_VER=x.xx nox --session=tests --install-only`` is supported.""" - - mode = None - - noxfile = build_dir / "noxfile.py" - if noxfile.is_file(): - # Our noxfile originally did not support `--install-only` - you - # could either run the tests, or run nothing at all. Adding - # `run_always` to `prepare_venv` enabled environment setup without - # running tests. - noxfile_tree = ast.parse(source=noxfile.read_text()) - prep_session = next( - filter( - lambda node: getattr(node, "name", "") == "prepare_venv", - ast.walk(noxfile_tree), - ) - ) - prep_session_code = ast.unparse(prep_session) - if ( - "session.run(" not in prep_session_code - and "session.run_always(" in prep_session_code - ): - mode = Mode.NOX - - match mode: - # Just NOX for now but the architecture is here for future cases. - case Mode.NOX: - # Need to determine a single Python version to run with. - req_dir = build_dir / "requirements" - lockfile_dir = req_dir / "locks" - if not lockfile_dir.is_dir(): - lockfile_dir = req_dir / "ci" / "nox.lock" - - if not lockfile_dir.is_dir(): - message = "No lockfile directory found in the expected locations." - raise FileNotFoundError(message) - - def py_ver_from_lockfiles(lockfile: Path) -> str: - pattern = re.compile(r"py(\d+)-") - search = pattern.search(lockfile.name) - assert search is not None - version = search.group(1) - return f"{version[0]}.{version[1:]}" - - python_versions = [ - py_ver_from_lockfiles(lockfile) - for lockfile in lockfile_dir.glob("*.lock") - ] - python_version = max(python_versions) - - # Construct and run the environment preparation command. - local_envs = dict(environ) - local_envs["PY_VER"] = python_version - # Prevent Nox re-using env with wrong Python version. - env_parent_dir = ( - env_parent_dir / f"nox{python_version.replace('.', '')}" - ) - env_command = [ - "nox", - f"--envdir={env_parent_dir}", - "--session=tests", - "--install-only", - "--no-error-on-external-run", - "--verbose", - ] - _ = asv_util.check_output( - env_command, - timeout=self._install_timeout, - cwd=build_dir, - env=local_envs, - ) - - env_parent_contents = list(env_parent_dir.iterdir()) - if len(env_parent_contents) != 1: - message = ( - f"{env_parent_dir} contains {len(env_parent_contents)} " - "items, expected 1. Cannot determine the environment " - "directory." - ) - raise FileNotFoundError(message) - (delegated_env_path,) = env_parent_contents - - case _: - message = "No environment setup is known for this commit of Iris." - raise NotImplementedError(message) - - return delegated_env_path diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 30a991a879..3c3ec34871 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -1,13 +1,10 @@ -# Copyright Iris contributors +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Common code for benchmarks.""" from os import environ -import tracemalloc - -import numpy as np def disable_repeat_between_setup(benchmark_object): diff --git a/benchmarks/benchmarks/generate_data/__init__.py b/benchmarks/benchmarks/generate_data/__init__.py index 3366bec6e0..9edec550e9 100644 --- a/benchmarks/benchmarks/generate_data/__init__.py +++ b/benchmarks/benchmarks/generate_data/__init__.py @@ -1,11 +1,11 @@ -# Copyright Iris contributors +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Scripts for generating supporting data for benchmarking. -Data generated using Iris should use :func:`run_function_elsewhere`, which -means that data is generated using a fixed version of Iris and a fixed +Data generated using this repo should use :func:`run_function_elsewhere`, which +means that data is generated using a fixed version of this repo and a fixed environment, rather than those that get changed when the benchmarking run checks out a new commit. @@ -28,7 +28,7 @@ #: Python executable used by :func:`run_function_elsewhere`, set via env #: variable of same name. Must be path of Python within an environment that -#: includes Iris (including dependencies and test modules) and Mule. +#: includes this repo (including dependencies and test modules) and Mule. try: DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"] _ = check_output([DATA_GEN_PYTHON, "-c", "a = True"]) diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py old mode 100644 new mode 100755 index 42761edc9c..c50de0ed37 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -1,6 +1,7 @@ -# Copyright Iris contributors +#!/usr/bin/env python3 +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Argparse conveniences for executing common types of benchmark runs.""" @@ -85,9 +86,8 @@ def _prep_data_gen_env() -> None: ) # Find the environment built above, set it to be the data generation # environment. - data_gen_python = next( - (ROOT_DIR / ".nox").rglob(f"tests*/bin/python{python_version}") - ).resolve() + env_directory: Path = next((ROOT_DIR / ".nox").rglob(f"tests*")) + data_gen_python = (env_directory / "bin" / "python").resolve() environ[data_gen_var] = str(data_gen_python) def clone_resource(name: str, clone_source: str) -> Path: @@ -133,7 +133,11 @@ def _setup_common() -> None: echo("Setup complete.") -def _asv_compare(*commits: str, overnight_mode: bool = False) -> None: +def _asv_compare( + *commits: str, + overnight_mode: bool = False, + fail_on_regression: bool = False, +) -> None: """Run through a list of commits comparing each one to the next.""" commits = tuple(commit[:8] for commit in commits) for i in range(len(commits) - 1): @@ -151,6 +155,13 @@ def _asv_compare(*commits: str, overnight_mode: bool = False) -> None: # For the overnight run: only post if there are shifts. _gh_create_reports(after, comparison, shifts) + if shifts and fail_on_regression: + # fail_on_regression supports setups that expect CI failures. + message = ( + f"Performance shifts detected between commits {before} and {after}.\n" + ) + raise RuntimeError(message) + def _gh_create_reports(commit_sha: str, results_full: str, results_shifts: str) -> None: """If running under GitHub Actions: record the results in report(s). @@ -396,14 +407,15 @@ def func(args: argparse.Namespace) -> None: class Branch(_SubParserGenerator): name = "branch" description = ( - "Performs the same operations as ``overnight``, but always on two commits " - "only - ``HEAD``, and ``HEAD``'s merge-base with the input " - "**base_branch**. If running on GitHub Actions: HEAD will be GitHub's " + "Performs the same operations as ``overnight``, but always on two " + "commits only - ``HEAD``, and ``HEAD``'s merge-base with the input " + "**base_branch**.\n" + "If running on GitHub Actions: HEAD will be GitHub's " "merge commit and merge-base will be the merge target. Performance " "comparisons will be posted in a comment on the relevant pull request.\n" - "Designed " - "for testing if the active branch's changes cause performance shifts - " - "anticipating what would be caught by ``overnight`` once merged.\n\n" + "Designed for testing if the active branch's changes cause performance " + "shifts - anticipating what would be caught by ``overnight`` once " + "merged.\n\n" "**For maximum accuracy, avoid using the machine that is running this " "session. Run time could be >1 hour for the full benchmark suite.**\n" "Uses `asv run`." @@ -631,7 +643,9 @@ def add_asv_arguments(self) -> None: def main(): parser = argparse.ArgumentParser( - description="Run the Iris performance benchmarks (using Airspeed Velocity).", + description=( + "Run the repository performance benchmarks (using Airspeed Velocity)." + ), epilog=( "More help is available within each sub-command." "\n\nNOTE(1): a separate python environment is created to " @@ -649,7 +663,17 @@ def main(): ) subparsers = parser.add_subparsers(required=True) - for gen in (Overnight, Branch, CPerf, SPerf, Custom, TrialRun, GhPost): + parser_generators: tuple[type(_SubParserGenerator), ...] = ( + Overnight, + Branch, + CPerf, + SPerf, + Custom, + TrialRun, + GhPost, + ) + + for gen in parser_generators: _ = gen(subparsers).subparser parsed = parser.parse_args() diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py index 59d27a0b43..f470a59606 100644 --- a/benchmarks/custom_bms/install.py +++ b/benchmarks/custom_bms/install.py @@ -1,8 +1,8 @@ -# Copyright Iris contributors +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. -"""Install Iris' custom benchmarks for detection by ASV. +"""Install the SciTools custom benchmarks for detection by ASV. See the requirements for being detected as an ASV plugin: https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html @@ -17,17 +17,17 @@ def package_files(new_dir: Path) -> None: - """Package Iris' custom benchmarks for detection by ASV. + """Package SciTools' custom benchmarks for detection by ASV. Parameters ---------- new_dir : Path The directory to package the custom benchmarks in. """ - asv_bench_iris = new_dir / "asv_bench_iris" - benchmarks = asv_bench_iris / "benchmarks" + asv_bench_scitools = new_dir / "asv_bench_scitools" + benchmarks = asv_bench_scitools / "benchmarks" benchmarks.mkdir(parents=True) - (asv_bench_iris / "__init__.py").touch() + (asv_bench_scitools / "__init__.py").touch() for py_file in this_dir.glob("*.py"): if py_file != Path(__file__): @@ -39,7 +39,7 @@ def package_files(new_dir: Path) -> None: py_project.write_text( """ [project] - name = "asv_bench_iris" + name = "asv_bench_scitools" version = "0.1" """ ) diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py index 486c67aeb9..357ad8a5b9 100644 --- a/benchmarks/custom_bms/tracemallocbench.py +++ b/benchmarks/custom_bms/tracemallocbench.py @@ -1,6 +1,6 @@ -# Copyright Iris contributors +# Copyright SciTools contributors # -# This file is part of Iris and is released under the BSD license. +# This file is part of SciTools and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Benchmark for growth in process resident memory, repeating for accuracy. From 843878b52aa7a559fb397137e3c977f74675cec2 Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 25 Apr 2025 10:34:25 +0100 Subject: [PATCH 2/2] Revert licence header changes - align with test expectations. --- benchmarks/_asv_delegated_abc.py | 4 ++-- benchmarks/asv_delegated.py | 4 ++-- benchmarks/benchmarks/__init__.py | 4 ++-- benchmarks/benchmarks/generate_data/__init__.py | 4 ++-- benchmarks/bm_runner.py | 4 ++-- benchmarks/custom_bms/install.py | 4 ++-- benchmarks/custom_bms/tracemallocbench.py | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/benchmarks/_asv_delegated_abc.py b/benchmarks/_asv_delegated_abc.py index a80d2c638e..0546a3c6a2 100644 --- a/benchmarks/_asv_delegated_abc.py +++ b/benchmarks/_asv_delegated_abc.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """ASV plug-in providing an alternative :class:`asv.environments.Environment` subclass. diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index 7490c1328a..06c424f21f 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Repository-specific adaptation of :mod:`_asv_delegated_abc`.""" diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 3c3ec34871..ab4b48ca90 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Common code for benchmarks.""" diff --git a/benchmarks/benchmarks/generate_data/__init__.py b/benchmarks/benchmarks/generate_data/__init__.py index 9edec550e9..ca5b8e567e 100644 --- a/benchmarks/benchmarks/generate_data/__init__.py +++ b/benchmarks/benchmarks/generate_data/__init__.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Scripts for generating supporting data for benchmarking. diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py index c50de0ed37..c3aff84b4d 100755 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Argparse conveniences for executing common types of benchmark runs.""" diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py index f470a59606..bda9f1cc3c 100644 --- a/benchmarks/custom_bms/install.py +++ b/benchmarks/custom_bms/install.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Install the SciTools custom benchmarks for detection by ASV. diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py index 357ad8a5b9..486c67aeb9 100644 --- a/benchmarks/custom_bms/tracemallocbench.py +++ b/benchmarks/custom_bms/tracemallocbench.py @@ -1,6 +1,6 @@ -# Copyright SciTools contributors +# Copyright Iris contributors # -# This file is part of SciTools and is released under the BSD license. +# This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. """Benchmark for growth in process resident memory, repeating for accuracy.