diff --git a/changelog.d/20240719_150741_danfuchs.md b/changelog.d/20240719_150741_danfuchs.md new file mode 100644 index 00000000..e301f4d6 --- /dev/null +++ b/changelog.d/20240719_150741_danfuchs.md @@ -0,0 +1,5 @@ + + +### New features + +- `NotebookRunner` business will skip notebooks in environments that do not have the services required for them to run. Required services ban be declared by adding [metadata](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#metadata) to a notebook. diff --git a/src/mobu/config.py b/src/mobu/config.py index 7d4c1dbc..3cf09b4a 100644 --- a/src/mobu/config.py +++ b/src/mobu/config.py @@ -87,6 +87,19 @@ class Configuration(BaseSettings): examples=["gt-vilSCi1ifK_MyuaQgMD2dQ.d6SIJhowv5Hs3GvujOyUig"], ) + available_services: set[str] = Field( + set(), + title="Available platform services", + description=( + "Names of services available in the current environment. For now," + " this list is manually maintained in the mobu config in Phalanx." + " When we have a service discovery mechanism in place, it should" + " be used here." + ), + validation_alias="MOBU_AVAILABLE_SERVICES", + examples=[{"tap", "ssotap", "butler"}], + ) + name: str = Field( "mobu", title="Name of application", diff --git a/src/mobu/models/business/notebookrunner.py b/src/mobu/models/business/notebookrunner.py index 61ec2be4..3c32e958 100644 --- a/src/mobu/models/business/notebookrunner.py +++ b/src/mobu/models/business/notebookrunner.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Literal -from pydantic import Field +from pydantic import BaseModel, Field from ...constants import NOTEBOOK_REPO_BRANCH, NOTEBOOK_REPO_URL from .base import BusinessConfig @@ -100,3 +100,18 @@ class NotebookRunnerData(NubladoBusinessData): description="Will not be present if no code is being executed", examples=['import json\nprint(json.dumps({"foo": "bar"})\n'], ) + + +class NotebookMetadata(BaseModel): + """Notebook metadata that we care about.""" + + required_services: set[str] = Field( + set(), + title="Required services", + description=( + "The names of services that the platform is required to provide in" + " order for the notebook to run correctly. Not all environments" + " provide all services." + ), + examples=[{"tap", "ssotap", "butler"}], + ) diff --git a/src/mobu/services/business/notebookrunner.py b/src/mobu/services/business/notebookrunner.py index bf660406..f8b11059 100644 --- a/src/mobu/services/business/notebookrunner.py +++ b/src/mobu/services/business/notebookrunner.py @@ -18,8 +18,10 @@ from httpx import AsyncClient from structlog.stdlib import BoundLogger +from ...config import config from ...exceptions import NotebookRepositoryError from ...models.business.notebookrunner import ( + NotebookMetadata, NotebookRunnerData, NotebookRunnerOptions, ) @@ -110,6 +112,25 @@ def is_excluded(self, notebook: Path) -> bool: # A notebook is excluded if any of its parent directories are excluded return bool(set(notebook.parents) & self._exclude_paths) + def missing_services(self, notebook: Path) -> bool: + """Return True if a notebook declares required services and they are + available. + """ + metadata = self.read_notebook_metadata(notebook) + missing_services = ( + metadata.required_services - config.available_services + ) + if missing_services: + msg = "Environment does not provide required services for notebook" + self.logger.info( + msg, + notebook=notebook, + required_services=metadata.required_services, + missing_services=missing_services, + ) + return True + return False + def find_notebooks(self) -> list[Path]: with self.timings.start("find_notebooks"): if self._repo_dir is None: @@ -119,8 +140,10 @@ def find_notebooks(self) -> list[Path]: notebooks = [ n for n in self._repo_dir.glob("**/*.ipynb") - if not self.is_excluded(n) + if not (self.is_excluded(n) or self.missing_services(n)) ] + + # Filter for explicit notebooks if self.options.notebooks_to_run: requested = [ self._repo_dir / notebook @@ -138,6 +161,7 @@ def find_notebooks(self) -> list[Path]: "Running with explicit list of notebooks", notebooks=notebooks, ) + if not notebooks: msg = "No notebooks found in {self._repo_dir}" raise NotebookRepositoryError(msg, self.user.username) @@ -149,6 +173,20 @@ def next_notebook(self) -> Path: self._notebook_paths = self.find_notebooks() return self._notebook_paths.pop() + def read_notebook_metadata(self, notebook: Path) -> NotebookMetadata: + """Extract mobu-specific metadata from a notebook.""" + with self.timings.start( + "read_notebook_metadata", {"notebook": notebook.name} + ): + try: + notebook_text = notebook.read_text() + notebook_json = json.loads(notebook_text) + metadata = notebook_json["metadata"].get("mobu", {}) + return NotebookMetadata.model_validate(metadata) + except Exception as e: + msg = f"Invalid notebook metadata {notebook.name}: {e!s}" + raise NotebookRepositoryError(msg, self.user.username) from e + def read_notebook(self, notebook: Path) -> list[dict[str, Any]]: with self.timings.start("read_notebook", {"notebook": notebook.name}): try: diff --git a/tests/business/notebookrunner_test.py b/tests/business/notebookrunner_test.py index 52c2484b..300509d7 100644 --- a/tests/business/notebookrunner_test.py +++ b/tests/business/notebookrunner_test.py @@ -209,6 +209,85 @@ async def test_run_recursive( assert "Done with this cycle of notebooks" in r.text +@pytest.mark.asyncio +async def test_run_required_services( + client: AsyncClient, respx_mock: respx.Router, tmp_path: Path +) -> None: + mock_gafaelfawr(respx_mock) + cwd = Path.cwd() + + # Set up a notebook repository. + source_path = TEST_DATA_DIR / "notebooks_services" + repo_path = tmp_path / "notebooks" + + shutil.copytree(str(source_path), str(repo_path)) + + # Set up git repo + await setup_git_repo(repo_path) + + # Start a monkey. We have to do this in a try/finally block since the + # runner will change working directories, which because working + # directories are process-global may mess up future tests. + try: + r = await client.put( + "/mobu/flocks", + json={ + "name": "test", + "count": 1, + "user_spec": {"username_prefix": "bot-mobu-testuser"}, + "scopes": ["exec:notebook"], + "business": { + "type": "NotebookRunner", + "options": { + "spawn_settle_time": 0, + "execution_idle_time": 0, + "max_executions": 1, + "repo_url": str(repo_path), + "repo_ref": "main", + "working_directory": str(repo_path), + }, + }, + }, + ) + assert r.status_code == 201 + + # Wait until we've finished one loop and check the results. + data = await wait_for_business(client, "bot-mobu-testuser1") + assert data == { + "name": "bot-mobu-testuser1", + "business": { + "failure_count": 0, + "name": "NotebookRunner", + "notebook": "test-notebook-has-services.ipynb", + "refreshing": False, + "success_count": 1, + "timings": ANY, + }, + "state": "RUNNING", + "user": { + "scopes": ["exec:notebook"], + "token": ANY, + "username": "bot-mobu-testuser1", + }, + } + finally: + os.chdir(cwd) + + # Get the log and check the cell output. + r = await client.get("/mobu/flocks/test/monkeys/bot-mobu-testuser1/log") + assert r.status_code == 200 + + # Notebook with all services available + assert "Required services are available" in r.text + assert "Final test" in r.text + + # Notebook with missing services + assert "Required services are NOT available" not in r.text + + # Make sure mobu ran all of the notebooks it thinks it should have + assert "Done with this cycle of notebooks" in r.text + + @pytest.mark.asyncio async def test_refresh( client: AsyncClient, diff --git a/tests/conftest.py b/tests/conftest.py index fd61ab82..45aaab3c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -56,9 +56,11 @@ def _configure() -> Iterator[None]: """ config.environment_url = HttpUrl("https://test.example.com") config.gafaelfawr_token = make_gafaelfawr_token() + config.available_services = {"some_service", "some_other_service"} yield config.environment_url = None config.gafaelfawr_token = None + config.available_services = set() @pytest.fixture diff --git a/tests/data/notebooks_services/test-notebook-has-services.ipynb b/tests/data/notebooks_services/test-notebook-has-services.ipynb new file mode 100644 index 00000000..81ffb62c --- /dev/null +++ b/tests/data/notebooks_services/test-notebook-has-services.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": [ + "This is a test notebook to check the NotebookRunner business. It contains some Markdown cells and some code cells. Only the code cells should run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f84f0959", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Required services are available\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53a941a4", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Final test\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "823560c6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "mobu": { + "required_services": ["some_service"] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/data/notebooks_services/test-notebook-missing-service.ipynb b/tests/data/notebooks_services/test-notebook-missing-service.ipynb new file mode 100644 index 00000000..26adbc53 --- /dev/null +++ b/tests/data/notebooks_services/test-notebook-missing-service.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": [ + "This is a test notebook to check the NotebookRunner business. It contains some Markdown cells and some code cells. Only the code cells should run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f84f0959", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Required services are NOT available\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53a941a4", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Final test\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "823560c6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "mobu": { + "required_services": ["nope"] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}