Skip to content

Commit

Permalink
run notebooks in environments that don't have the necessary services
Browse files Browse the repository at this point in the history
* Parse a 'mobu' section out of notebook metadata
* Add a list of available services to app config (to be replaced by some
kind of service discovery at some point
* Don't run the notebook if it declares required services in the
metadata and the services aren't available in the environment.
  • Loading branch information
fajpunk committed Jul 19, 2024
1 parent 58a865c commit 9ca39d3
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 2 deletions.
5 changes: 5 additions & 0 deletions changelog.d/20240719_150741_danfuchs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!-- Delete the sections that don't apply -->

### New features

- `NotebookRunner` business will skip notebooks in environments that do not have the services required for them to run. Required services ban be declared by adding [metadata](https://ipython.readthedocs.io/en/3.x/notebook/nbformat.html#metadata) to a notebook.
12 changes: 12 additions & 0 deletions src/mobu/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ class Configuration(BaseSettings):
examples=["gt-vilSCi1ifK_MyuaQgMD2dQ.d6SIJhowv5Hs3GvujOyUig"],
)

available_services: set[str] = Field(
set(),
title="Available platform services",
description=(
"Names of services available in the current environment. For now,"
" this list is manually maintained in the mobu config in Phalanx."
" When we have a service discovery mechanism in place, it should"
" be used here."
),
validation_alias="MOBU_AVAILABLE_SERVICES",
)

name: str = Field(
"mobu",
title="Name of application",
Expand Down
17 changes: 16 additions & 1 deletion src/mobu/models/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path
from typing import Literal

from pydantic import Field
from pydantic import BaseModel, Field

from ...constants import NOTEBOOK_REPO_BRANCH, NOTEBOOK_REPO_URL
from .base import BusinessConfig
Expand Down Expand Up @@ -100,3 +100,18 @@ class NotebookRunnerData(NubladoBusinessData):
description="Will not be present if no code is being executed",
examples=['import json\nprint(json.dumps({"foo": "bar"})\n'],
)


class NotebookMetadata(BaseModel):
"""Notebook metadata that we care about."""

required_services: set[str] = Field(
set(),
title="Required services",
description=(
"The names of services that the platform is required to provide in"
" order for the notebook to run correctly. Not all environments"
" provide all services."
),
examples=[{"tap", "ssotap", "butler"}],
)
40 changes: 39 additions & 1 deletion src/mobu/services/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
from httpx import AsyncClient
from structlog.stdlib import BoundLogger

from ...config import config
from ...exceptions import NotebookRepositoryError
from ...models.business.notebookrunner import (
NotebookMetadata,
NotebookRunnerData,
NotebookRunnerOptions,
)
Expand Down Expand Up @@ -110,6 +112,25 @@ def is_excluded(self, notebook: Path) -> bool:
# A notebook is excluded if any of its parent directories are excluded
return bool(set(notebook.parents) & self._exclude_paths)

def missing_services(self, notebook: Path) -> bool:
"""Return True if a notebook declares required services and they are
available.
"""
metadata = self.read_notebook_metadata(notebook)
missing_services = (
metadata.required_services - config.available_services
)
if missing_services:
msg = "Environment does not provide required services for notebook"
self.logger.info(
msg,
notebook=notebook,
required_services=metadata.required_services,
missing_services=missing_services,
)
return True
return False

def find_notebooks(self) -> list[Path]:
with self.timings.start("find_notebooks"):
if self._repo_dir is None:
Expand All @@ -119,8 +140,10 @@ def find_notebooks(self) -> list[Path]:
notebooks = [
n
for n in self._repo_dir.glob("**/*.ipynb")
if not self.is_excluded(n)
if not (self.is_excluded(n) or self.missing_services(n))
]

# Filter for explicit notebooks
if self.options.notebooks_to_run:
requested = [
self._repo_dir / notebook
Expand All @@ -138,6 +161,7 @@ def find_notebooks(self) -> list[Path]:
"Running with explicit list of notebooks",
notebooks=notebooks,
)

if not notebooks:
msg = "No notebooks found in {self._repo_dir}"
raise NotebookRepositoryError(msg, self.user.username)
Expand All @@ -149,6 +173,20 @@ def next_notebook(self) -> Path:
self._notebook_paths = self.find_notebooks()
return self._notebook_paths.pop()

def read_notebook_metadata(self, notebook: Path) -> NotebookMetadata:
"""Extract mobu-specific metadata from a notebook."""
with self.timings.start(
"read_notebook_metadata", {"notebook": notebook.name}
):
try:
notebook_text = notebook.read_text()
notebook_json = json.loads(notebook_text)
metadata = notebook_json["metadata"].get("mobu", {})
return NotebookMetadata.model_validate(metadata)
except Exception as e:
msg = f"Invalid notebook metadata {notebook.name}: {e!s}"
raise NotebookRepositoryError(msg, self.user.username) from e

def read_notebook(self, notebook: Path) -> list[dict[str, Any]]:
with self.timings.start("read_notebook", {"notebook": notebook.name}):
try:
Expand Down
79 changes: 79 additions & 0 deletions tests/business/notebookrunner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,85 @@ async def test_run_recursive(
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_run_required_services(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = TEST_DATA_DIR / "notebooks_services"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "bot-mobu-testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 1,
"repo_url": str(repo_path),
"repo_ref": "main",
"working_directory": str(repo_path),
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "bot-mobu-testuser1")
assert data == {
"name": "bot-mobu-testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": "test-notebook-has-services.ipynb",
"refreshing": False,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "bot-mobu-testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/bot-mobu-testuser1/log")
assert r.status_code == 200

# Notebook with all services available
assert "Required services are available" in r.text
assert "Final test" in r.text

# Notebook with missing services
assert "Required services are NOT available" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_refresh(
client: AsyncClient,
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ def _configure() -> Iterator[None]:
"""
config.environment_url = HttpUrl("https://test.example.com")
config.gafaelfawr_token = make_gafaelfawr_token()
config.available_services = {"some_service", "some_other_service"}
yield
config.environment_url = None
config.gafaelfawr_token = None
config.available_services = set()


@pytest.fixture
Expand Down
64 changes: 64 additions & 0 deletions tests/data/notebooks_services/test-notebook-has-services.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": [
"This is a test notebook to check the NotebookRunner business. It contains some Markdown cells and some code cells. Only the code cells should run."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f84f0959",
"metadata": {},
"outputs": [],
"source": [
"print(\"Required services are available\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a941a4",
"metadata": {},
"outputs": [],
"source": [
"print(\"Final test\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "823560c6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"mobu": {
"required_services": ["some_service"]
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
64 changes: 64 additions & 0 deletions tests/data/notebooks_services/test-notebook-missing-service.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5cb3e0b7",
"metadata": {},
"source": [
"This is a test notebook to check the NotebookRunner business. It contains some Markdown cells and some code cells. Only the code cells should run."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f84f0959",
"metadata": {},
"outputs": [],
"source": [
"print(\"Required services are NOT available\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a941a4",
"metadata": {},
"outputs": [],
"source": [
"print(\"Final test\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "823560c6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"mobu": {
"required_services": ["nope"]
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 9ca39d3

Please sign in to comment.