Skip to content

Commit

Permalink
Filter notebooks in a more informative way
Browse files Browse the repository at this point in the history
  • Loading branch information
fajpunk committed Jul 23, 2024
1 parent 42152a0 commit ce682dd
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 20 deletions.
46 changes: 46 additions & 0 deletions src/mobu/models/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

__all__ = [
"CiNotebookRunnerOptions",
"NotebookFilterResults",
"NotebookRunnerConfig",
"NotebookRunnerData",
"NotebookRunnerOptions",
Expand Down Expand Up @@ -121,3 +122,48 @@ class NotebookMetadata(BaseModel):
),
examples=[{"tap", "ssotap", "butler"}],
)


class NotebookFilterResults(BaseModel):
"""Valid notebooks and categories for invalid notebooks."""

all: set[Path] = Field(
default=set(),
title="All notebooks",
description="All notebooks in the repository",
)

runnable: set[Path] = Field(
default=set(),
title="Runnable notebooks",
description=(
"These are the notebooks to run after all filtering has been done"
),
)

excluded_by_dir: set[Path] = Field(
default=set(),
title="Excluded by directory",
description=(
"These notebooks won't be run because they are in a directory that"
"is excliticly excluded"
),
)

excluded_by_service: set[Path] = Field(
default=set(),
title="Excluded by service availability",
description=(
"These notebooks won't be run because the depend on services which"
" are not available in this environment"
),
)

excluded_by_requested: set[Path] = Field(
default=set(),
title="Excluded by explicit list",
description=(
"These notebooks won't be run because a list of explicitly"
" requested notebooks was provided, and they weren't in it."
),
)
71 changes: 51 additions & 20 deletions src/mobu/services/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ...exceptions import NotebookRepositoryError, RepositoryConfigError
from ...models.business.notebookrunner import (
CiNotebookRunnerOptions,
NotebookFilterResults,
NotebookMetadata,
NotebookRunnerData,
NotebookRunnerOptions,
Expand Down Expand Up @@ -90,8 +91,15 @@ async def startup(self) -> None:
async def cleanup(self) -> None:
shutil.rmtree(str(self._repo_dir))
self._repo_dir = None
self._notebook_filter_results = None

async def initialize(self) -> None:
"""Prepare to run the business.
* Check out the repository
* Parse the in-repo config
* Filter the notebooks
"""
if self._repo_dir is None:
self._repo_dir = Path(TemporaryDirectory(delete=False).name)
await self.clone_repo()
Expand All @@ -115,6 +123,7 @@ async def initialize(self) -> None:

exclude_dirs = repo_config.exclude_dirs
self._exclude_paths = {self._repo_dir / path for path in exclude_dirs}
self._notebooks = self.find_notebooks()
self.logger.info("Repository cloned and ready")

async def shutdown(self) -> None:
Expand Down Expand Up @@ -158,46 +167,68 @@ def missing_services(self, notebook: Path) -> bool:
return True
return False

def find_notebooks(self) -> list[Path]:
def find_notebooks(self) -> NotebookFilterResults:
with self.timings.start("find_notebooks"):
if self._repo_dir is None:
raise NotebookRepositoryError(
"Repository directory must be set", self.user.username
)
notebooks = [
n
for n in self._repo_dir.glob("**/*.ipynb")
if not (self.is_excluded(n) or self.missing_services(n))
]

# Filter for explicit notebooks
all_notebooks = set(self._repo_dir.glob("**/*.ipynb"))
if not all_notebooks:
msg = "No notebooks found in {self._repo_dir}"
raise NotebookRepositoryError(msg, self.user.username)

filter_results = NotebookFilterResults(all=all_notebooks)
filter_results.excluded_by_dir = {
n for n in filter_results.all if self.is_excluded(n)
}
filter_results.excluded_by_service = {
n for n in filter_results.all if self.missing_services(n)
}

if self._notebooks_to_run:
requested = {
self._repo_dir / notebook
for notebook in self._notebooks_to_run
}
not_found = requested - set(notebooks)
not_found = requested - filter_results.all
if not_found:
msg = (
f"These notebooks do not exist in {self._repo_dir}:"
f" {not_found}"
"Requested notebooks do not exist in"
" {self._repo_dir}: {not_found}"
)
raise NotebookRepositoryError(msg, self.user.username)
notebooks = requested
self.logger.debug(
"Running with explicit list of notebooks",
notebooks=notebooks,
filter_results.excluded_by_requested = (
filter_results.all - requested
)

if not notebooks:
msg = "No notebooks found in {self._repo_dir}"
raise NotebookRepositoryError(msg, self.user.username)
random.shuffle(notebooks)
return notebooks
filter_results.runnable = (
filter_results.all
- filter_results.excluded_by_service
- filter_results.excluded_by_dir
- filter_results.excluded_by_requested
)
if bool(filter_results.runnable):
self.logger.info(
"Found notebooks to run",
filter_results=filter_results.model_dump(),
)
else:
self.logger.warning(
"No notebooks to run after filtering!",
filter_results=filter_results.model_dump(),
)

return filter_results

def next_notebook(self) -> Path:
if not self._notebooks:
self._notebooks = self.find_notebooks()
if not self._notebook_paths:
self._notebook_paths = self.find_notebooks()
self._notebook_paths = random.sample(
list(self._notebooks.runnable), k=len(self._notebooks.runnable)
)
return self._notebook_paths.pop()

def read_notebook_metadata(self, notebook: Path) -> NotebookMetadata:
Expand Down
5 changes: 5 additions & 0 deletions src/mobu/services/github_ci/ci_notebook_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ async def run(self, user: User, scopes: list[str]) -> None:
# Run notebooks using a Solitary runner
summary = "Running these notebooks via Mobu:\n" + "\n".join(
[f"* {notebook}" for notebook in self._notebooks]
+ [
"Note that not all of these may run. Some may be exluded based"
" on config in the repo:"
" https://mobu.lsst.io/user_guide/in_repo_config.html"
]
)
await self.check_run.start(summary=summary)
solitary_config = SolitaryConfig(
Expand Down

0 comments on commit ce682dd

Please sign in to comment.