Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'final' and 'strip' to hooks, rework 'ensure_lab' and github on demand #11

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_static/openapi.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"openapi": "3.1.0", "info": {"title": "Ghostwriter", "description": "URL shortener/personalizer for Phalanx\n\n[Return to Ghostwriter documentation](.)", "version": "0.1.2.dev6+ge335638.d20241017"}, "paths": {"/ghostwriter/": {"get": {"summary": "Application metadata", "description": "Document the top-level API here. By default it only returns metadata about the application.", "operationId": "get_index_ghostwriter__get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Index"}}}}}}}, "/ghostwriter/rewrite/{full_path}": {"get": {"summary": "Rewrite", "operationId": "rewrite_ghostwriter_rewrite__full_path__get", "parameters": [{"name": "full_path", "in": "path", "required": true, "schema": {"type": "string", "title": "The URL path to rewrite"}}], "responses": {"307": {"description": "Successful Response"}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"HTTPValidationError": {"properties": {"detail": {"items": {"$ref": "#/components/schemas/ValidationError"}, "type": "array", "title": "Detail"}}, "type": "object", "title": "HTTPValidationError"}, "Index": {"properties": {"metadata": {"$ref": "#/components/schemas/Metadata", "title": "Package metadata"}}, "type": "object", "required": ["metadata"], "title": "Index", "description": "Metadata returned by the external root URL of the application.\n\nNotes\n-----\nAs written, this is not very useful. Add additional metadata that will be\nhelpful for a user exploring the application, or replace this model with\nsome other model that makes more sense to return from the application API\nroot."}, "Metadata": {"properties": {"name": {"type": "string", "title": "Application name", "examples": ["myapp"]}, "version": {"type": "string", "title": "Version", "examples": ["1.0.0"]}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Description", "examples": ["Some package description"]}, "repository_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Repository URL", "examples": ["https://example.com/"]}, "documentation_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Documentation URL", "examples": ["https://example.com/"]}}, "type": "object", "required": ["name", "version"], "title": "Metadata", "description": "Metadata about a package."}, "ValidationError": {"properties": {"loc": {"items": {"anyOf": [{"type": "string"}, {"type": "integer"}]}, "type": "array", "title": "Location"}, "msg": {"type": "string", "title": "Message"}, "type": {"type": "string", "title": "Error Type"}}, "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError"}}}}
{"openapi": "3.1.0", "info": {"title": "Ghostwriter", "description": "URL shortener/personalizer for Phalanx\n\n[Return to Ghostwriter documentation](.)", "version": "0.1.2.dev6+ge335638.d20241015"}, "paths": {"/ghostwriter/": {"get": {"summary": "Application metadata", "description": "Document the top-level API here. By default it only returns metadata about the application.", "operationId": "get_index_ghostwriter__get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Index"}}}}}}}, "/ghostwriter/rewrite/{full_path}": {"get": {"summary": "Rewrite", "operationId": "rewrite_ghostwriter_rewrite__full_path__get", "parameters": [{"name": "full_path", "in": "path", "required": true, "schema": {"type": "string", "title": "The URL path to rewrite"}}], "responses": {"307": {"description": "Successful Response"}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"HTTPValidationError": {"properties": {"detail": {"items": {"$ref": "#/components/schemas/ValidationError"}, "type": "array", "title": "Detail"}}, "type": "object", "title": "HTTPValidationError"}, "Index": {"properties": {"metadata": {"$ref": "#/components/schemas/Metadata", "title": "Package metadata"}}, "type": "object", "required": ["metadata"], "title": "Index", "description": "Metadata returned by the external root URL of the application.\n\nNotes\n-----\nAs written, this is not very useful. Add additional metadata that will be\nhelpful for a user exploring the application, or replace this model with\nsome other model that makes more sense to return from the application API\nroot."}, "Metadata": {"properties": {"name": {"type": "string", "title": "Application name", "examples": ["myapp"]}, "version": {"type": "string", "title": "Version", "examples": ["1.0.0"]}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Description", "examples": ["Some package description"]}, "repository_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Repository URL", "examples": ["https://example.com/"]}, "documentation_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Documentation URL", "examples": ["https://example.com/"]}}, "type": "object", "required": ["name", "version"], "title": "Metadata", "description": "Metadata about a package."}, "ValidationError": {"properties": {"loc": {"items": {"anyOf": [{"type": "string"}, {"type": "integer"}]}, "type": "array", "title": "Location"}, "msg": {"type": "string", "title": "Message"}, "type": {"type": "string", "title": "Error Type"}}, "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError"}}}}
17 changes: 12 additions & 5 deletions docs/hooks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ There is a list of hooks (possibly empty) for each route specified in the config
Each hook is run in sequence. If any hook raises an exception, the user will receive an error rather than a redirect.

Because (at least at present) hooks are, by definition, located inside the ``ghostwriter.hooks`` Python module namespace, any additions or modifications to hooks are code and repository changes.
Therefore, any proposed hooks or modififactions to existing ones will need to go through the SQuaRE PR process.
Therefore, any proposed hooks or modifications to existing ones will need to go through the SQuaRE PR process.

Anatomy of a Hook
=================
Expand All @@ -21,7 +21,7 @@ To signal failure, a hook should raise an ``Exception``.
If a hook does not wish to modify the parameters used by future hooks or
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a typo (not in your recent changes) just above:
modififactions -> modifications

route substitution, it should return ``None``.

If it does return an object, that object will first be checked to ensure only the ``target`` and ``unique_id`` fields changed.
If it does return an object, that object will first be checked to ensure only the ``target``, ``unique_id``, and/or ``final`` fields changed.
If any other field changed, an exception will be raised.
The returned ``Parameters`` object will be used as the input to
subsequent hooks and to update the target path that will be substituted.
Expand All @@ -37,16 +37,23 @@ Obviously that begs the question of "what's in a ``Parameters`` object?"

Three of the fields are obvious: they are ``base_url``, ``path``, and ``user``, all used in path construction for the redirect.

Two fields are only for use by the hook itself: those are ``target`` and ``unique_id``.
Four fields are only for use by the hook itself: those are ``target``, ``unique_id``, ``strip``, and ``final``.
The ``target`` field will be injected when the hook is run, and the ``unique_id`` field may be populated.
These two are the only fields a hook is permitted to change if it returns a ``Parameters`` object.
These four are the only fields a hook is permitted to change if it returns a ``Parameters`` object.
The ``target`` may need to be rewritten to accomodate a ``unique_id``.

The motivation here is simply to avoid rewriting existing files: the correct response is context-dependent, and might be to redirect to the existing file, but it equally well might be to create a new file under a different name.
The motivation of ``unique_id`` is simply to avoid rewriting existing files: the correct response is context-dependent, and might be to redirect to the existing file, but it equally well might be to create a new file under a different name.
In this case, the file name stem (that is, the part before the suffix) might need to be appended with a ``unique_id``, which could be (again, the best choice depends on context) a serial number, as in ``Untitled2.ipynb``, or a string representation of the date and time, or simply a UUID.
The ``unique_id`` can be any string legal in a filename, as long as the filename containing it will be distinct from any other filename in the directory.
Guaranteeing that it is unique is the job of the hook writer.

The ``strip`` field controls whether or not the path prefix is stripped from the target path. Sometimes it is useful not to strip it, particularly in conjunction with ``final``.

The ``final`` field is used to assert that hook processing should stop here and that ghostwriter should proceed with a redirect to the current value of ``target``.
This is used in ``ensure_lab`` to redirect the user to a spawner page, forcing them to choose an image and size.
The constructed ``target`` relies on the fact that once spawned, the specified path will be respected by the running lab, and it will have been constructed to hit a proxy endpoint, which will itself issue a redirect to throw the user back via the same redirection they've just come through.
However, this time, ``ensure_lab`` will return ``None`` and therefore hook processing will continue, picking out the correct file inside the user lab.

Given the context, the existing hooks do not worry much about race conditions.
If you are using the date or an incrementing integer...you are still in an RSP context, so it's very unlikely a user will go to the same redirected URL twice in the same microsecond, or even twice within the time it takes to write out a notebook. If you do have some high-frequency use case, a UUID would be a better choice.

Expand Down
2 changes: 2 additions & 0 deletions src/ghostwriter/hooks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .autostart_lab import ensure_autostart_lab
from .ensure_lab import ensure_running_lab
from .github_notebook import github_notebook
from .portal_query import portal_query
from .vacuous import vacuous_hook

__all__ = [
"ensure_autostart_lab",
"ensure_running_lab",
"github_notebook",
"portal_query",
Expand Down
2 changes: 1 addition & 1 deletion src/ghostwriter/hooks/_github_notebook_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@

# Finally, print the value of ``serial``, which we will capture as
# a notebook stream output to determine whether we need to modify
# the path and unique_id in rewrite parameters.
# the target and unique_id in rewrite parameters.
print(serial)
5 changes: 5 additions & 0 deletions src/ghostwriter/hooks/_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Logger for hooks."""

import structlog

LOGGER = structlog.getLogger("ghostwriter")
65 changes: 65 additions & 0 deletions src/ghostwriter/hooks/autostart_lab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Ensure that the user has a running lab. If they don't, they will be
given `recommended` in `medium` size.
"""

import asyncio

from rubin.nublado.client import NubladoClient
from rubin.nublado.client.models import (
NubladoImage,
NubladoImageByClass,
NubladoImageClass,
NubladoImageSize,
)

from ..models.substitution import Parameters
from ._logger import LOGGER

LAB_SPAWN_TIMEOUT = 90


async def ensure_autostart_lab(params: Parameters) -> None:
athornton marked this conversation as resolved.
Show resolved Hide resolved
"""Start a Lab if one is not present."""
LOGGER.debug(f"Checking for running Lab for {params}")
client = params.client
LOGGER.debug("Logging in to Hub")
await client.auth_to_hub()
stopped = await client.is_lab_stopped()
if stopped:
LOGGER.debug(f"Starting new 'recommended' Lab for {params.user}")
await _spawn_lab(client)
else:
LOGGER.debug(f"{params.user} already has a running Lab")
LOGGER.debug("Lab spawned; proceeding with redirection")


async def _spawn_lab(client: NubladoClient) -> None:
image = _choose_image()
await client.spawn_lab(image)
await _follow_progress(client)


def _choose_image() -> NubladoImage:
"""Because the primary use case of this redirection service is
to launch tutorial notebooks, our "start a new lab" parameters
are going to be the currently recommended lab, and Medium size,
because that's how the tutorial notebooks are generally set up
to run. Maybe we will do something more sophisticated later.
"""
return NubladoImageByClass(
image_class=NubladoImageClass.RECOMMENDED, size=NubladoImageSize.Medium
)


async def _follow_progress(client: NubladoClient) -> None:
LOGGER.debug("Waiting for lab to spawn")
progress = client.watch_spawn_progress()
try:
async with asyncio.timeout(LAB_SPAWN_TIMEOUT):
async for message in progress:
LOGGER.debug(f"Lab spawn message: {message.message}")
if message.ready:
break
except TimeoutError:
LOGGER.exception(f"Lab did not spawn in {LAB_SPAWN_TIMEOUT} seconds")
raise
75 changes: 23 additions & 52 deletions src/ghostwriter/hooks/ensure_lab.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,35 @@
"""Ensure that the user has a running lab."""

import asyncio

import structlog
from rubin.nublado.client import NubladoClient
from rubin.nublado.client.models import (
NubladoImage,
NubladoImageByClass,
NubladoImageClass,
NubladoImageSize,
)
"""Ensure that the user has a running lab by dropping them in the spawner form
if they don't, and then slingshotting them through the lab ghostwriter endpoint
to restart the redirect process. Requires d_2024_10_19 (corresponds to
w_2024_10_43) or later.
"""

from ..models.substitution import Parameters

LAB_SPAWN_TIMEOUT = 90
LOGGER = structlog.get_logger("ghostwriter")
from ._logger import LOGGER


async def ensure_running_lab(params: Parameters) -> None:
async def ensure_running_lab(params: Parameters) -> Parameters | None:
"""Start a Lab if one is not present."""
LOGGER.debug(f"Checking for running Lab for {params}")
client = params.client
LOGGER.debug("Logging in to Hub")
await client.auth_to_hub()
stopped = await client.is_lab_stopped()
if stopped:
LOGGER.debug(f"Starting new 'recommended' Lab for {params.user}")
await _spawn_lab(client)
else:
if not stopped:
LOGGER.debug(f"{params.user} already has a running Lab")
LOGGER.debug("Lab spawned; proceeding with redirection")


async def _spawn_lab(client: NubladoClient) -> None:
image = _choose_image()
await client.spawn_lab(image)
await _follow_progress(client)


def _choose_image() -> NubladoImage:
"""Because the primary use case of this redirection service is
to launch tutorial notebooks, our "start a new lab" parameters
are going to be the currently recommended lab, and Medium size,
because that's how the tutorial notebooks are generally set up
to run. Maybe we will do something more sophisticated later.
"""
return NubladoImageByClass(
image_class=NubladoImageClass.RECOMMENDED, size=NubladoImageSize.Medium
return None
LOGGER.debug(f"Sending {params.user} to spawner")
LOGGER.debug(f"Input parameters {params}")
new_p = Parameters(
user=params.user,
base_url=params.base_url,
path=params.path,
token=params.token,
client=params.client,
target="${base_url}/nb/user/${user}/rubin/ghostwriter/${path}",
unique_id=params.unique_id,
strip=False,
final=True,
)


async def _follow_progress(client: NubladoClient) -> None:
LOGGER.debug("Waiting for lab to spawn")
progress = client.watch_spawn_progress()
try:
async with asyncio.timeout(LAB_SPAWN_TIMEOUT):
async for message in progress:
LOGGER.debug(f"Lab spawn message: {message.message}")
if message.ready:
break
except TimeoutError:
LOGGER.exception(f"Lab did not spawn in {LAB_SPAWN_TIMEOUT} seconds")
raise
LOGGER.debug(f"Output parameters {new_p}")
return new_p
80 changes: 49 additions & 31 deletions src/ghostwriter/hooks/github_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@
from string import Template
from urllib.parse import urljoin

import structlog

from ..models.substitution import Parameters

LOGGER = structlog.get_logger("ghostwriter")
from ._logger import LOGGER


async def github_notebook(params: Parameters) -> Parameters | None:
async def github_notebook(params: Parameters) -> Parameters:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a suggestion for when you have more development effort on this would be splitting the path processing here to its own method and writing some tests for that to make sure it can handle all unusual cases of paths

"""Check out a particular notebook from GitHub."""
client = params.client
LOGGER.debug("Logging in to hub", user=params.user)
Expand All @@ -29,32 +26,53 @@ async def github_notebook(params: Parameters) -> Parameters | None:
# We know the stream output should be the serial number and
# a newline.
serial = (await lab_session.run_python(code)).strip()
if serial and serial != "0" and params.target is not None:
#
# params.target is never None, because it's injected by run_hooks(),
# but mypy doesn't know that.
#
# We had to change the filename and filename schema.
# Return a modified Parameters object.
#
targetcomp = params.target.split("/")
targetcomp[-1] = "${path}-${unique_id}.ipynb"
target = "/".join(targetcomp)
unique_id = str(serial)
LOGGER.debug(
"Continuing to redirect", target=target, unique_id=unique_id
)
return Parameters(
user=params.user,
base_url=params.base_url,
token=params.token,
client=params.client,
path=params.path,
target=target,
unique_id=unique_id,
)
LOGGER.debug("Continuing to redirect; params unchanged")
return None

# Honestly it's easier to just unconditionally rewrite the target
# than to figure out whether it needs rewriting.

return _get_new_params(serial, params)


def _get_new_params(serial: str, params: Parameters) -> Parameters:
# Start with the common fragment
target = (
athornton marked this conversation as resolved.
Show resolved Hide resolved
f"{params.base_url}nb/user/{params.user}/lab/tree/notebooks"
"/on-demand/github.com/"
)

# Remove branch information, if any (the checkout will have handled
# it in the code we ran to get the serial).
path = params.path.split("@")[0]

# Canonicalize path.
prefix = "notebooks/github.com/"
if path.startswith(prefix):
# Needs stripping
path = path[(len(prefix)) :]
if path.endswith(".ipynb"):
# Also needs stripping
path = path[: -(len(".ipynb"))]
athornton marked this conversation as resolved.
Show resolved Hide resolved
Comment on lines +48 to +54
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if this would be simpler:

path = path.removeprefix("notebooks/github.com/").removesuffix(".ipynb")


# Add discriminator if needed
unique_id: str | None = None
if serial and serial != "0":
# Glue in serial if it is nonzero
unique_id = serial
path += f"-{unique_id}"
path += ".ipynb" # And add the extension
target += path

new_param = Parameters(
user=params.user,
base_url=params.base_url,
token=params.token,
client=params.client,
path=params.path,
target=target,
unique_id=unique_id,
)
LOGGER.debug("Continuing to redirect", param=new_param)
return new_param


def _get_user_endpoint(base_url: str, user: str) -> str:
Expand Down
4 changes: 1 addition & 3 deletions src/ghostwriter/hooks/portal_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@

from urllib.parse import urljoin

import structlog
from rubin.nublado.client import NubladoClient

from ..exceptions import HookError
from ..models.substitution import Parameters

LOGGER = structlog.get_logger("ghostwriter")
from ._logger import LOGGER


async def portal_query(params: Parameters) -> None:
Expand Down
6 changes: 2 additions & 4 deletions src/ghostwriter/hooks/vacuous.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
"""Does nothing, but if it loads, the hook machinery is working."""

import structlog

from ..models.substitution import Parameters
from ._logger import LOGGER


async def vacuous_hook(params: Parameters) -> None:
"""Load and execute a no-op."""
logger = structlog.get_logger("ghostwriter")
logger.debug(f"Vacuous hook called with {params}")
LOGGER.debug(f"Vacuous hook called with {params}")
Loading