From 846e55be4044d6bea57bd6e2eb6e12a9e2048a2b Mon Sep 17 00:00:00 2001 From: adam Date: Mon, 21 Oct 2024 11:42:46 -0700 Subject: [PATCH 1/3] Add 'final' and 'strip' to hooks, rework 'ensure_lab' --- docs/hooks/index.rst | 15 +++-- src/ghostwriter/hooks/__init__.py | 2 + src/ghostwriter/hooks/_logger.py | 5 ++ src/ghostwriter/hooks/autostart_lab.py | 65 ++++++++++++++++++++ src/ghostwriter/hooks/ensure_lab.py | 75 ++++++++---------------- src/ghostwriter/hooks/github_notebook.py | 5 +- src/ghostwriter/hooks/portal_query.py | 4 +- src/ghostwriter/hooks/vacuous.py | 6 +- src/ghostwriter/models/substitution.py | 3 + src/ghostwriter/services/rewrite.py | 9 ++- 10 files changed, 119 insertions(+), 70 deletions(-) create mode 100644 src/ghostwriter/hooks/_logger.py create mode 100644 src/ghostwriter/hooks/autostart_lab.py diff --git a/docs/hooks/index.rst b/docs/hooks/index.rst index 22452ea..bf41077 100644 --- a/docs/hooks/index.rst +++ b/docs/hooks/index.rst @@ -21,7 +21,7 @@ To signal failure, a hook should raise an ``Exception``. If a hook does not wish to modify the parameters used by future hooks or route substitution, it should return ``None``. -If it does return an object, that object will first be checked to ensure only the ``target`` and ``unique_id`` fields changed. +If it does return an object, that object will first be checked to ensure only the ``target``, ``unique_id``, and/or ``final`` fields changed. If any other field changed, an exception will be raised. The returned ``Parameters`` object will be used as the input to subsequent hooks and to update the target path that will be substituted. @@ -37,16 +37,23 @@ Obviously that begs the question of "what's in a ``Parameters`` object?" Three of the fields are obvious: they are ``base_url``, ``path``, and ``user``, all used in path construction for the redirect. -Two fields are only for use by the hook itself: those are ``target`` and ``unique_id``. +Four fields are only for use by the hook itself: those are ``target``, ``unique_id``, ``strip``, and ``final``. The ``target`` field will be injected when the hook is run, and the ``unique_id`` field may be populated. -These two are the only fields a hook is permitted to change if it returns a ``Parameters`` object. +These four are the only fields a hook is permitted to change if it returns a ``Parameters`` object. The ``target`` may need to be rewritten to accomodate a ``unique_id``. -The motivation here is simply to avoid rewriting existing files: the correct response is context-dependent, and might be to redirect to the existing file, but it equally well might be to create a new file under a different name. +The motivation of ``unique_id`` is simply to avoid rewriting existing files: the correct response is context-dependent, and might be to redirect to the existing file, but it equally well might be to create a new file under a different name. In this case, the file name stem (that is, the part before the suffix) might need to be appended with a ``unique_id``, which could be (again, the best choice depends on context) a serial number, as in ``Untitled2.ipynb``, or a string representation of the date and time, or simply a UUID. The ``unique_id`` can be any string legal in a filename, as long as the filename containing it will be distinct from any other filename in the directory. Guaranteeing that it is unique is the job of the hook writer. +The ``strip`` field controls whether or not the path prefix is stripped from the target path. Sometimes it is useful not to strip it, particularly in conjunction with ``final``. + +The ``final`` field is used to assert that hook processing should stop here and that ghostwriter should proceed with a redirect to the current value of ``target``. +This is used in ``ensure_lab`` to redirect the user to a spawner page, forcing them to choose an image and size. +The constructed ``target`` relies on the fact that once spawned, the specified path will be respected by the running lab, and it will have been constructed to hit a proxy endpoint, which will itself issue a redirect to throw the user back via the same redirection they've just come through. +However, this time, ``ensure_lab`` will return ``None`` and therefore hook processing will continue, picking out the correct file inside the user lab. + Given the context, the existing hooks do not worry much about race conditions. If you are using the date or an incrementing integer...you are still in an RSP context, so it's very unlikely a user will go to the same redirected URL twice in the same microsecond, or even twice within the time it takes to write out a notebook. If you do have some high-frequency use case, a UUID would be a better choice. diff --git a/src/ghostwriter/hooks/__init__.py b/src/ghostwriter/hooks/__init__.py index 3a3d002..72725b8 100644 --- a/src/ghostwriter/hooks/__init__.py +++ b/src/ghostwriter/hooks/__init__.py @@ -1,9 +1,11 @@ +from .autostart_lab import ensure_autostart_lab from .ensure_lab import ensure_running_lab from .github_notebook import github_notebook from .portal_query import portal_query from .vacuous import vacuous_hook __all__ = [ + "ensure_autostart_lab", "ensure_running_lab", "github_notebook", "portal_query", diff --git a/src/ghostwriter/hooks/_logger.py b/src/ghostwriter/hooks/_logger.py new file mode 100644 index 0000000..4755fd8 --- /dev/null +++ b/src/ghostwriter/hooks/_logger.py @@ -0,0 +1,5 @@ +"""Logger for hooks.""" + +import structlog + +LOGGER = structlog.getLogger("ghostwriter") diff --git a/src/ghostwriter/hooks/autostart_lab.py b/src/ghostwriter/hooks/autostart_lab.py new file mode 100644 index 0000000..0520451 --- /dev/null +++ b/src/ghostwriter/hooks/autostart_lab.py @@ -0,0 +1,65 @@ +"""Ensure that the user has a running lab. If they don't, they will be +given `recommended` in `medium` size. +""" + +import asyncio + +from rubin.nublado.client import NubladoClient +from rubin.nublado.client.models import ( + NubladoImage, + NubladoImageByClass, + NubladoImageClass, + NubladoImageSize, +) + +from ..models.substitution import Parameters +from ._logger import LOGGER + +LAB_SPAWN_TIMEOUT = 90 + + +async def ensure_autostart_lab(params: Parameters) -> None: + """Start a Lab if one is not present.""" + LOGGER.debug(f"Checking for running Lab for {params}") + client = params.client + LOGGER.debug("Logging in to Hub") + await client.auth_to_hub() + stopped = await client.is_lab_stopped() + if stopped: + LOGGER.debug(f"Starting new 'recommended' Lab for {params.user}") + await _spawn_lab(client) + else: + LOGGER.debug(f"{params.user} already has a running Lab") + LOGGER.debug("Lab spawned; proceeding with redirection") + + +async def _spawn_lab(client: NubladoClient) -> None: + image = _choose_image() + await client.spawn_lab(image) + await _follow_progress(client) + + +def _choose_image() -> NubladoImage: + """Because the primary use case of this redirection service is + to launch tutorial notebooks, our "start a new lab" parameters + are going to be the currently recommended lab, and Medium size, + because that's how the tutorial notebooks are generally set up + to run. Maybe we will do something more sophisticated later. + """ + return NubladoImageByClass( + image_class=NubladoImageClass.RECOMMENDED, size=NubladoImageSize.Medium + ) + + +async def _follow_progress(client: NubladoClient) -> None: + LOGGER.debug("Waiting for lab to spawn") + progress = client.watch_spawn_progress() + try: + async with asyncio.timeout(LAB_SPAWN_TIMEOUT): + async for message in progress: + LOGGER.debug(f"Lab spawn message: {message.message}") + if message.ready: + break + except TimeoutError: + LOGGER.exception(f"Lab did not spawn in {LAB_SPAWN_TIMEOUT} seconds") + raise diff --git a/src/ghostwriter/hooks/ensure_lab.py b/src/ghostwriter/hooks/ensure_lab.py index 7caa1b7..027a41f 100644 --- a/src/ghostwriter/hooks/ensure_lab.py +++ b/src/ghostwriter/hooks/ensure_lab.py @@ -1,64 +1,35 @@ -"""Ensure that the user has a running lab.""" - -import asyncio - -import structlog -from rubin.nublado.client import NubladoClient -from rubin.nublado.client.models import ( - NubladoImage, - NubladoImageByClass, - NubladoImageClass, - NubladoImageSize, -) +"""Ensure that the user has a running lab by dropping them in the spawner form +if they don't, and then slingshotting them through the lab ghostwriter endpoint +to restart the redirect process. Requires d_2024_10_19 (corresponds to +w_2024_10_43) or later. +""" from ..models.substitution import Parameters - -LAB_SPAWN_TIMEOUT = 90 -LOGGER = structlog.get_logger("ghostwriter") +from ._logger import LOGGER -async def ensure_running_lab(params: Parameters) -> None: +async def ensure_running_lab(params: Parameters) -> Parameters | None: """Start a Lab if one is not present.""" LOGGER.debug(f"Checking for running Lab for {params}") client = params.client LOGGER.debug("Logging in to Hub") await client.auth_to_hub() stopped = await client.is_lab_stopped() - if stopped: - LOGGER.debug(f"Starting new 'recommended' Lab for {params.user}") - await _spawn_lab(client) - else: + if not stopped: LOGGER.debug(f"{params.user} already has a running Lab") - LOGGER.debug("Lab spawned; proceeding with redirection") - - -async def _spawn_lab(client: NubladoClient) -> None: - image = _choose_image() - await client.spawn_lab(image) - await _follow_progress(client) - - -def _choose_image() -> NubladoImage: - """Because the primary use case of this redirection service is - to launch tutorial notebooks, our "start a new lab" parameters - are going to be the currently recommended lab, and Medium size, - because that's how the tutorial notebooks are generally set up - to run. Maybe we will do something more sophisticated later. - """ - return NubladoImageByClass( - image_class=NubladoImageClass.RECOMMENDED, size=NubladoImageSize.Medium + return None + LOGGER.debug(f"Sending {params.user} to spawner") + LOGGER.debug(f"Input parameters {params}") + new_p = Parameters( + user=params.user, + base_url=params.base_url, + path=params.path, + token=params.token, + client=params.client, + target="${base_url}/nb/user/${user}/rubin/ghostwriter/${path}", + unique_id=params.unique_id, + strip=False, + final=True, ) - - -async def _follow_progress(client: NubladoClient) -> None: - LOGGER.debug("Waiting for lab to spawn") - progress = client.watch_spawn_progress() - try: - async with asyncio.timeout(LAB_SPAWN_TIMEOUT): - async for message in progress: - LOGGER.debug(f"Lab spawn message: {message.message}") - if message.ready: - break - except TimeoutError: - LOGGER.exception(f"Lab did not spawn in {LAB_SPAWN_TIMEOUT} seconds") - raise + LOGGER.debug(f"Output parameters {new_p}") + return new_p diff --git a/src/ghostwriter/hooks/github_notebook.py b/src/ghostwriter/hooks/github_notebook.py index 6ab805b..f97757b 100644 --- a/src/ghostwriter/hooks/github_notebook.py +++ b/src/ghostwriter/hooks/github_notebook.py @@ -9,11 +9,8 @@ from string import Template from urllib.parse import urljoin -import structlog - from ..models.substitution import Parameters - -LOGGER = structlog.get_logger("ghostwriter") +from ._logger import LOGGER async def github_notebook(params: Parameters) -> Parameters | None: diff --git a/src/ghostwriter/hooks/portal_query.py b/src/ghostwriter/hooks/portal_query.py index 8c985fa..4d4d326 100644 --- a/src/ghostwriter/hooks/portal_query.py +++ b/src/ghostwriter/hooks/portal_query.py @@ -6,13 +6,11 @@ from urllib.parse import urljoin -import structlog from rubin.nublado.client import NubladoClient from ..exceptions import HookError from ..models.substitution import Parameters - -LOGGER = structlog.get_logger("ghostwriter") +from ._logger import LOGGER async def portal_query(params: Parameters) -> None: diff --git a/src/ghostwriter/hooks/vacuous.py b/src/ghostwriter/hooks/vacuous.py index 9f67a48..a66cca1 100644 --- a/src/ghostwriter/hooks/vacuous.py +++ b/src/ghostwriter/hooks/vacuous.py @@ -1,11 +1,9 @@ """Does nothing, but if it loads, the hook machinery is working.""" -import structlog - from ..models.substitution import Parameters +from ._logger import LOGGER async def vacuous_hook(params: Parameters) -> None: """Load and execute a no-op.""" - logger = structlog.get_logger("ghostwriter") - logger.debug(f"Vacuous hook called with {params}") + LOGGER.debug(f"Vacuous hook called with {params}") diff --git a/src/ghostwriter/models/substitution.py b/src/ghostwriter/models/substitution.py index f601fff..00eed46 100644 --- a/src/ghostwriter/models/substitution.py +++ b/src/ghostwriter/models/substitution.py @@ -19,6 +19,8 @@ class Parameters: client: NubladoClient target: str | None = None unique_id: str | None = None + strip: bool = True + final: bool = False def __str__(self) -> str: ret = f"Parameters[User: '{self.user}'; Base URL '{self.base_url}'; " @@ -27,6 +29,7 @@ def __str__(self) -> str: ret += f"Target: '{self.target}'; " if self.unique_id: ret += f"UniqueID: {self.unique_id}; " + ret += f"Final: {self.final}; " ret += "Token and RSP client redacted]" return ret diff --git a/src/ghostwriter/services/rewrite.py b/src/ghostwriter/services/rewrite.py index 78eaa83..a4f3e0c 100644 --- a/src/ghostwriter/services/rewrite.py +++ b/src/ghostwriter/services/rewrite.py @@ -118,8 +118,9 @@ async def rewrite_route( tmpl = Template(params.target) mapping = params.rewrite_mapping() full_path = mapping["path"] - # Strip matched path - mapping["path"] = full_path[(len(route.source_prefix) - 1) :] + # Strip matched path if requested + if params.strip: + mapping["path"] = full_path[(len(route.source_prefix) - 1) :] logger.debug(f"Rewriting '{params.target}' with '{mapping}'") try: # Canonicalize the resulting URL (and throw an error if it's @@ -156,7 +157,7 @@ async def run_hooks( `~ghostwriter.models.substitution.Parameters` Parameters to pass to the URL rewrite. These may not be the same as the ones that we received as our input parameters (specifically, - target and unique_id may have changed). + target, unique_id, and/or final may have changed). """ current_target = route.target params.target = current_target @@ -187,6 +188,8 @@ async def run_hooks( raise RuntimeError(errstr) # Immediately converted if res.target is None: res.target = current_target # No parameters changed + if res.final: + return res if res.target != current_target: # Update current_target with result field if it changed current_target = res.target From dc6ea32a9054e609f0ec521b1526e57eb470af1a Mon Sep 17 00:00:00 2001 From: adam Date: Mon, 21 Oct 2024 15:31:32 -0700 Subject: [PATCH 2/3] Handle branches correctly --- .../hooks/_github_notebook_payload.py | 2 +- src/ghostwriter/hooks/github_notebook.py | 64 +++++++++++-------- src/ghostwriter/models/substitution.py | 1 + 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/ghostwriter/hooks/_github_notebook_payload.py b/src/ghostwriter/hooks/_github_notebook_payload.py index 9f8434d..b56f760 100644 --- a/src/ghostwriter/hooks/_github_notebook_payload.py +++ b/src/ghostwriter/hooks/_github_notebook_payload.py @@ -58,5 +58,5 @@ # Finally, print the value of ``serial``, which we will capture as # a notebook stream output to determine whether we need to modify -# the path and unique_id in rewrite parameters. +# the target and unique_id in rewrite parameters. print(serial) diff --git a/src/ghostwriter/hooks/github_notebook.py b/src/ghostwriter/hooks/github_notebook.py index f97757b..bf72c39 100644 --- a/src/ghostwriter/hooks/github_notebook.py +++ b/src/ghostwriter/hooks/github_notebook.py @@ -13,7 +13,7 @@ from ._logger import LOGGER -async def github_notebook(params: Parameters) -> Parameters | None: +async def github_notebook(params: Parameters) -> Parameters: """Check out a particular notebook from GitHub.""" client = params.client LOGGER.debug("Logging in to hub", user=params.user) @@ -26,32 +26,42 @@ async def github_notebook(params: Parameters) -> Parameters | None: # We know the stream output should be the serial number and # a newline. serial = (await lab_session.run_python(code)).strip() - if serial and serial != "0" and params.target is not None: - # - # params.target is never None, because it's injected by run_hooks(), - # but mypy doesn't know that. - # - # We had to change the filename and filename schema. - # Return a modified Parameters object. - # - targetcomp = params.target.split("/") - targetcomp[-1] = "${path}-${unique_id}.ipynb" - target = "/".join(targetcomp) - unique_id = str(serial) - LOGGER.debug( - "Continuing to redirect", target=target, unique_id=unique_id - ) - return Parameters( - user=params.user, - base_url=params.base_url, - token=params.token, - client=params.client, - path=params.path, - target=target, - unique_id=unique_id, - ) - LOGGER.debug("Continuing to redirect; params unchanged") - return None + + # Honestly it's easier to just unconditionally rewrite the target + # than to figure out whether it needs rewriting. + + # Start with the common fragment + target = ( + f"{params.base_url}nb/user/{params.user}/lab/tree/notebooks" + "/on-demand/github.com/" + ) + # Remove branch information, if any (the checkout will have handled + # it in the code we ran to get the serial). + path = params.path.split("@")[0] + if path.startswith("notebooks/github.com/"): + # Needs stripping + path = path[(len("notebooks/github.com/")) :] + if path.endswith(".ipynb"): + # Also needs stripping + path = path[: -(len(".ipynb"))] + unique_id: str | None = None + if serial and serial != "0": + # Glue in serial if it is nonzero + unique_id = serial + path += f"-{unique_id}" + path += ".ipynb" # And add the extension + target += path + new_param = Parameters( + user=params.user, + base_url=params.base_url, + token=params.token, + client=params.client, + path=params.path, + target=target, + unique_id=unique_id, + ) + LOGGER.debug("Continuing to redirect", param=new_param) + return new_param def _get_user_endpoint(base_url: str, user: str) -> str: diff --git a/src/ghostwriter/models/substitution.py b/src/ghostwriter/models/substitution.py index 00eed46..2abb02d 100644 --- a/src/ghostwriter/models/substitution.py +++ b/src/ghostwriter/models/substitution.py @@ -29,6 +29,7 @@ def __str__(self) -> str: ret += f"Target: '{self.target}'; " if self.unique_id: ret += f"UniqueID: {self.unique_id}; " + ret += f"Strip: {self.strip}; " ret += f"Final: {self.final}; " ret += "Token and RSP client redacted]" return ret From 7644e547c4402f03d99893ed32d886305faf855e Mon Sep 17 00:00:00 2001 From: Adam Thornton Date: Tue, 22 Oct 2024 13:39:21 -0700 Subject: [PATCH 3/3] Refactor github path construction and add tests --- docs/_static/openapi.json | 2 +- docs/hooks/index.rst | 2 +- src/ghostwriter/hooks/github_notebook.py | 15 +++++- tests/hooks/serial_test.py | 69 ++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 tests/hooks/serial_test.py diff --git a/docs/_static/openapi.json b/docs/_static/openapi.json index 978447a..2212f16 100644 --- a/docs/_static/openapi.json +++ b/docs/_static/openapi.json @@ -1 +1 @@ -{"openapi": "3.1.0", "info": {"title": "Ghostwriter", "description": "URL shortener/personalizer for Phalanx\n\n[Return to Ghostwriter documentation](.)", "version": "0.1.2.dev6+ge335638.d20241017"}, "paths": {"/ghostwriter/": {"get": {"summary": "Application metadata", "description": "Document the top-level API here. By default it only returns metadata about the application.", "operationId": "get_index_ghostwriter__get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Index"}}}}}}}, "/ghostwriter/rewrite/{full_path}": {"get": {"summary": "Rewrite", "operationId": "rewrite_ghostwriter_rewrite__full_path__get", "parameters": [{"name": "full_path", "in": "path", "required": true, "schema": {"type": "string", "title": "The URL path to rewrite"}}], "responses": {"307": {"description": "Successful Response"}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"HTTPValidationError": {"properties": {"detail": {"items": {"$ref": "#/components/schemas/ValidationError"}, "type": "array", "title": "Detail"}}, "type": "object", "title": "HTTPValidationError"}, "Index": {"properties": {"metadata": {"$ref": "#/components/schemas/Metadata", "title": "Package metadata"}}, "type": "object", "required": ["metadata"], "title": "Index", "description": "Metadata returned by the external root URL of the application.\n\nNotes\n-----\nAs written, this is not very useful. Add additional metadata that will be\nhelpful for a user exploring the application, or replace this model with\nsome other model that makes more sense to return from the application API\nroot."}, "Metadata": {"properties": {"name": {"type": "string", "title": "Application name", "examples": ["myapp"]}, "version": {"type": "string", "title": "Version", "examples": ["1.0.0"]}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Description", "examples": ["Some package description"]}, "repository_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Repository URL", "examples": ["https://example.com/"]}, "documentation_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Documentation URL", "examples": ["https://example.com/"]}}, "type": "object", "required": ["name", "version"], "title": "Metadata", "description": "Metadata about a package."}, "ValidationError": {"properties": {"loc": {"items": {"anyOf": [{"type": "string"}, {"type": "integer"}]}, "type": "array", "title": "Location"}, "msg": {"type": "string", "title": "Message"}, "type": {"type": "string", "title": "Error Type"}}, "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError"}}}} \ No newline at end of file +{"openapi": "3.1.0", "info": {"title": "Ghostwriter", "description": "URL shortener/personalizer for Phalanx\n\n[Return to Ghostwriter documentation](.)", "version": "0.1.2.dev6+ge335638.d20241015"}, "paths": {"/ghostwriter/": {"get": {"summary": "Application metadata", "description": "Document the top-level API here. By default it only returns metadata about the application.", "operationId": "get_index_ghostwriter__get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Index"}}}}}}}, "/ghostwriter/rewrite/{full_path}": {"get": {"summary": "Rewrite", "operationId": "rewrite_ghostwriter_rewrite__full_path__get", "parameters": [{"name": "full_path", "in": "path", "required": true, "schema": {"type": "string", "title": "The URL path to rewrite"}}], "responses": {"307": {"description": "Successful Response"}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"HTTPValidationError": {"properties": {"detail": {"items": {"$ref": "#/components/schemas/ValidationError"}, "type": "array", "title": "Detail"}}, "type": "object", "title": "HTTPValidationError"}, "Index": {"properties": {"metadata": {"$ref": "#/components/schemas/Metadata", "title": "Package metadata"}}, "type": "object", "required": ["metadata"], "title": "Index", "description": "Metadata returned by the external root URL of the application.\n\nNotes\n-----\nAs written, this is not very useful. Add additional metadata that will be\nhelpful for a user exploring the application, or replace this model with\nsome other model that makes more sense to return from the application API\nroot."}, "Metadata": {"properties": {"name": {"type": "string", "title": "Application name", "examples": ["myapp"]}, "version": {"type": "string", "title": "Version", "examples": ["1.0.0"]}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Description", "examples": ["Some package description"]}, "repository_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Repository URL", "examples": ["https://example.com/"]}, "documentation_url": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Documentation URL", "examples": ["https://example.com/"]}}, "type": "object", "required": ["name", "version"], "title": "Metadata", "description": "Metadata about a package."}, "ValidationError": {"properties": {"loc": {"items": {"anyOf": [{"type": "string"}, {"type": "integer"}]}, "type": "array", "title": "Location"}, "msg": {"type": "string", "title": "Message"}, "type": {"type": "string", "title": "Error Type"}}, "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError"}}}} \ No newline at end of file diff --git a/docs/hooks/index.rst b/docs/hooks/index.rst index bf41077..2f83dc8 100644 --- a/docs/hooks/index.rst +++ b/docs/hooks/index.rst @@ -9,7 +9,7 @@ There is a list of hooks (possibly empty) for each route specified in the config Each hook is run in sequence. If any hook raises an exception, the user will receive an error rather than a redirect. Because (at least at present) hooks are, by definition, located inside the ``ghostwriter.hooks`` Python module namespace, any additions or modifications to hooks are code and repository changes. -Therefore, any proposed hooks or modififactions to existing ones will need to go through the SQuaRE PR process. +Therefore, any proposed hooks or modifications to existing ones will need to go through the SQuaRE PR process. Anatomy of a Hook ================= diff --git a/src/ghostwriter/hooks/github_notebook.py b/src/ghostwriter/hooks/github_notebook.py index bf72c39..e83867c 100644 --- a/src/ghostwriter/hooks/github_notebook.py +++ b/src/ghostwriter/hooks/github_notebook.py @@ -30,20 +30,30 @@ async def github_notebook(params: Parameters) -> Parameters: # Honestly it's easier to just unconditionally rewrite the target # than to figure out whether it needs rewriting. + return _get_new_params(serial, params) + + +def _get_new_params(serial: str, params: Parameters) -> Parameters: # Start with the common fragment target = ( f"{params.base_url}nb/user/{params.user}/lab/tree/notebooks" "/on-demand/github.com/" ) + # Remove branch information, if any (the checkout will have handled # it in the code we ran to get the serial). path = params.path.split("@")[0] - if path.startswith("notebooks/github.com/"): + + # Canonicalize path. + prefix = "notebooks/github.com/" + if path.startswith(prefix): # Needs stripping - path = path[(len("notebooks/github.com/")) :] + path = path[(len(prefix)) :] if path.endswith(".ipynb"): # Also needs stripping path = path[: -(len(".ipynb"))] + + # Add discriminator if needed unique_id: str | None = None if serial and serial != "0": # Glue in serial if it is nonzero @@ -51,6 +61,7 @@ async def github_notebook(params: Parameters) -> Parameters: path += f"-{unique_id}" path += ".ipynb" # And add the extension target += path + new_param = Parameters( user=params.user, base_url=params.base_url, diff --git a/tests/hooks/serial_test.py b/tests/hooks/serial_test.py new file mode 100644 index 0000000..63a02e5 --- /dev/null +++ b/tests/hooks/serial_test.py @@ -0,0 +1,69 @@ +"""Test the filename construction from serial bit of the github hook.""" + +import json +from pathlib import Path + +import pytest +import pytest_asyncio +from httpx import AsyncClient +from rubin.nublado.client import NubladoClient +from rubin.nublado.client.models import User + +from ghostwriter.config import Configuration +from ghostwriter.hooks.github_notebook import _get_new_params +from ghostwriter.models.substitution import Parameters + +COMMON_PREFIX = ( + "https://data.example.org/nb/user/rachel/lab/tree/notebooks" + "/on-demand/github.com/lsst-sqre/system-test/Firefly" +) + + +@pytest_asyncio.fixture +def std_params(config: Configuration, client: AsyncClient) -> Parameters: + user_objs = json.loads( + (Path(__file__).parent.parent / "support" / "users.json").read_text() + ) + token = next(iter(user_objs.keys())) + username = user_objs[token]["username"] + user = User(username=username, token=token) + nc = NubladoClient(user=user, base_url=str(config.environment_url)) + return Parameters( + user=user.username, + base_url=str(config.environment_url), + token=token, + client=nc, + path="notebooks/github.com/lsst-sqre/system-test/Firefly", + target="inconsequential", + unique_id=None, + ) + + +@pytest.mark.asyncio +async def test_basic_rewrite(std_params: Parameters) -> None: + new_p = _get_new_params("0", std_params) + expected = f"{COMMON_PREFIX}.ipynb" + assert new_p.target == expected + + +@pytest.mark.asyncio +async def test_serial_rewrite(std_params: Parameters) -> None: + new_p = _get_new_params("3", std_params) + expected = f"{COMMON_PREFIX}-3.ipynb" + assert new_p.target == expected + + +@pytest.mark.asyncio +async def test_strip_branch(std_params: Parameters) -> None: + inp = Parameters( + user=std_params.user, + base_url=std_params.base_url, + token=std_params.token, + client=std_params.client, + path=f"{std_params.path}@prod", + target=std_params.target, + unique_id=std_params.unique_id, + ) + new_p = _get_new_params("0", inp) + expected = f"{COMMON_PREFIX}.ipynb" + assert new_p.target == expected