Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tickets/DM-46157: Move Nublado Client into Nublado monorepo #401

Merged
merged 21 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 92 additions & 18 deletions client/src/rubin/nublado/client/_util.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
"""Utility functions for Nublado client."""

import json
from enum import Enum

__all__ = [
"normalize_source",
"normalize_source_by_cell",
"extract_source_by_cell",
"source_string_by_cell",
"source_list_by_cell",
"notebook_to_disk_form",
"notebook_to_api_form",
]


class NotebookForm(Enum):
DISK = "disk"
API = "api"


def normalize_source(notebook: str) -> str:
"""Extract and concatenate all the source cells from a notebook.

Expand Down Expand Up @@ -41,13 +49,13 @@ def normalize_source(notebook: str) -> str:
return "\n".join(
[
x.rstrip("\n")
for x in normalize_source_by_cell(notebook)
for x in source_string_by_cell(notebook)
if x.rstrip("\n")
]
)


def normalize_source_by_cell(notebook: str) -> list[str]:
def source_string_by_cell(notebook: str) -> list[str]:
"""Extract each cell source to a single string.

Parameters
Expand All @@ -61,35 +69,101 @@ def normalize_source_by_cell(notebook: str) -> list[str]:
A list of all non-empty source lines in a cell as a single Python
string. Each cell's source lines (with newline as the line separator) will
be a separate item of the returned list.

Notes
-----
This is what the contents API returns, although the text of the notebook
on disk will have each source line as its own entry within a list of
strings. So we will convert it to API form first and then return the
source item from each cell.
"""
notebook = notebook_to_api_form(notebook)
obj = json.loads(notebook)
return [
"\n".join(
[
x.rstrip("\n")
for y in extract_source_by_cell(notebook)
for x in y
if x.rstrip("\n")
]
)
x["source"]
for x in obj["cells"]
if x["cell_type"] == "code" and "source" in x and x["source"]
]


def extract_source_by_cell(notebook: str) -> list[list[str]]:
"""Extract all non-empty "code" cells' "source" lines as a list of strings.
def source_list_by_cell(notebook: str) -> list[list[str]]:
athornton marked this conversation as resolved.
Show resolved Hide resolved
"""Extract all non-empty "code" cells' "source" entry as a list of strings.

Parameters
----------
notebook
The text of the notebook file.
The notebook text, or the results of the Contents API.

Returns
-------
list[list[str]]
Source lines
list[str]
Source entries.

Notes
-----
In the notebook, "source" is a list of strings. In the Contents API, it's
a single string. So we will convert the notebook to disk form, and return
the list of lists.
"""
notebook = notebook_to_disk_form(notebook)
obj = json.loads(notebook)
return [
x["source"]
for x in obj["cells"]
if x["cell_type"] == "code" and x["source"]
if x["cell_type"] == "code" and "source" in x and x["source"]
]


def notebook_to_disk_form(notebook: str) -> str:
return _transform_notebook(notebook, NotebookForm.DISK)


def notebook_to_api_form(notebook: str) -> str:
return _transform_notebook(notebook, NotebookForm.API)


def _transform_notebook(notebook: str, form: NotebookForm) -> str:
obj = json.loads(notebook)
cells = obj["cells"]
# Transform each cell's source as needed
for cell in cells:
if cell["cell_type"] != "code":
continue
if "source" not in cell or not cell["source"]:
continue
src = cell["source"]
if (isinstance(src, str) and form == NotebookForm.API) or (
isinstance(src, list) and form == NotebookForm.DISK
):
# Already in the correct form
continue
if form == NotebookForm.API:
# Turn source into a newline-separated string
cell["source"] = _list_to_string(src)
continue
# If we got this far, we need to turn the source into a list, where
# all items but the list end in a single newline.
cell["source"] = _string_to_list(src)
return json.dumps(obj)


def _list_to_string(src: list[str]) -> str:
copy_list: list[str] = []
for src_line in src:
copy_line = src_line.rstrip("\n")
if copy_line:
copy_list.append(copy_line)
return "\n".join(copy_list)


def _string_to_list(src: str) -> list[str]:
src_list = src.split("\n")
copy_list: list[str] = []
for src_line in src_list:
copy_line = src_line.rstrip("\n")
if copy_line:
copy_line += "\n"
copy_list.append(copy_line)
if copy_list:
copy_list[-1].rstrip("\n")
return copy_list
4 changes: 2 additions & 2 deletions client/src/rubin/nublado/client/nubladoclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from websockets.exceptions import WebSocketException

from ._constants import WEBSOCKET_OPEN_TIMEOUT
from ._util import extract_source_by_cell
from ._util import source_list_by_cell
from .exceptions import (
CodeExecutionError,
ExecutionAPIError,
Expand Down Expand Up @@ -444,7 +444,7 @@ async def run_notebook(self, notebook: Path) -> list[str]:
self._logger.debug(f"Getting content from {url}")
resp = await self._client.get(url)
notebook = resp.json()["content"]
sources = extract_source_by_cell(json.dumps(notebook))
sources = source_list_by_cell(json.dumps(notebook))
self._logger.debug(f"Content: {sources}")
retlist: list[str] = []
for cellsrc in sources:
Expand Down
8 changes: 6 additions & 2 deletions client/src/rubin/nublado/client/testing/_jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from httpx import Request, Response
from safir.datetime import current_datetime

from .._util import normalize_source
from .._util import normalize_source, notebook_to_api_form
from ..models import NotebookExecutionResult


Expand Down Expand Up @@ -373,6 +373,10 @@ def get_content(self, request: Request) -> Response:

This is only enough to provide for the NubladoClient's run_notebook
functionality. We don't even use a real timestamp.

Irritatingly, the real Contents API represents the source of each
cell as a single string, while a notebook on disk represents it as
a list of strings, so we do need to simulate that.
"""
user = request.headers.get("X-Auth-Request-User", None)
if user is None:
Expand All @@ -385,7 +389,7 @@ def get_content(self, request: Request) -> Response:
path = str(request.url)[len(contents_url) :]
try:
filename = self._user_dir / path
content = json.loads(filename.read_text())
content = notebook_to_api_form(json.loads(filename.read_text()))
fn = filename.name
tstamp = "2024-09-12T17:55:05.077220Z"
resp_obj = {
Expand Down
Loading