lsst-sqre · athornton · Aug 6, 2024 · Jul 31, 2024 · Jul 31, 2024 · Jul 31, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
       - id: check-toml
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.1
+    rev: v0.5.6
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]

diff --git a/requirements/dev.in b/requirements/dev.in
@@ -25,3 +25,6 @@ types-requests
 
 # Documentation
 scriv[toml]
+
+# Not a dependency on Mac, but is on Linux
+greenlet
diff --git a/requirements/dev.txt b/requirements/dev.txt
diff --git a/requirements/main.in b/requirements/main.in
@@ -22,7 +22,6 @@ pydantic-settings
 pyvo
 pyyaml
 safir>=6.1.0
-# -e /home/danfuchs/src/safir
 shortuuid
 structlog
 websockets

diff --git a/requirements/main.txt b/requirements/main.txt
diff --git a/src/mobu/dependencies/context.py b/src/mobu/dependencies/context.py
@@ -34,7 +34,7 @@ class RequestContext:
     """Incoming request."""
 
     logger: BoundLogger
-    """Request loger, rebound with discovered context."""
+    """Request logger, rebound with discovered context."""
 
     manager: FlockManager
     """Global singleton flock manager."""

diff --git a/src/mobu/models/business/base.py b/src/mobu/models/business/base.py
@@ -4,6 +4,7 @@
 from typing import Annotated
 
 from pydantic import BaseModel, ConfigDict, Field, PlainSerializer
+from safir.logging import LogLevel
 
 from ..timings import StopwatchData
 
@@ -35,12 +36,17 @@ class BusinessOptions(BaseModel):
         timedelta(minutes=1),
         title="How long to wait between business executions",
         description=(
-            "AFter each loop executing monkey business, the monkey will"
+            "After each loop executing monkey business, the monkey will"
             " pause for this long"
         ),
         examples=[60],
     )
 
+    log_level: LogLevel = Field(
+        LogLevel.INFO,
+        title="Log level for this monkey business",
+    )
+
     model_config = ConfigDict(extra="forbid")
 
 

diff --git a/src/mobu/services/business/nublado.py b/src/mobu/services/business/nublado.py
@@ -188,6 +188,7 @@ async def execution_idle(self) -> bool:
             return await self.pause(self.options.execution_idle_time)
 
     async def shutdown(self) -> None:
+        await self.hub_login()
         await self.delete_lab()
 
     async def idle(self) -> None:

diff --git a/src/mobu/services/monkey.py b/src/mobu/services/monkey.py
@@ -72,6 +72,7 @@ def __init__(
         self._name = name
         self._flock = flock
         self._restart = business_config.restart
+        self._log_level = business_config.options.log_level
         self._http_client = http_client
         self._user = user
 
@@ -276,7 +277,7 @@ def _build_logger(self, logfile: _TemporaryFileWrapper) -> BoundLogger:
         file_handler.setFormatter(formatter)
         logger = logging.getLogger(self._name)
         logger.handlers = []
-        logger.setLevel(logging.INFO)
+        logger.setLevel(self._log_level.value)
         logger.addHandler(file_handler)
         logger.propagate = False
         if config.profile == Profile.development:

diff --git a/src/mobu/storage/nublado.py b/src/mobu/storage/nublado.py
@@ -556,19 +556,20 @@ def __init__(
         self._base_url = base_url
         self._logger = logger.bind(user=user.username)
 
-        # Construct a connection pool to use for requets to JupyterHub. We
+        # Construct a connection pool to use for requests to JupyterHub. We
         # have to create a separate connection pool for every monkey, since
         # each will get user-specific cookies set by JupyterHub. If we shared
         # connection pools, monkeys would overwrite each other's cookies and
         # get authentication failures from labs.
         headers = {"Authorization": f"Bearer {user.token}"}
         self._client = AsyncClient(
-            headers=headers,
             follow_redirects=True,
+            headers=headers,
             timeout=timeout.total_seconds(),
         )
         self._hub_xsrf: str | None = None
         self._lab_xsrf: str | None = None
+        self._logger.debug("Created new NubladoClient")
 
     async def close(self) -> None:
         """Close the underlying HTTP connection pool."""
@@ -591,11 +592,36 @@ async def auth_to_hub(self) -> None:
             Raised if no ``_xsrf`` cookie was set in the reply from the lab.
         """
         url = self._url_for("hub/home")
-        r = await self._client.get(url)
+        r = await self._client.get(url, follow_redirects=False)
+        # As with auth_to_lab, manually extract from cookies at each
+        # redirection, because httpx doesn't do that if following redirects
+        # automatically.
+        while r.is_redirect:
+            self._logger.debug(
+                "Following hub redirect looking for _xsrf cookies",
+                method=r.request.method,
+                url=r.url.copy_with(query=None, fragment=None),
+                status_code=r.status_code,
+            )
+            xsrf = self._extract_xsrf(r)
+            if xsrf and xsrf != self._hub_xsrf:
+                self._hub_xsrf = xsrf
+                self._logger.debug(
+                    "Set _hub_xsrf",
+                    url=r.url.copy_with(query=None, fragment=None),
+                    status_code=r.status_code,
+                )
+            next_url = urljoin(url, r.headers["Location"])
+            r = await self._client.get(next_url, follow_redirects=False)
         r.raise_for_status()
         xsrf = self._extract_xsrf(r)
-        if xsrf:
+        if xsrf and xsrf != self._hub_xsrf:
             self._hub_xsrf = xsrf
+            self._logger.debug(
+                "Set _hub_xsrf",
+                url=r.url.copy_with(query=None, fragment=None),
+                status_code=r.status_code,
+            )
         elif not self._hub_xsrf:
             msg = "No _xsrf cookie set in login reply from JupyterHub"
             raise JupyterProtocolError(msg)
@@ -625,17 +651,33 @@ async def auth_to_lab(self) -> None:
             url, headers=headers, follow_redirects=False
         )
         while r.is_redirect:
+            self._logger.debug(
+                "Following lab redirect looking for _xsrf cookies",
+                method=r.request.method,
+                url=r.url.copy_with(query=None, fragment=None),
+                status_code=r.status_code,
+            )
             xsrf = self._extract_xsrf(r)
-            if xsrf and xsrf != self._hub_xsrf:
+            if xsrf and xsrf != self._lab_xsrf:
                 self._lab_xsrf = xsrf
+                self._logger.debug(
+                    "Set _lab_xsrf",
+                    url=r.url.copy_with(query=None, fragment=None),
+                    status_code=r.status_code,
+                )
             next_url = urljoin(url, r.headers["Location"])
             r = await self._client.get(
                 next_url, headers=headers, follow_redirects=False
             )
         r.raise_for_status()
         xsrf = self._extract_xsrf(r)
-        if xsrf and xsrf != self._hub_xsrf:
+        if xsrf and xsrf != self._lab_xsrf:
             self._lab_xsrf = xsrf
+            self._logger.debug(
+                "Set _lab_xsrf",
+                url=r.url.copy_with(query=None, fragment=None),
+                status_code=r.status_code,
+            )
         if not self._lab_xsrf:
             msg = "No _xsrf cookie set in login reply from lab"
             raise JupyterProtocolError(msg)
@@ -681,7 +723,7 @@ def open_lab_session(
 
         Parameters
         ----------
-        nobebook_name
+        notebook_name
             Name of the notebook we will be running, which is passed to the
             session and might influence logging on the lab side. If set, the
             session type will be set to ``notebook``. If not set, the session
@@ -802,7 +844,17 @@ def _extract_xsrf(self, response: Response) -> str | None:
         """
         cookies = Cookies()
         cookies.extract_cookies(response)
-        return cookies.get("_xsrf")
+        xsrf = cookies.get("_xsrf")
+        if xsrf is not None:
+            self._logger.debug(
+                "Extracted _xsrf cookie",
+                method=response.request.method,
+                url=response.url.copy_with(query=None, fragment=None),
+                status_code=response.status_code,
+                # Logging the set-cookie header can be useful here but it
+                # leaks secrets.  Don't put that code in a release build.
+            )
+        return xsrf
 
     def _url_for(self, partial: str) -> str:
         """Construct a JupyterHub or Jupyter lab URL from a partial URL.

diff --git a/tests/business/notebookrunner_test.py b/tests/business/notebookrunner_test.py
@@ -112,6 +112,78 @@ async def test_run(
     assert "Done with this cycle of notebooks" in r.text
 
 
+@pytest.mark.asyncio
+async def test_run_debug_log(
+    client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
+) -> None:
+    mock_gafaelfawr(respx_mock)
+    cwd = Path.cwd()
+
+    # Set up a notebook repository.
+    source_path = TEST_DATA_DIR / "notebooks"
+    repo_path = tmp_path / "notebooks"
+
+    shutil.copytree(str(source_path), str(repo_path))
+
+    # Set up git repo
+    await setup_git_repo(repo_path)
+
+    # Start a monkey. We have to do this in a try/finally block since the
+    # runner will change working directories, which because working
+    # directories are process-global may mess up future tests.
+    try:
+        r = await client.put(
+            "/mobu/flocks",
+            json={
+                "name": "test",
+                "count": 1,
+                "user_spec": {"username_prefix": "bot-mobu-testuser"},
+                "scopes": ["exec:notebook"],
+                "business": {
+                    "type": "NotebookRunner",
+                    "options": {
+                        "log_level": "DEBUG",
+                        "spawn_settle_time": 0,
+                        "execution_idle_time": 0,
+                        "max_executions": 1,
+                        "repo_url": str(repo_path),
+                        "repo_ref": "main",
+                        "working_directory": str(repo_path),
+                    },
+                },
+            },
+        )
+        assert r.status_code == 201
+
+        # Wait until we've finished one loop and check the results.
+        data = await wait_for_business(client, "bot-mobu-testuser1")
+        assert data == {
+            "name": "bot-mobu-testuser1",
+            "business": {
+                "failure_count": 0,
+                "name": "NotebookRunner",
+                "notebook": "test-notebook.ipynb",
+                "refreshing": False,
+                "success_count": 1,
+                "timings": ANY,
+            },
+            "state": "RUNNING",
+            "user": {
+                "scopes": ["exec:notebook"],
+                "token": ANY,
+                "username": "bot-mobu-testuser1",
+            },
+        }
+    finally:
+        os.chdir(cwd)
+
+    # Get the log and check the cell output.
+    r = await client.get("/mobu/flocks/test/monkeys/bot-mobu-testuser1/log")
+    assert r.status_code == 200
+    # Only occurs in the debug log.
+    assert "Set _hub_xsrf" in r.text
+
+
 @pytest.mark.asyncio
 async def test_run_recursive(
     client: AsyncClient, respx_mock: respx.Router, tmp_path: Path

diff --git a/tests/support/jupyter.py b/tests/support/jupyter.py
@@ -249,6 +249,16 @@ def lab_callback(self, request: Request) -> Response:
         to a callback, and then back to the lab), but it's hopefully good
         enough to test handling of cookies during redirect chains for
         capturing the ``_xsrf`` cookie.
+
+        Well...it's not realistic enough, in that the test cases passed but
+        in practice the ``_xsrf`` cookie would expire.  I think that that is
+        fixed now (similar strategy of following redirects manually and
+        grabbing the cookie at each intermediate step), but I don't
+        understand how to test it.
+
+        The actual chain is ``/user/username/lab`` to
+        ``/hub/api/oauth2/authorize`` to ``/user/username/oauth_callback``
+        and it here that the cookie is actually set.
         """
         user = self.get_user(request.headers["Authorization"])
         assert str(request.url).endswith(f"/user/{user}/callback")