Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tickets/DM-45523: walk redirects for Lab manually, extracting xsrf along the way #365

Merged
merged 12 commits into from
Aug 6, 2024
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ repos:
- id: check-toml

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.1
rev: v0.5.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
10 changes: 10 additions & 0 deletions changelog.d/20240806_123848_athornton_DM_45523.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!-- Delete the sections that don't apply -->

### New features

- Allow specification of log level for individual flocks

### Bug fixes

- Follow redirections by hand for hub logins as well as lab ones to get XSRF

3 changes: 3 additions & 0 deletions requirements/dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ types-requests

# Documentation
scriv[toml]

# Not a dependency on Mac, but is on Linux
greenlet
400 changes: 202 additions & 198 deletions requirements/dev.txt

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion requirements/main.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ pydantic-settings
pyvo
pyyaml
safir>=6.1.0
# -e /home/danfuchs/src/safir
shortuuid
structlog
websockets
Expand Down
365 changes: 92 additions & 273 deletions requirements/main.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/mobu/dependencies/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class RequestContext:
"""Incoming request."""

logger: BoundLogger
"""Request loger, rebound with discovered context."""
"""Request logger, rebound with discovered context."""

manager: FlockManager
"""Global singleton flock manager."""
Expand Down
8 changes: 7 additions & 1 deletion src/mobu/models/business/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Annotated

from pydantic import BaseModel, ConfigDict, Field, PlainSerializer
from safir.logging import LogLevel

from ..timings import StopwatchData

Expand Down Expand Up @@ -35,12 +36,17 @@ class BusinessOptions(BaseModel):
timedelta(minutes=1),
title="How long to wait between business executions",
description=(
"AFter each loop executing monkey business, the monkey will"
"After each loop executing monkey business, the monkey will"
" pause for this long"
),
examples=[60],
)

log_level: LogLevel = Field(
LogLevel.INFO,
title="Log level for this monkey business",
)

model_config = ConfigDict(extra="forbid")


Expand Down
1 change: 1 addition & 0 deletions src/mobu/services/business/nublado.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ async def execution_idle(self) -> bool:
return await self.pause(self.options.execution_idle_time)

async def shutdown(self) -> None:
await self.hub_login()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about putting a hub_login call in delete_lab? Tradeoff is that we may call it a few extra times.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I prefer it be explicit.

await self.delete_lab()

async def idle(self) -> None:
Expand Down
3 changes: 2 additions & 1 deletion src/mobu/services/monkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(
self._name = name
self._flock = flock
self._restart = business_config.restart
self._log_level = business_config.options.log_level
self._http_client = http_client
self._user = user

Expand Down Expand Up @@ -276,7 +277,7 @@ def _build_logger(self, logfile: _TemporaryFileWrapper) -> BoundLogger:
file_handler.setFormatter(formatter)
logger = logging.getLogger(self._name)
logger.handlers = []
logger.setLevel(logging.INFO)
logger.setLevel(self._log_level.value)
logger.addHandler(file_handler)
logger.propagate = False
if config.profile == Profile.development:
Expand Down
68 changes: 60 additions & 8 deletions src/mobu/storage/nublado.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,19 +556,20 @@ def __init__(
self._base_url = base_url
self._logger = logger.bind(user=user.username)

# Construct a connection pool to use for requets to JupyterHub. We
# Construct a connection pool to use for requests to JupyterHub. We
# have to create a separate connection pool for every monkey, since
# each will get user-specific cookies set by JupyterHub. If we shared
# connection pools, monkeys would overwrite each other's cookies and
# get authentication failures from labs.
headers = {"Authorization": f"Bearer {user.token}"}
self._client = AsyncClient(
headers=headers,
follow_redirects=True,
headers=headers,
timeout=timeout.total_seconds(),
)
self._hub_xsrf: str | None = None
self._lab_xsrf: str | None = None
self._logger.debug("Created new NubladoClient")

async def close(self) -> None:
"""Close the underlying HTTP connection pool."""
Expand All @@ -591,11 +592,36 @@ async def auth_to_hub(self) -> None:
Raised if no ``_xsrf`` cookie was set in the reply from the lab.
"""
url = self._url_for("hub/home")
r = await self._client.get(url)
r = await self._client.get(url, follow_redirects=False)
# As with auth_to_lab, manually extract from cookies at each
# redirection, because httpx doesn't do that if following redirects
# automatically.
while r.is_redirect:
self._logger.debug(
"Following hub redirect looking for _xsrf cookies",
method=r.request.method,
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
xsrf = self._extract_xsrf(r)
if xsrf and xsrf != self._hub_xsrf:
self._hub_xsrf = xsrf
self._logger.debug(
"Set _hub_xsrf",
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
next_url = urljoin(url, r.headers["Location"])
r = await self._client.get(next_url, follow_redirects=False)
r.raise_for_status()
xsrf = self._extract_xsrf(r)
if xsrf:
if xsrf and xsrf != self._hub_xsrf:
self._hub_xsrf = xsrf
self._logger.debug(
"Set _hub_xsrf",
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
elif not self._hub_xsrf:
msg = "No _xsrf cookie set in login reply from JupyterHub"
raise JupyterProtocolError(msg)
Expand Down Expand Up @@ -625,17 +651,33 @@ async def auth_to_lab(self) -> None:
url, headers=headers, follow_redirects=False
)
while r.is_redirect:
self._logger.debug(
"Following lab redirect looking for _xsrf cookies",
method=r.request.method,
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
xsrf = self._extract_xsrf(r)
if xsrf and xsrf != self._hub_xsrf:
if xsrf and xsrf != self._lab_xsrf:
self._lab_xsrf = xsrf
self._logger.debug(
"Set _lab_xsrf",
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
next_url = urljoin(url, r.headers["Location"])
r = await self._client.get(
next_url, headers=headers, follow_redirects=False
)
r.raise_for_status()
xsrf = self._extract_xsrf(r)
if xsrf and xsrf != self._hub_xsrf:
if xsrf and xsrf != self._lab_xsrf:
self._lab_xsrf = xsrf
self._logger.debug(
"Set _lab_xsrf",
url=r.url.copy_with(query=None, fragment=None),
status_code=r.status_code,
)
if not self._lab_xsrf:
msg = "No _xsrf cookie set in login reply from lab"
raise JupyterProtocolError(msg)
Expand Down Expand Up @@ -681,7 +723,7 @@ def open_lab_session(

Parameters
----------
nobebook_name
notebook_name
Name of the notebook we will be running, which is passed to the
session and might influence logging on the lab side. If set, the
session type will be set to ``notebook``. If not set, the session
Expand Down Expand Up @@ -802,7 +844,17 @@ def _extract_xsrf(self, response: Response) -> str | None:
"""
cookies = Cookies()
cookies.extract_cookies(response)
return cookies.get("_xsrf")
xsrf = cookies.get("_xsrf")
if xsrf is not None:
self._logger.debug(
"Extracted _xsrf cookie",
method=response.request.method,
url=response.url.copy_with(query=None, fragment=None),
status_code=response.status_code,
# Logging the set-cookie header can be useful here but it
# leaks secrets. Don't put that code in a release build.
)
Comment on lines +848 to +856
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we want to put this in each auth method so it would be easier to tell if it happened during the hub auth or the lab auth? It would be duplicated, but maybe more clear?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, if we end up using it, we log that (with whether it is lab or hub as well). I'm not sure we even want to log it in the extraction step. Maybe I'll just leave the comment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eh, I'm going to leave this alone for now. If it's too chatty if we need to turn on debug for real, we can change it then.

return xsrf

def _url_for(self, partial: str) -> str:
"""Construct a JupyterHub or Jupyter lab URL from a partial URL.
Expand Down
72 changes: 72 additions & 0 deletions tests/business/notebookrunner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,78 @@ async def test_run(
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_run_debug_log(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = TEST_DATA_DIR / "notebooks"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "bot-mobu-testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"log_level": "DEBUG",
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 1,
"repo_url": str(repo_path),
"repo_ref": "main",
"working_directory": str(repo_path),
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "bot-mobu-testuser1")
assert data == {
"name": "bot-mobu-testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": "test-notebook.ipynb",
"refreshing": False,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "bot-mobu-testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/bot-mobu-testuser1/log")
assert r.status_code == 200
# Only occurs in the debug log.
assert "Set _hub_xsrf" in r.text


@pytest.mark.asyncio
async def test_run_recursive(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
Expand Down
23 changes: 14 additions & 9 deletions tests/support/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,18 @@ def lab(self, request: Request) -> Response:
return Response(500, request=request)
state = self.state.get(user, JupyterState.LOGGED_OUT)
if state == JupyterState.LAB_RUNNING:
# In real life, there's another redirect to
# /hub/api/oauth2/authorize, which doesn't set a cookie,
# and then redirects to /user/username/oauth_callback.
#
# We're skipping that one since it doesn't change the
# client state at all.
xsrf = f"_xsrf={self._lab_xsrf}"
return Response(
302,
request=request,
headers={
"Location": _url(f"user/{user}/callback"),
"Location": _url(f"user/{user}/oauth_callback"),
"Set-Cookie": xsrf,
},
)
Expand All @@ -243,15 +249,14 @@ def lab(self, request: Request) -> Response:
def lab_callback(self, request: Request) -> Response:
"""Simulate not setting the ``_xsrf`` cookie on first request.

This implements a redirect from ``/user/username/lab`` to
``/user/username/callback``, followed by a 200, which is not how
the lab does it (it has a much more complex redirect to the hub, back
to a callback, and then back to the lab), but it's hopefully good
enough to test handling of cookies during redirect chains for
capturing the ``_xsrf`` cookie.
This happens at the end of a chain from ``/user/username/lab`` to
``/hub/api/oauth2/authorize``, which then issues a redirect to
``/user/username/oauth_callback``. It is in the final redirect
that the ``_xsrf`` cookie is actually set, and then it returns
a 200.
"""
user = self.get_user(request.headers["Authorization"])
assert str(request.url).endswith(f"/user/{user}/callback")
assert str(request.url).endswith(f"/user/{user}/oauth_callback")
return Response(200, request=request)

def delete_lab(self, request: Request) -> Response:
Expand Down Expand Up @@ -439,7 +444,7 @@ def mock_jupyter(respx_mock: respx.Router) -> MockJupyter:
respx_mock.delete(url__regex=regex).mock(side_effect=mock.delete_lab)
regex = _url_regex(r"user/[^/]+/lab")
respx_mock.get(url__regex=regex).mock(side_effect=mock.lab)
regex = _url_regex(r"user/[^/]+/callback")
regex = _url_regex(r"user/[^/]+/oauth_callback")
respx_mock.get(url__regex=regex).mock(side_effect=mock.lab_callback)
regex = _url_regex("user/[^/]+/api/sessions")
respx_mock.post(url__regex=regex).mock(side_effect=mock.create_session)
Expand Down