Skip to content

Commit

Permalink
refine error messages about not having a propolis address
Browse files Browse the repository at this point in the history
the chronology here seems to be: Nexus learned to talk directly to
Propolis to communicate with an instance's serial console, that
"instance to Propolis address" translation got outlined into
`propolis_addr_for_instance`, then for region and region snapshot
replacment sagas we needed to also talk to the relevant Propolis. at
that point, `propolis_client_for_instance` used
`propolis_addr_for_instance` too.

this is all fine, but `propolis_addr_for_instance` kept a few serial
console-specific error messages, which would show up in places like
region replacement if an instance is stopped or shut down at an
inopportune time.

in the process, i discovered that a request for an instance's serial
console history with incorrect parameters manifests as a 500 rather than
a 400. Propolis even returns a 400, but the failure to establish a
websocket console is presumed an internal error in Nexus, and so it's
wrapped as a 500 whose contents talk about a 400! fix that to just
return a 400.
  • Loading branch information
iximeow committed Dec 17, 2024
1 parent 7ff5a28 commit 1214a47
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 16 deletions.
41 changes: 25 additions & 16 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1610,11 +1610,19 @@ impl super::Nexus {
if let Some(max_bytes) = params.max_bytes {
request = request.max_bytes(max_bytes);
}
if let Some(from_start) = params.from_start {
request = request.from_start(from_start);
}
if let Some(most_recent) = params.most_recent {
request = request.most_recent(most_recent);
match (params.from_start, params.most_recent) {
(Some(from_start), None) => {
request = request.from_start(from_start);
}
(None, Some(most_recent)) => {
request = request.most_recent(most_recent);
}
_ => {
return Err(Error::invalid_request(
"Exactly one of 'from_start' \
or 'most_recent' must be specified.",
));
}
}
let data = request
.send()
Expand Down Expand Up @@ -1714,29 +1722,30 @@ impl super::Nexus {
match vmm.runtime.state {
DbVmmState::Running
| DbVmmState::Rebooting
| DbVmmState::Migrating => {
Ok((vmm.clone(), SocketAddr::new(vmm.propolis_ip.ip(), vmm.propolis_port.into())))
}
| DbVmmState::Migrating => Ok((
vmm.clone(),
SocketAddr::new(
vmm.propolis_ip.ip(),
vmm.propolis_port.into(),
),
)),

DbVmmState::Starting
| DbVmmState::Stopping
| DbVmmState::Stopped
| DbVmmState::Failed
| DbVmmState::Creating => {
| DbVmmState::Creating
| DbVmmState::Destroyed
| DbVmmState::SagaUnwound => {
Err(Error::invalid_request(format!(
"cannot connect to serial console of instance in state \"{}\"",
"cannot administer instance in state \"{}\"",
state.effective_state(),
)))
}

DbVmmState::Destroyed | DbVmmState::SagaUnwound => Err(Error::invalid_request(
"cannot connect to serial console of instance in state \"Stopped\"",
)),
}
} else {
Err(Error::invalid_request(format!(
"instance is {} and has no active serial console \
server",
"instance is {} and cannot be administered",
state.effective_state(),
)))
}
Expand Down
90 changes: 90 additions & 0 deletions nexus/tests/integration_tests/instances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5430,6 +5430,8 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) {
// Make sure we get a 404 if we try to access the serial console before creation.
let instance_serial_url =
get_instance_url(format!("{}/serial-console", instance_name).as_str());
let instance_serial_stream_url =
get_instance_url(format!("{}/serial-console", instance_name).as_str());
let error: HttpErrorResponseBody = NexusRequest::expect_failure(
client,
StatusCode::NOT_FOUND,
Expand Down Expand Up @@ -5517,6 +5519,47 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) {
}
assert_eq!(&actual[..expected.len()], expected);

// Now try querying the serial output history endpoint with bad parameters.
// Check that the errors are indicative here.
let builder =
RequestBuilder::new(client, http::Method::GET, &instance_serial_url)
.expect_status(Some(http::StatusCode::BAD_REQUEST));

let error = NexusRequest::new(builder)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.unwrap()
.parsed_body::<dropshot::HttpErrorResponseBody>()
.unwrap();

// We provided neither from_start nor most_recent...
assert_eq!(
error.message,
"Exactly one of 'from_start' or 'most_recent' must be specified.",
);

let builder = RequestBuilder::new(
client,
http::Method::GET,
&format!("{}&from_start=0&most_recent=0", instance_serial_url),
)
.expect_status(Some(http::StatusCode::BAD_REQUEST));

let error = NexusRequest::new(builder)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.unwrap()
.parsed_body::<dropshot::HttpErrorResponseBody>()
.unwrap();

// We provided both from_start and most_recent...
assert_eq!(
error.message,
"Exactly one of 'from_start' or 'most_recent' must be specified.",
);

// Request a halt and verify both the immediate state and the finished state.
let instance = instance_next;
let instance_next =
Expand All @@ -5527,6 +5570,53 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) {
> instance.runtime.time_run_state_updated
);

// As the instance is now stopping, we can't connect to its serial console
// anymore. We also can't get its cached data; the (simulated) Propolis is
// going away.

let builder = RequestBuilder::new(
client,
http::Method::GET,
&instance_serial_stream_url,
)
.expect_status(Some(http::StatusCode::BAD_REQUEST));

let error = NexusRequest::new(builder)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.unwrap()
.parsed_body::<dropshot::HttpErrorResponseBody>()
.unwrap();

assert_eq!(
error.message,
"cannot administer instance in state \"stopping\"",
);

// Have to pass some offset for the cached data request otherwise we'll
// error out of Nexus early on while validating parameters, before
// discovering the serial console is unreachable.
let builder = RequestBuilder::new(
client,
http::Method::GET,
&format!("{}&from_start=0", instance_serial_url),
)
.expect_status(Some(http::StatusCode::BAD_REQUEST));

let error = NexusRequest::new(builder)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.unwrap()
.parsed_body::<dropshot::HttpErrorResponseBody>()
.unwrap();

assert_eq!(
error.message,
"cannot administer instance in state \"stopping\"",
);

let instance = instance_next;
instance_simulate(nexus, &instance_id).await;
let instance_next =
Expand Down

0 comments on commit 1214a47

Please sign in to comment.