Skip to content

[mgs] PendingMgsUpdate type to have a variant for the RoT #8054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions dev-tools/reconfigurator-sp-updater/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,28 @@ fn cmd_config(
expected_active_version, expected_inactive_version,
)?;
}
PendingMgsUpdateDetails::Rot {
expected_slot_a_version,
expected_slot_b_version,
expected_active_slot,
expected_persistent_boot_preference,
expected_pending_persistent_boot_preference,
expected_transient_boot_preference,
} => {
writeln!(
&mut s,
" preconditions: expected_slot_a_version {:?}
expected_slot_b_version {:?}
expected active_slot {:?}
expected persistent_boot_preference {:?}
expected pending_persistent_boot_preference {:?}
expected transient_boot_preference {:?}",
expected_slot_a_version, expected_slot_b_version,
expected_active_slot, expected_persistent_boot_preference,
expected_pending_persistent_boot_preference,
expected_transient_boot_preference,
)?;
}
}

writeln!(&mut s)?;
Expand Down
16 changes: 16 additions & 0 deletions gateway-types/src/rot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,22 @@ pub enum RotSlot {
B,
}

impl RotSlot {
pub fn to_u16(&self) -> u16 {
match self {
RotSlot::A => 0,
RotSlot::B => 1,
}
}

pub fn toggled(&self) -> Self {
match self {
RotSlot::A => RotSlot::B,
RotSlot::B => RotSlot::A,
}
}
}

impl From<gateway_messages::RotSlotId> for RotSlot {
fn from(slot: gateway_messages::RotSlotId) -> Self {
match slot {
Expand Down
15 changes: 14 additions & 1 deletion nexus/mgs-updates/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
//! Drive one or more in-progress MGS-managed updates
use crate::ArtifactCache;
use crate::SpComponentUpdateHelper;
use crate::driver_update::ApplyUpdateError;
use crate::driver_update::PROGRESS_TIMEOUT;
use crate::driver_update::SpComponentUpdate;
use crate::driver_update::apply_update;
use crate::rot_updater::ReconfiguratorRotUpdater;
use crate::sp_updater::ReconfiguratorSpUpdater;
use futures::FutureExt;
use futures::future::BoxFuture;
Expand Down Expand Up @@ -302,13 +304,24 @@ impl MgsUpdateDriver {
));
info!(&log, "begin update attempt for baseboard");

let (sp_update, updater) = match &request.details {
let (sp_update, updater): (
_,
Box<dyn SpComponentUpdateHelper + Send + Sync>,
) = match &request.details {
nexus_types::deployment::PendingMgsUpdateDetails::Sp { .. } => {
let sp_update =
SpComponentUpdate::from_request(&log, &request, update_id);

(sp_update, Box::new(ReconfiguratorSpUpdater {}))
}
nexus_types::deployment::PendingMgsUpdateDetails::Rot {
..
} => {
let sp_update =
SpComponentUpdate::from_request(&log, &request, update_id);

(sp_update, Box::new(ReconfiguratorRotUpdater {}))
}
};

let baseboard_id = baseboard_id.clone();
Expand Down
11 changes: 11 additions & 0 deletions nexus/mgs-updates/src/driver_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ impl SpComponentUpdate {
firmware_slot: 0,
update_id,
},
PendingMgsUpdateDetails::Rot { expected_active_slot, .. } => {
SpComponentUpdate {
log: log.clone(),
component: SpComponent::ROT,
target_sp_type: request.sp_type,
target_sp_slot: request.slot_id,
// Like the SP, we request an update to the inactive slot
firmware_slot: expected_active_slot.toggled().to_u16(),
update_id,
}
}
}
}
}
Expand Down
31 changes: 31 additions & 0 deletions nexus/mgs-updates/src/rot_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@ use super::SpComponentUpdateError;
use super::UpdateProgress;
use super::common_sp_update::SpComponentUpdater;
use super::common_sp_update::deliver_update;
use crate::SpComponentUpdateHelper;
use crate::common_sp_update::PrecheckError;
use crate::common_sp_update::PrecheckStatus;
use futures::future::BoxFuture;
use gateway_client::SpComponent;
use gateway_client::types::RotSlot;
use gateway_client::types::SpComponentFirmwareSlot;
use gateway_client::types::SpType;
use nexus_types::deployment::PendingMgsUpdate;
use slog::Logger;
use slog::info;
use tokio::sync::watch;
Expand Down Expand Up @@ -192,3 +197,29 @@ impl SpComponentUpdater for RotUpdater {
&self.log
}
}

pub struct ReconfiguratorRotUpdater;
impl SpComponentUpdateHelper for ReconfiguratorRotUpdater {
/// Checks if the component is already updated or ready for update
fn precheck<'a>(
&'a self,
_log: &'a slog::Logger,
_mgs_clients: &'a mut MgsClients,
_update: &'a PendingMgsUpdate,
) -> BoxFuture<'a, Result<PrecheckStatus, PrecheckError>> {
// TODO-K: fill in the precheck
todo!()
}

/// Attempts once to perform any post-update actions (e.g., reset the
/// device)
fn post_update<'a>(
&'a self,
_log: &'a slog::Logger,
_mgs_clients: &'a mut MgsClients,
_update: &'a PendingMgsUpdate,
) -> BoxFuture<'a, Result<(), GatewayClientError>> {
// TODO-K: fill in the post_update
todo!()
}
Comment on lines +204 to +224
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will be completed in a follow up PR (step two of #7989)

}
7 changes: 6 additions & 1 deletion nexus/mgs-updates/src/sp_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,12 @@ impl SpComponentUpdateHelper for ReconfiguratorSpUpdater {
let PendingMgsUpdateDetails::Sp {
expected_active_version,
expected_inactive_version,
} = &update.details;
} = &update.details
else {
unreachable!(
"pending MGS update details will always be for the SP"
);
};
if caboose.version != expected_active_version.to_string() {
return Err(PrecheckError::WrongActiveVersion {
expected: expected_active_version.clone(),
Expand Down
1 change: 1 addition & 0 deletions nexus/types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ url = { workspace = true, features = ["serde"] }

api_identity.workspace = true
gateway-client.workspace = true
gateway-types.workspace = true
internal-dns-types.workspace = true
nexus-sled-agent-shared.workspace = true
omicron-common.workspace = true
Expand Down
68 changes: 68 additions & 0 deletions nexus/types/src/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ pub use blueprint_diff::BlueprintDiffSummary;
use blueprint_display::BpPendingMgsUpdates;
pub use clickhouse::ClickhouseClusterConfig;
use gateway_client::types::SpType;
use gateway_types::rot::RotSlot;
pub use network_resources::AddNetworkResourceError;
pub use network_resources::OmicronZoneExternalFloatingAddr;
pub use network_resources::OmicronZoneExternalFloatingIp;
Expand Down Expand Up @@ -1245,6 +1246,36 @@ pub enum PendingMgsUpdateDetails {
/// expected contents of the inactive slot
expected_inactive_version: ExpectedVersion,
},
/// the RoT is being updated
Rot {
// implicit: component = ROT
// implicit: firmware slot id will be the inactive slot
/// expected contents of "A" slot
expected_slot_a_version: ExpectedVersion,
/// expected contents of "B" slot
expected_slot_b_version: ExpectedVersion,
/// the slot of the currently running image
expected_active_slot: RotSlot,
// under normal operation, this should always match the active slot.
// if this field changed without the active slot changing, that might
// reflect a bad update.
//
/// the persistent boot preference written into the current authoritative
/// CFPA page (ping or pong)
expected_persistent_boot_preference: RotSlot,
// if this value changed, but not any of this other information, that could
// reflect an attempt to switch to the other slot.
//
/// the persistent boot preference written into the CFPA scratch page that
/// will become the persistent boot preference in the authoritative CFPA
/// page upon reboot, unless CFPA update of the authoritative page fails
/// for some reason.
expected_pending_persistent_boot_preference: Option<RotSlot>,
// this field is not in use yet.
//
/// override persistent preference selection for a single boot
expected_transient_boot_preference: Option<RotSlot>,
},
}

impl slog::KV for PendingMgsUpdateDetails {
Expand All @@ -1268,6 +1299,43 @@ impl slog::KV for PendingMgsUpdateDetails {
&format!("{:?}", expected_inactive_version),
)
}
PendingMgsUpdateDetails::Rot {
expected_slot_a_version,
expected_slot_b_version,
expected_active_slot,
expected_persistent_boot_preference,
expected_pending_persistent_boot_preference,
expected_transient_boot_preference,
} => {
serializer.emit_str(Key::from("component"), "rot")?;
serializer.emit_str(
Key::from("expected_slot_a_version"),
&format!("{:?}", expected_slot_a_version),
)?;
serializer.emit_str(
Key::from("expected_slot_b_version"),
&format!("{:?}", expected_slot_b_version),
)?;
serializer.emit_str(
Key::from("expected_active_slot"),
&format!("{:?}", expected_active_slot),
)?;
serializer.emit_str(
Key::from("expected_persistent_boot_preference"),
&format!("{:?}", expected_persistent_boot_preference),
)?;
serializer.emit_str(
Key::from("expected_pending_persistent_boot_preference"),
&format!(
"{:?}",
expected_pending_persistent_boot_preference
),
)?;
serializer.emit_str(
Key::from("expected_transient_boot_preference"),
&format!("{:?}", expected_transient_boot_preference),
)
}
}
}
}
Expand Down
101 changes: 101 additions & 0 deletions openapi/nexus-internal.json
Original file line number Diff line number Diff line change
Expand Up @@ -5377,6 +5377,75 @@
"expected_active_version",
"expected_inactive_version"
]
},
{
"description": "the RoT is being updated",
"type": "object",
"properties": {
"component": {
"type": "string",
"enum": [
"rot"
]
},
"expected_active_slot": {
"description": "the slot of the currently running image",
"allOf": [
{
"$ref": "#/components/schemas/RotSlot"
}
]
},
"expected_pending_persistent_boot_preference": {
"nullable": true,
"description": "the persistent boot preference written into the CFPA scratch page that will become the persistent boot preference in the authoritative CFPA page upon reboot, unless CFPA update of the authoritative page fails for some reason.",
"allOf": [
{
"$ref": "#/components/schemas/RotSlot"
}
]
},
"expected_persistent_boot_preference": {
"description": "the persistent boot preference written into the current authoritative CFPA page (ping or pong)",
"allOf": [
{
"$ref": "#/components/schemas/RotSlot"
}
]
},
"expected_slot_a_version": {
"description": "expected contents of \"A\" slot",
"allOf": [
{
"$ref": "#/components/schemas/ExpectedVersion"
}
]
},
"expected_slot_b_version": {
"description": "expected contents of \"B\" slot",
"allOf": [
{
"$ref": "#/components/schemas/ExpectedVersion"
}
]
},
"expected_transient_boot_preference": {
"nullable": true,
"description": "override persistent preference selection for a single boot",
"allOf": [
{
"$ref": "#/components/schemas/RotSlot"
}
]
}
},
"required": [
"component",
"expected_active_slot",
"expected_persistent_boot_preference",
"expected_slot_a_version",
"expected_slot_b_version"
]
}
]
},
Expand Down Expand Up @@ -6013,6 +6082,38 @@
"time"
]
},
"RotSlot": {
"oneOf": [
{
"type": "object",
"properties": {
"slot": {
"type": "string",
"enum": [
"a"
]
}
},
"required": [
"slot"
]
},
{
"type": "object",
"properties": {
"slot": {
"type": "string",
"enum": [
"b"
]
}
},
"required": [
"slot"
]
}
]
},
"RouteConfig": {
"type": "object",
"properties": {
Expand Down
Loading