Skip to content

wip - mana: save/restore for keepalive support #1033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ee98c83
Tests are now using NvmeTestEmulatedDevice instead
gurasinghMS Feb 28, 2025
9e698f2
Emulated Device with a custom allocator is now working
gurasinghMS Mar 3, 2025
5b9ac15
Removing requirement for inspect mut for the Allocator
gurasinghMS Mar 4, 2025
0148c4b
Fixing merge conflicts
gurasinghMS Mar 12, 2025
3b6e57a
Nvme tests can now use the PagePool allocator instead of using Device…
gurasinghMS Mar 7, 2025
bc99323
Some cleanup in the nvme tests
gurasinghMS Mar 7, 2025
5098b57
Fixed the invalid mqes and the save restore tests
gurasinghMS Mar 7, 2025
add871e
Fixing merge conflicts
gurasinghMS Mar 12, 2025
5197971
Fixing merge conflict issues
gurasinghMS Mar 11, 2025
c3b2d52
Getting a better solutioon for TestBacking
gurasinghMS Mar 11, 2025
a0540e3
allow dma test now working
gurasinghMS Mar 11, 2025
7aa71de
Should now be working for all invocations to the EmulatedDevice
gurasinghMS Mar 11, 2025
af135bb
Updating cargo file to include sparse_mmap everywhere in user_driver
gurasinghMS Mar 12, 2025
d4b76f2
Fixing stuff based on linter
gurasinghMS Mar 12, 2025
8c7df08
Fixing PR build errors
gurasinghMS Mar 12, 2025
02063f9
Fixing unit test build errors
gurasinghMS Mar 12, 2025
38180b9
Fixing bus in build
gurasinghMS Mar 12, 2025
3f56d4c
Appeasing the linter
gurasinghMS Mar 12, 2025
b6efa8a
More appeasement of clippy
gurasinghMS Mar 12, 2025
854fb9f
xtask fmt fix
gurasinghMS Mar 12, 2025
e7ef177
Removed SparseMapBacking and converted backing to a generic instead
gurasinghMS Mar 12, 2025
21f0450
Re-organized emulated.rs file after previous refactor
gurasinghMS Mar 12, 2025
cbf5b1b
Comment updated
gurasinghMS Mar 12, 2025
1ecf2ab
fix build
gurasinghMS Mar 12, 2025
43f6af0
Merge branch 'emu_device' into gdma_save_restore
justus-camp-microsoft Mar 13, 2025
150dcb3
minimized implementation of gdma save/restore, unit test failing
justus-camp-microsoft Mar 13, 2025
0a2972b
e2e enablement code with vmm test
justus-camp-microsoft Mar 14, 2025
88cc6de
resolve doc warnings
justus-camp-microsoft Mar 14, 2025
06a477e
enablement fixes
justus-camp-microsoft Mar 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4535,9 +4535,11 @@ dependencies = [
"guid",
"inspect",
"inspect_counters",
"memory_range",
"mesh",
"nvme",
"nvme_spec",
"page_pool_alloc",
"pal_async",
"parking_lot",
"pci_core",
Expand Down
4 changes: 4 additions & 0 deletions openhcl/host_fdt_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ pub struct ParsedDeviceTree<
pub nvme_keepalive: bool,
/// The physical address of the VTL0 alias mapping, if one is configured.
pub vtl0_alias_map: Option<u64>,
/// Indicates that Host supports MANA keep-alive.
pub mana_keepalive: bool,
}

/// The memory allocation mode provided by the host. This determines how OpenHCL
Expand Down Expand Up @@ -316,6 +318,7 @@ impl<
entropy: None,
device_dma_page_count: None,
nvme_keepalive: false,
mana_keepalive: false,
vtl0_alias_map: None,
}
}
Expand Down Expand Up @@ -735,6 +738,7 @@ impl<
device_dma_page_count: _,
nvme_keepalive: _,
vtl0_alias_map: _,
mana_keepalive: _,
} = storage;

*device_tree_size = parser.total_size;
Expand Down
2 changes: 2 additions & 0 deletions openhcl/openhcl_boot/src/host_params/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ impl PartitionInfo {
entropy,
vtl0_alias_map: _,
nvme_keepalive,
mana_keepalive,
} = storage;

assert!(!vtl2_used_ranges.is_empty());
Expand All @@ -550,6 +551,7 @@ impl PartitionInfo {
*gic = parsed.gic.clone();
*entropy = parsed.entropy.clone();
*nvme_keepalive = parsed.nvme_keepalive;
*mana_keepalive = parsed.mana_keepalive;

Ok(Some(storage))
}
Expand Down
3 changes: 3 additions & 0 deletions openhcl/openhcl_boot/src/host_params/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ pub struct PartitionInfo {
pub vtl0_alias_map: Option<u64>,
/// Host is compatible with DMA preservation / NVMe keep-alive.
pub nvme_keepalive: bool,
/// Host is compatible with MANA keep-alive.
pub mana_keepalive: bool,
}

impl PartitionInfo {
Expand Down Expand Up @@ -125,6 +127,7 @@ impl PartitionInfo {
entropy: None,
vtl0_alias_map: None,
nvme_keepalive: false,
mana_keepalive: false,
}
}

Expand Down
5 changes: 5 additions & 0 deletions openhcl/openhcl_boot/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ fn build_kernel_command_line(
write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=1 ")?;
}

if partition_info.mana_keepalive && !partition_info.vtl2_pool_memory.is_empty() {
write!(cmdline, "OPENHCL_MANA_KEEP_ALIVE=1 ")?;
}

if let Some(sidecar) = sidecar {
write!(cmdline, "{} ", sidecar.kernel_command_line())?;
}
Expand Down Expand Up @@ -931,6 +935,7 @@ mod test {
entropy: None,
vtl0_alias_map: None,
nvme_keepalive: false,
mana_keepalive: false,
}
}

Expand Down
2 changes: 1 addition & 1 deletion openhcl/openhcl_dma_manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ impl OpenhclDmaManager {

if let Some(private_pool) = &self.private_pool {
private_pool
.validate_restore(false)
.validate_restore(true)
.context("failed to validate restore for private pool")?
}

Expand Down
68 changes: 62 additions & 6 deletions openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc;
use hyperv_ic_resources::shutdown::ShutdownType;
use igvm_defs::MemoryMapEntryType;
use inspect::Inspect;
use mana_driver::save_restore::ManaDeviceSavedState;
use mesh::CancelContext;
use mesh::MeshPayload;
use mesh::error::RemoteError;
Expand Down Expand Up @@ -106,6 +107,7 @@ pub trait LoadedVmNetworkSettings: Inspect {
threadpool: &AffinitizedThreadpool,
uevent_listener: &UeventListener,
servicing_netvsp_state: &Option<Vec<crate::emuplat::netvsp::SavedState>>,
servicing_mana_state: &Option<Vec<ManaDeviceSavedState>>,
partition: Arc<UhPartition>,
state_units: &StateUnits,
vmbus_server: &Option<VmbusServerHandle>,
Expand All @@ -119,6 +121,11 @@ pub trait LoadedVmNetworkSettings: Inspect {
/// Callback after stopping the VM and all workers, in preparation for a VTL2 reboot.
async fn unload_for_servicing(&mut self);

async fn save(
&mut self,
mana_keepalive_flag: bool,
) -> Option<Vec<Result<ManaDeviceSavedState, anyhow::Error>>>;

/// Handles packet capture related operations.
async fn packet_capture(
&self,
Expand Down Expand Up @@ -181,6 +188,7 @@ pub(crate) struct LoadedVm {
pub _periodic_telemetry_task: Task<()>,

pub nvme_keep_alive: bool,
pub mana_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
pub dma_manager: OpenhclDmaManager,
}
Expand Down Expand Up @@ -268,7 +276,7 @@ impl LoadedVm {
WorkerRpc::Restart(rpc) => {
let state = async {
let running = self.stop().await;
match self.save(None, false).await {
match self.save(None, false, false).await {
Ok(servicing_state) => Some((rpc, servicing_state)),
Err(err) => {
if running {
Expand Down Expand Up @@ -330,7 +338,7 @@ impl LoadedVm {
UhVmRpc::Save(rpc) => {
rpc.handle_failable(async |()| {
let running = self.stop().await;
let r = self.save(None, false).await;
let r = self.save(None, false, false).await;
if running {
self.start(None).await;
}
Expand Down Expand Up @@ -442,6 +450,11 @@ impl LoadedVm {
std::future::pending::<()>().await;
}

tracing::info!(
"handle_servicing_request mana: {:?}",
capabilities_flags.enable_mana_keepalive()
);

let running = self.state_units.is_running();
let success = match self
.handle_servicing_inner(correlation_id, deadline, capabilities_flags)
Expand Down Expand Up @@ -492,9 +505,22 @@ impl LoadedVm {
anyhow::bail!("Servicing is not yet supported for isolated VMs");
}

tracing::info!("self.nvme_keepalive: {:?}", self.nvme_keep_alive);
tracing::info!("self.mana_keepalive: {:?}", self.mana_keep_alive);

// NOTE: This is set via the corresponding env arg, as this feature is
// experimental.
let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive();
let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive();

tracing::info!(
"handle_servicing_inner nvme_keepalive: {:?}",
nvme_keepalive
);
tracing::info!(
"handle_servicing_inner mana_keepalive: {:?}",
mana_keepalive
);

// Do everything before the log flush under a span.
let r = async {
Expand All @@ -509,7 +535,7 @@ impl LoadedVm {
anyhow::bail!("cannot service underhill while paused");
}

let mut state = self.save(Some(deadline), nvme_keepalive).await?;
let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?;
state.init_state.correlation_id = Some(correlation_id);

// Unload any network devices.
Expand Down Expand Up @@ -662,23 +688,51 @@ impl LoadedVm {
async fn save(
&mut self,
_deadline: Option<std::time::Instant>,
vf_keepalive_flag: bool,
nvme_keepalive_flag: bool,
mana_keepalive_flag: bool,
) -> anyhow::Result<ServicingState> {
assert!(!self.state_units.is_running());

tracing::info!(
"keepalive flags - nvme: {:?}, mana: {:?}",
nvme_keepalive_flag,
mana_keepalive_flag
);

let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?;

// Only save NVMe state when there are NVMe controllers and keep alive
// was enabled.
let nvme_state = if let Some(n) = &self.nvme_manager {
n.save(vf_keepalive_flag)
n.save(nvme_keepalive_flag)
.instrument(tracing::info_span!("nvme_manager_save"))
.await
.map(|s| NvmeSavedState { nvme_state: s })
} else {
None
};

let mana_state = if let Some(network_settings) = &mut self.network_settings {
tracing::info!("saving mana state: {:?}", mana_keepalive_flag);
let results = network_settings.save(mana_keepalive_flag).await;
let mut saved_states = Vec::new();

if let Some(results) = results {
for result in results {
match result {
Ok(state) => saved_states.push(state),
Err(e) => tracing::warn!("Error saving MANA device state: {:#}", e),
}
}
}

Some(saved_states)
} else {
None
};

tracing::info!("saved mana_state: {:?}", mana_state);

let units = self.save_units().await.context("state unit save failed")?;
let vmgs = self
.vmgs_thin_client
Expand All @@ -689,7 +743,7 @@ impl LoadedVm {
// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
let dma_manager_state = if vf_keepalive_flag {
let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
Expand All @@ -713,6 +767,7 @@ impl LoadedVm {
vmgs: (vmgs, self.vmgs_disk_metadata.clone()),
overlay_shutdown_device: self.shutdown_relay.is_some(),
nvme_state,
mana_state,
dma_manager_state,
vmbus_client,
},
Expand Down Expand Up @@ -776,6 +831,7 @@ impl LoadedVm {
threadpool,
&self.uevent_listener,
&None, // VF getting added; no existing state
&None, // VF getting added; no existing state
self.partition.clone(),
&self.state_units,
&self.vmbus_server,
Expand Down
Loading