diff --git a/Cargo.lock b/Cargo.lock index 309090278e..07f4c3dc1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4535,9 +4535,11 @@ dependencies = [ "guid", "inspect", "inspect_counters", + "memory_range", "mesh", "nvme", "nvme_spec", + "page_pool_alloc", "pal_async", "parking_lot", "pci_core", diff --git a/openhcl/host_fdt_parser/src/lib.rs b/openhcl/host_fdt_parser/src/lib.rs index 3f2e16183f..15b91e4b53 100644 --- a/openhcl/host_fdt_parser/src/lib.rs +++ b/openhcl/host_fdt_parser/src/lib.rs @@ -237,6 +237,8 @@ pub struct ParsedDeviceTree< pub nvme_keepalive: bool, /// The physical address of the VTL0 alias mapping, if one is configured. pub vtl0_alias_map: Option, + /// Indicates that Host supports MANA keep-alive. + pub mana_keepalive: bool, } /// The memory allocation mode provided by the host. This determines how OpenHCL @@ -316,6 +318,7 @@ impl< entropy: None, device_dma_page_count: None, nvme_keepalive: false, + mana_keepalive: false, vtl0_alias_map: None, } } @@ -735,6 +738,7 @@ impl< device_dma_page_count: _, nvme_keepalive: _, vtl0_alias_map: _, + mana_keepalive: _, } = storage; *device_tree_size = parser.total_size; diff --git a/openhcl/openhcl_boot/src/host_params/dt.rs b/openhcl/openhcl_boot/src/host_params/dt.rs index a1a97ccc1b..03a92c1f52 100644 --- a/openhcl/openhcl_boot/src/host_params/dt.rs +++ b/openhcl/openhcl_boot/src/host_params/dt.rs @@ -528,6 +528,7 @@ impl PartitionInfo { entropy, vtl0_alias_map: _, nvme_keepalive, + mana_keepalive, } = storage; assert!(!vtl2_used_ranges.is_empty()); @@ -550,6 +551,7 @@ impl PartitionInfo { *gic = parsed.gic.clone(); *entropy = parsed.entropy.clone(); *nvme_keepalive = parsed.nvme_keepalive; + *mana_keepalive = parsed.mana_keepalive; Ok(Some(storage)) } diff --git a/openhcl/openhcl_boot/src/host_params/mod.rs b/openhcl/openhcl_boot/src/host_params/mod.rs index ea9ac0d422..aedce63a24 100644 --- a/openhcl/openhcl_boot/src/host_params/mod.rs +++ b/openhcl/openhcl_boot/src/host_params/mod.rs @@ -94,6 +94,8 @@ pub struct PartitionInfo { pub vtl0_alias_map: Option, /// Host is compatible with DMA preservation / NVMe keep-alive. pub nvme_keepalive: bool, + /// Host is compatible with MANA keep-alive. + pub mana_keepalive: bool, } impl PartitionInfo { @@ -125,6 +127,7 @@ impl PartitionInfo { entropy: None, vtl0_alias_map: None, nvme_keepalive: false, + mana_keepalive: false, } } diff --git a/openhcl/openhcl_boot/src/main.rs b/openhcl/openhcl_boot/src/main.rs index 4e871421b5..dfb6c84977 100644 --- a/openhcl/openhcl_boot/src/main.rs +++ b/openhcl/openhcl_boot/src/main.rs @@ -234,6 +234,10 @@ fn build_kernel_command_line( write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=1 ")?; } + if partition_info.mana_keepalive && !partition_info.vtl2_pool_memory.is_empty() { + write!(cmdline, "OPENHCL_MANA_KEEP_ALIVE=1 ")?; + } + if let Some(sidecar) = sidecar { write!(cmdline, "{} ", sidecar.kernel_command_line())?; } @@ -931,6 +935,7 @@ mod test { entropy: None, vtl0_alias_map: None, nvme_keepalive: false, + mana_keepalive: false, } } diff --git a/openhcl/openhcl_dma_manager/src/lib.rs b/openhcl/openhcl_dma_manager/src/lib.rs index f5e55cc631..0bcfe3cb43 100644 --- a/openhcl/openhcl_dma_manager/src/lib.rs +++ b/openhcl/openhcl_dma_manager/src/lib.rs @@ -369,7 +369,7 @@ impl OpenhclDmaManager { if let Some(private_pool) = &self.private_pool { private_pool - .validate_restore(false) + .validate_restore(true) .context("failed to validate restore for private pool")? } diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index bf3c460597..7e53339707 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -33,6 +33,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc; use hyperv_ic_resources::shutdown::ShutdownType; use igvm_defs::MemoryMapEntryType; use inspect::Inspect; +use mana_driver::save_restore::ManaDeviceSavedState; use mesh::CancelContext; use mesh::MeshPayload; use mesh::error::RemoteError; @@ -106,6 +107,7 @@ pub trait LoadedVmNetworkSettings: Inspect { threadpool: &AffinitizedThreadpool, uevent_listener: &UeventListener, servicing_netvsp_state: &Option>, + servicing_mana_state: &Option>, partition: Arc, state_units: &StateUnits, vmbus_server: &Option, @@ -119,6 +121,11 @@ pub trait LoadedVmNetworkSettings: Inspect { /// Callback after stopping the VM and all workers, in preparation for a VTL2 reboot. async fn unload_for_servicing(&mut self); + async fn save( + &mut self, + mana_keepalive_flag: bool, + ) -> Option>>; + /// Handles packet capture related operations. async fn packet_capture( &self, @@ -181,6 +188,7 @@ pub(crate) struct LoadedVm { pub _periodic_telemetry_task: Task<()>, pub nvme_keep_alive: bool, + pub mana_keep_alive: bool, pub test_configuration: Option, pub dma_manager: OpenhclDmaManager, } @@ -268,7 +276,7 @@ impl LoadedVm { WorkerRpc::Restart(rpc) => { let state = async { let running = self.stop().await; - match self.save(None, false).await { + match self.save(None, false, false).await { Ok(servicing_state) => Some((rpc, servicing_state)), Err(err) => { if running { @@ -330,7 +338,7 @@ impl LoadedVm { UhVmRpc::Save(rpc) => { rpc.handle_failable(async |()| { let running = self.stop().await; - let r = self.save(None, false).await; + let r = self.save(None, false, false).await; if running { self.start(None).await; } @@ -442,6 +450,11 @@ impl LoadedVm { std::future::pending::<()>().await; } + tracing::info!( + "handle_servicing_request mana: {:?}", + capabilities_flags.enable_mana_keepalive() + ); + let running = self.state_units.is_running(); let success = match self .handle_servicing_inner(correlation_id, deadline, capabilities_flags) @@ -492,9 +505,22 @@ impl LoadedVm { anyhow::bail!("Servicing is not yet supported for isolated VMs"); } + tracing::info!("self.nvme_keepalive: {:?}", self.nvme_keep_alive); + tracing::info!("self.mana_keepalive: {:?}", self.mana_keep_alive); + // NOTE: This is set via the corresponding env arg, as this feature is // experimental. let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive(); + let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive(); + + tracing::info!( + "handle_servicing_inner nvme_keepalive: {:?}", + nvme_keepalive + ); + tracing::info!( + "handle_servicing_inner mana_keepalive: {:?}", + mana_keepalive + ); // Do everything before the log flush under a span. let r = async { @@ -509,7 +535,7 @@ impl LoadedVm { anyhow::bail!("cannot service underhill while paused"); } - let mut state = self.save(Some(deadline), nvme_keepalive).await?; + let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?; state.init_state.correlation_id = Some(correlation_id); // Unload any network devices. @@ -662,16 +688,23 @@ impl LoadedVm { async fn save( &mut self, _deadline: Option, - vf_keepalive_flag: bool, + nvme_keepalive_flag: bool, + mana_keepalive_flag: bool, ) -> anyhow::Result { assert!(!self.state_units.is_running()); + tracing::info!( + "keepalive flags - nvme: {:?}, mana: {:?}", + nvme_keepalive_flag, + mana_keepalive_flag + ); + let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?; // Only save NVMe state when there are NVMe controllers and keep alive // was enabled. let nvme_state = if let Some(n) = &self.nvme_manager { - n.save(vf_keepalive_flag) + n.save(nvme_keepalive_flag) .instrument(tracing::info_span!("nvme_manager_save")) .await .map(|s| NvmeSavedState { nvme_state: s }) @@ -679,6 +712,27 @@ impl LoadedVm { None }; + let mana_state = if let Some(network_settings) = &mut self.network_settings { + tracing::info!("saving mana state: {:?}", mana_keepalive_flag); + let results = network_settings.save(mana_keepalive_flag).await; + let mut saved_states = Vec::new(); + + if let Some(results) = results { + for result in results { + match result { + Ok(state) => saved_states.push(state), + Err(e) => tracing::warn!("Error saving MANA device state: {:#}", e), + } + } + } + + Some(saved_states) + } else { + None + }; + + tracing::info!("saved mana_state: {:?}", mana_state); + let units = self.save_units().await.context("state unit save failed")?; let vmgs = self .vmgs_thin_client @@ -689,7 +743,7 @@ impl LoadedVm { // Only save dma manager state if we are expected to keep VF devices // alive across save. Otherwise, don't persist the state at all, as // there should be no live DMA across save. - let dma_manager_state = if vf_keepalive_flag { + let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag { use vmcore::save_restore::SaveRestore; Some(self.dma_manager.save().context("dma_manager save failed")?) } else { @@ -713,6 +767,7 @@ impl LoadedVm { vmgs: (vmgs, self.vmgs_disk_metadata.clone()), overlay_shutdown_device: self.shutdown_relay.is_some(), nvme_state, + mana_state, dma_manager_state, vmbus_client, }, @@ -776,6 +831,7 @@ impl LoadedVm { threadpool, &self.uevent_listener, &None, // VF getting added; no existing state + &None, // VF getting added; no existing state self.partition.clone(), &self.state_units, &self.vmbus_server, diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index bf517208fb..2d65a1aa48 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -14,6 +14,7 @@ use guid::Guid; use inspect::Inspect; use mana_driver::mana::ManaDevice; use mana_driver::mana::VportState; +use mana_driver::save_restore::ManaDeviceSavedState; use mesh::rpc::FailableRpc; use mesh::rpc::Rpc; use mesh::rpc::RpcSend; @@ -58,6 +59,7 @@ enum HclNetworkVfManagerMessage { HideVtl0VF(Rpc), Inspect(inspect::Deferred), PacketCapture(FailableRpc, PacketCaptureParams>), + SaveState(Rpc<(), ManaDeviceSavedState>), } async fn create_mana_device( @@ -66,7 +68,26 @@ async fn create_mana_device( vp_count: u32, max_sub_channels: u16, dma_client: Arc, + mana_state: Option, ) -> anyhow::Result> { + // Don't do anything to the device in servicing mode + if let Some(mana_state) = mana_state { + tracing::info!( + "Restoring mana device and skipping reset with pci_id {:?}", + pci_id + ); + + return try_create_mana_device( + driver_source, + pci_id, + vp_count, + max_sub_channels, + dma_client.clone(), + Some(mana_state.clone()), + ) + .await; + } + // Disable FLR on vfio attach/detach; this allows faster system // startup/shutdown with the caveat that the device needs to be properly // sent through the shutdown path during servicing operations, as that is @@ -90,6 +111,7 @@ async fn create_mana_device( vp_count, max_sub_channels, dma_client.clone(), + mana_state.clone(), ) .await { @@ -119,16 +141,28 @@ async fn try_create_mana_device( vp_count: u32, max_sub_channels: u16, dma_client: Arc, + mana_state: Option, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id, dma_client) - .await - .context("failed to open device")?; + let device = if mana_state.is_some() { + tracing::info!("restoring mana vfio device with pci_id {:?}", pci_id); + VfioDevice::restore(driver_source, pci_id, true, dma_client) + .await + .context("failed to restore device")? + } else { + tracing::info!("creating mana vfio device with pci_id {:?}", pci_id); + VfioDevice::new(driver_source, pci_id, dma_client) + .await + .context("failed to open device")? + }; + + tracing::info!("successfully got mana vfio device"); ManaDevice::new( &driver_source.simple(), device, vp_count, max_sub_channels + 1, + mana_state, ) .instrument(tracing::info_span!("new_mana_device")) .await @@ -661,6 +695,13 @@ impl HclNetworkVFManagerWorker { // Exit worker thread. return; } + NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::SaveState(rpc)) => { + rpc.handle(|_| async { + self.mana_device.as_ref().unwrap().save().await.unwrap() + }) + .await; + return; + } NextWorkItem::ManaDeviceArrived => { assert!(!self.is_shutdown_active); let mut ctx = @@ -683,6 +724,7 @@ impl HclNetworkVFManagerWorker { self.vp_count, self.max_sub_channels, self.dma_client.clone(), + None, ) .await { @@ -857,6 +899,7 @@ impl HclNetworkVFManager { vp_count: u32, max_sub_channels: u16, netvsp_state: &Option>, + mana_state: &Option>, dma_mode: GuestDmaMode, dma_client: Arc, ) -> anyhow::Result<( @@ -864,12 +907,17 @@ impl HclNetworkVFManager { Vec, RuntimeSavedState, )> { + tracing::info!("creating mana device. mana_state: {:?}", mana_state); + + let mana_state = mana_state.as_ref().map(|mana_state| mana_state[0].clone()); + let device = create_mana_device( driver_source, &vtl2_pci_id, vp_count, max_sub_channels, dma_client.clone(), + mana_state, ) .await?; let (mut endpoints, endpoint_controls): (Vec<_>, Vec<_>) = (0..device.num_vports()) @@ -969,6 +1017,19 @@ impl HclNetworkVFManager { )) } + pub async fn save(&self) -> anyhow::Result { + let save_state = self + .shared_state + .worker_channel + .call(HclNetworkVfManagerMessage::SaveState, ()) + .await + .map_err(anyhow::Error::from)?; + + tracing::info!("Returned save_state {:?}", save_state); + + Ok(save_state) + } + pub async fn packet_capture( &self, params: PacketCaptureParams, diff --git a/openhcl/underhill_core/src/lib.rs b/openhcl/underhill_core/src/lib.rs index b268fa5f11..83f8fef265 100644 --- a/openhcl/underhill_core/src/lib.rs +++ b/openhcl/underhill_core/src/lib.rs @@ -317,6 +317,7 @@ async fn launch_workers( gdbstub: opt.gdbstub, hide_isolation: opt.hide_isolation, nvme_keep_alive: opt.nvme_keep_alive, + mana_keep_alive: opt.mana_keep_alive, test_configuration: opt.test_configuration, }; diff --git a/openhcl/underhill_core/src/options.rs b/openhcl/underhill_core/src/options.rs index fcdde25f9d..dcb04fbd80 100644 --- a/openhcl/underhill_core/src/options.rs +++ b/openhcl/underhill_core/src/options.rs @@ -136,6 +136,9 @@ pub struct Options { /// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing. pub nvme_keep_alive: bool, + /// (OPENHCL_MANA_KEEP_ALIVE=1) Enable mana keep alive when servicing. + pub mana_keep_alive: bool, + /// (OPENHCL_TEST_CONFIG=\) /// Test configurations are designed to replicate specific behaviors and /// conditions in order to simulate various test scenarios. @@ -227,6 +230,7 @@ impl Options { let gdbstub = parse_legacy_env_bool("OPENHCL_GDBSTUB"); let gdbstub_port = parse_legacy_env_number("OPENHCL_GDBSTUB_PORT")?.map(|x| x as u32); let nvme_keep_alive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE"); + let mana_keep_alive = parse_env_bool("OPENHCL_MANA_KEEP_ALIVE"); let test_configuration = parse_env_string("OPENHCL_TEST_CONFIG").and_then(|x| { x.to_string_lossy() .parse::() @@ -291,6 +295,7 @@ impl Options { halt_on_guest_halt, no_sidecar_hotplug, nvme_keep_alive, + mana_keep_alive, test_configuration, }) } diff --git a/openhcl/underhill_core/src/servicing.rs b/openhcl/underhill_core/src/servicing.rs index 0994a27ffd..980cdfde20 100644 --- a/openhcl/underhill_core/src/servicing.rs +++ b/openhcl/underhill_core/src/servicing.rs @@ -10,6 +10,7 @@ use anyhow::Context as _; use vmcore::save_restore::SavedStateBlob; mod state { + use mana_driver::save_restore::ManaDeviceSavedState; use mesh::payload::Protobuf; use openhcl_dma_manager::save_restore::OpenhclDmaManagerState; use state_unit::SavedStateUnit; @@ -84,6 +85,8 @@ mod state { pub dma_manager_state: Option, #[mesh(10002)] pub vmbus_client: Option, + #[mesh(10003)] + pub mana_state: Option>, } #[derive(Protobuf)] @@ -183,6 +186,7 @@ impl From for FirmwareType { #[expect(clippy::option_option)] pub mod transposed { use super::*; + use mana_driver::save_restore::ManaDeviceSavedState; use openhcl_dma_manager::save_restore::OpenhclDmaManagerState; use vmcore::save_restore::SaveRestore; @@ -193,6 +197,7 @@ pub mod transposed { pub firmware_type: Option, pub vm_stop_reference_time: Option, pub emuplat: OptionEmuplatSavedState, + pub mana_state: Option>, pub flush_logs_result: Option>, pub vmgs: Option<( vmgs::save_restore::state::SavedVmgsState, @@ -230,6 +235,7 @@ pub mod transposed { vmgs, overlay_shutdown_device, nvme_state, + mana_state, dma_manager_state, vmbus_client, } = state; @@ -246,6 +252,7 @@ pub mod transposed { vmgs: Some(vmgs), overlay_shutdown_device: Some(overlay_shutdown_device), nvme_state: Some(nvme_state), + mana_state, dma_manager_state: Some(dma_manager_state), vmbus_client: Some(vmbus_client), } diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 015902a0f2..a61dbbbc47 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -82,6 +82,7 @@ use input_core::InputData; use input_core::MultiplexedInputHandle; use inspect::Inspect; use loader_defs::shim::MemoryVtlType; +use mana_driver::save_restore::ManaDeviceSavedState; use memory_range::MemoryRange; use mesh::CancelContext; use mesh::MeshPayload; @@ -279,6 +280,8 @@ pub struct UnderhillEnvCfg { pub hide_isolation: bool, /// Enable nvme keep alive. pub nvme_keep_alive: bool, + /// Enable mana keep alive. + pub mana_keep_alive: bool, /// test configuration pub test_configuration: Option, @@ -726,6 +729,7 @@ impl UhVmNetworkSettings { driver_source: &VmTaskDriverSource, uevent_listener: &UeventListener, servicing_netvsp_state: &Option>, + servicing_mana_state: &Option>, partition: Arc, state_units: &StateUnits, tp: &AffinitizedThreadpool, @@ -747,7 +751,7 @@ impl UhVmNetworkSettings { } else { AllocationVisibility::Private }, - persistent_allocations: false, + persistent_allocations: true, })?; let (vf_manager, endpoints, save_state) = HclNetworkVFManager::new( @@ -760,6 +764,7 @@ impl UhVmNetworkSettings { vps_count as u32, nic_max_sub_channels, servicing_netvsp_state, + servicing_mana_state, self.dma_mode, dma_client, ) @@ -872,6 +877,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { threadpool: &AffinitizedThreadpool, uevent_listener: &UeventListener, servicing_netvsp_state: &Option>, + servicing_mana_state: &Option>, partition: Arc, state_units: &StateUnits, vmbus_server: &Option, @@ -904,6 +910,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { &driver_source, uevent_listener, servicing_netvsp_state, + servicing_mana_state, partition, state_units, threadpool, @@ -962,6 +969,24 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { } Ok(params) } + + async fn save( + &mut self, + mana_keepalive_flag: bool, + ) -> Option>> { + if mana_keepalive_flag { + Some( + join_all( + self.vf_managers + .values() + .map(|vf_manager| vf_manager.save()), + ) + .await, + ) + } else { + None + } + } } /// The final vtl0 memory layout computed from different inputs. @@ -2790,6 +2815,12 @@ async fn new_underhill_vm( net_mana::GuestDmaMode::DirectDma }, }; + + tracing::info!( + "mana servicing state on create: {:?}", + servicing_state.mana_state + ); + let mut netvsp_state = Vec::with_capacity(controllers.mana.len()); if !controllers.mana.is_empty() { let _span = tracing::info_span!("network_settings").entered(); @@ -2802,6 +2833,7 @@ async fn new_underhill_vm( tp, &uevent_listener, &servicing_state.emuplat.netvsp_state, + &servicing_state.mana_state, partition.clone(), &state_units, &vmbus_server, @@ -3007,6 +3039,7 @@ async fn new_underhill_vm( _periodic_telemetry_task: periodic_telemetry_task, nvme_keep_alive: env_cfg.nvme_keep_alive, + mana_keep_alive: env_cfg.mana_keep_alive, test_configuration: env_cfg.test_configuration, dma_manager, }; diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index 80fcfb8283..a3d2f1db5f 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -393,4 +393,6 @@ pub enum IsolationType { pub struct OpenHclServicingFlags { /// Preserve DMA memory for NVMe devices if supported. pub enable_nvme_keepalive: bool, + /// Preserve DMA memory for MANA devices if supported. + pub enable_mana_keepalive: bool, } diff --git a/petri/src/worker.rs b/petri/src/worker.rs index a3a479f630..531055f762 100644 --- a/petri/src/worker.rs +++ b/petri/src/worker.rs @@ -69,6 +69,7 @@ impl Worker { send, GuestServicingFlags { nvme_keepalive: flags.enable_nvme_keepalive, + mana_keepalive: flags.enable_mana_keepalive, }, file, ) diff --git a/vm/devices/get/get_protocol/src/lib.rs b/vm/devices/get/get_protocol/src/lib.rs index ccdcb36d71..28e74d3fd4 100644 --- a/vm/devices/get/get_protocol/src/lib.rs +++ b/vm/devices/get/get_protocol/src/lib.rs @@ -1185,8 +1185,11 @@ pub struct SaveGuestVtl2StateFlags { /// Explicitly allow nvme_keepalive feature when servicing. #[bits(1)] pub enable_nvme_keepalive: bool, + /// Explicitly allow mana_keepalive feature when servicing. + #[bits(1)] + pub enable_mana_keepalive: bool, /// Reserved, must be zero. - #[bits(63)] + #[bits(62)] _rsvd1: u64, } diff --git a/vm/devices/get/get_resources/src/lib.rs b/vm/devices/get/get_resources/src/lib.rs index a25ce39dc6..85ef98239d 100644 --- a/vm/devices/get/get_resources/src/lib.rs +++ b/vm/devices/get/get_resources/src/lib.rs @@ -153,8 +153,10 @@ pub mod ged { /// Define servicing behavior. #[derive(MeshPayload, Default)] pub struct GuestServicingFlags { - /// Retain memory for DMA-attached devices. + /// Retain memory for DMA-attached NVMe devices. pub nvme_keepalive: bool, + /// Retain memory for DMA-attached MANA devices. + pub mana_keepalive: bool, } /// Actions a client can request that the Guest Emulation diff --git a/vm/devices/get/guest_emulation_device/src/lib.rs b/vm/devices/get/guest_emulation_device/src/lib.rs index fd856f647c..b3a75bfadd 100644 --- a/vm/devices/get/guest_emulation_device/src/lib.rs +++ b/vm/devices/get/guest_emulation_device/src/lib.rs @@ -538,7 +538,8 @@ impl GedChannel { ), correlation_id: Guid::ZERO, capabilities_flags: SaveGuestVtl2StateFlags::new() - .with_enable_nvme_keepalive(rpc.input().nvme_keepalive), + .with_enable_nvme_keepalive(rpc.input().nvme_keepalive) + .with_enable_mana_keepalive(rpc.input().mana_keepalive), timeout_hint_secs: 60, }; diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 60e409c6eb..16983763b8 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -9,6 +9,10 @@ use crate::queues::Eq; use crate::queues::Wq; use crate::resources::Resource; use crate::resources::ResourceArena; +use crate::save_restore::DoorbellSavedState; +use crate::save_restore::GdmaDriverSavedState; +use crate::save_restore::InterruptSavedState; +use crate::save_restore::SavedMemoryState; use anyhow::Context; use futures::FutureExt; use gdma_defs::Cqe; @@ -60,7 +64,6 @@ use gdma_defs::HwcTxOob; use gdma_defs::HwcTxOobFlags3; use gdma_defs::HwcTxOobFlags4; use gdma_defs::RegMap; -use gdma_defs::SMC_MSG_TYPE_DESTROY_HWC_VERSION; use gdma_defs::SMC_MSG_TYPE_ESTABLISH_HWC_VERSION; use gdma_defs::SMC_MSG_TYPE_REPORT_HWC_TIMEOUT_VERSION; use gdma_defs::Sge; @@ -118,6 +121,13 @@ impl Doorbell for Bar0 { safe_intrinsics::store_fence(); self.mem.write_u64(offset as usize, value); } + + fn save(&self, doorbell_id: Option) -> DoorbellSavedState { + DoorbellSavedState { + doorbell_id: doorbell_id.unwrap(), + page_count: self.page_count(), + } + } } #[derive(Inspect)] @@ -125,7 +135,7 @@ pub struct GdmaDriver { device: Option, bar0: Arc>, #[inspect(skip)] - dma_buffer: MemoryBlock, + pub dma_buffer: MemoryBlock, #[inspect(skip)] interrupts: Vec>, eq: Eq, @@ -148,6 +158,7 @@ pub struct GdmaDriver { hwc_warning_time_in_ms: u32, hwc_timeout_in_ms: u32, hwc_failure: bool, + db_id: u32, } const EQ_PAGE: usize = 0; @@ -161,61 +172,6 @@ const NUM_PAGES: usize = 6; // RWQEs have no OOB and one SGL entry so they are always exactly 32 bytes. const RWQE_SIZE: u32 = 32; -impl Drop for GdmaDriver { - fn drop(&mut self) { - if self.hwc_failure { - return; - } - let data = self - .bar0 - .mem - .read_u32(self.bar0.map.vf_gdma_sriov_shared_reg_start as usize + 28); - if data == u32::MAX { - tracing::error!("Device no longer present"); - return; - } - - let hdr = SmcProtoHdr::new() - .with_msg_type(SmcMessageType::SMC_MSG_TYPE_DESTROY_HWC.0) - .with_msg_version(SMC_MSG_TYPE_DESTROY_HWC_VERSION); - - let hdr = u32::from_le_bytes(hdr.as_bytes().try_into().expect("known size")); - self.bar0.mem.write_u32( - self.bar0.map.vf_gdma_sriov_shared_reg_start as usize + 28, - hdr, - ); - // Wait for the device to respond. - let max_wait_time = - std::time::Instant::now() + Duration::from_millis(HWC_POLL_TIMEOUT_IN_MS); - let header = loop { - let data = self - .bar0 - .mem - .read_u32(self.bar0.map.vf_gdma_sriov_shared_reg_start as usize + 28); - if data == u32::MAX { - tracing::error!("Device no longer present"); - return; - } - let header = SmcProtoHdr::from(data); - if !header.owner_is_pf() { - break header; - } - if std::time::Instant::now() > max_wait_time { - tracing::error!("MANA request timed out. SMC_MSG_TYPE_DESTROY_HWC"); - return; - } - std::hint::spin_loop(); - }; - - if !header.is_response() { - tracing::error!("expected response"); - } - if header.status() != 0 { - tracing::error!("DESTROY_HWC failed: {}", header.status()); - } - } -} - struct EqeWaitResult { eqe_found: bool, elapsed: u128, @@ -475,6 +431,7 @@ impl GdmaDriver { hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, hwc_failure: false, + db_id, }; this.push_rqe(); @@ -499,6 +456,214 @@ impl GdmaDriver { Ok(this) } + pub async fn save(&mut self) -> anyhow::Result { + tracing::info!( + "saving GDMA driver state. base_pfn: {}, len: {}", + self.dma_buffer.pfns()[0], + self.dma_buffer.len() + ); + + tracing::info!( + "saving gdma interrupts state. count: {}, active: {}", + self.interrupts.len(), + self.interrupts.iter().filter(|i| i.is_some()).count() + ); + + let doorbell = self.bar0.save(Some(self.db_id as u64)); + + let mut interrupt_config = Vec::new(); + for (index, interrupt) in self.interrupts.iter().enumerate() { + if interrupt.is_some() { + interrupt_config.push(InterruptSavedState { + msix_index: index as u32, + cpu: index as u32, + }); + } + } + + Ok(GdmaDriverSavedState { + mem: SavedMemoryState { + base_pfn: self.dma_buffer.pfns()[0], + len: self.dma_buffer.len(), + }, + eq: self.eq.save(), + cq: self.cq.save(), + rq: self.rq.save(), + sq: self.sq.save(), + db_id: doorbell.doorbell_id, + gpa_mkey: self.gpa_mkey, + pdid: self._pdid, + cq_armed: self.cq_armed, + eq_armed: self.eq_armed, + hwc_subscribed: self.hwc_subscribed, + eq_id_msix: self.eq_id_msix.clone(), + hwc_activity_id: self.hwc_activity_id, + num_msix: self.num_msix, + min_queue_avail: self.min_queue_avail, + interrupt_config, + }) + } + + pub async fn restore(saved_state: GdmaDriverSavedState, mut device: T) -> anyhow::Result { + tracing::info!("restoring gdma driver from saved state"); + + let bar0_mapping = device.map_bar(0)?; + let bar0_len = bar0_mapping.len(); + if bar0_len < size_of::() { + anyhow::bail!("bar0 ({} bytes) too small for reg map", bar0_mapping.len()); + } + + let mut map = RegMap::new_zeroed(); + for i in 0..size_of_val(&map) / 4 { + let v = bar0_mapping.read_u32(i * 4); + // Unmapped device memory will return -1 on reads, so check the first 32 + // bits for this condition to get a clear error message early. + if i == 0 && v == !0 { + anyhow::bail!("bar0 read returned -1, device is not present"); + } + map.as_mut_bytes()[i * 4..(i + 1) * 4].copy_from_slice(&v.to_ne_bytes()); + } + + tracing::debug!(?map, "register map on restore"); + + // Verify HWC channel is still active by reading shared memory region + let shmem_data = bar0_mapping.read_u32(map.vf_gdma_sriov_shared_reg_start as usize + 28); + if shmem_data == u32::MAX { + anyhow::bail!("Device no longer present"); + } + + let header = SmcProtoHdr::from(shmem_data); + if header.owner_is_pf() { + tracing::warn!("HWC channel appears inactive, device owns shared memory"); + // Consider re-establishing HWC here if needed + anyhow::bail!("HWC channel not active"); + } else { + tracing::info!("HWC channel appears active"); + } + + // Log on unknown major version numbers. This is not necessarily an + // error, so continue. + if map.major_version_number != 0 && map.major_version_number != 1 { + tracing::warn!( + major = map.major_version_number, + minor = map.minor_version_number, + micro = map.micro_version_number, + "unrecognized major version" + ); + } + + if map.vf_gdma_sriov_shared_sz != 32 { + anyhow::bail!( + "unexpected shared memory size: {}", + map.vf_gdma_sriov_shared_sz + ); + } + + if (bar0_len as u64).saturating_sub(map.vf_gdma_sriov_shared_reg_start) + < map.vf_gdma_sriov_shared_sz as u64 + { + anyhow::bail!( + "bar0 ({} bytes) too small for shared memory at {}", + bar0_mapping.len(), + map.vf_gdma_sriov_shared_reg_start + ); + } + + let dma_client = device.dma_client(); + tracing::info!("restoring gdma DMA buffer: {:?}", saved_state.mem); + let dma_buffer = + dma_client.attach_dma_buffer(saved_state.mem.len, saved_state.mem.base_pfn)?; + + let pages = dma_buffer.pfns(); + tracing::info!("restored pages: {:?}", pages); + + let doorbell_shift = map.vf_db_page_sz.trailing_zeros(); + let bar0 = Arc::new(Bar0 { + mem: bar0_mapping, + map, + doorbell_shift, + }); + + tracing::info!("Restoring doorbell state with db_id: {}", saved_state.db_id); + + let eq = Eq::restore( + dma_buffer.subblock(0, PAGE_SIZE), + saved_state.eq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let db_id = saved_state.db_id; + let cq = Cq::restore( + dma_buffer.subblock(CQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.cq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let rq = Wq::restore_rq( + dma_buffer.subblock(RQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.rq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let sq = Wq::restore_sq( + dma_buffer.subblock(SQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.sq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let mut interrupts = vec![None; saved_state.num_msix as usize]; + for int_state in &saved_state.interrupt_config { + tracing::info!( + "Restoring interrupt at index {:?} and on cpu {:?}", + int_state.msix_index, + int_state.cpu + ); + let interrupt = device.map_interrupt(int_state.msix_index, int_state.cpu)?; + + interrupts[int_state.msix_index as usize] = Some(interrupt); + } + + let mut this = Self { + device: Some(device), + bar0, + dma_buffer, + interrupts, // Revisit: is this right? + eq, + cq, + rq, + sq, + test_events: 0, + eq_armed: saved_state.eq_armed, + cq_armed: saved_state.cq_armed, + gpa_mkey: saved_state.gpa_mkey, + _pdid: saved_state.pdid, + eq_id_msix: saved_state.eq_id_msix, + num_msix: saved_state.num_msix, + min_queue_avail: saved_state.min_queue_avail, + hwc_activity_id: saved_state.hwc_activity_id, + link_toggle: Vec::new(), // Revisit: is this right? + hwc_subscribed: saved_state.hwc_subscribed, + hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, + hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, + hwc_failure: false, + db_id: db_id as u32, + }; + + if saved_state.hwc_subscribed { + this.hwc_subscribe(); + } + + if saved_state.eq_armed { + this.eq.arm(); + } + + if saved_state.cq_armed { + this.cq.arm(); + } + + Ok(this) + } + async fn report_hwc_timeout( &mut self, last_cmd_failed: bool, diff --git a/vm/devices/net/mana_driver/src/lib.rs b/vm/devices/net/mana_driver/src/lib.rs index a510b4b65e..2572de3a1a 100644 --- a/vm/devices/net/mana_driver/src/lib.rs +++ b/vm/devices/net/mana_driver/src/lib.rs @@ -10,5 +10,6 @@ mod gdma_driver; pub mod mana; pub mod queues; mod resources; +pub mod save_restore; #[cfg(test)] mod tests; diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index 8bfa10cd12..a5d343914c 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -12,6 +12,7 @@ use crate::gdma_driver::GdmaDriver; use crate::queues; use crate::queues::Doorbell; use crate::queues::DoorbellPage; +use crate::save_restore::ManaDeviceSavedState; use anyhow::Context; use futures::StreamExt; use futures::lock::Mutex; @@ -76,8 +77,16 @@ impl ManaDevice { device: T, num_vps: u32, max_queues_per_vport: u16, + mana_state: Option, ) -> anyhow::Result { - let mut gdma = GdmaDriver::new(driver, device, num_vps).await?; + let mut gdma = if let Some(ref mana_state) = mana_state { + tracing::info!("Restoring gdma driver from saved state"); + GdmaDriver::restore(mana_state.gdma.clone(), device).await? + } else { + tracing::info!("Creating a new gdma driver"); + GdmaDriver::new(driver, device, num_vps).await? + }; + gdma.test_eq().await?; gdma.verify_vf_driver_version().await?; @@ -90,7 +99,16 @@ impl ManaDevice { .find(|dev_id| dev_id.ty == GdmaDevType::GDMA_DEVICE_MANA) .context("no mana device found")?; - let dev_data = gdma.register_device(dev_id).await?; + let dev_data = if let Some(mana_state) = mana_state { + tracing::info!("restoring device data from saved state"); + GdmaRegisterDeviceResp { + pdid: mana_state.gdma.pdid, + gpa_mkey: mana_state.gdma.gpa_mkey, + db_id: mana_state.gdma.db_id as u32, + } + } else { + gdma.register_device(dev_id).await? + }; let mut bnic = BnicDriver::new(&mut gdma, dev_id); let dev_config = bnic.query_dev_config().await?; @@ -141,6 +159,18 @@ impl ManaDevice { Ok(device) } + /// Saves the device's state for servicing + pub async fn save(&self) -> anyhow::Result { + let mut gdma = self.inner.gdma.lock().await; + let saved_state = ManaDeviceSavedState { + gdma: gdma.save().await?, + }; + + tracing::info!("Saved gdma driver state: {:?}", saved_state); + + Ok(saved_state) + } + /// Returns the number of vports the device supports. pub fn num_vports(&self) -> u32 { self.inner.dev_config.max_num_vports.into() diff --git a/vm/devices/net/mana_driver/src/queues.rs b/vm/devices/net/mana_driver/src/queues.rs index f0c696ab32..ea4415d4d7 100644 --- a/vm/devices/net/mana_driver/src/queues.rs +++ b/vm/devices/net/mana_driver/src/queues.rs @@ -3,6 +3,10 @@ //! Types to access work, completion, and event queues. +use crate::save_restore::CqEqSavedState; +use crate::save_restore::DoorbellSavedState; +use crate::save_restore::MemoryBlockSavedState; +use crate::save_restore::WqSavedState; use gdma_defs::CLIENT_OOB_8; use gdma_defs::CLIENT_OOB_24; use gdma_defs::CLIENT_OOB_32; @@ -37,6 +41,8 @@ pub trait Doorbell: Send + Sync { fn page_count(&self) -> u32; /// Write a doorbell value at page `page`, offset `address`. fn write(&self, page: u32, address: u32, value: u64); + /// Save the doorbell state. + fn save(&self, doorbell_id: Option) -> DoorbellSavedState; } struct NullDoorbell; @@ -47,6 +53,13 @@ impl Doorbell for NullDoorbell { } fn write(&self, _page: u32, _address: u32, _value: u64) {} + + fn save(&self, _doorbell_id: Option) -> DoorbellSavedState { + DoorbellSavedState { + doorbell_id: 0, + page_count: 0, + } + } } /// A single GDMA doorbell page. @@ -114,6 +127,25 @@ impl CqEq { pub fn new_cq(mem: MemoryBlock, doorbell: DoorbellPage, id: u32) -> Self { Self::new(GdmaQueueType::GDMA_CQ, DB_CQ, mem, doorbell, id) } + + /// Restores an existing completion queue. + pub fn restore( + mem: MemoryBlock, + state: CqEqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_CQ, + mem, + id: state.id, + next: state.next, + size: state.size, + shift: state.shift, + _phantom: PhantomData, + }) + } } impl CqEq { @@ -121,6 +153,25 @@ impl CqEq { pub fn new_eq(mem: MemoryBlock, doorbell: DoorbellPage, id: u32) -> Self { Self::new(GdmaQueueType::GDMA_EQ, DB_EQ, mem, doorbell, id) } + + /// Restores an existing event queue. + pub fn restore( + mem: MemoryBlock, + state: CqEqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_EQ, + mem, + id: state.id, + next: state.next, + size: state.size, + shift: state.shift, + _phantom: PhantomData, + }) + } } impl CqEq { @@ -147,6 +198,29 @@ impl CqEq { } } + /// Save the state of the queue for restoration after servicing. + pub fn save(&self) -> CqEqSavedState { + let state = CqEqSavedState { + doorbell: DoorbellSavedState { + doorbell_id: self.doorbell.doorbell_id as u64, + page_count: self.doorbell.doorbell.page_count(), + }, + doorbell_addr: self.doorbell_addr, + mem: MemoryBlockSavedState { + base: self.mem.base() as u64, + len: self.mem.len(), + pfns: self.mem.pfns().to_vec(), + pfn_bias: self.mem.pfn_bias(), + }, + id: self.id, + next: self.next, + size: self.size, + shift: self.shift, + }; + + state + } + /// Updates the queue ID. pub(crate) fn set_id(&mut self, id: u32) { self.id = id; @@ -284,6 +358,65 @@ impl Wq { } } + /// Save the state of the Wq for restoration after servicing + pub fn save(&self) -> WqSavedState { + WqSavedState { + doorbell: DoorbellSavedState { + doorbell_id: self.doorbell.doorbell_id as u64, + page_count: self.doorbell.doorbell.page_count(), + }, + doorbell_addr: self.doorbell_addr, + mem: MemoryBlockSavedState { + base: self.mem.base() as u64, + len: self.mem.len(), + pfns: self.mem.pfns().to_vec(), + pfn_bias: self.mem.pfn_bias(), + }, + id: self.id, + head: self.head, + tail: self.tail, + mask: self.mask, + } + } + + /// Restores an existing receive work queue. + pub fn restore_rq( + mem: MemoryBlock, + state: WqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_RQ, + mem, + id: state.id, + head: state.head, + tail: state.tail, + mask: state.mask, + uncommitted_count: 0, + }) + } + + /// Restores an existing send work queue. + pub fn restore_sq( + mem: MemoryBlock, + state: WqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_SQ, + mem, + id: state.id, + head: state.head, + tail: state.tail, + mask: state.mask, + uncommitted_count: 0, + }) + } + /// Returns the queue ID. pub fn id(&self) -> u32 { self.id diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs new file mode 100644 index 0000000000..d770b7ee42 --- /dev/null +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -0,0 +1,213 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Types to save and restore the state of a MANA device. + +use mesh::payload::Protobuf; +use std::collections::HashMap; + +/// Top level mana device driver saved state +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct ManaDeviceSavedState { + /// Saved state for restoration of the GDMA driver + #[mesh(1)] + pub gdma: GdmaDriverSavedState, +} + +/// Top level saved state for the GDMA driver's saved state +#[derive(Protobuf, Clone, Debug)] +#[mesh(package = "mana_driver")] +pub struct GdmaDriverSavedState { + /// Memory to be restored by a DMA client + #[mesh(1)] + pub mem: SavedMemoryState, + + /// EQ to be restored + #[mesh(2)] + pub eq: CqEqSavedState, + + /// CQ to be restored + #[mesh(3)] + pub cq: CqEqSavedState, + + /// RQ to be restored + #[mesh(4)] + pub rq: WqSavedState, + + /// SQ to be restored + #[mesh(5)] + pub sq: WqSavedState, + + /// Doorbell id + #[mesh(6)] + pub db_id: u64, + + /// Guest physical address memory key + #[mesh(7)] + pub gpa_mkey: u32, + + /// Protection domain id + #[mesh(8)] + pub pdid: u32, + + /// Whether the driver is subscribed to hwc + #[mesh(9)] + pub hwc_subscribed: bool, + + /// Whether the eq is armed or not + #[mesh(10)] + pub eq_armed: bool, + + /// Whether the cq is armed or not + #[mesh(11)] + pub cq_armed: bool, + + /// Event queue id to msix mapping + #[mesh(12)] + pub eq_id_msix: HashMap, + + /// The id of the hwc activity + #[mesh(13)] + pub hwc_activity_id: u32, + + /// How many msix vectors are available + #[mesh(14)] + pub num_msix: u32, + + /// Minimum number of queues available + #[mesh(15)] + pub min_queue_avail: u32, + + /// Saved interrupts for restoration + #[mesh(16)] + pub interrupt_config: Vec, +} + +/// Saved state for the memory region used by the driver +/// to be restored by a DMA client during servicing +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct SavedMemoryState { + /// The base page frame number of the memory region + #[mesh(1)] + pub base_pfn: u64, + + /// How long the memory region is + #[mesh(2)] + pub len: usize, +} + +/// The saved state of a completion queue or event queue for restoration +/// during servicing +#[derive(Clone, Protobuf, Debug)] +#[mesh(package = "mana_driver")] +pub struct CqEqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell register + #[mesh(2)] + pub doorbell_addr: u32, + + /// The memory region used by the queue + #[mesh(4)] + pub mem: MemoryBlockSavedState, + + /// The id of the queue + #[mesh(5)] + pub id: u32, + + /// The index of the next entry in the queue + #[mesh(6)] + pub next: u32, + + /// The total size of the queue + #[mesh(7)] + pub size: u32, + + /// The bit shift value for the queue + #[mesh(8)] + pub shift: u32, +} + +/// Saved state of a memory region allocated for queues +#[derive(Protobuf, Clone, Debug)] +#[mesh(package = "mana_driver")] +pub struct MemoryBlockSavedState { + /// Base address of the block in guest memory + #[mesh(1)] + pub base: u64, + + /// Length of the memory block + #[mesh(2)] + pub len: usize, + + /// The page frame numbers comprising the block + #[mesh(3)] + pub pfns: Vec, + + /// The page frame offset of the block + #[mesh(4)] + pub pfn_bias: u64, +} + +/// Saved state of a work queue for restoration during servicing +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct WqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell + #[mesh(2)] + pub doorbell_addr: u32, + + /// The memory region used by the queue + #[mesh(3)] + pub mem: MemoryBlockSavedState, + + /// The id of the queue + #[mesh(4)] + pub id: u32, + + /// The head of the queue + #[mesh(5)] + pub head: u32, + + /// The tail of the queue + #[mesh(6)] + pub tail: u32, + + /// The bitmask for wrapping queue indices + #[mesh(7)] + pub mask: u32, +} + +/// Saved state of a doorbell for restoration during servicing +#[derive(Clone, Protobuf, Debug)] +#[mesh(package = "mana_driver")] +pub struct DoorbellSavedState { + /// The doorbell's id + #[mesh(1)] + pub doorbell_id: u64, + + /// The number of pages allocated for the doorbell + #[mesh(2)] + pub page_count: u32, +} + +/// Saved state of an interrupt for restoration during servicing +#[derive(Protobuf, Clone, Debug)] +#[mesh(package = "mana_driver")] +pub struct InterruptSavedState { + /// The index in the msix table for this interrupt + #[mesh(1)] + pub msix_index: u32, + + /// Which CPU this interrupt is assigned to + #[mesh(2)] + pub cpu: u32, +} diff --git a/vm/devices/net/mana_driver/src/tests.rs b/vm/devices/net/mana_driver/src/tests.rs index 597f6ceae1..41781053f2 100644 --- a/vm/devices/net/mana_driver/src/tests.rs +++ b/vm/devices/net/mana_driver/src/tests.rs @@ -22,6 +22,7 @@ use test_with_tracing::test; use user_driver::DeviceBacking; use user_driver::emulated::DeviceSharedMemory; use user_driver::emulated::EmulatedDevice; +use user_driver::emulated::EmulatedDmaAllocator; use user_driver::memory::PAGE_SIZE; use vmcore::vm_task::SingleDriverBackend; use vmcore::vm_task::VmTaskDriverSource; @@ -40,7 +41,8 @@ async fn test_gdma(driver: DefaultDriver) { }], &mut ExternallyManagedMmioIntercepts, ); - let device = EmulatedDevice::new(device, msi_set, mem); + let allocator = EmulatedDmaAllocator::new(mem.clone()); + let device = EmulatedDevice::new(device, msi_set, allocator.into()); let mut gdma = GdmaDriver::new(&driver, device, 1).await.unwrap(); gdma.test_eq().await.unwrap(); diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 1bd8f77ac8..1ce8e40806 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -1299,6 +1299,7 @@ mod tests { use test_with_tracing::test; use user_driver::emulated::DeviceSharedMemory; use user_driver::emulated::EmulatedDevice; + use user_driver::emulated::EmulatedDmaAllocator; use vmcore::vm_task::SingleDriverBackend; use vmcore::vm_task::VmTaskDriverSource; @@ -1342,7 +1343,8 @@ mod tests { }], &mut ExternallyManagedMmioIntercepts, ); - let device = EmulatedDevice::new(device, msi_set, mem); + let allocator = EmulatedDmaAllocator::new(mem.clone()); + let device = EmulatedDevice::new(device, msi_set, allocator.into()); let dev_config = ManaQueryDeviceCfgResp { pf_cap_flags1: 0.into(), pf_cap_flags2: 0, @@ -1352,7 +1354,7 @@ mod tests { reserved: 0, max_num_eqs: 64, }; - let thing = ManaDevice::new(&driver, device, 1, 1).await.unwrap(); + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); let vport = thing.new_vport(0, None, &dev_config).await.unwrap(); let mut endpoint = ManaEndpoint::new(driver.clone(), vport, dma_mode).await; let mut queues = Vec::new(); @@ -1429,7 +1431,8 @@ mod tests { }], &mut ExternallyManagedMmioIntercepts, ); - let device = EmulatedDevice::new(device, msi_set, mem); + let allocator = EmulatedDmaAllocator::new(mem.clone()); + let device = EmulatedDevice::new(device, msi_set, allocator.into()); let cap_flags1 = gdma_defs::bnic::BasicNicDriverFlags::new().with_query_filter_state(1); let dev_config = ManaQueryDeviceCfgResp { pf_cap_flags1: cap_flags1, @@ -1440,7 +1443,7 @@ mod tests { reserved: 0, max_num_eqs: 64, }; - let thing = ManaDevice::new(&driver, device, 1, 1).await.unwrap(); + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); let _ = thing.new_vport(0, None, &dev_config).await.unwrap(); } } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml b/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml index ce74ac9760..afb52b1ff0 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml +++ b/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml @@ -7,36 +7,36 @@ edition.workspace = true rust-version.workspace = true [dependencies] +anyhow.workspace = true +event-listener.workspace = true +futures.workspace = true +guestmem.workspace = true inspect = { workspace = true, features = ["defer"] } inspect_counters.workspace = true +memory_range.workspace = true mesh.workspace = true -pal_async.workspace = true nvme_spec.workspace = true -task_control.workspace = true -user_driver.workspace = true -guestmem.workspace = true -vmcore.workspace = true - -anyhow.workspace = true -event-listener.workspace = true -futures.workspace = true +page_pool_alloc.workspace = true +pal_async.workspace = true parking_lot.workspace = true +safe_intrinsics.workspace = true safeatomic.workspace = true slab.workspace = true +task_control.workspace = true thiserror.workspace = true tracing.workspace = true +user_driver.workspace = true +vmcore.workspace = true zerocopy.workspace = true -safe_intrinsics.workspace = true [dev-dependencies] chipset_device.workspace = true disklayer_ram.workspace = true +guid.workspace = true nvme.workspace = true pci_core.workspace = true scsi_buffers.workspace = true test_with_tracing.workspace = true -guid.workspace = true - [lints] workspace = true diff --git a/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_emulated_device.rs b/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_emulated_device.rs index 222056e0a8..232ffb120f 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_emulated_device.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/fuzz/fuzz_emulated_device.rs @@ -18,21 +18,23 @@ use user_driver::DeviceBacking; use user_driver::DmaClient; use user_driver::emulated::DeviceSharedMemory; use user_driver::emulated::EmulatedDevice; +use user_driver::emulated::EmulatedDmaAllocator; use user_driver::emulated::Mapping; use user_driver::interrupt::DeviceInterrupt; /// An EmulatedDevice fuzzer that requires a working EmulatedDevice backend. #[derive(Inspect)] pub struct FuzzEmulatedDevice { - device: EmulatedDevice, + device: EmulatedDevice, } impl FuzzEmulatedDevice { /// Creates a new emulated device, wrapping `device`, using the provided MSI controller. pub fn new(device: T, msi_set: MsiInterruptSet, shared_mem: DeviceSharedMemory) -> Self { - Self { - device: EmulatedDevice::new(device, msi_set, shared_mem), - } + let allocator = EmulatedDmaAllocator::new(shared_mem.clone()); + let device = EmulatedDevice::new(device, msi_set, allocator.into()); + + Self { device } } } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 5181af62d4..745c9e959a 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -5,12 +5,17 @@ use crate::NvmeDriver; use chipset_device::mmio::ExternallyManagedMmioIntercepts; use chipset_device::mmio::MmioIntercept; use chipset_device::pci::PciConfigSpace; +use guestmem::GuestMemory; use guid::Guid; use inspect::Inspect; use inspect::InspectMut; +use memory_range::MemoryRange; use nvme::NvmeControllerCaps; use nvme_spec::Cap; use nvme_spec::nvm::DsmRange; +use page_pool_alloc::PagePool; +use page_pool_alloc::PagePoolAllocator; +use page_pool_alloc::TestMapper; use pal_async::DefaultDriver; use pal_async::async_test; use parking_lot::Mutex; @@ -21,10 +26,11 @@ use test_with_tracing::test; use user_driver::DeviceBacking; use user_driver::DeviceRegisterIo; use user_driver::DmaClient; -use user_driver::emulated::DeviceSharedMemory; use user_driver::emulated::EmulatedDevice; use user_driver::emulated::Mapping; +use user_driver::emulated::create_guest_memory; use user_driver::interrupt::DeviceInterrupt; +use user_driver::memory::PAGE_SIZE64; use vmcore::vm_task::SingleDriverBackend; use vmcore::vm_task::VmTaskDriverSource; use zerocopy::IntoBytes; @@ -50,15 +56,16 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; - let base_len = 64 << 20; - let payload_len = 4 << 20; - let mem = DeviceSharedMemory::new(base_len, payload_len); + // Memory setup + let pages = 1000; + let (guest_mem, _page_pool, dma_client) = create_test_memory(pages, false); + // Controller Driver Setup let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver)); let mut msi_set = MsiInterruptSet::new(); let nvme = nvme::NvmeController::new( &driver_source, - mem.guest_memory().clone(), + guest_mem, &mut msi_set, &mut ExternallyManagedMmioIntercepts, NvmeControllerCaps { @@ -68,13 +75,14 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { }, ); - let mut device = NvmeTestEmulatedDevice::new(nvme, msi_set, mem); - // Setup mock response at offset 0 + let mut device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); + + // Mock response at offset 0 since that is where Cap will be accessed let max_u16: u16 = 65535; let cap: Cap = Cap::new().with_mqes_z(max_u16); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device).await; + let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device).await; assert!(driver.is_ok()); } @@ -84,15 +92,15 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; - let base_len = 64 << 20; - let payload_len = 4 << 20; - let mem = DeviceSharedMemory::new(base_len, payload_len); + // Memory setup + let pages = 1000; + let (guest_mem, _page_pool, dma_client) = create_test_memory(pages, false); let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver)); let mut msi_set = MsiInterruptSet::new(); let nvme = nvme::NvmeController::new( &driver_source, - mem.guest_memory().clone(), + guest_mem, &mut msi_set, &mut ExternallyManagedMmioIntercepts, NvmeControllerCaps { @@ -102,7 +110,8 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { }, ); - let mut device = NvmeTestEmulatedDevice::new(nvme, msi_set, mem); + let mut device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); + // Setup mock response at offset 0 let cap: Cap = Cap::new().with_mqes_z(0); device.set_mock_response_u64(Some((0, cap.into()))); @@ -116,19 +125,15 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; - let base_len = 64 << 20; - let payload_len = 4 << 20; - let mem = DeviceSharedMemory::new(base_len, payload_len); - let payload_mem = mem - .guest_memory() - .subrange(base_len as u64, payload_len as u64, false) - .unwrap(); + // Memory setup + let pages = 100000; + let (guest_mem, _page_pool, dma_client) = create_test_memory(pages, allow_dma); + let driver_dma_mem = if allow_dma { - mem.guest_memory_for_driver_dma() - .subrange(base_len as u64, payload_len as u64, false) - .unwrap() + let range_half = (pages / 2) * PAGE_SIZE64; + guest_mem.subrange(0_u64, range_half, false).unwrap() } else { - payload_mem.clone() + guest_mem.clone() }; let buf_range = OwnedRequestBuffers::linear(0, 16384, true); @@ -137,7 +142,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { let mut msi_set = MsiInterruptSet::new(); let nvme = nvme::NvmeController::new( &driver_source, - mem.guest_memory().clone(), + guest_mem.clone(), &mut msi_set, &mut ExternallyManagedMmioIntercepts, NvmeControllerCaps { @@ -151,7 +156,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { .await .unwrap(); - let device = EmulatedDevice::new(nvme, msi_set, mem); + let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device) .await @@ -159,7 +164,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { let namespace = driver.namespace(1).await.unwrap(); - payload_mem.write_at(0, &[0xcc; 8192]).unwrap(); + guest_mem.write_at(0, &[0xcc; 8192]).unwrap(); namespace .write( 0, @@ -167,7 +172,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { 2, false, &driver_dma_mem, - buf_range.buffer(&payload_mem).range(), + buf_range.buffer(&guest_mem).range(), ) .await .unwrap(); @@ -178,12 +183,12 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { 0, 32, &driver_dma_mem, - buf_range.buffer(&payload_mem).range(), + buf_range.buffer(&guest_mem).range(), ) .await .unwrap(); let mut v = [0; 4096]; - payload_mem.read_at(0, &mut v).unwrap(); + guest_mem.read_at(0, &mut v).unwrap(); assert_eq!(&v[..512], &[0; 512]); assert_eq!(&v[512..1536], &[0xcc; 1024]); assert!(v[1536..].iter().all(|&x| x == 0)); @@ -216,7 +221,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { 0, 32, &driver_dma_mem, - buf_range.buffer(&payload_mem).range(), + buf_range.buffer(&guest_mem).range(), ) .await .unwrap(); @@ -224,7 +229,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { assert_eq!(driver.fallback_cpu_count(), 1); let mut v = [0; 4096]; - payload_mem.read_at(0, &mut v).unwrap(); + guest_mem.read_at(0, &mut v).unwrap(); assert_eq!(&v[..512], &[0; 512]); assert_eq!(&v[512..1024], &[0xcc; 512]); assert!(v[1024..].iter().all(|&x| x == 0)); @@ -237,14 +242,15 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { const IO_QUEUE_COUNT: u16 = 64; const CPU_COUNT: u32 = 64; - let base_len = 64 * 1024 * 1024; - let payload_len = 4 * 1024 * 1024; - let mem = DeviceSharedMemory::new(base_len, payload_len); + // Memory setup + let pages = 1000; + let (guest_mem, _page_pool, dma_client) = create_test_memory(pages, false); + let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())); let mut msi_x = MsiInterruptSet::new(); let nvme_ctrl = nvme::NvmeController::new( &driver_source, - mem.guest_memory().clone(), + guest_mem.clone(), &mut msi_x, &mut ExternallyManagedMmioIntercepts, NvmeControllerCaps { @@ -261,7 +267,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { .await .unwrap(); - let device = EmulatedDevice::new(nvme_ctrl, msi_x, mem); + let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone()); let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device) .await .unwrap(); @@ -273,11 +279,10 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { assert_eq!(saved_state.namespaces.len(), 0); // Create a second set of devices since the ownership has been moved. - let new_emu_mem = DeviceSharedMemory::new(base_len, payload_len); let mut new_msi_x = MsiInterruptSet::new(); let mut new_nvme_ctrl = nvme::NvmeController::new( &driver_source, - new_emu_mem.guest_memory().clone(), + guest_mem.clone(), &mut new_msi_x, &mut ExternallyManagedMmioIntercepts, NvmeControllerCaps { @@ -299,7 +304,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { // Wait for CSTS.RDY to set. backoff.back_off().await; - let _new_device = EmulatedDevice::new(new_nvme_ctrl, new_msi_x, new_emu_mem); + let _new_device = NvmeTestEmulatedDevice::new(new_nvme_ctrl, new_msi_x, dma_client.clone()); // TODO: Memory restore is disabled for emulated DMA, uncomment once fixed. // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state) // .await @@ -308,7 +313,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { #[derive(Inspect)] pub struct NvmeTestEmulatedDevice { - device: EmulatedDevice, + device: EmulatedDevice, #[inspect(debug)] mocked_response_u32: Arc>>, #[inspect(debug)] @@ -326,9 +331,9 @@ pub struct NvmeTestMapping { impl NvmeTestEmulatedDevice { /// Creates a new emulated device, wrapping `device`, using the provided MSI controller. - pub fn new(device: T, msi_set: MsiInterruptSet, shared_mem: DeviceSharedMemory) -> Self { + pub fn new(device: T, msi_set: MsiInterruptSet, dma_client: Arc) -> Self { Self { - device: EmulatedDevice::new(device, msi_set, shared_mem), + device: EmulatedDevice::new(device, msi_set, dma_client.clone()), mocked_response_u32: Arc::new(Mutex::new(None)), mocked_response_u64: Arc::new(Mutex::new(None)), } @@ -409,3 +414,23 @@ impl DeviceRegisterIo for NvmeTestMapping { self.mapping.write_u64(offset, data); } } + +/// Creates test memory that leverages the TestMapper. Returned GuestMemory references the entire range +/// and the page pool allocator references only the second half +fn create_test_memory( + num_pages: u64, + allow_dma: bool, +) -> (GuestMemory, PagePool, Arc) { + let test_mapper = TestMapper::new(num_pages).unwrap(); + let sparse_mmap = test_mapper.sparse_mapping(); + let guest_mem = create_guest_memory(sparse_mmap, allow_dma); + let pool = PagePool::new( + &[MemoryRange::from_4k_gpn_range(num_pages / 2..num_pages)], + test_mapper, + ) + .unwrap(); + + // Return page pool so that it is not dropped. + let allocator = pool.allocator("nvme_test_page_pool".into()).unwrap(); + (guest_mem, pool, Arc::new(allocator)) +} diff --git a/vm/devices/user_driver/Cargo.toml b/vm/devices/user_driver/Cargo.toml index 67fbe8ab05..3c8567354f 100644 --- a/vm/devices/user_driver/Cargo.toml +++ b/vm/devices/user_driver/Cargo.toml @@ -23,6 +23,7 @@ uevent.workspace = true event-listener.workspace = true guestmem.workspace = true vmcore.workspace = true +sparse_mmap = { workspace = true, optional = true } anyhow.workspace = true parking_lot.workspace = true @@ -35,7 +36,6 @@ futures.workspace = true futures-concurrency.workspace = true libc.workspace = true pal_event.workspace = true -sparse_mmap = { workspace = true, optional = true } vfio_sys = { workspace = true, optional = true } [lints] diff --git a/vm/devices/user_driver/src/emulated.rs b/vm/devices/user_driver/src/emulated.rs index 43e73eb4bd..692cb682cb 100644 --- a/vm/devices/user_driver/src/emulated.rs +++ b/vm/devices/user_driver/src/emulated.rs @@ -26,19 +26,20 @@ use pci_core::msi::MsiControl; use pci_core::msi::MsiInterruptSet; use pci_core::msi::MsiInterruptTarget; use safeatomic::AtomicSliceOps; +use sparse_mmap::SparseMapping; use std::ptr::NonNull; use std::sync::Arc; use std::sync::atomic::AtomicU8; /// An emulated device. -pub struct EmulatedDevice { +pub struct EmulatedDevice { device: Arc>, controller: MsiController, - shared_mem: DeviceSharedMemory, + dma_client: Arc, bar0_len: usize, } -impl Inspect for EmulatedDevice { +impl Inspect for EmulatedDevice { fn inspect(&self, req: inspect::Request<'_>) { self.device.lock().inspect_mut(req); } @@ -71,9 +72,9 @@ impl MsiInterruptTarget for MsiController { } } -impl EmulatedDevice { +impl EmulatedDevice { /// Creates a new emulated device, wrapping `device`, using the provided MSI controller. - pub fn new(mut device: T, msi_set: MsiInterruptSet, shared_mem: DeviceSharedMemory) -> Self { + pub fn new(mut device: T, msi_set: MsiInterruptSet, dma_client: Arc) -> Self { // Connect an interrupt controller. let controller = MsiController::new(msi_set.len()); msi_set.connect(&controller); @@ -107,7 +108,7 @@ impl EmulatedDevice { Self { device: Arc::new(Mutex::new(device)), controller, - shared_mem, + dma_client, bar0_len, } } @@ -139,13 +140,17 @@ pub struct DeviceSharedMemory { state: Arc>>, } -struct Backing { - mem: Arc, +/// The Backing struct is meant for testing only. It is meant to encapsulate types that already +/// implement [GuestMemoryAccess] but provides the allow_dma switch regardless of the underlying +/// type T. +struct Backing { + mem: T, allow_dma: bool, } -/// SAFETY: passing through to [`AlignedHeapMemory`]. -unsafe impl GuestMemoryAccess for Backing { +/// SAFETY: Defer to [GuestMemoryAccess] implementation of T +/// Only intercept the base_iova fn with a naive response of 0 if allow_dma is enabled. +unsafe impl GuestMemoryAccess for Backing { fn mapping(&self) -> Option> { self.mem.mapping() } @@ -159,6 +164,15 @@ unsafe impl GuestMemoryAccess for Backing { } } +/// Takes sparse mapping as input and converts it to GuestMemory with the allow_dma switch +pub fn create_guest_memory(sparse_mmap: SparseMapping, allow_dma: bool) -> GuestMemory { + let test_backing = Backing { + mem: sparse_mmap, + allow_dma, + }; + GuestMemory::new("test mapper guest memory", test_backing) +} + impl DeviceSharedMemory { pub fn new(size: usize, extra: usize) -> Self { assert_eq!(size % PAGE_SIZE, 0); @@ -272,6 +286,12 @@ pub struct EmulatedDmaAllocator { shared_mem: DeviceSharedMemory, } +impl EmulatedDmaAllocator { + pub fn new(shared_mem: DeviceSharedMemory) -> Self { + Self { shared_mem } + } +} + impl DmaClient for EmulatedDmaAllocator { fn allocate_dma_buffer(&self, len: usize) -> anyhow::Result { let memory = MemoryBlock::new(self.shared_mem.alloc(len).context("out of memory")?); @@ -284,7 +304,9 @@ impl DmaClient for EmulatedDmaAllocator { } } -impl DeviceBacking for EmulatedDevice { +impl DeviceBacking + for EmulatedDevice +{ type Registers = Mapping; fn id(&self) -> &str { @@ -303,9 +325,7 @@ impl DeviceBacking for EmulatedD } fn dma_client(&self) -> Arc { - Arc::new(EmulatedDmaAllocator { - shared_mem: self.shared_mem.clone(), - }) as Arc + self.dma_client.clone() } fn max_interrupt_count(&self) -> u32 { diff --git a/vm/page_pool_alloc/src/lib.rs b/vm/page_pool_alloc/src/lib.rs index efa16cd9b9..b2efdb6efb 100644 --- a/vm/page_pool_alloc/src/lib.rs +++ b/vm/page_pool_alloc/src/lib.rs @@ -446,6 +446,7 @@ pub trait PoolSource: Inspect + Send + Sync { pub struct TestMapper { #[inspect(skip)] mem: Mappable, + len: usize, } impl TestMapper { @@ -454,7 +455,16 @@ impl TestMapper { let len = (size_pages * PAGE_SIZE) as usize; let fd = alloc_shared_memory(len).context("creating shared mem")?; - Ok(Self { mem: fd }) + Ok(Self { mem: fd, len }) + } + + /// Returns sparse memory that maps the entire range used by [TestMapper]. This assumes that + /// that pages start at 0. + pub fn sparse_mapping(&self) -> SparseMapping { + let mappable = self.mappable(); + let mapping = SparseMapping::new(self.len).unwrap(); + mapping.map_file(0, self.len, mappable, 0, true).unwrap(); + mapping } fn inspect_extra(&self, resp: &mut Response<'_>) { diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_servicing.rs index f2317b2eab..d985a8c4b6 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_servicing.rs @@ -28,6 +28,8 @@ async fn openhcl_servicing_core( ) -> anyhow::Result<()> { let (mut vm, agent) = config .with_openhcl_command_line(openhcl_cmdline) + .with_vmbus_redirect() + .with_nic() .run() .await?; @@ -59,9 +61,9 @@ async fn openhcl_servicing( } /// Test servicing an OpenHCL VM from the current version to itself -/// with VF keepalive support. +/// with nvme keepalive support. #[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] -async fn openhcl_servicing_keepalive( +async fn openhcl_servicing_nvme_keepalive( config: PetriVmConfigOpenVmm, (igvm_file,): (ResolvedArtifact,), ) -> Result<(), anyhow::Error> { @@ -71,6 +73,26 @@ async fn openhcl_servicing_keepalive( igvm_file, OpenHclServicingFlags { enable_nvme_keepalive: true, + enable_mana_keepalive: false, + }, + ) + .await +} + +/// Test servicing an OpenHCL VM from the current version to itself +/// with mana keepalive support. +#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] +async fn openhcl_servicing_mana_keepalive( + config: PetriVmConfigOpenVmm, + (igvm_file,): (ResolvedArtifact,), +) -> Result<(), anyhow::Error> { + openhcl_servicing_core( + config, + "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_MANA_KEEP_ALIVE=1", + igvm_file, + OpenHclServicingFlags { + enable_nvme_keepalive: false, + enable_mana_keepalive: true, }, ) .await diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs index 354c2cc43f..7b9aabb483 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs @@ -101,6 +101,7 @@ async fn nvme_keepalive( igvm_file, OpenHclServicingFlags { enable_nvme_keepalive: true, + enable_mana_keepalive: false, }, ) .await