diff --git a/Cargo.lock b/Cargo.lock index dda653c599..93fd5aed9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3808,6 +3808,7 @@ dependencies = [ "anyhow", "hvdef", "inspect", + "parking_lot", "user_driver", "virt", ] @@ -4786,6 +4787,7 @@ dependencies = [ "sha2", "sidecar_defs", "tdcall", + "test_with_tracing", "underhill_confidentiality", "x86defs", "zerocopy 0.8.24", @@ -4804,6 +4806,8 @@ dependencies = [ "memory_range", "mesh", "page_pool_alloc", + "parking_lot", + "thiserror 2.0.12", "user_driver", "virt", "vmcore", diff --git a/openhcl/bootloader_fdt_parser/src/lib.rs b/openhcl/bootloader_fdt_parser/src/lib.rs index 2b056ca72f..b6c04f6794 100644 --- a/openhcl/bootloader_fdt_parser/src/lib.rs +++ b/openhcl/bootloader_fdt_parser/src/lib.rs @@ -169,6 +169,8 @@ pub struct ParsedBootDtInfo { /// VTL2 range for private pool memory. #[inspect(iter_by_index)] pub private_pool_ranges: Vec, + /// Source of DMA hint calculation. + pub dma_hint_self: bool, } fn err_to_owned(e: fdt::parser::Error<'_>) -> anyhow::Error { @@ -207,6 +209,7 @@ struct OpenhclInfo { memory_allocation_mode: MemoryAllocationMode, isolation: IsolationType, private_pool_ranges: Vec, + dma_hint_self: bool, } fn parse_memory_openhcl(node: &Node<'_>) -> anyhow::Result { @@ -394,6 +397,11 @@ fn parse_openhcl(node: &Node<'_>) -> anyhow::Result { .transpose() .context("unable to read vtl0-alias-map")?; + let dma_hint_self = matches!( + try_find_property(node, "dma-hint").and_then(|p| p.read_str().ok()), + Some("self") + ); + // Extract vmbus mmio information from the overall memory map. let vtl0_mmio = memory .iter() @@ -416,6 +424,7 @@ fn parse_openhcl(node: &Node<'_>) -> anyhow::Result { memory_allocation_mode, isolation, private_pool_ranges, + dma_hint_self, }) } @@ -509,6 +518,7 @@ impl ParsedBootDtInfo { let mut isolation = IsolationType::None; let mut vtl2_reserved_range = MemoryRange::EMPTY; let mut private_pool_ranges = Vec::new(); + let mut dma_hint_self = false; let parser = Parser::new(raw) .map_err(err_to_owned) @@ -538,6 +548,7 @@ impl ParsedBootDtInfo { memory_allocation_mode: n_memory_allocation_mode, isolation: n_isolation, private_pool_ranges: n_private_pool_ranges, + dma_hint_self: n_dma_hint_self, } = parse_openhcl(&child)?; vtl0_mmio = n_vtl0_mmio; config_ranges = n_config_ranges; @@ -548,6 +559,7 @@ impl ParsedBootDtInfo { isolation = n_isolation; vtl2_reserved_range = n_vtl2_reserved_range; private_pool_ranges = n_private_pool_ranges; + dma_hint_self = n_dma_hint_self; } _ if child.name.starts_with("memory@") => { @@ -580,6 +592,7 @@ impl ParsedBootDtInfo { isolation, vtl2_reserved_range, private_pool_ranges, + dma_hint_self, }) } } @@ -945,6 +958,7 @@ mod tests { range: MemoryRange::new(0x60000..0x70000), vnode: 0, }], + dma_hint_self: false, }; let dt = build_dt(&orig_info).unwrap(); diff --git a/openhcl/lower_vtl_permissions_guard/Cargo.toml b/openhcl/lower_vtl_permissions_guard/Cargo.toml index d4e2b17496..809158724d 100644 --- a/openhcl/lower_vtl_permissions_guard/Cargo.toml +++ b/openhcl/lower_vtl_permissions_guard/Cargo.toml @@ -9,6 +9,7 @@ rust-version.workspace = true [target.'cfg(target_os = "linux")'.dependencies] hvdef.workspace = true inspect.workspace = true +parking_lot.workspace = true user_driver.workspace = true virt.workspace = true diff --git a/openhcl/lower_vtl_permissions_guard/src/lib.rs b/openhcl/lower_vtl_permissions_guard/src/lib.rs index a8678fbc71..616267cbba 100644 --- a/openhcl/lower_vtl_permissions_guard/src/lib.rs +++ b/openhcl/lower_vtl_permissions_guard/src/lib.rs @@ -13,6 +13,7 @@ pub use device_dma::LowerVtlDmaBuffer; use anyhow::Context; use anyhow::Result; use inspect::Inspect; +use parking_lot::Mutex; use std::sync::Arc; use user_driver::DmaClient; use user_driver::memory::MemoryBlock; @@ -79,6 +80,7 @@ pub struct LowerVtlMemorySpawner { spawner: T, #[inspect(skip)] vtl_protect: Arc, + alloc_size: Mutex, } impl LowerVtlMemorySpawner { @@ -88,6 +90,7 @@ impl LowerVtlMemorySpawner { Self { spawner, vtl_protect, + alloc_size: Mutex::new(0), } } } @@ -98,6 +101,7 @@ impl DmaClient for LowerVtlMemorySpawner { let vtl_guard = PagesAccessibleToLowerVtl::new_from_pages(self.vtl_protect.clone(), mem.pfns()) .context("failed to lower VTL permissions on memory block")?; + *self.alloc_size.lock() += len as u64; Ok(MemoryBlock::new(LowerVtlDmaBuffer { block: mem, @@ -108,4 +112,19 @@ impl DmaClient for LowerVtlMemorySpawner { fn attach_pending_buffers(&self) -> Result> { anyhow::bail!("restore is not supported for LowerVtlMemorySpawner") } + + /// Query if this client supports persistent allocations. + fn is_persistent(&self) -> bool { + false + } + + /// How much memory was allocated during session. + fn alloc_size(&self) -> u64 { + *self.alloc_size.lock() + } + + /// Not supported for this allocator. + fn fallback_alloc_size(&self) -> u64 { + 0 + } } diff --git a/openhcl/openhcl_boot/Cargo.toml b/openhcl/openhcl_boot/Cargo.toml index f8d8a70e0a..0e9cbe6b14 100644 --- a/openhcl/openhcl_boot/Cargo.toml +++ b/openhcl/openhcl_boot/Cargo.toml @@ -37,3 +37,6 @@ minimal_rt_build.workspace = true [lints] workspace = true + +[dev-dependencies] +test_with_tracing.workspace = true diff --git a/openhcl/openhcl_boot/src/dt.rs b/openhcl/openhcl_boot/src/dt.rs index 3a85058c9c..40b16536df 100644 --- a/openhcl/openhcl_boot/src/dt.rs +++ b/openhcl/openhcl_boot/src/dt.rs @@ -483,6 +483,11 @@ pub fn write_dt( openhcl_builder = openhcl_builder.add_u64(p_vtl0_alias_map, data)?; } + if partition_info.dma_hint_self { + let p_dma_hint = openhcl_builder.add_string("dma-hint")?; + openhcl_builder = openhcl_builder.add_str(p_dma_hint, "self")?; + } + #[derive(Debug, Copy, Clone, PartialEq, Eq)] struct Vtl2MemoryEntry { range: MemoryRange, diff --git a/openhcl/openhcl_boot/src/host_params/dma_hint.rs b/openhcl/openhcl_boot/src/host_params/dma_hint.rs new file mode 100644 index 0000000000..6208a95602 --- /dev/null +++ b/openhcl/openhcl_boot/src/host_params/dma_hint.rs @@ -0,0 +1,197 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Calculate DMA hint value if not provided by host. + +use super::PartitionInfo; +use igvm_defs::{MemoryMapEntryType, PAGE_SIZE_4K}; + +/// Lookup table for DMA hint calculation. +/// Using tuples instead of structs to keep it readable. +/// Let's keep the table sorted by VP count, then by assigned memory. +/// Using u16 to keep the memory req short. +/// Max VTL2 memory known today is 24838 MiB. +/// (vp_count, vtl2_memory_mb, dma_hint_mb) +const LOOKUP_TABLE: &[(u16, u16, u16)] = &[ + (2, 96, 2), + (2, 98, 4), + (2, 100, 4), + (2, 104, 4), + (4, 108, 2), + (4, 110, 6), + (4, 112, 6), + (4, 118, 8), + (4, 130, 12), + (8, 140, 4), + (8, 148, 10), + (8, 170, 20), + (8, 176, 20), + (16, 234, 12), + (16, 256, 20), // There is another configuration with '18'. + (16, 268, 38), + (16, 282, 54), + (24, 420, 66), + (32, 404, 22), + (32, 516, 36), + (32, 538, 74), // There is another configuration with '52'. + (48, 558, 32), + (48, 718, 52), + (48, 730, 52), + (48, 746, 78), + (64, 712, 42), + (64, 924, 68), + (64, 938, 68), + (96, 1030, 64), + (96, 1042, 114), // Can be '64'. + (96, 1058, 114), // Can be '106'. + (96, 1340, 102), + (96, 1358, 104), + (96, 1382, 120), + (112, 1566, 288), + (128, 1342, 84), + (128, 1360, 84), + (896, 12912, 0), // (516) Needs to be validated as the vNIC number is unknown. +]; + +/// Round up to next 2MiB. +fn round_up_to_2mb(pages_4k: u64) -> u64 { + (pages_4k + 511) & !(511) +} + +/// Returns calculated DMA hint value, in 4k pages. +pub fn vtl2_calculate_dma_hint(vp_count: usize, storage: &PartitionInfo) -> u64 { + let mut dma_hint_4k = 0; + let mem_size = storage + .vtl2_ram + .iter() + .filter(|m| m.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE) + .map(|e| e.range.len()) + .sum::(); + // Sanity check for the calculated memory size. + if mem_size > 0 && mem_size < 0xFFFFFFFF00000 { + let mem_size_mb = (mem_size / 1048576) as u32; + + let mut min_vtl2_memory_mb = 65535; + let mut max_vtl2_memory_mb = 0; + + // To avoid using floats, scale ratios to 1:1000. + let mut min_ratio_1000th = 100000; + let mut max_ratio_1000th = 1000; + + let mut min_vp_count: u16 = 1; + let mut max_vp_count = vp_count as u16; + + for (vp_lookup, vtl2_memory_mb, dma_hint_mb) in LOOKUP_TABLE { + match (*vp_lookup).cmp(&(vp_count as u16)) { + core::cmp::Ordering::Less => { + // Find nearest. + min_vp_count = min_vp_count.max(*vp_lookup); + } + core::cmp::Ordering::Equal => { + if *vtl2_memory_mb == mem_size_mb as u16 { + // Found exact match. + dma_hint_4k = *dma_hint_mb as u64 * 1048576 / PAGE_SIZE_4K; + max_vtl2_memory_mb = *vtl2_memory_mb; + break; + } else { + // Prepare for possible extrapolation. + min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb); + max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb); + min_ratio_1000th = min_ratio_1000th + .min(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32); + max_ratio_1000th = max_ratio_1000th + .max(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32); + } + } + core::cmp::Ordering::Greater => { + // Find nearest. + max_vp_count = max_vp_count.min(*vp_lookup); + } + } + } + + // It is possible there were no matching entries in the lookup table. + // (i.e. unexpected VP count). + if max_vtl2_memory_mb == 0 { + LOOKUP_TABLE + .iter() + .filter(|(vp_lookup, _, _)| { + *vp_lookup == min_vp_count || *vp_lookup == max_vp_count + }) + .for_each(|(_, vtl2_memory_mb, dma_hint_mb)| { + min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb); + max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb); + min_ratio_1000th = + min_ratio_1000th.min(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32); + max_ratio_1000th = + max_ratio_1000th.max(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32); + }); + } + + if dma_hint_4k == 0 { + // Didn't find an exact match for vp_count, try to extrapolate. + dma_hint_4k = (mem_size_mb as u64 * 1000u64 * (1048576u64 / PAGE_SIZE_4K)) + / ((min_ratio_1000th + max_ratio_1000th) as u64 / 2u64); + + // And then round up to 2MiB. + dma_hint_4k = round_up_to_2mb(dma_hint_4k); + } + } + + dma_hint_4k +} + +#[cfg(test)] +mod test { + use super::*; + use crate::MemoryRange; + use crate::host_params::MemoryEntry; + use test_with_tracing::test; + + #[test] + fn test_vtl2_calculate_dma_hint() { + let mut storage = PartitionInfo::new(); + + storage.vtl2_ram.clear(); + storage.vtl2_ram.push(MemoryEntry { + range: MemoryRange::new(0x0..0x6200000), + mem_type: MemoryMapEntryType::VTL2_PROTECTABLE, + vnode: 0, + }); + assert_eq!(vtl2_calculate_dma_hint(2, &storage), 1024); + + storage.vtl2_ram.clear(); + storage.vtl2_ram.push(MemoryEntry { + range: MemoryRange::new(0x0..0x6E00000), + mem_type: MemoryMapEntryType::VTL2_PROTECTABLE, + vnode: 0, + }); + assert_eq!(vtl2_calculate_dma_hint(4, &storage), 1536); + + // Test VP count higher than max from LOOKUP_TABLE. + storage.vtl2_ram.clear(); + storage.vtl2_ram.push(MemoryEntry { + range: MemoryRange::new(0x0..0x7000000), + mem_type: MemoryMapEntryType::VTL2_PROTECTABLE, + vnode: 0, + }); + assert_eq!(vtl2_calculate_dma_hint(112, &storage), 5632); + + // Test unusual VP count. + storage.vtl2_ram.clear(); + storage.vtl2_ram.push(MemoryEntry { + range: MemoryRange::new(0x0..0x6000000), + mem_type: MemoryMapEntryType::VTL2_PROTECTABLE, + vnode: 0, + }); + assert_eq!(vtl2_calculate_dma_hint(52, &storage), 2048); + + storage.vtl2_ram.clear(); + storage.vtl2_ram.push(MemoryEntry { + range: MemoryRange::new(0x0..0x8000000), + mem_type: MemoryMapEntryType::VTL2_PROTECTABLE, + vnode: 0, + }); + assert_eq!(vtl2_calculate_dma_hint(52, &storage), 2560); + } +} diff --git a/openhcl/openhcl_boot/src/host_params/dt.rs b/openhcl/openhcl_boot/src/host_params/dt.rs index 1ae52e24c0..de72ea9676 100644 --- a/openhcl/openhcl_boot/src/host_params/dt.rs +++ b/openhcl/openhcl_boot/src/host_params/dt.rs @@ -13,6 +13,7 @@ use crate::host_params::MAX_ENTROPY_SIZE; use crate::host_params::MAX_NUMA_NODES; use crate::host_params::MAX_PARTITION_RAM_RANGES; use crate::host_params::MAX_VTL2_USED_RANGES; +use crate::host_params::dma_hint::vtl2_calculate_dma_hint; use crate::single_threaded::OffStackRef; use crate::single_threaded::off_stack; use arrayvec::ArrayVec; @@ -455,6 +456,7 @@ impl PartitionInfo { .vtl2_used_ranges .extend(flatten_ranges(used_ranges.iter().copied())); + let mut vtl2_dma_hint_self = false; // Decide if we will reserve memory for a VTL2 private pool. Parse this // from the final command line, or the host provided device tree value. let vtl2_gpa_pool_size = { @@ -463,7 +465,19 @@ impl PartitionInfo { crate::cmdline::parse_boot_command_line(storage.cmdline.as_str()) .enable_vtl2_gpa_pool; - max(dt_page_count.unwrap_or(0), cmdline_page_count.unwrap_or(0)) + let hostval = max(dt_page_count.unwrap_or(0), cmdline_page_count.unwrap_or(0)); + if hostval == 0 + && parsed.nvme_keepalive + && params.isolation_type == IsolationType::None + && storage.memory_allocation_mode == MemoryAllocationMode::Host + { + // If host did not provide the DMA hint value, re-evaluate + // it internally if conditions satisfy. + vtl2_dma_hint_self = true; + vtl2_calculate_dma_hint(parsed.cpu_count(), storage) + } else { + hostval + } }; if vtl2_gpa_pool_size != 0 { // Reserve the specified number of pages for the pool. Use the used @@ -500,6 +514,7 @@ impl PartitionInfo { .extend(flatten_ranges(used_ranges.iter().copied())); storage.vtl2_pool_memory = pool; + storage.dma_hint_self = vtl2_dma_hint_self; } // If we can trust the host, use the provided alias map @@ -528,6 +543,7 @@ impl PartitionInfo { entropy, vtl0_alias_map: _, nvme_keepalive, + dma_hint_self, } = storage; assert!(!vtl2_used_ranges.is_empty()); @@ -550,6 +566,7 @@ impl PartitionInfo { *gic = parsed.gic.clone(); *entropy = parsed.entropy.clone(); *nvme_keepalive = parsed.nvme_keepalive; + *dma_hint_self = vtl2_dma_hint_self; Ok(Some(storage)) } diff --git a/openhcl/openhcl_boot/src/host_params/mod.rs b/openhcl/openhcl_boot/src/host_params/mod.rs index ea9ac0d422..b53652715a 100644 --- a/openhcl/openhcl_boot/src/host_params/mod.rs +++ b/openhcl/openhcl_boot/src/host_params/mod.rs @@ -15,6 +15,7 @@ use memory_range::MemoryRange; use memory_range::subtract_ranges; use shim_params::IsolationType; +mod dma_hint; mod dt; mod mmio; pub mod shim_params; @@ -94,6 +95,8 @@ pub struct PartitionInfo { pub vtl0_alias_map: Option, /// Host is compatible with DMA preservation / NVMe keep-alive. pub nvme_keepalive: bool, + /// DMA hint was calculated in boot-shim instead of host. + pub dma_hint_self: bool, } impl PartitionInfo { @@ -125,6 +128,7 @@ impl PartitionInfo { entropy: None, vtl0_alias_map: None, nvme_keepalive: false, + dma_hint_self: false, } } diff --git a/openhcl/openhcl_boot/src/main.rs b/openhcl/openhcl_boot/src/main.rs index d2cfce8e74..648f7f5be0 100644 --- a/openhcl/openhcl_boot/src/main.rs +++ b/openhcl/openhcl_boot/src/main.rs @@ -956,6 +956,7 @@ mod test { entropy: None, vtl0_alias_map: None, nvme_keepalive: false, + dma_hint_self: false, } } diff --git a/openhcl/openhcl_dma_manager/Cargo.toml b/openhcl/openhcl_dma_manager/Cargo.toml index 97b9821055..6e61581bf5 100644 --- a/openhcl/openhcl_dma_manager/Cargo.toml +++ b/openhcl/openhcl_dma_manager/Cargo.toml @@ -16,6 +16,8 @@ inspect.workspace = true memory_range.workspace = true mesh.workspace = true page_pool_alloc.workspace = true +parking_lot.workspace = true +thiserror.workspace = true user_driver.workspace = true virt.workspace = true vmcore.workspace = true diff --git a/openhcl/openhcl_dma_manager/src/lib.rs b/openhcl/openhcl_dma_manager/src/lib.rs index 42e79d7297..76ea8c2fff 100644 --- a/openhcl/openhcl_dma_manager/src/lib.rs +++ b/openhcl/openhcl_dma_manager/src/lib.rs @@ -16,10 +16,21 @@ use memory_range::MemoryRange; use page_pool_alloc::PagePool; use page_pool_alloc::PagePoolAllocator; use page_pool_alloc::PagePoolAllocatorSpawner; +use parking_lot::Mutex; use std::sync::Arc; +use thiserror::Error; use user_driver::DmaClient; +use user_driver::DmaClientAllocStats; use user_driver::lockmem::LockedMemorySpawner; +/// DMA manager errors. +#[derive(Debug, Error)] +pub enum DmaManagerError { + /// No memory. + #[error("no memory")] + NoMemory, +} + /// Save restore support for [`OpenhclDmaManager`]. pub mod save_restore { use super::OpenhclDmaManager; @@ -122,7 +133,7 @@ pub struct OpenhclDmaManager { } /// The required VTL permissions on DMA allocations. -#[derive(Inspect)] +#[derive(Clone, Inspect)] pub enum LowerVtlPermissionPolicy { /// No specific permission constraints are required. Any, @@ -189,7 +200,11 @@ impl virt::VtlMemoryProtection for DmaManagerLowerVtl { } impl DmaManagerInner { - fn new_dma_client(&self, params: DmaClientParameters) -> anyhow::Result> { + fn new_dma_client( + &self, + params: DmaClientParameters, + fallback: Option>, + ) -> anyhow::Result> { // Allocate the inner client that actually performs the allocations. let backing = { let DmaClientParameters { @@ -283,13 +298,13 @@ impl DmaManagerInner { LowerVtlPermissionPolicy::Any => { // No persistence needed means the `LockedMemorySpawner` // using normal VTL2 ram is fine. - DmaClientBacking::LockedMemory(LockedMemorySpawner) + DmaClientBacking::LockedMemory(LockedMemorySpawner::new()) } LowerVtlPermissionPolicy::Vtl0 => { // `LockedMemorySpawner` uses private VTL2 ram, so // lowering VTL permissions is required. DmaClientBacking::LockedMemoryLowerVtl(LowerVtlMemorySpawner::new( - LockedMemorySpawner, + LockedMemorySpawner::new(), self.lower_vtl.clone(), )) } @@ -297,7 +312,15 @@ impl DmaManagerInner { } }; - Ok(Arc::new(OpenhclDmaClient { backing, params })) + Ok(Arc::new(OpenhclDmaClient { + backing, + params, + fallback, + inner_stats: Mutex::new(DmaClientAllocStats { + total_alloc: 0, + fallback_alloc: 0, + }), + })) } } @@ -346,8 +369,17 @@ impl OpenhclDmaManager { /// Creates a new DMA client with the given device name and lower VTL /// policy. - pub fn new_client(&self, params: DmaClientParameters) -> anyhow::Result> { - self.inner.new_dma_client(params) + pub fn new_client( + &self, + params: DmaClientParameters, + fallback_params: Option, + ) -> anyhow::Result> { + let fb = if let Some(fb1) = fallback_params { + self.inner.new_dma_client(fb1, None).ok() + } else { + None + }; + self.inner.new_dma_client(params, fb) } /// Returns a [`DmaClientSpawner`] for creating DMA clients. @@ -375,6 +407,20 @@ impl OpenhclDmaManager { Ok(()) } + + /// Return shared pool size in bytes. + pub fn shared_pool_size(&self) -> u64 { + self.shared_pool + .as_ref() + .map_or(0, |pool| pool.total_size()) + } + + /// Return private pool size in bytes. + pub fn private_pool_size(&self) -> u64 { + self.private_pool + .as_ref() + .map_or(0, |pool| pool.total_size()) + } } /// A spawner for creating DMA clients. @@ -385,8 +431,17 @@ pub struct DmaClientSpawner { impl DmaClientSpawner { /// Creates a new DMA client with the given parameters. - pub fn new_client(&self, params: DmaClientParameters) -> anyhow::Result> { - self.inner.new_dma_client(params) + pub fn new_client( + &self, + params: DmaClientParameters, + fallback_params: Option, + ) -> anyhow::Result> { + let fb = if let Some(fb1) = fallback_params { + self.inner.new_dma_client(fb1, None).ok() + } else { + None + }; + self.inner.new_dma_client(params, fb) } } @@ -429,6 +484,16 @@ impl DmaClientBacking { DmaClientBacking::LockedMemoryLowerVtl(spawner) => spawner.attach_pending_buffers(), } } + + fn is_persistent(&self) -> bool { + match self { + DmaClientBacking::SharedPool(_allocator) => false, + DmaClientBacking::PrivatePool(_allocator) => true, + DmaClientBacking::LockedMemory(_spawner) => false, + DmaClientBacking::PrivatePoolLowerVtl(_spawner) => false, + DmaClientBacking::LockedMemoryLowerVtl(_spawner) => false, + } + } } /// An OpenHCL dma client. This client implements inspect to allow seeing what @@ -437,6 +502,10 @@ impl DmaClientBacking { pub struct OpenhclDmaClient { backing: DmaClientBacking, params: DmaClientParameters, + #[inspect(skip)] // TODO: Skip for now + /// Allocation statistics per client. + inner_stats: Mutex, + fallback: Option>, } impl DmaClient for OpenhclDmaClient { @@ -444,10 +513,37 @@ impl DmaClient for OpenhclDmaClient { &self, total_size: usize, ) -> anyhow::Result { - self.backing.allocate_dma_buffer(total_size) + // The stats must be tracked here, not in the backing. + let mut stats = self.inner_stats.lock(); + stats.total_alloc += total_size as u64; + let mem_block = self.backing.allocate_dma_buffer(total_size).or_else(|_| { + stats.fallback_alloc += total_size as u64; + self.fallback + .as_ref() + .map_or(Err(DmaManagerError::NoMemory.into()), |f| { + f.allocate_dma_buffer(total_size) + }) + }); + + mem_block } fn attach_pending_buffers(&self) -> anyhow::Result> { self.backing.attach_pending_buffers() } + + /// Query if this client supports persistent allocations. + fn is_persistent(&self) -> bool { + self.backing.is_persistent() + } + + /// How much memory was allocated during session. + fn alloc_size(&self) -> u64 { + self.inner_stats.lock().total_alloc + } + + /// How much backup memory was allocated during session (fallback). + fn fallback_alloc_size(&self) -> u64 { + self.inner_stats.lock().fallback_alloc + } } diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index bf3c460597..152637070b 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -180,7 +180,7 @@ pub(crate) struct LoadedVm { pub _periodic_telemetry_task: Task<()>, - pub nvme_keep_alive: bool, + pub nvme_keepalive: bool, pub test_configuration: Option, pub dma_manager: OpenhclDmaManager, } @@ -494,7 +494,21 @@ impl LoadedVm { // NOTE: This is set via the corresponding env arg, as this feature is // experimental. - let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive(); + let nvme_keepalive_runtime = if let Some(nvme_manager) = self.nvme_manager.as_ref() { + nvme_manager.query_keepalive_runtime_status().await + } else { + false + }; + // Three sources to determine if keepalive can be enabled: + // 1. Host indicates that it is compatible with keepalive + // by setting device tree property when VM starts. + // 2. During servicing the capabilities_flags is also set + // so OpenHCL knows that it wasn't migrated to an older host. + // 3. If we ran out of dedicated DMA memory and used non-persistent + // fallback allocator, disable keepalive altogether. + let nvme_keepalive = self.nvme_keepalive + && capabilities_flags.enable_nvme_keepalive() + && nvme_keepalive_runtime; // Do everything before the log flush under a span. let r = async { @@ -527,7 +541,7 @@ impl LoadedVm { if let Some(nvme_manager) = self.nvme_manager.take() { nvme_manager .shutdown(nvme_keepalive) - .instrument(tracing::info_span!("shutdown_nvme_vfio", %correlation_id, %nvme_keepalive)) + .instrument(tracing::info_span!("shutdown_nvme_vfio", %correlation_id, %nvme_keepalive, %nvme_keepalive_runtime)) .await; } }; diff --git a/openhcl/underhill_core/src/lib.rs b/openhcl/underhill_core/src/lib.rs index 41038dbd07..5f101eebb9 100644 --- a/openhcl/underhill_core/src/lib.rs +++ b/openhcl/underhill_core/src/lib.rs @@ -318,7 +318,7 @@ async fn launch_workers( no_sidecar_hotplug: opt.no_sidecar_hotplug, gdbstub: opt.gdbstub, hide_isolation: opt.hide_isolation, - nvme_keep_alive: opt.nvme_keep_alive, + nvme_keepalive: opt.nvme_keepalive, test_configuration: opt.test_configuration, disable_uefi_frontpage: opt.disable_uefi_frontpage, }; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index 2580996140..de8037fef4 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -26,8 +26,11 @@ use pal_async::task::Spawn; use pal_async::task::Task; use std::collections::HashMap; use std::collections::hash_map; +use std::sync::Arc; use thiserror::Error; use tracing::Instrument; +use user_driver::DmaClient; +use user_driver::DmaClientAllocStats; use user_driver::vfio::VfioDevice; use vm_resource::AsyncResolveResource; use vm_resource::ResourceId; @@ -126,6 +129,24 @@ impl NvmeManager { &self.client } + /// Save could have been disabled if fallback allocator was used. + pub async fn query_keepalive_runtime_status(&self) -> bool { + let worker_save_restore = match self.client.sender.call(Request::KeepAliveStatus, ()).await + { + Ok(s) => { + if s.stats.fallback_alloc > 0 { + tracing::warn!( + mem_size = s.stats.fallback_alloc, + "fallback mem allocator was used" + ); + } + s.nvme_keepalive + } + Err(_) => false, + }; + self.save_restore_supported && worker_save_restore + } + pub async fn shutdown(self, nvme_keepalive: bool) { // Early return is faster way to skip shutdown. // but we need to thoroughly test the data integrity. @@ -166,6 +187,13 @@ impl NvmeManager { } } +pub struct NvmeKeepaliveRuntimeStatus { + /// Indicates if keepalive is still enabled. + pub nvme_keepalive: bool, + /// Retrieve statistics from connected DMA client. + pub stats: DmaClientAllocStats, +} + enum Request { Inspect(inspect::Deferred), ForceLoadDriver(inspect::DeferredUpdate), @@ -175,6 +203,7 @@ enum Request { span: tracing::Span, nvme_keepalive: bool, }, + KeepAliveStatus(Rpc<(), NvmeKeepaliveRuntimeStatus>), } #[derive(Debug, Clone)] @@ -265,7 +294,28 @@ impl NvmeManagerWorker { // Prevent devices from originating controller reset in drop(). dev.update_servicing_flags(do_not_reset); } - break (span, nvme_keepalive); + // Use final combined flag to report back. + break (span, do_not_reset); + } + Request::KeepAliveStatus(rpc) => { + let mut stats = DmaClientAllocStats { + total_alloc: 0, + fallback_alloc: 0, + }; + for (_s, dev) in self.devices.iter_mut() { + let dev_stats = dev.get_alloc_stats().await; + stats.total_alloc += dev_stats.total_alloc; + stats.fallback_alloc += dev_stats.fallback_alloc; + } + if stats.fallback_alloc > 0 { + // If any of the attached devices ever used fallback allocator, + // update internal tracking and return the result. + self.save_restore_supported = false; + } + rpc.complete(NvmeKeepaliveRuntimeStatus { + nvme_keepalive: self.save_restore_supported, + stats, + }); } } }; @@ -274,6 +324,7 @@ impl NvmeManagerWorker { // because the Shutdown request is never sent. // // Tear down all the devices if nvme_keepalive is not set. + // TODO: Since the loop above is returning combined flag, this condition can be simplified. if !nvme_keepalive || !self.save_restore_supported { async { join_all(self.devices.drain().map(|(pci_id, driver)| { @@ -295,18 +346,40 @@ impl NvmeManagerWorker { let driver = match self.devices.entry(pci_id.to_owned()) { hash_map::Entry::Occupied(entry) => entry.into_mut(), hash_map::Entry::Vacant(entry) => { + let device_name = format!("nvme_{}", pci_id); + let lower_vtl_policy = LowerVtlPermissionPolicy::Any; + let allocation_visibility = if self.is_isolated { + AllocationVisibility::Shared + } else { + AllocationVisibility::Private + }; + + // Main client parameters. + let main_params = DmaClientParameters { + device_name: device_name.clone(), + lower_vtl_policy: lower_vtl_policy.clone(), + allocation_visibility, + persistent_allocations: self.save_restore_supported, + }; + + // Persistent allocations use fixed size memory. + // Create a fallback allocator which uses heap. + // When fallback allocator is involved, nvme_keepalive + // will be implicitly disabled. + let fallback_params = if self.save_restore_supported && !self.is_isolated { + Some(DmaClientParameters { + device_name, + lower_vtl_policy, + allocation_visibility, + persistent_allocations: false, + }) + } else { + None + }; + let dma_client = self .dma_client_spawner - .new_client(DmaClientParameters { - device_name: format!("nvme_{}", pci_id), - lower_vtl_policy: LowerVtlPermissionPolicy::Any, - allocation_visibility: if self.is_isolated { - AllocationVisibility::Shared - } else { - AllocationVisibility::Private - }, - persistent_allocations: self.save_restore_supported, - }) + .new_client(main_params, fallback_params) .map_err(InnerError::DmaClient)?; let device = VfioDevice::new(&self.driver_source, entry.key(), dma_client) @@ -341,6 +414,45 @@ impl NvmeManagerWorker { .map_err(|source| InnerError::Namespace { nsid, source }) } + /// Copy of the code from get_driver. + fn get_dma_client(&self, pci_id: String) -> Result, InnerError> { + let device_name = format!("nvme_{}", pci_id); + let lower_vtl_policy = LowerVtlPermissionPolicy::Any; + let allocation_visibility = if self.is_isolated { + AllocationVisibility::Shared + } else { + AllocationVisibility::Private + }; + + // Main client parameters. + let main_params = DmaClientParameters { + device_name: device_name.clone(), + lower_vtl_policy: lower_vtl_policy.clone(), + allocation_visibility, + persistent_allocations: self.save_restore_supported, + }; + + // Persistent allocations use fixed size memory. + // Create a fallback allocator which uses heap. + // When fallback allocator is involved, nvme_keepalive + // will be implicitly disabled. + let fallback_params = if self.save_restore_supported && !self.is_isolated { + Some(DmaClientParameters { + device_name, + lower_vtl_policy, + allocation_visibility, + persistent_allocations: false, + }) + } else { + None + }; + + Ok(self + .dma_client_spawner + .new_client(main_params, fallback_params) + .map_err(InnerError::DmaClient)?) + } + /// Saves NVMe device's states into buffer during servicing. pub async fn save(&mut self) -> anyhow::Result { let mut nvme_disks: Vec = Vec::new(); @@ -365,17 +477,7 @@ impl NvmeManagerWorker { self.devices = HashMap::new(); for disk in &saved_state.nvme_disks { let pci_id = disk.pci_id.clone(); - - let dma_client = self.dma_client_spawner.new_client(DmaClientParameters { - device_name: format!("nvme_{}", pci_id), - lower_vtl_policy: LowerVtlPermissionPolicy::Any, - allocation_visibility: if self.is_isolated { - AllocationVisibility::Shared - } else { - AllocationVisibility::Private - }, - persistent_allocations: true, - })?; + let dma_client = self.get_dma_client(pci_id.clone())?; // This code can wait on each VFIO device until it is arrived. // A potential optimization would be to delay VFIO operation diff --git a/openhcl/underhill_core/src/options.rs b/openhcl/underhill_core/src/options.rs index 5dd9254f8c..647f1b8442 100644 --- a/openhcl/underhill_core/src/options.rs +++ b/openhcl/underhill_core/src/options.rs @@ -140,7 +140,7 @@ pub struct Options { pub no_sidecar_hotplug: bool, /// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing. - pub nvme_keep_alive: bool, + pub nvme_keepalive: bool, /// (OPENHCL_TEST_CONFIG=\) /// Test configurations are designed to replicate specific behaviors and @@ -237,7 +237,7 @@ impl Options { let no_sidecar_hotplug = parse_legacy_env_bool("OPENHCL_NO_SIDECAR_HOTPLUG"); let gdbstub = parse_legacy_env_bool("OPENHCL_GDBSTUB"); let gdbstub_port = parse_legacy_env_number("OPENHCL_GDBSTUB_PORT")?.map(|x| x as u32); - let nvme_keep_alive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE"); + let nvme_keepalive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE"); let test_configuration = parse_env_string("OPENHCL_TEST_CONFIG").and_then(|x| { x.to_string_lossy() .parse::() @@ -304,7 +304,7 @@ impl Options { hide_isolation, halt_on_guest_halt, no_sidecar_hotplug, - nvme_keep_alive, + nvme_keepalive, test_configuration, disable_uefi_frontpage, }) diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 6c668a387f..2c1051941a 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -280,8 +280,8 @@ pub struct UnderhillEnvCfg { pub gdbstub: bool, /// Hide the isolation mode from the guest. pub hide_isolation: bool, - /// Enable nvme keep alive. - pub nvme_keep_alive: bool, + /// Enable nvme keep-alive. + pub nvme_keepalive: bool, /// test configuration pub test_configuration: Option, @@ -745,16 +745,19 @@ impl UhVmNetworkSettings { .unwrap_or(MAX_SUBCHANNELS_PER_VNIC) .min(vps_count as u16); - let dma_client = dma_client_spawner.new_client(DmaClientParameters { - device_name: format!("nic_{}", nic_config.pci_id), - lower_vtl_policy: LowerVtlPermissionPolicy::Any, - allocation_visibility: if is_isolated { - AllocationVisibility::Shared - } else { - AllocationVisibility::Private + let dma_client = dma_client_spawner.new_client( + DmaClientParameters { + device_name: format!("nic_{}", nic_config.pci_id), + lower_vtl_policy: LowerVtlPermissionPolicy::Any, + allocation_visibility: if is_isolated { + AllocationVisibility::Shared + } else { + AllocationVisibility::Private + }, + persistent_allocations: false, }, - persistent_allocations: false, - })?; + None, + )?; let (vf_manager, endpoints, save_state) = HclNetworkVFManager::new( nic_config.instance_id, @@ -1515,6 +1518,14 @@ async fn new_underhill_vm( .context("failed to restore global dma manager")?; } + // Print important info about DMA sizes. + tracing::info!( + dma_hint_self = boot_info.dma_hint_self, + shared_pool_size = shared_pool_size, + private_pool_size = dma_manager.private_pool_size(), + "dma pool" + ); + // Test with the highest VTL for which we have a GuestMemory object let highest_vtl_gm = gm.vtl1().unwrap_or(gm.vtl0()); @@ -1556,16 +1567,19 @@ async fn new_underhill_vm( if !matches!(isolation, virt::IsolationType::Vbs) { get_client.set_gpa_allocator( dma_manager - .new_client(DmaClientParameters { - device_name: "get".into(), - lower_vtl_policy: LowerVtlPermissionPolicy::Vtl0, - allocation_visibility: if isolation.is_isolated() { - AllocationVisibility::Shared - } else { - AllocationVisibility::Private + .new_client( + DmaClientParameters { + device_name: "get".into(), + lower_vtl_policy: LowerVtlPermissionPolicy::Vtl0, + allocation_visibility: if isolation.is_isolated() { + AllocationVisibility::Shared + } else { + AllocationVisibility::Private + }, + persistent_allocations: false, }, - persistent_allocations: false, - }) + None, + ) .context("get dma client")?, ); } @@ -1720,18 +1734,24 @@ async fn new_underhill_vm( Some(virt_mshv_vtl::CvmLateParams { shared_gm: cvm_mem.shared_gm.clone(), isolated_memory_protector: cvm_mem.protector.clone(), - shared_dma_client: dma_manager.new_client(DmaClientParameters { - device_name: "partition-shared".into(), - lower_vtl_policy: LowerVtlPermissionPolicy::Any, - allocation_visibility: AllocationVisibility::Shared, - persistent_allocations: false, - })?, - private_dma_client: dma_manager.new_client(DmaClientParameters { - device_name: "partition-private".into(), - lower_vtl_policy: LowerVtlPermissionPolicy::Any, - allocation_visibility: AllocationVisibility::Private, - persistent_allocations: false, - })?, + shared_dma_client: dma_manager.new_client( + DmaClientParameters { + device_name: "partition-shared".into(), + lower_vtl_policy: LowerVtlPermissionPolicy::Any, + allocation_visibility: AllocationVisibility::Shared, + persistent_allocations: false, + }, + None, + )?, + private_dma_client: dma_manager.new_client( + DmaClientParameters { + device_name: "partition-private".into(), + lower_vtl_policy: LowerVtlPermissionPolicy::Any, + allocation_visibility: AllocationVisibility::Private, + persistent_allocations: false, + }, + None, + )?, }) } else { None @@ -1839,7 +1859,10 @@ async fn new_underhill_vm( // TODO: reevaluate enablement of nvme save restore when private pool // save restore to bootshim is available. let private_pool_available = !runtime_params.private_pool_ranges().is_empty(); - let save_restore_supported = env_cfg.nvme_keep_alive && private_pool_available; + // Two separate flags because: + // - private pool alone can be used for other purposes; + // - host must explicitly indicate that keepalive is supported (compatibility). + let save_restore_supported = env_cfg.nvme_keepalive && private_pool_available; let manager = NvmeManager::new( &driver_source, @@ -2857,12 +2880,15 @@ async fn new_underhill_vm( let shutdown_guest = SimpleVmbusClientDeviceWrapper::new( driver_source.simple(), dma_manager - .new_client(DmaClientParameters { - device_name: "shutdown-relay".into(), - lower_vtl_policy: LowerVtlPermissionPolicy::Vtl0, - allocation_visibility: AllocationVisibility::Private, - persistent_allocations: false, - }) + .new_client( + DmaClientParameters { + device_name: "shutdown-relay".into(), + lower_vtl_policy: LowerVtlPermissionPolicy::Vtl0, + allocation_visibility: AllocationVisibility::Private, + persistent_allocations: false, + }, + None, + ) .context("shutdown relay dma client")?, shutdown_guest, )?; @@ -3004,7 +3030,7 @@ async fn new_underhill_vm( control_send, _periodic_telemetry_task: periodic_telemetry_task, - nvme_keep_alive: env_cfg.nvme_keep_alive, + nvme_keepalive: env_cfg.nvme_keepalive, test_configuration: env_cfg.test_configuration, dma_manager, }; diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 558be0a035..66bbe00994 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -34,6 +34,7 @@ use thiserror::Error; use tracing::Instrument; use tracing::info_span; use user_driver::DeviceBacking; +use user_driver::DmaClientAllocStats; use user_driver::backoff::Backoff; use user_driver::interrupt::DeviceInterrupt; use user_driver::memory::MemoryBlock; @@ -161,6 +162,8 @@ enum NvmeWorkerRequest { CreateIssuer(Rpc), /// Save worker state. Save(Rpc<(), anyhow::Result>), + /// Query how much memory was allocated with fallback allocator. + QueryAllocatorStats(Rpc<(), DmaClientAllocStats>), } impl NvmeDriver { @@ -252,7 +255,7 @@ impl NvmeDriver { io_issuers, rescan_event: Default::default(), namespaces: vec![], - nvme_keepalive: false, + nvme_keepalive: false, // In the beginning always assume it's not supported. }) } @@ -581,7 +584,7 @@ impl NvmeDriver { io_issuers, rescan_event: Default::default(), namespaces: vec![], - nvme_keepalive: true, + nvme_keepalive: true, // We know it is supported because we're in restore(). }; let task = &mut this.task.as_mut().unwrap(); @@ -693,6 +696,19 @@ impl NvmeDriver { pub fn update_servicing_flags(&mut self, nvme_keepalive: bool) { self.nvme_keepalive = nvme_keepalive; } + + /// Queries worker task if memory allocator ever fell back. + pub async fn get_alloc_stats(&self) -> DmaClientAllocStats { + let fb = self + .io_issuers + .send + .call(NvmeWorkerRequest::QueryAllocatorStats, ()) + .await; + fb.unwrap_or(DmaClientAllocStats { + total_alloc: 0, + fallback_alloc: 0, + }) + } } async fn handle_asynchronous_events( @@ -792,6 +808,12 @@ impl AsyncRun for DriverWorkerTask { Some(NvmeWorkerRequest::Save(rpc)) => { rpc.handle(async |_| self.save(state).await).await } + Some(NvmeWorkerRequest::QueryAllocatorStats(rpc)) => { + rpc.complete(DmaClientAllocStats { + total_alloc: self.device.dma_client().alloc_size(), + fallback_alloc: self.device.dma_client().fallback_alloc_size(), + }) + } None => break, } } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs index 95ee213b1a..8b11e06a57 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs @@ -181,15 +181,15 @@ impl QueuePair { interrupt: DeviceInterrupt, registers: Arc>, ) -> anyhow::Result { + assert!(sq_entries <= Self::MAX_SQ_ENTRIES); + assert!(cq_entries <= Self::MAX_CQ_ENTRIES); + let total_size = QueuePair::SQ_SIZE + QueuePair::CQ_SIZE + QueuePair::PER_QUEUE_PAGES * PAGE_SIZE; let dma_client = device.dma_client(); let mem = dma_client .allocate_dma_buffer(total_size) - .context("failed to allocate memory for queues")?; - - assert!(sq_entries <= Self::MAX_SQ_ENTRIES); - assert!(cq_entries <= Self::MAX_CQ_ENTRIES); + .context("failed to allocate memory for the queues")?; QueuePair::new_or_restore( spawner, qid, sq_entries, cq_entries, interrupt, registers, mem, None, diff --git a/vm/devices/user_driver/src/lib.rs b/vm/devices/user_driver/src/lib.rs index 2a451bc055..7724fdd2bb 100644 --- a/vm/devices/user_driver/src/lib.rs +++ b/vm/devices/user_driver/src/lib.rs @@ -69,4 +69,21 @@ pub trait DmaClient: Send + Sync + Inspect { /// Attach all previously allocated memory blocks. fn attach_pending_buffers(&self) -> anyhow::Result>; + + /// Query if this client supports persistent allocations. + fn is_persistent(&self) -> bool; + + /// How much memory was allocated during session. + fn alloc_size(&self) -> u64; + + /// How much backup memory was allocated during session (fallback). + fn fallback_alloc_size(&self) -> u64; +} + +/// DMA allocator statistics per client. +pub struct DmaClientAllocStats { + /// How much memory (bytes) was allocated by a main allocator. + pub total_alloc: u64, + /// How much memory (bytes) was allocated by a fallback allocator. + pub fallback_alloc: u64, } diff --git a/vm/devices/user_driver/src/lockmem.rs b/vm/devices/user_driver/src/lockmem.rs index b7cb190630..aa9fee6bf6 100644 --- a/vm/devices/user_driver/src/lockmem.rs +++ b/vm/devices/user_driver/src/lockmem.rs @@ -6,6 +6,7 @@ use crate::memory::MappedDmaTarget; use anyhow::Context; use inspect::Inspect; +use parking_lot::Mutex; use std::ffi::c_void; use std::fs::File; use std::io::Read; @@ -123,15 +124,43 @@ unsafe impl MappedDmaTarget for LockedMemory { } } -#[derive(Clone, Inspect)] -pub struct LockedMemorySpawner; +#[derive(Inspect)] +pub struct LockedMemorySpawner { + alloc_size: Mutex, +} + +impl LockedMemorySpawner { + /// Create a new [`LockedMemorySpawner`]. + pub fn new() -> Self { + Self { + alloc_size: Mutex::new(0), + } + } +} impl crate::DmaClient for LockedMemorySpawner { fn allocate_dma_buffer(&self, len: usize) -> anyhow::Result { - Ok(crate::memory::MemoryBlock::new(LockedMemory::new(len)?)) + let mem_block = crate::memory::MemoryBlock::new(LockedMemory::new(len)?); + *self.alloc_size.lock() += len as u64; + Ok(mem_block) } fn attach_pending_buffers(&self) -> anyhow::Result> { anyhow::bail!("restore not supported for lockmem") } + + /// Query if this client supports persistent allocations. + fn is_persistent(&self) -> bool { + false + } + + /// How much memory was allocated during session. + fn alloc_size(&self) -> u64 { + *self.alloc_size.lock() + } + + /// Not supported for this allocator. + fn fallback_alloc_size(&self) -> u64 { + 0 + } } diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 0bc6a7670b..4151c56079 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -83,7 +83,7 @@ impl VfioDevice { pub async fn restore( driver_source: &VmTaskDriverSource, pci_id: &str, - keepalive: bool, + vf_keepalive: bool, dma_client: Arc, ) -> anyhow::Result { let path = Path::new("/sys/bus/pci/devices").join(pci_id); @@ -110,7 +110,7 @@ impl VfioDevice { } container.set_iommu(IommuType::NoIommu)?; - if keepalive { + if vf_keepalive { // Prevent physical hardware interaction when restoring. group.set_keep_alive(pci_id)?; } diff --git a/vm/page_pool_alloc/src/lib.rs b/vm/page_pool_alloc/src/lib.rs index c55ba44655..69234c2484 100644 --- a/vm/page_pool_alloc/src/lib.rs +++ b/vm/page_pool_alloc/src/lib.rs @@ -504,6 +504,7 @@ pub struct PagePool { inner: Arc, #[inspect(iter_by_index)] ranges: Vec, + total_len: u64, } impl PagePool { @@ -557,6 +558,7 @@ impl PagePool { mapping, }), ranges: memory.to_vec(), + total_len: total_len as u64, }) } @@ -621,6 +623,11 @@ impl PagePool { Ok(()) } } + + /// Returns the total size of the pool in bytes. + pub fn total_size(&self) -> u64 { + self.total_len + } } /// A spawner for [`PagePoolAllocator`] instances. @@ -657,6 +664,8 @@ pub struct PagePoolAllocator { inner: Arc, #[inspect(skip)] device_id: usize, + /// Total alloc size in bytes for the session duration. + alloc_size: Mutex, } impl PagePoolAllocator { @@ -695,6 +704,7 @@ impl PagePoolAllocator { Ok(Self { inner: inner.clone(), device_id, + alloc_size: Mutex::new(0), }) } @@ -867,7 +877,8 @@ impl user_driver::DmaClient for PagePoolAllocator { let alloc = self .alloc(size_pages, "vfio dma".into()) - .context("failed to allocate shared mem")?; + .context("failed to allocate from page pool")?; + *self.alloc_size.lock() += len as u64; // The VfioDmaBuffer trait requires that newly allocated buffers are // zeroed. @@ -883,6 +894,21 @@ impl user_driver::DmaClient for PagePoolAllocator { .map(|alloc| alloc.into_memory_block()) .collect() } + + /// Query if this client supports persistent allocations. + fn is_persistent(&self) -> bool { + true + } + + /// How much memory was allocated during session. + fn alloc_size(&self) -> u64 { + *self.alloc_size.lock() + } + + /// Not supported for this allocator. + fn fallback_alloc_size(&self) -> u64 { + 0 + } } #[cfg(test)]