Skip to content

DMA hint calculation in OpenHCL bootshim and fallback mem allocator for NVMe #1190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3808,6 +3808,7 @@ dependencies = [
"anyhow",
"hvdef",
"inspect",
"parking_lot",
"user_driver",
"virt",
]
Expand Down Expand Up @@ -4786,6 +4787,7 @@ dependencies = [
"sha2",
"sidecar_defs",
"tdcall",
"test_with_tracing",
"underhill_confidentiality",
"x86defs",
"zerocopy 0.8.24",
Expand All @@ -4804,6 +4806,8 @@ dependencies = [
"memory_range",
"mesh",
"page_pool_alloc",
"parking_lot",
"thiserror 2.0.12",
"user_driver",
"virt",
"vmcore",
Expand Down
14 changes: 14 additions & 0 deletions openhcl/bootloader_fdt_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ pub struct ParsedBootDtInfo {
/// VTL2 range for private pool memory.
#[inspect(iter_by_index)]
pub private_pool_ranges: Vec<MemoryRangeWithNode>,
/// Source of DMA hint calculation.
pub dma_hint_self: bool,
}

fn err_to_owned(e: fdt::parser::Error<'_>) -> anyhow::Error {
Expand Down Expand Up @@ -207,6 +209,7 @@ struct OpenhclInfo {
memory_allocation_mode: MemoryAllocationMode,
isolation: IsolationType,
private_pool_ranges: Vec<MemoryRangeWithNode>,
dma_hint_self: bool,
}

fn parse_memory_openhcl(node: &Node<'_>) -> anyhow::Result<AddressRange> {
Expand Down Expand Up @@ -394,6 +397,11 @@ fn parse_openhcl(node: &Node<'_>) -> anyhow::Result<OpenhclInfo> {
.transpose()
.context("unable to read vtl0-alias-map")?;

let dma_hint_self = matches!(
try_find_property(node, "dma-hint").and_then(|p| p.read_str().ok()),
Some("self")
);

// Extract vmbus mmio information from the overall memory map.
let vtl0_mmio = memory
.iter()
Expand All @@ -416,6 +424,7 @@ fn parse_openhcl(node: &Node<'_>) -> anyhow::Result<OpenhclInfo> {
memory_allocation_mode,
isolation,
private_pool_ranges,
dma_hint_self,
})
}

Expand Down Expand Up @@ -509,6 +518,7 @@ impl ParsedBootDtInfo {
let mut isolation = IsolationType::None;
let mut vtl2_reserved_range = MemoryRange::EMPTY;
let mut private_pool_ranges = Vec::new();
let mut dma_hint_self = false;

let parser = Parser::new(raw)
.map_err(err_to_owned)
Expand Down Expand Up @@ -538,6 +548,7 @@ impl ParsedBootDtInfo {
memory_allocation_mode: n_memory_allocation_mode,
isolation: n_isolation,
private_pool_ranges: n_private_pool_ranges,
dma_hint_self: n_dma_hint_self,
} = parse_openhcl(&child)?;
vtl0_mmio = n_vtl0_mmio;
config_ranges = n_config_ranges;
Expand All @@ -548,6 +559,7 @@ impl ParsedBootDtInfo {
isolation = n_isolation;
vtl2_reserved_range = n_vtl2_reserved_range;
private_pool_ranges = n_private_pool_ranges;
dma_hint_self = n_dma_hint_self;
}

_ if child.name.starts_with("memory@") => {
Expand Down Expand Up @@ -580,6 +592,7 @@ impl ParsedBootDtInfo {
isolation,
vtl2_reserved_range,
private_pool_ranges,
dma_hint_self,
})
}
}
Expand Down Expand Up @@ -945,6 +958,7 @@ mod tests {
range: MemoryRange::new(0x60000..0x70000),
vnode: 0,
}],
dma_hint_self: false,
};

let dt = build_dt(&orig_info).unwrap();
Expand Down
1 change: 1 addition & 0 deletions openhcl/lower_vtl_permissions_guard/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rust-version.workspace = true
[target.'cfg(target_os = "linux")'.dependencies]
hvdef.workspace = true
inspect.workspace = true
parking_lot.workspace = true
user_driver.workspace = true
virt.workspace = true

Expand Down
19 changes: 19 additions & 0 deletions openhcl/lower_vtl_permissions_guard/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use device_dma::LowerVtlDmaBuffer;
use anyhow::Context;
use anyhow::Result;
use inspect::Inspect;
use parking_lot::Mutex;
use std::sync::Arc;
use user_driver::DmaClient;
use user_driver::memory::MemoryBlock;
Expand Down Expand Up @@ -79,6 +80,7 @@ pub struct LowerVtlMemorySpawner<T: DmaClient> {
spawner: T,
#[inspect(skip)]
vtl_protect: Arc<dyn VtlMemoryProtection + Send + Sync>,
alloc_size: Mutex<u64>,
}

impl<T: DmaClient> LowerVtlMemorySpawner<T> {
Expand All @@ -88,6 +90,7 @@ impl<T: DmaClient> LowerVtlMemorySpawner<T> {
Self {
spawner,
vtl_protect,
alloc_size: Mutex::new(0),
}
}
}
Expand All @@ -98,6 +101,7 @@ impl<T: DmaClient> DmaClient for LowerVtlMemorySpawner<T> {
let vtl_guard =
PagesAccessibleToLowerVtl::new_from_pages(self.vtl_protect.clone(), mem.pfns())
.context("failed to lower VTL permissions on memory block")?;
*self.alloc_size.lock() += len as u64;

Ok(MemoryBlock::new(LowerVtlDmaBuffer {
block: mem,
Expand All @@ -108,4 +112,19 @@ impl<T: DmaClient> DmaClient for LowerVtlMemorySpawner<T> {
fn attach_pending_buffers(&self) -> Result<Vec<MemoryBlock>> {
anyhow::bail!("restore is not supported for LowerVtlMemorySpawner")
}

/// Query if this client supports persistent allocations.
fn is_persistent(&self) -> bool {
false
}

/// How much memory was allocated during session.
fn alloc_size(&self) -> u64 {
*self.alloc_size.lock()
}

/// Not supported for this allocator.
fn fallback_alloc_size(&self) -> u64 {
0
}
}
3 changes: 3 additions & 0 deletions openhcl/openhcl_boot/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ minimal_rt_build.workspace = true

[lints]
workspace = true

[dev-dependencies]
test_with_tracing.workspace = true
5 changes: 5 additions & 0 deletions openhcl/openhcl_boot/src/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,11 @@ pub fn write_dt(
openhcl_builder = openhcl_builder.add_u64(p_vtl0_alias_map, data)?;
}

if partition_info.dma_hint_self {
let p_dma_hint = openhcl_builder.add_string("dma-hint")?;
openhcl_builder = openhcl_builder.add_str(p_dma_hint, "self")?;
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
struct Vtl2MemoryEntry {
range: MemoryRange,
Expand Down
197 changes: 197 additions & 0 deletions openhcl/openhcl_boot/src/host_params/dma_hint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

//! Calculate DMA hint value if not provided by host.

use super::PartitionInfo;
use igvm_defs::{MemoryMapEntryType, PAGE_SIZE_4K};

/// Lookup table for DMA hint calculation.
/// Using tuples instead of structs to keep it readable.
/// Let's keep the table sorted by VP count, then by assigned memory.
/// Using u16 to keep the memory req short.
/// Max VTL2 memory known today is 24838 MiB.
/// (vp_count, vtl2_memory_mb, dma_hint_mb)
const LOOKUP_TABLE: &[(u16, u16, u16)] = &[
(2, 96, 2),
(2, 98, 4),
(2, 100, 4),
(2, 104, 4),
(4, 108, 2),
(4, 110, 6),
(4, 112, 6),
(4, 118, 8),
(4, 130, 12),
(8, 140, 4),
(8, 148, 10),
(8, 170, 20),
(8, 176, 20),
(16, 234, 12),
(16, 256, 20), // There is another configuration with '18'.
(16, 268, 38),
(16, 282, 54),
(24, 420, 66),
(32, 404, 22),
(32, 516, 36),
(32, 538, 74), // There is another configuration with '52'.
(48, 558, 32),
(48, 718, 52),
(48, 730, 52),
(48, 746, 78),
(64, 712, 42),
(64, 924, 68),
(64, 938, 68),
(96, 1030, 64),
(96, 1042, 114), // Can be '64'.
(96, 1058, 114), // Can be '106'.
(96, 1340, 102),
(96, 1358, 104),
(96, 1382, 120),
(112, 1566, 288),
(128, 1342, 84),
(128, 1360, 84),
(896, 12912, 0), // (516) Needs to be validated as the vNIC number is unknown.
];

/// Round up to next 2MiB.
fn round_up_to_2mb(pages_4k: u64) -> u64 {
(pages_4k + 511) & !(511)
}

/// Returns calculated DMA hint value, in 4k pages.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably want some more rationale on what we should do if we don't match one of these lookup tables exactly.

pub fn vtl2_calculate_dma_hint(vp_count: usize, storage: &PartitionInfo) -> u64 {
let mut dma_hint_4k = 0;
let mem_size = storage
.vtl2_ram
.iter()
.filter(|m| m.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE)
.map(|e| e.range.len())
.sum::<u64>();
// Sanity check for the calculated memory size.
if mem_size > 0 && mem_size < 0xFFFFFFFF00000 {
let mem_size_mb = (mem_size / 1048576) as u32;

let mut min_vtl2_memory_mb = 65535;
let mut max_vtl2_memory_mb = 0;

// To avoid using floats, scale ratios to 1:1000.
let mut min_ratio_1000th = 100000;
let mut max_ratio_1000th = 1000;

let mut min_vp_count: u16 = 1;
let mut max_vp_count = vp_count as u16;

for (vp_lookup, vtl2_memory_mb, dma_hint_mb) in LOOKUP_TABLE {
match (*vp_lookup).cmp(&(vp_count as u16)) {
core::cmp::Ordering::Less => {
// Find nearest.
min_vp_count = min_vp_count.max(*vp_lookup);
}
core::cmp::Ordering::Equal => {
if *vtl2_memory_mb == mem_size_mb as u16 {
// Found exact match.
dma_hint_4k = *dma_hint_mb as u64 * 1048576 / PAGE_SIZE_4K;
max_vtl2_memory_mb = *vtl2_memory_mb;
break;
} else {
// Prepare for possible extrapolation.
min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
min_ratio_1000th = min_ratio_1000th
.min(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32);
max_ratio_1000th = max_ratio_1000th
.max(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32);
}
}
core::cmp::Ordering::Greater => {
// Find nearest.
max_vp_count = max_vp_count.min(*vp_lookup);
}
}
}

// It is possible there were no matching entries in the lookup table.
// (i.e. unexpected VP count).
if max_vtl2_memory_mb == 0 {
LOOKUP_TABLE
.iter()
.filter(|(vp_lookup, _, _)| {
*vp_lookup == min_vp_count || *vp_lookup == max_vp_count
})
.for_each(|(_, vtl2_memory_mb, dma_hint_mb)| {
min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
min_ratio_1000th =
min_ratio_1000th.min(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32);
max_ratio_1000th =
max_ratio_1000th.max(*vtl2_memory_mb as u32 * 1000 / *dma_hint_mb as u32);
});
}

if dma_hint_4k == 0 {
// Didn't find an exact match for vp_count, try to extrapolate.
dma_hint_4k = (mem_size_mb as u64 * 1000u64 * (1048576u64 / PAGE_SIZE_4K))
/ ((min_ratio_1000th + max_ratio_1000th) as u64 / 2u64);

// And then round up to 2MiB.
dma_hint_4k = round_up_to_2mb(dma_hint_4k);
}
}

dma_hint_4k
}

#[cfg(test)]
mod test {
use super::*;
use crate::MemoryRange;
use crate::host_params::MemoryEntry;
use test_with_tracing::test;

#[test]
fn test_vtl2_calculate_dma_hint() {
let mut storage = PartitionInfo::new();

storage.vtl2_ram.clear();
storage.vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(0x0..0x6200000),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
assert_eq!(vtl2_calculate_dma_hint(2, &storage), 1024);

storage.vtl2_ram.clear();
storage.vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(0x0..0x6E00000),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
assert_eq!(vtl2_calculate_dma_hint(4, &storage), 1536);

// Test VP count higher than max from LOOKUP_TABLE.
storage.vtl2_ram.clear();
storage.vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(0x0..0x7000000),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
assert_eq!(vtl2_calculate_dma_hint(112, &storage), 5632);

// Test unusual VP count.
storage.vtl2_ram.clear();
storage.vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(0x0..0x6000000),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
assert_eq!(vtl2_calculate_dma_hint(52, &storage), 2048);

storage.vtl2_ram.clear();
storage.vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(0x0..0x8000000),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
assert_eq!(vtl2_calculate_dma_hint(52, &storage), 2560);
}
}
Loading