Skip to content

mana: keepalive feature #1136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 56 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
ee98c83
Tests are now using NvmeTestEmulatedDevice instead
gurasinghMS Feb 28, 2025
9e698f2
Emulated Device with a custom allocator is now working
gurasinghMS Mar 3, 2025
5b9ac15
Removing requirement for inspect mut for the Allocator
gurasinghMS Mar 4, 2025
0148c4b
Fixing merge conflicts
gurasinghMS Mar 12, 2025
3b6e57a
Nvme tests can now use the PagePool allocator instead of using Device…
gurasinghMS Mar 7, 2025
bc99323
Some cleanup in the nvme tests
gurasinghMS Mar 7, 2025
5098b57
Fixed the invalid mqes and the save restore tests
gurasinghMS Mar 7, 2025
add871e
Fixing merge conflicts
gurasinghMS Mar 12, 2025
5197971
Fixing merge conflict issues
gurasinghMS Mar 11, 2025
c3b2d52
Getting a better solutioon for TestBacking
gurasinghMS Mar 11, 2025
a0540e3
allow dma test now working
gurasinghMS Mar 11, 2025
7aa71de
Should now be working for all invocations to the EmulatedDevice
gurasinghMS Mar 11, 2025
af135bb
Updating cargo file to include sparse_mmap everywhere in user_driver
gurasinghMS Mar 12, 2025
d4b76f2
Fixing stuff based on linter
gurasinghMS Mar 12, 2025
8c7df08
Fixing PR build errors
gurasinghMS Mar 12, 2025
02063f9
Fixing unit test build errors
gurasinghMS Mar 12, 2025
38180b9
Fixing bus in build
gurasinghMS Mar 12, 2025
3f56d4c
Appeasing the linter
gurasinghMS Mar 12, 2025
b6efa8a
More appeasement of clippy
gurasinghMS Mar 12, 2025
854fb9f
xtask fmt fix
gurasinghMS Mar 12, 2025
e7ef177
Removed SparseMapBacking and converted backing to a generic instead
gurasinghMS Mar 12, 2025
21f0450
Re-organized emulated.rs file after previous refactor
gurasinghMS Mar 12, 2025
cbf5b1b
Comment updated
gurasinghMS Mar 12, 2025
1ecf2ab
fix build
gurasinghMS Mar 12, 2025
43f6af0
Merge branch 'emu_device' into gdma_save_restore
justus-camp-microsoft Mar 13, 2025
150dcb3
minimized implementation of gdma save/restore, unit test failing
justus-camp-microsoft Mar 13, 2025
0a2972b
e2e enablement code with vmm test
justus-camp-microsoft Mar 14, 2025
88cc6de
resolve doc warnings
justus-camp-microsoft Mar 14, 2025
06a477e
enablement fixes
justus-camp-microsoft Mar 14, 2025
99fb52a
plumbed through save/restore for endpoints
justus-camp-microsoft Mar 14, 2025
09706d3
working before rpc
justus-camp-microsoft Mar 17, 2025
b1503e7
Merge branch 'main' into mana_wip
justus-camp-microsoft Mar 19, 2025
e297070
move some state to a new module to deal with cyclic dep but prob not …
justus-camp-microsoft Mar 19, 2025
48fa4d5
weird merge issues
justus-camp-microsoft Mar 19, 2025
a452d8f
plumb mana queue state, some dma changes to leak and lazily restore
justus-camp-microsoft Mar 25, 2025
38bd722
more progress
justus-camp-microsoft Mar 27, 2025
e6c9c74
test passing
justus-camp-microsoft Mar 28, 2025
e8dff0e
Merge branch 'main' into mana_wip
justus-camp-microsoft Mar 28, 2025
5685044
pass right flag to dma client spawner
justus-camp-microsoft Mar 28, 2025
b93e8f0
mana vmm tests passing, need to fix for mulitiple nics
justus-camp-microsoft Mar 31, 2025
d8877af
put servicing keepalive test back with validate_mana_nic
justus-camp-microsoft Mar 31, 2025
e51476f
key on pci id
justus-camp-microsoft Mar 31, 2025
349bb6e
quick once-over
justus-camp-microsoft Mar 31, 2025
ef0bd0b
clean up endpoint saved state since it's not currently being used
justus-camp-microsoft Apr 3, 2025
f72b5fd
copyright header
justus-camp-microsoft Apr 7, 2025
e30e1d9
resolve all outstanding clippy warnings
justus-camp-microsoft Apr 7, 2025
78d84e8
Merge branch 'main' into mana_wip
justus-camp-microsoft Apr 7, 2025
a343ca5
directly save endpoints instead of using vmbus save/restore
justus-camp-microsoft Apr 16, 2025
5cc6a64
Merge branch 'main' into mana_wip
justus-camp-microsoft Apr 17, 2025
cee65e9
random test changes
justus-camp-microsoft Apr 23, 2025
eb06840
cherry-pick gdma changes
justus-camp-microsoft Apr 23, 2025
e9e293f
some cleanup
justus-camp-microsoft Apr 24, 2025
1d5479d
unused import
justus-camp-microsoft Apr 24, 2025
fad0df8
Merge branch 'main' into mana_wip
justus-camp-microsoft Apr 24, 2025
3d4f5db
Merge branch 'gdma' into mana_wip
justus-camp-microsoft Apr 24, 2025
65ca41a
save queue directly - gets most of tests passing
justus-camp-microsoft May 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3865,6 +3865,7 @@ dependencies = [
"gdma_defs",
"getrandom 0.3.2",
"inspect",
"mana_save_restore",
"mesh",
"net_backend",
"net_backend_resources",
Expand All @@ -3880,6 +3881,13 @@ dependencies = [
"zerocopy 0.8.24",
]

[[package]]
name = "mana_save_restore"
version = "0.0.0"
dependencies = [
"mesh",
]

[[package]]
name = "managed"
version = "0.8.0"
Expand Down Expand Up @@ -4309,6 +4317,7 @@ dependencies = [
"futures-concurrency",
"guestmem",
"inspect",
"mana_save_restore",
"memory_range",
"mesh",
"net_backend_resources",
Expand Down Expand Up @@ -4378,6 +4387,7 @@ dependencies = [
"guestmem",
"inspect",
"mana_driver",
"mana_save_restore",
"mesh",
"net_backend",
"pal_async",
Expand All @@ -4403,6 +4413,7 @@ dependencies = [
"futures-concurrency",
"guestmem",
"inspect",
"mana_save_restore",
"mesh",
"net_backend",
"parking_lot",
Expand Down Expand Up @@ -4445,6 +4456,7 @@ dependencies = [
"hvdef",
"inspect",
"inspect_counters",
"mana_save_restore",
"mesh",
"net_backend",
"net_backend_resources",
Expand Down Expand Up @@ -7698,6 +7710,7 @@ dependencies = [
"pci_core",
"tracing",
"user_driver",
"vmcore",
]

[[package]]
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ gdma_defs = { path = "vm/devices/net/gdma_defs" }
gdma_resources = { path = "vm/devices/net/gdma_resources" }
linux_net_bindings = { path = "vm/devices/net/linux_net_bindings" }
mana_driver = { path = "vm/devices/net/mana_driver" }
mana_save_restore = { path = "vm/devices/net/mana_save_restore" }
vfio_sys = { path = "vm/devices/user_driver/vfio_sys" }
net_backend = { path = "vm/devices/net/net_backend" }
net_backend_resources = { path = "vm/devices/net/net_backend_resources" }
Expand Down
4 changes: 4 additions & 0 deletions openhcl/host_fdt_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ pub struct ParsedDeviceTree<
pub nvme_keepalive: bool,
/// The physical address of the VTL0 alias mapping, if one is configured.
pub vtl0_alias_map: Option<u64>,
/// Indicates that Host supports MANA keep-alive.
pub mana_keepalive: bool,
}

/// The memory allocation mode provided by the host. This determines how OpenHCL
Expand Down Expand Up @@ -316,6 +318,7 @@ impl<
entropy: None,
device_dma_page_count: None,
nvme_keepalive: false,
mana_keepalive: false,
vtl0_alias_map: None,
}
}
Expand Down Expand Up @@ -735,6 +738,7 @@ impl<
device_dma_page_count: _,
nvme_keepalive: _,
vtl0_alias_map: _,
mana_keepalive: _,
} = storage;

*device_tree_size = parser.total_size;
Expand Down
4 changes: 4 additions & 0 deletions openhcl/lower_vtl_permissions_guard/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,8 @@ impl<T: DmaClient> DmaClient for LowerVtlMemorySpawner<T> {
fn attach_pending_buffers(&self) -> Result<Vec<MemoryBlock>> {
anyhow::bail!("restore is not supported for LowerVtlMemorySpawner")
}

fn get_dma_buffer(&self, _len: usize, _base_pfn: u64) -> Result<MemoryBlock> {
anyhow::bail!("get is not supported for LowerVtlMemorySpawner")
}
}
2 changes: 2 additions & 0 deletions openhcl/openhcl_boot/src/host_params/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ impl PartitionInfo {
entropy,
vtl0_alias_map: _,
nvme_keepalive,
mana_keepalive,
} = storage;

assert!(!vtl2_used_ranges.is_empty());
Expand All @@ -550,6 +551,7 @@ impl PartitionInfo {
*gic = parsed.gic.clone();
*entropy = parsed.entropy.clone();
*nvme_keepalive = parsed.nvme_keepalive;
*mana_keepalive = parsed.mana_keepalive;

Ok(Some(storage))
}
Expand Down
3 changes: 3 additions & 0 deletions openhcl/openhcl_boot/src/host_params/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ pub struct PartitionInfo {
pub vtl0_alias_map: Option<u64>,
/// Host is compatible with DMA preservation / NVMe keep-alive.
pub nvme_keepalive: bool,
/// Host is compatible with MANA keep-alive.
pub mana_keepalive: bool,
}

impl PartitionInfo {
Expand Down Expand Up @@ -125,6 +127,7 @@ impl PartitionInfo {
entropy: None,
vtl0_alias_map: None,
nvme_keepalive: false,
mana_keepalive: false,
}
}

Expand Down
5 changes: 5 additions & 0 deletions openhcl/openhcl_boot/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,10 @@ fn build_kernel_command_line(
write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=1 ")?;
}

if partition_info.mana_keepalive && !partition_info.vtl2_pool_memory.is_empty() {
write!(cmdline, "OPENHCL_MANA_KEEP_ALIVE=1 ")?;
}

if let Some(sidecar) = sidecar {
write!(cmdline, "{} ", sidecar.kernel_command_line())?;
}
Expand Down Expand Up @@ -956,6 +960,7 @@ mod test {
entropy: None,
vtl0_alias_map: None,
nvme_keepalive: false,
mana_keepalive: false,
}
}

Expand Down
26 changes: 25 additions & 1 deletion openhcl/openhcl_dma_manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ impl OpenhclDmaManager {

if let Some(private_pool) = &self.private_pool {
private_pool
.validate_restore(false)
.validate_restore(true)
.context("failed to validate restore for private pool")?
}

Expand Down Expand Up @@ -429,6 +429,22 @@ impl DmaClientBacking {
DmaClientBacking::LockedMemoryLowerVtl(spawner) => spawner.attach_pending_buffers(),
}
}

fn get_dma_buffer(
&self,
len: usize,
base_pfn: u64,
) -> anyhow::Result<user_driver::memory::MemoryBlock> {
match self {
DmaClientBacking::SharedPool(allocator) => allocator.get_dma_buffer(len, base_pfn),
DmaClientBacking::PrivatePool(allocator) => allocator.get_dma_buffer(len, base_pfn),
DmaClientBacking::LockedMemory(spawner) => spawner.get_dma_buffer(len, base_pfn),
DmaClientBacking::PrivatePoolLowerVtl(spawner) => spawner.get_dma_buffer(len, base_pfn),
DmaClientBacking::LockedMemoryLowerVtl(spawner) => {
spawner.get_dma_buffer(len, base_pfn)
}
}
}
}

/// An OpenHCL dma client. This client implements inspect to allow seeing what
Expand All @@ -450,4 +466,12 @@ impl DmaClient for OpenhclDmaClient {
fn attach_pending_buffers(&self) -> anyhow::Result<Vec<user_driver::memory::MemoryBlock>> {
self.backing.attach_pending_buffers()
}

fn get_dma_buffer(
&self,
len: usize,
base_pfn: u64,
) -> anyhow::Result<user_driver::memory::MemoryBlock> {
self.backing.get_dma_buffer(len, base_pfn)
}
}
71 changes: 56 additions & 15 deletions openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc;
use hyperv_ic_resources::shutdown::ShutdownType;
use igvm_defs::MemoryMapEntryType;
use inspect::Inspect;
use mana_driver::save_restore::ManaSavedState;
use mesh::CancelContext;
use mesh::MeshPayload;
use mesh::error::RemoteError;
Expand Down Expand Up @@ -106,18 +107,25 @@ pub trait LoadedVmNetworkSettings: Inspect {
threadpool: &AffinitizedThreadpool,
uevent_listener: &UeventListener,
servicing_netvsp_state: &Option<Vec<crate::emuplat::netvsp::SavedState>>,
servicing_mana_state: &Option<ManaSavedState>,
partition: Arc<UhPartition>,
state_units: &StateUnits,
vmbus_server: &Option<VmbusServerHandle>,
dma_client_spawner: DmaClientSpawner,
is_isolated: bool,
mana_keepalive: bool,
) -> anyhow::Result<RuntimeSavedState>;

/// Callback when network is removed externally.
async fn remove_network(&mut self, instance_id: Guid) -> anyhow::Result<()>;

/// Callback after stopping the VM and all workers, in preparation for a VTL2 reboot.
async fn unload_for_servicing(&mut self);
async fn unload_for_servicing(&mut self, mana_keepalive: bool);

async fn save(
&mut self,
mana_keepalive_flag: bool,
) -> Option<Vec<Result<ManaSavedState, anyhow::Error>>>;

/// Handles packet capture related operations.
async fn packet_capture(
Expand Down Expand Up @@ -181,6 +189,7 @@ pub(crate) struct LoadedVm {
pub _periodic_telemetry_task: Task<()>,

pub nvme_keep_alive: bool,
pub mana_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
pub dma_manager: OpenhclDmaManager,
}
Expand Down Expand Up @@ -268,7 +277,7 @@ impl LoadedVm {
WorkerRpc::Restart(rpc) => {
let state = async {
let running = self.stop().await;
match self.save(None, false).await {
match self.save(None, false, false).await {
Ok(servicing_state) => Some((rpc, servicing_state)),
Err(err) => {
if running {
Expand Down Expand Up @@ -330,7 +339,7 @@ impl LoadedVm {
UhVmRpc::Save(rpc) => {
rpc.handle_failable(async |()| {
let running = self.stop().await;
let r = self.save(None, false).await;
let r = self.save(None, false, false).await;
if running {
self.start(None).await;
}
Expand Down Expand Up @@ -495,6 +504,7 @@ impl LoadedVm {
// NOTE: This is set via the corresponding env arg, as this feature is
// experimental.
let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive();
let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive();

// Do everything before the log flush under a span.
let r = async {
Expand All @@ -509,14 +519,14 @@ impl LoadedVm {
anyhow::bail!("cannot service underhill while paused");
}

let mut state = self.save(Some(deadline), nvme_keepalive).await?;
let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?;
state.init_state.correlation_id = Some(correlation_id);

// Unload any network devices.
let shutdown_mana = async {
if let Some(network_settings) = self.network_settings.as_mut() {
network_settings
.unload_for_servicing()
.unload_for_servicing(mana_keepalive)
.instrument(tracing::info_span!("shutdown_mana"))
.await;
}
Expand Down Expand Up @@ -662,7 +672,8 @@ impl LoadedVm {
async fn save(
&mut self,
_deadline: Option<std::time::Instant>,
vf_keepalive_flag: bool,
nvme_keepalive_flag: bool,
mana_keepalive_flag: bool,
) -> anyhow::Result<ServicingState> {
assert!(!self.state_units.is_running());

Expand All @@ -671,31 +682,58 @@ impl LoadedVm {
// Only save NVMe state when there are NVMe controllers and keep alive
// was enabled.
let nvme_state = if let Some(n) = &self.nvme_manager {
n.save(vf_keepalive_flag)
n.save(nvme_keepalive_flag)
.instrument(tracing::info_span!("nvme_manager_save"))
.await
.map(|s| NvmeSavedState { nvme_state: s })
} else {
None
};

let units = self.save_units().await.context("state unit save failed")?;
let vmgs = self
.vmgs_thin_client
.save()
.await
.context("vmgs save failed")?;

// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
let dma_manager_state = if vf_keepalive_flag {
let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
None
};

let units = self.save_units().await.context("state unit save failed")?;

let mana_state = if let Some(network_settings) = &mut self.network_settings {
let results = network_settings.save(mana_keepalive_flag).await;
let mut saved_states = Vec::new();

if let Some(results) = results {
for result in results {
match result {
Ok(state) => saved_states.push(state),
Err(e) => tracing::warn!("Error saving MANA device state: {:#}", e),
}
}
}

if !saved_states.is_empty() {
Some(saved_states)
} else {
None
}
} else {
None
};

units.iter().for_each(|unit| {
tracing::info!(unit = unit.name, "saved state unit");
});

let vmgs = self
.vmgs_thin_client
.save()
.await
.context("vmgs save failed")?;

let vmbus_client = if let Some(vmbus_client) = &mut self.vmbus_client {
vmbus_client.stop().await;
Some(vmbus_client.save().await)
Expand All @@ -713,6 +751,7 @@ impl LoadedVm {
vmgs: (vmgs, self.vmgs_disk_metadata.clone()),
overlay_shutdown_device: self.shutdown_relay.is_some(),
nvme_state,
mana_state,
dma_manager_state,
vmbus_client,
},
Expand Down Expand Up @@ -776,11 +815,13 @@ impl LoadedVm {
threadpool,
&self.uevent_listener,
&None, // VF getting added; no existing state
&None, // VF getting added; no existing state
self.partition.clone(),
&self.state_units,
&self.vmbus_server,
self.dma_manager.client_spawner(),
self.isolation.is_isolated(),
self.mana_keep_alive,
)
.await?;

Expand Down
Loading