Skip to content

propolis-server: collect and report VSS minus VM mappings #889

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
610 changes: 359 additions & 251 deletions Cargo.lock

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,19 @@ zerocopy = "0.7.34"
# It's common during development to use a local copy of various complex
# dependencies. If you want to use those, uncomment one of these blocks.
#
# [patch."https://github.com/oxidecomputer/omicron"]
# internal-dns = { path = "../omicron/internal-dns" }
# nexus-client = { path = "../omicron/clients/nexus-client" }
# omicron-common = { path = "../omicron/common" }
# oximeter-instruments = { path = "../omicron/oximeter/instruments" }
# oximeter-producer = { path = "../omicron/oximeter/producer" }
# oximeter = { path = "../omicron/oximeter/oximeter" }
[patch."https://github.com/oxidecomputer/omicron"]
#internal-dns = { path = "../omicron/internal-dns" }
nexus-client = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
omicron-uuid-kinds = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
nexus-types = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
illumos-utils = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
omicron-common = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
oximeter-instruments = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
oximeter-producer = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }
oximeter = { git = "https://github.com/oxidecomputer//omicron.git", rev = "da5f7706119b3f39dc5dfb2fde823c2294476b5a" }

# i've just picked what Omicron happens to currently point to.
tufaceous-artifact = { git = "https://github.com/oxidecomputer//tufaceous.git", rev = "04681f26ba09e144e5467dd6bd22c4887692a670" }
# [patch."https://github.com/oxidecomputer/crucible"]
# crucible = { path = "../crucible/upstairs" }
# crucible-client-types = { path = "../crucible/crucible-client-types" }
2 changes: 2 additions & 0 deletions bin/propolis-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ internal-dns-types.workspace = true
itertools.workspace = true
kstat-rs.workspace = true
lazy_static.workspace = true
libc.workspace = true
nexus-client.workspace = true
omicron-common.workspace = true
oximeter-instruments.workspace = true
Expand Down Expand Up @@ -67,6 +68,7 @@ uuid.workspace = true
usdt.workspace = true
base64.workspace = true
schemars = { workspace = true, features = ["chrono", "uuid1"] }
zerocopy.workspace = true

[dev-dependencies]
hex.workspace = true
Expand Down
97 changes: 87 additions & 10 deletions bin/propolis-server/src/lib/stats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! Methods for starting an Oximeter endpoint and gathering server-level stats.

use std::net::SocketAddr;
use std::sync::{Arc, Mutex};
use std::sync::{Arc, Mutex, Weak};

use omicron_common::api::internal::nexus::{ProducerEndpoint, ProducerKind};
use oximeter::{
Expand All @@ -18,9 +18,13 @@ use slog::Logger;
use uuid::Uuid;

use crate::spec::Spec;
use crate::{server::MetricsEndpointConfig, vm::NetworkInterfaceIds};
use crate::{
server::MetricsEndpointConfig, vm::objects::VmObjects,
vm::NetworkInterfaceIds,
};

mod network_interface;
mod process;
mod pvpanic;
mod virtual_disk;
mod virtual_machine;
Expand Down Expand Up @@ -78,15 +82,80 @@ struct ServerStatsInner {
/// data.
virtual_machine: VirtualMachine,

/// A weak reference to the objects implementing the VM this Propolis server
/// is responsible for. It is critical this is a weak reference: we do not
/// want to prohibit `VmObjects` from being dropped when unneeded. The
/// referece is, however, necessary to measure "Propolis overhead" -
/// measurements of the Propolis process minus the VM it is managing.
vm_objects: Weak<VmObjects>,

/// The reset count for the relevant instance.
run_count: virtual_machine::Reset,

/// Mapped virtual memory for the Propolis managing this instance, excluding
/// VM memory.
vmm_vsz: virtual_machine::VmmVsz,

/// Process RSS is sampled in a standalone task. The `tokio::sync::watch`
/// here is to observe that sampling and update the `vmm_vsz` tracked
/// here.
process_stats_rx: tokio::sync::watch::Receiver<process::ProcessStats>,
}

impl ServerStatsInner {
pub fn new(virtual_machine: VirtualMachine) -> Self {
pub fn new(
virtual_machine: VirtualMachine,
vm_objects: Weak<VmObjects>,
) -> Self {
let rx = process::ProcessStats::new();

ServerStatsInner {
virtual_machine,
vm_objects,
run_count: virtual_machine::Reset { datum: Default::default() },
vmm_vsz: virtual_machine::VmmVsz { datum: Default::default() },
process_stats_rx: rx,
}
}

fn refresh_vmm_vsz(&mut self) {
let last_process_stats = self.process_stats_rx.borrow();

if let Some(vm_objects) = self.vm_objects.upgrade() {
// The mutex guarding VmObjects is async. Since we're not in an
// async context, we have to be a bit more explicit with the future.
// It's possible both for us to block acquiring this lock (if the
// state driver had an exclusive lock to make some change) and for
// us to block the state driver by acquiring the lock (but we
// *should* drop the lock very quickly, here).
let vm_objects = tokio::runtime::Handle::current()
.block_on(vm_objects.lock_shared());

let vm_va_size =
vm_objects.machine().map_physmem.virtual_address_size();

// We have a ref of `VmObjects`, so we know this `propolis-server`'s
// VM is still mapped.
//
// In theory at some point we'd want to handle the VM dimensions
// changing here. Imagine PCIe hotplug (via MMIO mappings), memory
// hotplug, or perhaps a balloon device. In that case we'd probably
// want to collect this information closer to reading `psinfo`, so
// that we can be sure that VM didn't change between reading
// `psinfo` and correcting for VM-specific usage.
self.vmm_vsz.datum = (last_process_stats.vsz - vm_va_size) as u64;
} else {
// We don't have a `VmObjects`, so the VM has been unmapped. The
// process' VSZ is already everything-but-the-guest-VM, so report
// that directly.
//
// This is really just a race as `propolis-server` is shutting down.
// The `ServerStats` that manages this producer will be shut down
// imminently too, we're just measuring right as that's happening.
// We may even be in a cancelled future that hasn't observed it yet,
// with this datapoint never to be sent. But if do get one last
// datapoint out, this is at least the current state of the process.
self.vmm_vsz.datum = last_process_stats.vsz as u64;
}
}
}
Expand All @@ -103,8 +172,10 @@ pub struct ServerStats {

impl ServerStats {
/// Create new server stats, representing the provided instance.
pub fn new(vm: VirtualMachine) -> Self {
Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm))) }
pub fn new(vm: VirtualMachine, vm_objects: Weak<VmObjects>) -> Self {
Self {
inner: Arc::new(Mutex::new(ServerStatsInner::new(vm, vm_objects))),
}
}

/// Increments the number of times the managed instance was reset.
Expand All @@ -117,14 +188,20 @@ impl Producer for ServerStats {
fn produce(
&mut self,
) -> Result<Box<dyn Iterator<Item = Sample> + 'static>, MetricsError> {
let run_count = {
let inner = self.inner.lock().unwrap();
std::iter::once(Sample::new(
let samples = {
let mut inner = self.inner.lock().unwrap();
inner.refresh_vmm_vsz();
let run_count = std::iter::once(Sample::new(
&inner.virtual_machine,
&inner.run_count,
)?)
)?);
let vsz = std::iter::once(Sample::new(
&inner.virtual_machine,
&inner.vmm_vsz,
)?);
run_count.chain(vsz)
};
Ok(Box::new(run_count))
Ok(Box::new(samples))
}
}

Expand Down
129 changes: 129 additions & 0 deletions bin/propolis-server/src/lib/stats/process.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use std::io::Read;
use std::time::{Duration, Instant};

use anyhow::Context;
use tokio::sync::watch;
use zerocopy::{AsBytes, FromBytes, FromZeroes};

#[derive(Debug, Default)]
pub(crate) struct ProcessStats {
pub(crate) measurement_time: Duration,
pub(crate) rss: usize,
pub(crate) vsz: usize,
}

impl ProcessStats {
pub fn new() -> watch::Receiver<ProcessStats> {
let (tx, rx) = watch::channel::<ProcessStats>(ProcessStats::default());

tokio::task::spawn(process_stats_task(tx));

rx
}
}

const PRFNSZ: usize = 16;
const PRARGSZ: usize = 80;

/// From `sys/types.h`
#[allow(non_camel_case_types)]
type taskid_t = i32;
/// From `sys/time_impl.h`
#[allow(non_camel_case_types)]
type timespec_t = [i64; 2];
/// From `sys/time_impl.h`
#[allow(non_camel_case_types)]
type timestruc_t = timespec_t;
/// From `sys/types.h`
#[allow(non_camel_case_types)]
type dev_t = u64;

/// `psinfo`'s definition depends on the data model reported by illumos. This is
/// in line with the 64-bit version of the struct, and ignores a few fields at
/// the end of the struct.
#[derive(Copy, Clone, Debug, FromZeroes, AsBytes, FromBytes)]
#[cfg(target_arch = "x86_64")]
#[repr(C)]
struct psinfo {
pr_flag: i32,
pr_nlwp: i32,
pr_pid: u32,
pr_ppid: u32,
pr_pgid: u32,
pr_sid: u32,
pr_uid: u32,
pr_euid: u32,
pr_gid: u32,
pr_egid: u32,
pr_addr: usize,
pr_size: usize,
pr_rssize: usize,
// From `struct psinfo`. This seems to be present to ensure that `pr_ttydev`
// is 64-bit aligned even on 32-bit targets.
pr_pad1: usize,
pr_ttydev: dev_t,
pr_pctcpu: u16,
pr_pctmem: u16,
// This padding is not explicitly present in illumos' `struct procfs`, but
// is none the less there due to C struct layout rules.
_pad2: u32,
pr_start: timestruc_t,
pr_time: timestruc_t,
pr_ctime: timestruc_t,
pr_fname: [u8; PRFNSZ],
pr_psargs: [u8; PRARGSZ],
pr_wstat: u32,
pr_argc: u32,
pr_argv: usize,
pr_envp: usize,
pr_dmodel: u8,
// More padding from `struct psinfo`.
pr_pad2: [u8; 3],
pr_taskid: taskid_t,
}

pub fn process_stats() -> anyhow::Result<ProcessStats> {
let mut psinfo_file = std::fs::File::open("/proc/self/psinfo")?;

let mut stats =
ProcessStats { measurement_time: Duration::ZERO, vsz: 0, rss: 0 };

let mut info: psinfo = FromZeroes::new_zeroed();

let stats_read_start = Instant::now();

psinfo_file
.read(info.as_bytes_mut())
.context("reading struct psinfo from file")?;

stats.measurement_time = stats_read_start.elapsed();

stats.vsz = info.pr_size;
stats.rss = info.pr_rssize;

Ok(stats)
}

async fn process_stats_task(tx: watch::Sender<ProcessStats>) {
while !tx.is_closed() {
let new_process_stats = tokio::task::spawn_blocking(process_stats)
.await
.expect("collecting address space stats does not panic")
.expect("process_stats() does not error");

// [there isn't a nice way to plumb a slog Logger here, so this is an
// eprintln for demonstrative purposes but otherwise should be removed.]
eprintln!(
"Sending new stats at least.. {:?}, took {}ms",
new_process_stats,
new_process_stats.measurement_time.as_millis()
);
tx.send_replace(new_process_stats);

tokio::time::sleep(std::time::Duration::from_millis(15000)).await;
}
}
2 changes: 2 additions & 0 deletions bin/propolis-server/src/lib/stats/virtual_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ use super::kstat_types::{KstatList, KstatTarget};
// `./omicron/oximeter/oximeter/schema/virtual-machine.toml`.
oximeter::use_timeseries!("virtual-machine.toml");
pub use self::virtual_machine::Reset;
// [this currently exists only due to Cargo.toml patching to a one-off Omicron.]
pub use self::virtual_machine::VmmVsz;
use self::virtual_machine::{
VcpuUsage, VirtualMachine as VirtualMachineTarget,
};
Expand Down
13 changes: 10 additions & 3 deletions bin/propolis-server/src/lib/vm/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! Services visible to consumers outside this Propolis that depend on
//! functionality supplied by an extant VM.

use std::sync::Arc;
use std::sync::{Arc, Weak};

use oximeter::types::ProducerRegistry;
use propolis_api_types::InstanceProperties;
Expand Down Expand Up @@ -53,10 +53,15 @@ impl VmServices {
/// configuration.
pub(super) async fn new(
log: &slog::Logger,
vm_objects: &VmObjects,
vm_objects: &Arc<VmObjects>,
vm_properties: &InstanceProperties,
ensure_options: &super::EnsureOptions,
) -> Self {
// While we only need this if there's a metrics config present, it's
// simpler plumbing to create this before locking `vm_objects`, which we
// need to do regardless of metrics config.
let vm_objects_weak = Arc::downgrade(vm_objects);

let vm_objects = vm_objects.lock_shared().await;
let oximeter_state = if let Some(cfg) = &ensure_options.metrics_config {
let registry = ensure_options.oximeter_registry.as_ref().expect(
Expand All @@ -67,6 +72,7 @@ impl VmServices {
cfg,
registry,
vm_objects.instance_spec(),
vm_objects_weak,
vm_properties,
)
.await
Expand Down Expand Up @@ -119,6 +125,7 @@ async fn register_oximeter_producer(
cfg: &MetricsEndpointConfig,
registry: &ProducerRegistry,
spec: &Spec,
vm_objects_weak: Weak<VmObjects>,
vm_properties: &InstanceProperties,
) -> OximeterState {
let mut oximeter_state = OximeterState::default();
Expand Down Expand Up @@ -152,7 +159,7 @@ async fn register_oximeter_producer(
// Assign our own metrics production for this VM instance to the
// registry, letting the server actually return them to oximeter when
// polled.
let stats = ServerStats::new(virtual_machine);
let stats = ServerStats::new(virtual_machine, vm_objects_weak);
if let Err(e) = registry.register_producer(stats.clone()) {
error!(
log,
Expand Down
4 changes: 2 additions & 2 deletions lib/propolis/src/util/aspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ fn safe_end(start: usize, len: usize) -> Option<usize> {
// Flatten the K/V nested tuple
fn kv_flatten<'a, T>(i: (&'a usize, &'a (usize, T))) -> SpaceItem<'a, T> {
let start = *i.0;
let end = (i.1).0;
let size = (i.1).0;
let item = &(i.1).1;
(start, end, item)
(start, size, item)
}

/// Iterator for all items in an [ASpace], constructed by [ASpace::iter].
Expand Down
Loading