Skip to content

Commit 1392309

Browse files
committed
vss in 30 microseconds or less
1 parent c24d70f commit 1392309

File tree

8 files changed

+189
-302
lines changed

8 files changed

+189
-302
lines changed

Cargo.lock

+62-167
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+10-7
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,16 @@ zerocopy = "0.7.34"
180180
# It's common during development to use a local copy of various complex
181181
# dependencies. If you want to use those, uncomment one of these blocks.
182182
#
183-
# [patch."https://github.com/oxidecomputer/omicron"]
184-
# internal-dns = { path = "../omicron/internal-dns" }
185-
# nexus-client = { path = "../omicron/clients/nexus-client" }
186-
# omicron-common = { path = "../omicron/common" }
187-
# oximeter-instruments = { path = "../omicron/oximeter/instruments" }
188-
# oximeter-producer = { path = "../omicron/oximeter/producer" }
189-
# oximeter = { path = "../omicron/oximeter/oximeter" }
183+
[patch."https://github.com/oxidecomputer/omicron"]
184+
#internal-dns = { path = "../omicron/internal-dns" }
185+
nexus-client = { path = "../omicron/clients/nexus-client" }
186+
omicron-uuid-kinds = { path = "../omicron/uuid-kinds" }
187+
nexus-types = { path = "../omicron/nexus/types" }
188+
illumos-utils = { path = "../omicron/illumos-utils" }
189+
omicron-common = { path = "../omicron/common" }
190+
oximeter-instruments = { path = "../omicron/oximeter/instruments" }
191+
oximeter-producer = { path = "../omicron/oximeter/producer" }
192+
oximeter = { path = "../omicron/oximeter/oximeter" }
190193
# [patch."https://github.com/oxidecomputer/crucible"]
191194
# crucible = { path = "../crucible/upstairs" }
192195
# crucible-client-types = { path = "../crucible/crucible-client-types" }

bin/propolis-server/src/lib/stats/mod.rs

+26-13
Original file line numberDiff line numberDiff line change
@@ -79,34 +79,47 @@ struct ServerStatsInner {
7979
/// data.
8080
virtual_machine: VirtualMachine,
8181

82+
/// The total amount of virtual address space reserved in support of this
83+
/// virtual machine. This is retained to correct the process-wide virtual
84+
/// address space size down to the "non-VM" amount; mappings not derived
85+
/// from a virtual machine explicitly asking for some fixed size of memory.
86+
vm_va_size: usize,
87+
8288
/// The reset count for the relevant instance.
8389
run_count: virtual_machine::Reset,
8490

85-
/// The resident set size for the Propolis managing this instance, excluding
91+
/// Mapped virtual memory for the Propolis managing this instance, excluding
8692
/// VM memory.
87-
vmm_rss: virtual_machine::VmmMaxRss,
93+
vmm_vss: virtual_machine::VmmVss,
8894

8995
/// Process RSS is sampled in a standalone task. The `tokio::sync::watch`
90-
/// here is to observe that sampling and update the `vmm_rss` tracked
96+
/// here is to observe that sampling and update the `vmm_vss` tracked
9197
/// here.
9298
process_stats_rx: tokio::sync::watch::Receiver<process::ProcessStats>,
9399
}
94100

95101
impl ServerStatsInner {
96-
pub fn new(virtual_machine: VirtualMachine) -> Self {
102+
pub fn new(virtual_machine: VirtualMachine, vm_va_size: usize) -> Self {
97103
let rx = process::ProcessStats::new();
98104

99105
ServerStatsInner {
100106
virtual_machine,
107+
vm_va_size,
101108
run_count: virtual_machine::Reset { datum: Default::default() },
102-
vmm_rss: virtual_machine::VmmMaxRss { datum: Default::default() },
109+
vmm_vss: virtual_machine::VmmVss { datum: Default::default() },
103110
process_stats_rx: rx,
104111
}
105112
}
106113

107-
fn refresh_vmm_rss(&mut self) {
114+
fn refresh_vmm_vss(&mut self) {
108115
let last_process_stats = self.process_stats_rx.borrow();
109-
self.vmm_rss.datum = last_process_stats.unshared_rss;
116+
if last_process_stats.vss < self.vm_va_size {
117+
eprintln!("process VSS is smaller than expected; is the VMM being torn down or already stopped?");
118+
return;
119+
}
120+
121+
self.vmm_vss.datum = (last_process_stats.vss - self.vm_va_size) as u64;
122+
eprintln!("reporting vmm_vss of {}", self.vmm_vss.datum);
110123
}
111124
}
112125

@@ -122,8 +135,8 @@ pub struct ServerStats {
122135

123136
impl ServerStats {
124137
/// Create new server stats, representing the provided instance.
125-
pub fn new(vm: VirtualMachine) -> Self {
126-
Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm))) }
138+
pub fn new(vm: VirtualMachine, vm_va_size: usize) -> Self {
139+
Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm, vm_va_size))) }
127140
}
128141

129142
/// Increments the number of times the managed instance was reset.
@@ -138,16 +151,16 @@ impl Producer for ServerStats {
138151
) -> Result<Box<dyn Iterator<Item = Sample> + 'static>, MetricsError> {
139152
let samples = {
140153
let mut inner = self.inner.lock().unwrap();
141-
inner.refresh_vmm_rss();
154+
inner.refresh_vmm_vss();
142155
let run_count = std::iter::once(Sample::new(
143156
&inner.virtual_machine,
144157
&inner.run_count,
145158
)?);
146-
let rss = std::iter::once(Sample::new(
159+
let vss = std::iter::once(Sample::new(
147160
&inner.virtual_machine,
148-
&inner.vmm_rss,
161+
&inner.vmm_vss,
149162
)?);
150-
run_count.chain(rss)
163+
run_count.chain(vss)
151164
};
152165
Ok(Box::new(samples))
153166
}

bin/propolis-server/src/lib/stats/process.rs

+72-111
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ use zerocopy::{AsBytes, FromBytes, FromZeroes};
1212
#[derive(Debug, Default)]
1313
pub(crate) struct ProcessStats {
1414
pub(crate) measurement_time: Duration,
15-
pub(crate) unshared_rss: u64,
16-
pub(crate) unshared_vss: u64,
17-
pub(crate) shared_rss: u64,
18-
pub(crate) shared_vss: u64,
15+
pub(crate) rss: usize,
16+
pub(crate) vss: usize,
1917
}
2018

2119
impl ProcessStats {
@@ -28,112 +26,88 @@ impl ProcessStats {
2826
}
2927
}
3028

31-
/// There are some arrays with size `PRMAPSZ` holding names in `struct prmap`
32-
/// and `struct prxmap`.
33-
const PRMAPSZ: usize = 64;
34-
35-
/// We're going to query our `/proc/<self>/xmap` here, which contains a series o
36-
/// `struct prxmap` records. So, define it here in the same way it's defined in
37-
/// illumos' `common/sys/procfs.h`
29+
const PRFNSZ: usize = 16;
30+
const PRARGSZ: usize = 80;
31+
32+
/// From `sys/types.h`
33+
#[allow(non_camel_case_types)]
34+
type taskid_t = i32;
35+
/// From `sys/time_impl.h`
36+
#[allow(non_camel_case_types)]
37+
type timespec_t = [i64; 2];
38+
/// From `sys/time_impl.h`
39+
#[allow(non_camel_case_types)]
40+
type timestruc_t = timespec_t;
41+
/// From `sys/types.h`
42+
#[allow(non_camel_case_types)]
43+
type dev_t = u64;
44+
45+
/// `psinfo`'s definition depends on the data model reported by illumos. This is
46+
/// in line with the 64-bit version of the struct, and ignores a few fields at
47+
/// the end of the struct.
3848
#[derive(Copy, Clone, Debug, FromZeroes, AsBytes, FromBytes)]
39-
// Kind of weird to need packed here, but the struct size is 0x
49+
#[cfg(target_arch = "x86_64")]
4050
#[repr(C)]
41-
struct prxmap {
42-
/// virtual address of the mapping
43-
pr_vaddr: usize,
44-
/// size of the mapping in bytes
51+
struct psinfo {
52+
pr_flag: i32,
53+
pr_nlwp: i32,
54+
pr_pid: u32,
55+
pr_ppid: u32,
56+
pr_pgid: u32,
57+
pr_sid: u32,
58+
pr_uid: u32,
59+
pr_euid: u32,
60+
pr_gid: u32,
61+
pr_egid: u32,
62+
pr_addr: usize,
4563
pr_size: usize,
46-
/// name in /proc/<pid>/object
47-
pr_mapname: [u8; PRMAPSZ],
48-
/// offset into mapped object, if any
49-
pr_offset: i64,
50-
/// protection and attribute flags. See procfs.h' `MA_*` flags
51-
pr_mflags: i32,
52-
/// Pagesize (bytes) for this mapping
53-
pr_pagesize: i32,
54-
/// SysV shmid, -1 if not SysV shared memory
55-
pr_shmid: i32,
56-
/// illumos' `struct prxmap` on 64-bit operating systems has implicit
57-
/// internal padding between `pr_shmid` and `pr_dev`. This should always be
58-
/// zero.
59-
// Rust rules are that padding bytes are uninitialized, so transforming
60-
// `&prxmap` to a `&[u8]` is UB if there is internal padding.
61-
// Consequently, `AsBytes` and `FromBytes` require no internal padding, so
62-
// put a small field here to make the "padding" explicit.
63-
_internal_pad: i32,
64-
/// st_dev from stat64() of mapped object, or PRNODEV
65-
pr_dev: i64,
66-
/// st_ino from stat64() of mapped object, if any
67-
pr_ino: u64,
68-
/// pages of resident memory
69-
pr_rss: isize,
70-
/// pages of resident anonymous memory
71-
pr_anon: isize,
72-
/// pages of locked memory
73-
pr_locked: isize,
74-
/// currently unused
75-
pr_pad: isize,
76-
/// pagesize of the hat mapping
77-
pr_hatpagesize: usize,
78-
/// filler for future expansion
79-
// illumos uses `ulong_t` here, which should be 32-bit, but
80-
// sizes only line up if it's u64?
81-
pr_filler: [u64; 7],
64+
pr_rssize: usize,
65+
// From `struct psinfo`. This seems to be present to ensure that `pr_ttydev`
66+
// is 64-bit aligned even on 32-bit targets.
67+
pr_pad1: usize,
68+
pr_ttydev: dev_t,
69+
pr_pctcpu: u16,
70+
pr_pctmem: u16,
71+
// This padding is not explicitly present in illumos' `struct procfs`, but
72+
// is none the less there due to C struct layout rules.
73+
_pad2: u32,
74+
pr_start: timestruc_t,
75+
pr_time: timestruc_t,
76+
pr_ctime: timestruc_t,
77+
pr_fname: [u8; PRFNSZ],
78+
pr_psargs: [u8; PRARGSZ],
79+
pr_wstat: u32,
80+
pr_argc: u32,
81+
pr_argv: usize,
82+
pr_envp: usize,
83+
pr_dmodel: u8,
84+
// More padding from `struct psinfo`.
85+
pr_pad2: [u8; 3],
86+
pr_taskid: taskid_t,
8287
}
8388

84-
const MA_SHARED: i32 = 0x08;
89+
pub fn process_stats() -> anyhow::Result<ProcessStats> {
90+
let mut psinfo_file = std::fs::File::open("/proc/self/psinfo")?;
8591

86-
// Computed here just because it's used in a few places in this file.
87-
const PRXMAPSZ: usize = std::mem::size_of::<prxmap>();
92+
let mut stats = ProcessStats {
93+
measurement_time: Duration::ZERO,
94+
vss: 0,
95+
rss: 0,
96+
};
8897

89-
impl prxmap {
90-
fn is_shared(&self) -> bool {
91-
self.pr_mflags & MA_SHARED == MA_SHARED
92-
}
93-
}
98+
let mut info: psinfo = FromZeroes::new_zeroed();
9499

95-
pub fn process_stats() -> anyhow::Result<ProcessStats> {
96100
let stats_read_start = Instant::now();
97101

98-
// Safety: getpid() does not alter any state, or even read mutable state.
99-
let mypid = unsafe { libc::getpid() };
100-
let xmap_path = format!("/proc/{}/xmap", mypid);
101-
let mut xmap_file = std::fs::File::open(&xmap_path)?;
102+
psinfo_file.read(info.as_bytes_mut())
103+
.context("reading struct psinfo from file")?;
102104

103-
let mut stats = ProcessStats {
104-
measurement_time: Duration::ZERO,
105-
unshared_rss: 0,
106-
unshared_vss: 0,
107-
shared_rss: 0,
108-
shared_vss: 0,
109-
};
105+
stats.measurement_time = stats_read_start.elapsed();
110106

111-
let mut buf: [prxmap; 256] = [FromZeroes::new_zeroed(); 256];
112-
loop {
113-
let nread = xmap_file
114-
.read(buf.as_bytes_mut())
115-
.context("reading struct prxmap from file")?;
116-
if nread == 0 {
117-
// we've read all maps this go around.
118-
stats.measurement_time = stats_read_start.elapsed();
119-
return Ok(stats);
120-
}
121-
122-
assert!(nread % PRXMAPSZ == 0);
123-
let maps_read = nread / PRXMAPSZ;
124-
125-
for buf in buf[0..maps_read].iter() {
126-
let map_rss = buf.pr_rss as u64 * buf.pr_pagesize as u64;
127-
let map_vss = buf.pr_size as u64;
128-
if buf.is_shared() {
129-
stats.shared_rss += map_rss;
130-
stats.shared_vss += map_vss;
131-
} else {
132-
stats.unshared_rss += map_rss;
133-
stats.unshared_vss += map_vss;
134-
}
135-
}
136-
}
107+
stats.vss = info.pr_size;
108+
stats.rss = info.pr_rssize;
109+
110+
Ok(stats)
137111
}
138112

139113
async fn process_stats_task(tx: watch::Sender<ProcessStats>) {
@@ -155,16 +129,3 @@ async fn process_stats_task(tx: watch::Sender<ProcessStats>) {
155129
tokio::time::sleep(std::time::Duration::from_millis(15000)).await;
156130
}
157131
}
158-
159-
#[cfg(test)]
160-
mod test {
161-
use super::prxmap;
162-
use std::mem::size_of;
163-
164-
#[test]
165-
fn prxmap_size() {
166-
// Double check `prxmap` size against that of structures in
167-
// /proc/<pid>/xmap..
168-
assert_eq!(size_of::<prxmap>(), 0xd8);
169-
}
170-
}

bin/propolis-server/src/lib/stats/virtual_machine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ oximeter::use_timeseries!("virtual-machine.toml");
3131
pub use self::virtual_machine::Reset;
3232
// [this won't exist in CI until the oximeter schema is updated. hacked on this
3333
// locally with Cargo.toml patching.]
34-
pub use self::virtual_machine::VmmMaxRss;
34+
pub use self::virtual_machine::VmmVss;
3535
use self::virtual_machine::{
3636
VcpuUsage, VirtualMachine as VirtualMachineTarget,
3737
};

bin/propolis-server/src/lib/vm/services.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ impl VmServices {
6767
cfg,
6868
registry,
6969
vm_objects.instance_spec(),
70+
vm_objects.machine().map_physmem.virtual_address_size(),
7071
vm_properties,
7172
)
7273
.await
@@ -119,6 +120,7 @@ async fn register_oximeter_producer(
119120
cfg: &MetricsEndpointConfig,
120121
registry: &ProducerRegistry,
121122
spec: &Spec,
123+
vm_va_size: usize,
122124
vm_properties: &InstanceProperties,
123125
) -> OximeterState {
124126
let mut oximeter_state = OximeterState::default();
@@ -152,7 +154,7 @@ async fn register_oximeter_producer(
152154
// Assign our own metrics production for this VM instance to the
153155
// registry, letting the server actually return them to oximeter when
154156
// polled.
155-
let stats = ServerStats::new(virtual_machine);
157+
let stats = ServerStats::new(virtual_machine, vm_va_size);
156158
if let Err(e) = registry.register_producer(stats.clone()) {
157159
error!(
158160
log,

lib/propolis/src/util/aspace.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,9 @@ fn safe_end(start: usize, len: usize) -> Option<usize> {
184184
// Flatten the K/V nested tuple
185185
fn kv_flatten<'a, T>(i: (&'a usize, &'a (usize, T))) -> SpaceItem<'a, T> {
186186
let start = *i.0;
187-
let end = (i.1).0;
187+
let size = (i.1).0;
188188
let item = &(i.1).1;
189-
(start, end, item)
189+
(start, size, item)
190190
}
191191

192192
/// Iterator for all items in an [ASpace], constructed by [ASpace::iter].

lib/propolis/src/vmm/mem.rs

+13
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,19 @@ impl PhysMap {
197197
let mut guard = self.map.lock().unwrap();
198198
guard.clear();
199199
}
200+
201+
/// Sum the sizes of virtual address ranges supporting segments of memory
202+
/// for this VM.
203+
pub fn virtual_address_size(&self) -> usize {
204+
let guard = self.map.lock().unwrap();
205+
let mut va_size = 0usize;
206+
207+
for (_start, size, _item) in guard.iter() {
208+
va_size += size;
209+
}
210+
211+
va_size
212+
}
200213
}
201214

202215
#[cfg(test)]

0 commit comments

Comments
 (0)