Skip to content

Commit 62ccbc7

Browse files
authored
Support measuring process-wide perf events (#357)
* [WIP] support measuring perf events for mutator and GC * Write again from scratch because I git reset --hard HEAD * Check the total number of threads in gc_init * Rephrase the warning * Conditionally import * Get rid of debug prints * Initialize logging before printing log msg * Fix build error * Adjust process-wide and work-packet perf events separately * Conditionally compile Linux specific code * Differentiate different perf event options * Fix Options type * Fix compilation errors * Cargo fmt * Fix unit tests * cargo fmt
1 parent 1e79646 commit 62ccbc7

File tree

10 files changed

+121
-31
lines changed

10 files changed

+121
-31
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ atomic-traits = "0.2.0"
3333
atomic = "0.4.6"
3434
spin = "0.5.2"
3535
env_logger = "0.8.2"
36-
pfm = {version = "0.0.7", optional = true}
36+
pfm = {version = "0.0.8", optional = true}
3737

3838
[dev-dependencies]
3939
crossbeam = "0.7.3"

src/memory_manager.rs

+17
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,23 @@ pub fn gc_init<VM: VMBinding>(mmtk: &'static mut MMTK<VM>, heap_size: usize) {
5050
"MMTk failed to initialize the logger. Possibly a logger has been initialized by user."
5151
),
5252
}
53+
#[cfg(all(feature = "perf_counter", target_os = "linux"))]
54+
{
55+
use std::fs::File;
56+
use std::io::Read;
57+
let mut status = File::open("/proc/self/status").unwrap();
58+
let mut contents = String::new();
59+
status.read_to_string(&mut contents).unwrap();
60+
for line in contents.lines() {
61+
let split: Vec<&str> = line.split('\t').collect();
62+
if split[0] == "Threads:" {
63+
let threads = split[1].parse::<i32>().unwrap();
64+
if threads != 1 {
65+
warn!("Current process has {} threads, process-wide perf event measurement will only include child threads spawned from this threadas", threads);
66+
}
67+
}
68+
}
69+
}
5370
assert!(heap_size > 0, "Invalid heap size");
5471
mmtk.plan
5572
.gc_init(heap_size, &crate::VM_MAP, &mmtk.scheduler);

src/plan/global.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ impl<VM: VMBinding> BasePlan<VM> {
411411
constraints: &'static PlanConstraints,
412412
global_side_metadata_specs: Vec<SideMetadataSpec>,
413413
) -> BasePlan<VM> {
414-
let stats = Stats::new();
414+
let stats = Stats::new(&options);
415415
// Initializing the analysis manager and routines
416416
#[cfg(feature = "analysis")]
417417
let analysis_manager = AnalysisManager::new(&stats);

src/scheduler/stat.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ impl<VM: VMBinding> HasCounterSet for MMTK<VM> {
242242
fn counter_set(mmtk: &'static Self) -> Vec<Box<dyn WorkCounter>> {
243243
let mut counters: Vec<Box<dyn WorkCounter>> = vec![Box::new(WorkDuration::new())];
244244
#[cfg(feature = "perf_counter")]
245-
for e in &mmtk.options.perf_events.events {
245+
for e in &mmtk.options.work_perf_events.events {
246246
counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2));
247247
}
248248
counters

src/scheduler/work_counter.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ mod perf_event {
163163
/// -1, 0 measures all threads on CPU 0
164164
/// -1, -1 is invalid
165165
pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent {
166-
let mut pe = PerfEvent::new(name)
166+
let mut pe = PerfEvent::new(name, false)
167167
.unwrap_or_else(|_| panic!("Failed to create perf event {}", name));
168168
pe.open(pid, cpu)
169169
.unwrap_or_else(|_| panic!("Failed to open perf event {}", name));

src/util/options.rs

+18-8
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,11 @@ options! {
189189
// Perf events to measure
190190
// Semicolons are used to separate events
191191
// Each event is in the format of event_name,pid,cpu (see man perf_event_open for what pid and cpu mean)
192-
perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]}
192+
//
193+
// Measuring perf events for work packets
194+
work_perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]},
195+
// Measuring perf events for GC and mutators
196+
phase_perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]}
193197
}
194198

195199
impl Options {
@@ -313,7 +317,10 @@ mod tests {
313317
fn test_str_option_default() {
314318
serial_test(|| {
315319
let options = Options::default();
316-
assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] });
320+
assert_eq!(
321+
&options.work_perf_events,
322+
&PerfEventOptions { events: vec![] }
323+
);
317324
})
318325
}
319326

@@ -322,18 +329,18 @@ mod tests {
322329
serial_test(|| {
323330
with_cleanup(
324331
|| {
325-
std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1");
332+
std::env::set_var("MMTK_WORK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1");
326333

327334
let options = Options::default();
328335
assert_eq!(
329-
&options.perf_events,
336+
&options.work_perf_events,
330337
&PerfEventOptions {
331338
events: vec![("PERF_COUNT_HW_CPU_CYCLES".into(), 0, -1)]
332339
}
333340
);
334341
},
335342
|| {
336-
std::env::remove_var("MMTK_PERF_EVENTS");
343+
std::env::remove_var("MMTK_WORK_PERF_EVENTS");
337344
},
338345
)
339346
})
@@ -345,14 +352,17 @@ mod tests {
345352
with_cleanup(
346353
|| {
347354
// The option needs to start with "hello", otherwise it is invalid.
348-
std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES");
355+
std::env::set_var("MMTK_WORK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES");
349356

350357
let options = Options::default();
351358
// invalid value from env var, use default.
352-
assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] });
359+
assert_eq!(
360+
&options.work_perf_events,
361+
&PerfEventOptions { events: vec![] }
362+
);
353363
},
354364
|| {
355-
std::env::remove_var("MMTK_PERF_EVENTS");
365+
std::env::remove_var("MMTK_WORK_PERF_EVENTS");
356366
},
357367
)
358368
})

src/util/statistics/counter/long_counter.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub struct LongCounter<T: Diffable> {
88
pub implicitly_start: bool,
99
merge_phases: bool,
1010
count: Box<[u64; MAX_PHASES]>, // FIXME make this resizable
11+
diffable: T,
1112
start_value: Option<T::Val>,
1213
total_count: u64,
1314
running: bool,
@@ -27,7 +28,7 @@ impl<T: Diffable> Counter for LongCounter<T> {
2728
}
2829
debug_assert!(!self.running);
2930
self.running = true;
30-
self.start_value = Some(T::current_value());
31+
self.start_value = Some(self.diffable.current_value());
3132
}
3233

3334
fn stop(&mut self) {
@@ -36,14 +37,15 @@ impl<T: Diffable> Counter for LongCounter<T> {
3637
}
3738
debug_assert!(self.running);
3839
self.running = false;
39-
let delta = T::diff(&T::current_value(), self.start_value.as_ref().unwrap());
40+
let current_value = self.diffable.current_value();
41+
let delta = T::diff(&current_value, self.start_value.as_ref().unwrap());
4042
self.count[self.stats.get_phase()] += delta;
4143
self.total_count += delta;
4244
}
4345

4446
fn phase_change(&mut self, old_phase: usize) {
4547
if self.running {
46-
let now = T::current_value();
48+
let now = self.diffable.current_value();
4749
let delta = T::diff(&now, self.start_value.as_ref().unwrap());
4850
self.count[old_phase] += delta;
4951
self.total_count += delta;
@@ -125,12 +127,14 @@ impl<T: Diffable> LongCounter<T> {
125127
stats: Arc<SharedStats>,
126128
implicitly_start: bool,
127129
merge_phases: bool,
130+
diffable: T,
128131
) -> Self {
129132
LongCounter {
130133
name,
131134
implicitly_start,
132135
merge_phases,
133136
count: box [0; MAX_PHASES],
137+
diffable,
134138
start_value: None,
135139
total_count: 0,
136140
running: false,

src/util/statistics/counter/mod.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@ use std::time::Instant;
22

33
mod event_counter;
44
mod long_counter;
5+
#[cfg(feature = "perf_counter")]
6+
mod perf_event;
57
mod size_counter;
68

79
pub use self::event_counter::EventCounter;
810
pub use self::long_counter::{LongCounter, Timer};
11+
#[cfg(feature = "perf_counter")]
12+
pub use self::perf_event::PerfEventDiffable;
913
pub use self::size_counter::SizeCounter;
1014

1115
pub trait Counter {
@@ -22,9 +26,11 @@ pub trait Counter {
2226
fn name(&self) -> &String;
2327
}
2428

29+
/// A Diffable object could be stateless (e.g. a timer that reads the wall
30+
/// clock), or stateful (e.g. holds reference to a perf event fd)
2531
pub trait Diffable {
2632
type Val;
27-
fn current_value() -> Self::Val;
33+
fn current_value(&mut self) -> Self::Val;
2834
fn diff(current: &Self::Val, earlier: &Self::Val) -> u64;
2935
fn print_diff(val: u64);
3036
}
@@ -34,7 +40,7 @@ pub struct MonotoneNanoTime;
3440
impl Diffable for MonotoneNanoTime {
3541
type Val = Instant;
3642

37-
fn current_value() -> Instant {
43+
fn current_value(&mut self) -> Instant {
3844
Instant::now()
3945
}
4046

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
use super::Diffable;
2+
use pfm::{PerfEvent, PerfEventValue};
3+
4+
/// A [`Diffable`] helper type for measuring overall perf events for mutators
5+
/// and GC
6+
/// This is the process-wide counterpart of [`crate::scheduler::work_counter::WorkPerfEvent`].
7+
pub struct PerfEventDiffable {
8+
pe: PerfEvent,
9+
}
10+
11+
impl PerfEventDiffable {
12+
pub fn new(name: &str) -> Self {
13+
let mut pe = PerfEvent::new(name, true)
14+
.unwrap_or_else(|_| panic!("Failed to create perf event {}", name));
15+
// measures the calling thread (and all child threads) on all CPUs
16+
pe.open(0, -1)
17+
.unwrap_or_else(|_| panic!("Failed to open perf event {}", name));
18+
PerfEventDiffable { pe }
19+
}
20+
}
21+
22+
impl Diffable for PerfEventDiffable {
23+
type Val = PerfEventValue;
24+
25+
fn current_value(&mut self) -> Self::Val {
26+
let val = self.pe.read().unwrap();
27+
self.pe.enable();
28+
self.pe.reset();
29+
val
30+
}
31+
32+
fn diff(current: &Self::Val, _earlier: &Self::Val) -> u64 {
33+
// earlier value is not used as the counter is reset after each use
34+
assert_eq!(current.time_enabled, current.time_running);
35+
current.value as u64
36+
}
37+
38+
fn print_diff(val: u64) {
39+
print!("{}", val);
40+
}
41+
}

src/util/statistics/stats.rs

+26-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::mmtk::MMTK;
2+
use crate::util::options::Options;
23
use crate::util::statistics::counter::*;
34
use crate::util::statistics::Timer;
45
use crate::vm::VMBinding;
@@ -51,29 +52,45 @@ pub struct Stats {
5152
}
5253

5354
impl Stats {
54-
pub fn new() -> Self {
55+
#[allow(unused)]
56+
pub fn new(options: &Options) -> Self {
57+
#[cfg(feature = "perf_counter")]
58+
let perfmon = {
59+
let mut perfmon: Perfmon = Default::default();
60+
perfmon.initialize().expect("Perfmon failed to initialize");
61+
perfmon
62+
};
5563
let shared = Arc::new(SharedStats {
5664
phase: AtomicUsize::new(0),
5765
gathering_stats: AtomicBool::new(false),
5866
});
67+
let mut counters: Vec<Arc<Mutex<dyn Counter + Send>>> = vec![];
5968
let t = Arc::new(Mutex::new(LongCounter::new(
6069
"time".to_string(),
6170
shared.clone(),
6271
true,
6372
false,
73+
MonotoneNanoTime {},
6474
)));
75+
counters.push(t.clone());
76+
#[cfg(feature = "perf_counter")]
77+
for e in &options.phase_perf_events.events {
78+
counters.push(Arc::new(Mutex::new(LongCounter::new(
79+
e.0.clone(),
80+
shared.clone(),
81+
true,
82+
false,
83+
PerfEventDiffable::new(&e.0),
84+
))));
85+
}
6586
Stats {
6687
gc_count: AtomicUsize::new(0),
67-
total_time: t.clone(),
88+
total_time: t,
6889
#[cfg(feature = "perf_counter")]
69-
perfmon: {
70-
let mut perfmon: Perfmon = Default::default();
71-
perfmon.initialize().expect("Perfmon failed to initialize");
72-
perfmon
73-
},
90+
perfmon,
7491

7592
shared,
76-
counters: Mutex::new(vec![t]),
93+
counters: Mutex::new(counters),
7794
exceeded_phase_limit: AtomicBool::new(false),
7895
}
7996
}
@@ -118,6 +135,7 @@ impl Stats {
118135
self.shared.clone(),
119136
implicit_start,
120137
merge_phases,
138+
MonotoneNanoTime {},
121139
)));
122140
guard.push(counter.clone());
123141
counter
@@ -237,9 +255,3 @@ impl Stats {
237255
self.shared.get_gathering_stats()
238256
}
239257
}
240-
241-
impl Default for Stats {
242-
fn default() -> Self {
243-
Self::new()
244-
}
245-
}

0 commit comments

Comments
 (0)