From 3f7b752ae6f87172a4a2da49be4f70731c5d2fdd Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Wed, 19 Feb 2025 23:25:14 +0000 Subject: [PATCH] add limbo time to freed RMID --- docs/design.md | 54 +++++++++++++++++++++++++++++++++++++++++ module/rmid_allocator.c | 17 +++++++++++-- module/rmid_allocator.h | 1 + 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 docs/design.md diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..e725eb3 --- /dev/null +++ b/docs/design.md @@ -0,0 +1,54 @@ +# Memory Collector Design + +## RMID Allocation Semantics + +The Memory Collector uses Resource Monitoring IDs (RMIDs) to track memory usage of processes. To ensure accurate measurement attribution, the RMID allocation system implements the following semantics: + +### RMID Lifecycle + +1. **Allocation** + - RMIDs are allocated to thread group leaders (processes) + - All threads within a process share the same RMID + - RMID 0 is reserved and considered invalid + - Allocation fails if no RMIDs are available that have been free long enough + +2. **Deallocation** + - RMIDs are freed when a process terminates + - The free timestamp is recorded to enforce the limbo period + - Freed RMIDs are added to a FIFO queue for reuse + +3. **Limbo Period** + - A minimum wait time of 2ms is enforced between RMID deallocation and reallocation + - This ensures measurement intervals (1ms) remain unambiguous + - Prevents the ABA problem where measurements from different processes could be mixed + +### Measurement Guarantees + +1. **Temporal Isolation** + - Each RMID uniquely identifies a single process during any 1ms measurement window + - The 2ms limbo period ensures no overlap between processes using the same RMID + - Userspace can safely aggregate measurements using RMID-indexed arrays + +2. **Resource Efficiency** + - RMIDs are a limited resource (typically 512 maximum) + - The FIFO reuse policy aims to let cache footprints associated with freed RMIDs to decay before reuse. + - The limbo period is kept minimal (2ms) to maintain high RMID availability. If we see high jitter in measurement timers, we can increase the limbo period. + +3. **Hardware Integration** + - On systems with hardware RDT support, RMIDs are programmed into MSRs + - On systems without RDT support, RMIDs are emulated for consistent behavior + - Context switches update RMIDs in hardware when necessary + +### Implementation Details + +1. **Data Structures** + - `struct rmid_info`: Tracks RMID metadata including process info and free timestamp + - `struct rmid_alloc`: Global allocator with free list (used as a queue) and spinlock protection + +2. **Concurrency** + - Spinlock protection for all RMID operations + - Lock-free fast path for thread RMID inheritance + +3. **Monitoring** + - Tracepoints report RMID allocation and deallocation events to the eBPF collector + - Procfs interface for dumping current RMID assignments (so the eBPF collector can see RMIDs for processes that existed before the collector was loaded) diff --git a/module/rmid_allocator.c b/module/rmid_allocator.c index 0efca39..54d7a55 100644 --- a/module/rmid_allocator.c +++ b/module/rmid_allocator.c @@ -3,6 +3,10 @@ #include "tracepoints.h" #include "collector.h" +// Minimum time (in nanoseconds) an RMID must remain unused before reallocation +// Set to 2ms to ensure no overlap during 1ms measurement intervals +#define RMID_MINIMUM_FREE_TIME_NS (2 * NSEC_PER_MSEC) + // forward declarations static u32 _rmid_alloc(const char *comm, pid_t tgid); @@ -17,6 +21,7 @@ static u32 _rmid_alloc(const char *comm, pid_t tgid) { struct rmid_info *info; u32 rmid; + u64 now = ktime_get_ns(); // Check if we have any free RMIDs if (list_empty(&rmid_allocator.free_list)) { @@ -25,6 +30,12 @@ static u32 _rmid_alloc(const char *comm, pid_t tgid) // Get the RMID that was freed the longest time ago info = list_first_entry(&rmid_allocator.free_list, struct rmid_info, list); + + // Check if enough time has passed since this RMID was freed + if (now - info->last_free_timestamp < RMID_MINIMUM_FREE_TIME_NS) { + return 0; // No RMIDs available that have been free long enough + } + list_del_init(&info->list); // Update RMID info @@ -34,7 +45,7 @@ static u32 _rmid_alloc(const char *comm, pid_t tgid) rmid = info->rmid; // Emit tracepoint for RMID allocation while holding the lock - trace_memory_collector_rmid_alloc(rmid, comm, tgid, ktime_get_ns()); + trace_memory_collector_rmid_alloc(rmid, comm, tgid, now); return rmid; } @@ -127,6 +138,7 @@ int init_rmid_allocator(u32 max_rmid) INIT_LIST_HEAD(&rmid_allocator.rmids[i].list); rmid_allocator.rmids[i].rmid = i; rmid_allocator.rmids[i].tgid = 0; + rmid_allocator.rmids[i].last_free_timestamp = 0; // Initialize to 0 to allow immediate allocation if (i != RMID_INVALID) { // Don't add RMID 0 to free list list_add_tail(&rmid_allocator.rmids[i].list, &rmid_allocator.free_list); } @@ -163,10 +175,11 @@ void rmid_free(u32 rmid) info = &rmid_allocator.rmids[rmid]; info->tgid = 0; + info->last_free_timestamp = ktime_get_ns(); // Record free timestamp list_add_tail(&info->list, &rmid_allocator.free_list); // Emit tracepoint for RMID deallocation while holding the lock - trace_memory_collector_rmid_free(rmid, ktime_get_ns()); + trace_memory_collector_rmid_free(rmid, info->last_free_timestamp); spin_unlock_irqrestore(&rmid_allocator.lock, flags); } diff --git a/module/rmid_allocator.h b/module/rmid_allocator.h index 32335ce..cb11886 100644 --- a/module/rmid_allocator.h +++ b/module/rmid_allocator.h @@ -11,6 +11,7 @@ struct rmid_info { u32 rmid; char comm[TASK_COMM_LEN]; // Name of task leader pid_t tgid; // Thread group ID (process ID) + u64 last_free_timestamp; // Timestamp when RMID was last freed }; struct rmid_alloc {