Skip to content

Fix root scanning #165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions mmtk/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ use mmtk::Mutator;
use mmtk::MutatorContext;
use mmtk::MMTK;
use once_cell::sync;
use std::cell::RefCell;
use std::ffi::{CStr, CString};
use std::sync::atomic::Ordering;

// Supported barriers:
static NO_BARRIER: sync::Lazy<CString> = sync::Lazy::new(|| CString::new("NoBarrier").unwrap());
Expand Down Expand Up @@ -282,3 +284,51 @@ pub extern "C" fn get_finalized_object() -> ObjectReference {
None => unsafe { Address::ZERO.to_object_reference() },
}
}

thread_local! {
/// Cache all the pointers reported by the current thread.
static NMETHOD_SLOTS: RefCell<Vec<Address>> = RefCell::new(vec![]);
}

/// Report a list of pointers in nmethod to mmtk.
#[no_mangle]
pub extern "C" fn mmtk_add_nmethod_oop(addr: Address) {
NMETHOD_SLOTS.with(|x| x.borrow_mut().push(addr))
}

/// Register a nmethod.
/// The c++ part of the binding should scan the nmethod and report all the pointers to mmtk first, before calling this function.
/// This function will transfer all the locally cached pointers of this nmethod to the global storage.
#[no_mangle]
pub extern "C" fn mmtk_register_nmethod(nm: Address) {
let slots = NMETHOD_SLOTS.with(|x| {
if x.borrow().len() == 0 {
return None;
}
Some(x.replace(vec![]))
});
let slots = match slots {
Some(slots) => slots,
_ => return,
};
let mut roots = crate::CODE_CACHE_ROOTS.lock().unwrap();
// Relaxed add instead of `fetch_add`, since we've already acquired the lock.
crate::CODE_CACHE_ROOTS_SIZE.store(
crate::CODE_CACHE_ROOTS_SIZE.load(Ordering::Relaxed) + slots.len(),
Ordering::Relaxed,
);
roots.insert(nm, slots);
}

/// Unregister a nmethod.
#[no_mangle]
pub extern "C" fn mmtk_unregister_nmethod(nm: Address) {
let mut roots = crate::CODE_CACHE_ROOTS.lock().unwrap();
if let Some(slots) = roots.remove(&nm) {
// Relaxed sub instead of `fetch_sub`, since we've already acquired the lock.
crate::CODE_CACHE_ROOTS_SIZE.store(
crate::CODE_CACHE_ROOTS_SIZE.load(Ordering::Relaxed) - slots.len(),
Ordering::Relaxed,
);
}
}
31 changes: 30 additions & 1 deletion mmtk/src/gc_work.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::atomic::Ordering;

use super::{OpenJDK, UPCALLS};
use mmtk::scheduler::*;
use mmtk::vm::RootsWorkFactory;
Expand Down Expand Up @@ -33,11 +35,38 @@ scan_roots_work!(ScanManagementRoots, scan_management_roots);
scan_roots_work!(ScanJvmtiExportRoots, scan_jvmti_export_roots);
scan_roots_work!(ScanAOTLoaderRoots, scan_aot_loader_roots);
scan_roots_work!(ScanSystemDictionaryRoots, scan_system_dictionary_roots);
scan_roots_work!(ScanCodeCacheRoots, scan_code_cache_roots);
scan_roots_work!(ScanStringTableRoots, scan_string_table_roots);
scan_roots_work!(
ScanClassLoaderDataGraphRoots,
scan_class_loader_data_graph_roots
);
scan_roots_work!(ScanWeakProcessorRoots, scan_weak_processor_roots);
scan_roots_work!(ScanVMThreadRoots, scan_vm_thread_roots);

pub struct ScanCodeCacheRoots<F: RootsWorkFactory> {
factory: F,
}

impl<F: RootsWorkFactory> ScanCodeCacheRoots<F> {
pub fn new(factory: F) -> Self {
Self { factory }
}
}

impl<F: RootsWorkFactory> GCWork<OpenJDK> for ScanCodeCacheRoots<F> {
fn do_work(&mut self, _worker: &mut GCWorker<OpenJDK>, _mmtk: &'static MMTK<OpenJDK>) {
// Collect all the cached roots
let mut edges = Vec::with_capacity(crate::CODE_CACHE_ROOTS_SIZE.load(Ordering::Relaxed));
for roots in (*crate::CODE_CACHE_ROOTS.lock().unwrap()).values() {
for r in roots {
edges.push(*r)
}
}
// Create work packet
self.factory.create_process_edge_roots_work(edges);
// Use the following code to scan CodeCache directly, instead of scanning the "remembered set".
// unsafe {
// ((*UPCALLS).scan_code_cache_roots)(create_process_edges_work::<E> as _);
// }
}
}
18 changes: 13 additions & 5 deletions mmtk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ extern crate mmtk;
extern crate lazy_static;
extern crate once_cell;

use std::collections::HashMap;
use std::ptr::null_mut;
use std::sync::atomic::AtomicUsize;
use std::sync::Mutex;

use libc::{c_char, c_void, uintptr_t};
use mmtk::util::alloc::AllocationError;
Expand Down Expand Up @@ -58,9 +61,6 @@ pub struct OpenJDK_Upcalls {
pub out_of_memory: extern "C" fn(tls: VMThread, err_kind: AllocationError),
pub get_next_mutator: extern "C" fn() -> *mut Mutator<OpenJDK>,
pub reset_mutator_iterator: extern "C" fn(),
pub compute_static_roots: extern "C" fn(trace: *mut c_void, tls: OpaquePointer),
pub compute_global_roots: extern "C" fn(trace: *mut c_void, tls: OpaquePointer),
pub compute_thread_roots: extern "C" fn(trace: *mut c_void, tls: OpaquePointer),
pub scan_object: extern "C" fn(trace: *mut c_void, object: ObjectReference, tls: OpaquePointer),
pub dump_object: extern "C" fn(object: ObjectReference),
pub get_object_size: extern "C" fn(object: ObjectReference) -> usize,
Expand All @@ -74,8 +74,8 @@ pub struct OpenJDK_Upcalls {
pub referent_offset: extern "C" fn() -> i32,
pub discovered_offset: extern "C" fn() -> i32,
pub dump_object_string: extern "C" fn(object: ObjectReference) -> *const c_char,
pub scan_thread_roots: extern "C" fn(closure: EdgesClosure),
pub scan_thread_root: extern "C" fn(closure: EdgesClosure, tls: VMMutatorThread),
pub scan_all_thread_roots: extern "C" fn(closure: EdgesClosure),
pub scan_thread_roots: extern "C" fn(closure: EdgesClosure, tls: VMMutatorThread),
pub scan_universe_roots: extern "C" fn(closure: EdgesClosure),
pub scan_jni_handle_roots: extern "C" fn(closure: EdgesClosure),
pub scan_object_synchronizer_roots: extern "C" fn(closure: EdgesClosure),
Expand Down Expand Up @@ -142,3 +142,11 @@ lazy_static! {
#[no_mangle]
pub static MMTK_MARK_COMPACT_HEADER_RESERVED_IN_BYTES: usize =
mmtk::util::alloc::MarkCompactAllocator::<OpenJDK>::HEADER_RESERVED_IN_BYTES;

lazy_static! {
/// A global storage for all the cached CodeCache root pointers
static ref CODE_CACHE_ROOTS: Mutex<HashMap<Address, Vec<Address>>> = Mutex::new(HashMap::new());
}

/// A counter tracking the total size of the `CODE_CACHE_ROOTS`.
static CODE_CACHE_ROOTS_SIZE: AtomicUsize = AtomicUsize::new(0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this as a separate global variable. Whenever you change this SIZE, you have to acquire a lock to change the CODE_CACHE_ROOTS. And in the only place where you read the SIZE, you also acquire a lock to read the CODE_CACHE_ROOTS.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also in mmtk_register_nmethod(), you modify the SIZE before acquiring a lock to the ROOTS, which will make the SIZE and the ROOTS inconsistent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, SIZE is not necessary, it's just an optimization. Because CODE_CACHE_ROOTS is a set of Vec<Address>s, at root scanning time these vectors have to be merged together as a single buffer. Record total SIZE ahead of time and use it later for the merged buffer allocation may prevent frequent vector size expansion.

The consistency is ensured by separating the writes and reads. SIZE is only updated during mutator phases, and reads only happen in pauses.


Apart from tracking SIZE, I also tried putting each vector in the cached roots (HashMap<Address, Vec<Address>>) to a different work packet. There's a slight performance drop by doing so, since this may yield too many vector clones, too many work packets, and most of the vectors are small.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, SIZE is not necessary, it's just an optimization. Because CODE_CACHE_ROOTS is a set of Vec<Address>s, at root scanning time these vectors have to be merged together as a single buffer. Record total SIZE ahead of time and use it later for the merged buffer allocation may prevent frequent vector size expansion.

You can just calculate the total size of the Vec<Address> before creating the vec.

let code_cache_roots = CODE_CACHE_ROOTS.lock().unwrap();
let size = code_cache_roots.values().map(|vec| vec.len()).sum();
let mut vec = Vec::with_capacity(size);
for roots in code_cache_roots.values() {
    for r in roots {
        vec.push(*r)
    }
}

My point is that if you have acquired the lock to CODE_CACHE_ROOTS whenever you access CODE_CACHE_ROOTS_SIZE, then there is little point having a separate count.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid calculating the size in a loop may slow down the GC time. If I can calculate the total size cheaply ahead of time, simply using the pre-calculated size here is the fastest way.

This makes me realize that if I move the SIZE increments after the lock, I can do relaxed instead of atomic increments. This solves the consistency issue, has higher the increments performance, while still having a precise pre-calculated size counter.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you always access the CODE_CACHE_ROOTS_SIZE while holding the lock of CODE_CACHE_ROOTS, you can merge them into one Mutex<T>, like this:

struct CodeCacheRoots {
    hash_map: HashMap<Address, Vec<Address>>,
    size: usize,
}

lazy_static! {
    static ref CODE_CACHE_ROOTS: Mutex<CodeCacheRoots> = Mutex::new(CodeCacheRoots {
        hash_map: HashMap::new(),
        size: 0,
    });
}

When using, you lock the mutex and have access to both hash_map and size:

{
    let mut lock_guard = CODE_CACHE_ROOTS.lock().unwrap();
    *lock_guard.size += slots.len();
    lock_guard.hash_map.insert(nm, slots);
}

4 changes: 2 additions & 2 deletions mmtk/src/scanning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ impl Scanning<OpenJDK> for VMScanning {

fn scan_thread_roots(_tls: VMWorkerThread, mut factory: impl RootsWorkFactory) {
unsafe {
((*UPCALLS).scan_thread_roots)(to_edges_closure(&mut factory));
((*UPCALLS).scan_all_thread_roots)(to_edges_closure(&mut factory));
}
}

Expand All @@ -71,7 +71,7 @@ impl Scanning<OpenJDK> for VMScanning {
) {
let tls = mutator.get_tls();
unsafe {
((*UPCALLS).scan_thread_root)(to_edges_closure(&mut factory), tls);
((*UPCALLS).scan_thread_roots)(to_edges_closure(&mut factory), tls);
}
}

Expand Down
11 changes: 6 additions & 5 deletions openjdk/mmtk.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ extern void handle_user_collection_request(void *tls);
extern void start_control_collector(void *tls, void *context);
extern void start_worker(void *tls, void* worker);

extern size_t mmtk_add_nmethod_oop(void* object);
extern size_t mmtk_register_nmethod(void* nm);
extern size_t mmtk_unregister_nmethod(void* nm);

/**
* VM Accounting
*/
Expand Down Expand Up @@ -127,9 +131,6 @@ typedef struct {
void (*out_of_memory) (void *tls, MMTkAllocationError err_kind);
void* (*get_next_mutator) ();
void (*reset_mutator_iterator) ();
void (*compute_static_roots) (void* trace, void* tls);
void (*compute_global_roots) (void* trace, void* tls);
void (*compute_thread_roots) (void* trace, void* tls);
void (*scan_object) (void* trace, void* object, void* tls);
void (*dump_object) (void* object);
size_t (*get_object_size) (void* object);
Expand All @@ -143,8 +144,8 @@ typedef struct {
int (*referent_offset) ();
int (*discovered_offset) ();
char* (*dump_object_string) (void* object);
void (*scan_thread_roots)(EdgesClosure closure);
void (*scan_thread_root)(EdgesClosure closure, void* tls);
void (*scan_all_thread_roots)(EdgesClosure closure);
void (*scan_thread_roots)(EdgesClosure closure, void* tls);
void (*scan_universe_roots) (EdgesClosure closure);
void (*scan_jni_handle_roots) (EdgesClosure closure);
void (*scan_object_synchronizer_roots) (EdgesClosure closure);
Expand Down
101 changes: 21 additions & 80 deletions openjdk/mmtkHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,135 +335,76 @@ void MMTkHeap::print_tracing_info() const {
}


// An object is scavengable if its location may move during a scavenge.
// (A scavenge is a GC which is not a full GC.)
bool MMTkHeap::is_scavengable(oop obj) {return false;}
// Registering and unregistering an nmethod (compiled code) with the heap.
// Override with specific mechanism for each specialized heap type.

// Heap verification
void MMTkHeap::verify(VerifyOption option) {}

template<int MAX_TASKS = 12>
struct MMTkRootScanWorkScope {
int* _num_root_scan_tasks;
int _current_task_ordinal;
MMTkRootScanWorkScope(int* num_root_scan_tasks): _num_root_scan_tasks(num_root_scan_tasks), _current_task_ordinal(0) {
_current_task_ordinal = Atomic::add(1, _num_root_scan_tasks);
if (_current_task_ordinal == 1) {
nmethod::oops_do_marking_prologue();
}
}
~MMTkRootScanWorkScope() {
if (_current_task_ordinal == MAX_TASKS) {
_current_task_ordinal = 0;
nmethod::oops_do_marking_epilogue();
}
class MMTkRegisterNMethodOopClosure: public OopClosure {
template <class T> void do_oop_work(T* p) {
mmtk_add_nmethod_oop((void*) p);
}
public:
void do_oop(oop* p) { do_oop_work(p); }
void do_oop(narrowOop* p) { do_oop_work(p); }
};

void MMTkHeap::scan_static_roots(OopClosure& cl) {

void MMTkHeap::register_nmethod(nmethod* nm) {
// Scan and report pointers in this nmethod
MMTkRegisterNMethodOopClosure reg_cl;
nm->oops_do(&reg_cl);
// Register the nmethod
mmtk_register_nmethod((void*) nm);
}

void MMTkHeap::unregister_nmethod(nmethod* nm) {
mmtk_unregister_nmethod((void*) nm);
}

// Heap verification
void MMTkHeap::verify(VerifyOption option) {}

void MMTkHeap::scan_universe_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
Universe::oops_do(&cl);
}
void MMTkHeap::scan_jni_handle_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
JNIHandles::oops_do(&cl);
}
void MMTkHeap::scan_object_synchronizer_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
ObjectSynchronizer::oops_do(&cl);
}
void MMTkHeap::scan_management_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
Management::oops_do(&cl);
}
void MMTkHeap::scan_jvmti_export_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
JvmtiExport::oops_do(&cl);
}
void MMTkHeap::scan_aot_loader_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
AOTLoader::oops_do(&cl);
}
void MMTkHeap::scan_system_dictionary_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
SystemDictionary::oops_do(&cl);
}
void MMTkHeap::scan_code_cache_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
CodeBlobToOopClosure cb_cl(&cl, true);
{
MutexLockerEx lock(CodeCache_lock, Mutex::_no_safepoint_check_flag);
CodeCache::scavenge_root_nmethods_do(&cb_cl);
CodeCache::blobs_do(&cb_cl);
}
MarkingCodeBlobClosure cb_cl(&cl, false);
CodeCache::blobs_do(&cb_cl);
}
void MMTkHeap::scan_string_table_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
StringTable::oops_do(&cl);
}
void MMTkHeap::scan_class_loader_data_graph_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
CLDToOopClosure cld_cl(&cl, false);
ClassLoaderDataGraph::cld_do(&cld_cl);
}
void MMTkHeap::scan_weak_processor_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
WeakProcessor::oops_do(&cl); // (really needed???)
}
void MMTkHeap::scan_vm_thread_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
VMThread::vm_thread()->oops_do(&cl, NULL);
}

void MMTkHeap::scan_global_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);

CodeBlobToOopClosure cb_cl(&cl, true);
CLDToOopClosure cld_cl(&cl, false);

Universe::oops_do(&cl);
JNIHandles::oops_do(&cl);
ObjectSynchronizer::oops_do(&cl);
Management::oops_do(&cl);
JvmtiExport::oops_do(&cl);
AOTLoader::oops_do(&cl);
SystemDictionary::oops_do(&cl);
{
MutexLockerEx lock(CodeCache_lock, Mutex::_no_safepoint_check_flag);
CodeCache::blobs_do(&cb_cl);
}

OopStorage::ParState<false, false> _par_state_string(StringTable::weak_storage());
StringTable::possibly_parallel_oops_do(&_par_state_string, &cl);

// if (!_root_tasks->is_task_claimed(MMTk_ClassLoaderDataGraph_oops_do)) ClassLoaderDataGraph::roots_cld_do(&cld_cl, &cld_cl);
ClassLoaderDataGraph::cld_do(&cld_cl);

WeakProcessor::oops_do(&cl); // (really needed???)
}

void MMTkHeap::scan_thread_roots(OopClosure& cl) {
ResourceMark rm;
MMTkRootScanWorkScope<> root_scan_work(&_num_root_scan_tasks);
Threads::possibly_parallel_oops_do(false, &cl, NULL);
}

Expand Down
6 changes: 3 additions & 3 deletions openjdk/mmtkHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,11 @@ class MMTkHeap : public CollectedHeap {

// An object is scavengable if its location may move during a scavenge.
// (A scavenge is a GC which is not a full GC.)
bool is_scavengable(oop obj);
inline bool is_scavengable(oop obj) { return true; }
// Registering and unregistering an nmethod (compiled code) with the heap.
// Override with specific mechanism for each specialized heap type.
virtual void register_nmethod(nmethod* nm);
virtual void unregister_nmethod(nmethod* nm);

// Heap verification
void verify(VerifyOption option);
Expand All @@ -192,8 +194,6 @@ class MMTkHeap : public CollectedHeap {

void scan_roots(OopClosure& cl);

void scan_static_roots(OopClosure& cl);
void scan_global_roots(OopClosure& cl);
void scan_thread_roots(OopClosure& cl);

void scan_universe_roots(OopClosure& cl);
Expand Down
Loading