Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No longer use pool size in MMTk allocation #17

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
// the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
#else
int pool_id = jl_gc_szclass_align8(allocsz);
int osize = jl_gc_sizeclasses[pool_id];
s = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, jl_string_type);
size_t osize = mmtk_align_alloc_size_8(allocsz);
s = jl_mmtk_gc_alloc_default(ptls, osize, jl_string_type);
#endif
}
else {
Expand Down
21 changes: 21 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2414,6 +2414,27 @@ STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOI
{
mmtk_gc_wb_fast(parent, ptr);
}

#define MMTK_MIN_ALIGNMENT 4
STATIC_INLINE size_t mmtk_align_alloc_size(size_t sz) JL_NOTSAFEPOINT
{
size_t ret = (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT -1);
if (ret < sz) {
printf("wrong!!\n");
exit(1);
}
return ret;
}
STATIC_INLINE size_t mmtk_align_alloc_size_8(size_t sz) JL_NOTSAFEPOINT
{
size_t ret = (sz + 8 - 1) & ~(8 -1);
if (ret < sz) {
printf("wrong!!\n");
exit(1);
}
return ret;
}

#endif

#ifdef __cplusplus
Expand Down
7 changes: 3 additions & 4 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
int osize);
jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
#ifdef MMTK_GC
JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty);
JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, size_t osize, void* ty);
JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls);
Expand Down Expand Up @@ -494,9 +494,8 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
jl_value_t *v;
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
if (sz <= GC_MAX_SZCLASS) {
int pool_id = jl_gc_szclass(allocsz);
int osize = jl_gc_sizeclasses[pool_id];
v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty);
int osize = mmtk_align_alloc_size(allocsz);
v = jl_mmtk_gc_alloc_default(ptls, osize, ty);
}
else {
if (allocsz < sz) // overflow in adding offs, size was "negative"
Expand Down
145 changes: 77 additions & 68 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,77 +278,86 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
#else // MMTK_GC
osize = mmtk_align_alloc_size(sz + sizeof(jl_taggedvalue_t));
auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);

// Assuming we use the first immix allocator.
// FIXME: We should get the allocator index and type from MMTk.
auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);

auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit));

auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");

// offset = 8
auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
// alignment 16 (15 = 16 - 1)
auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
auto result = builder.CreateNSWAdd(cursor, delta, "result");

auto new_cursor = builder.CreateNSWAdd(result, pool_osize);

auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");

auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);

auto current_block = target->getParent();
builder.SetInsertPoint(target->getNextNode());
auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");

auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);

auto next_br = current_block->getTerminator();
next_br->eraseFromParent();
builder.SetInsertPoint(current_block);
builder.CreateCondBr(gt_limit, slowpath, fastpath);

// slowpath
builder.SetInsertPoint(slowpath);
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
builder.CreateBr(top_cont);

// // fastpath
builder.SetInsertPoint(fastpath);
builder.CreateStore(new_cursor, cursor_ptr);

// ptls->gc_num.allocd += osize;
auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
builder.CreateStore(pool_allocd_total, pool_alloc_tls);

auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
builder.CreateBr(top_cont);

phiNode->addIncoming(new_call, slowpath);
phiNode->addIncoming(v_as_ptr, fastpath);
phiNode->takeName(target);

return phiNode;
const bool INLINE_FASTPATH_ALLOCATION = true;

if (INLINE_FASTPATH_ALLOCATION) {
// Assuming we use the first immix allocator.
// FIXME: We should get the allocator index and type from MMTk.
auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);

auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit));

auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");

// offset = 8
auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
// alignment 16 (15 = 16 - 1)
auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
auto result = builder.CreateNSWAdd(cursor, delta, "result");

auto new_cursor = builder.CreateNSWAdd(result, pool_osize);

auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");

auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);

auto current_block = target->getParent();
builder.SetInsertPoint(target->getNextNode());
auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");

auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);

auto next_br = current_block->getTerminator();
next_br->eraseFromParent();
builder.SetInsertPoint(current_block);
builder.CreateCondBr(gt_limit, slowpath, fastpath);

// slowpath
builder.SetInsertPoint(slowpath);
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
builder.CreateBr(top_cont);

// // fastpath
builder.SetInsertPoint(fastpath);
builder.CreateStore(new_cursor, cursor_ptr);

// ptls->gc_num.allocd += osize;
auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
builder.CreateStore(pool_allocd_total, pool_alloc_tls);

auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
builder.CreateBr(top_cont);

phiNode->addIncoming(new_call, slowpath);
phiNode->addIncoming(v_as_ptr, fastpath);
phiNode->takeName(target);

return phiNode;
} else {
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
}
#endif // MMTK_GC
}
} else {
Expand Down
5 changes: 3 additions & 2 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o
// TODO: drop this okay?
// maybe_collect(ptls);

jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, pool_offset, osize, NULL);
jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, (size_t) osize, NULL);
// TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable
// here when that's edited?
/*
Expand Down Expand Up @@ -546,7 +546,8 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE
void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
{
jl_ptls_t ptls = jl_current_task->ptls;
void* addr = mmtk_alloc(&ptls->mmtk_mutator, sz, align, offset, 1);
size_t allocsz = mmtk_align_alloc_size(sz);
void* addr = mmtk_alloc(&ptls->mmtk_mutator, allocsz, align, offset, 1);
return addr;
}

Expand Down