Skip to content

Commit c0c60e8

Browse files
Pool more JIT resources to reduce memory usage/contention (#44912)
1 parent 4c858f8 commit c0c60e8

File tree

3 files changed

+140
-113
lines changed

3 files changed

+140
-113
lines changed

doc/src/devdocs/locks.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,10 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
2828
> * gc_perm_lock
2929
> * flisp
3030
> * jl_in_stackwalk (Win32)
31-
> * PM_mutex[i]
32-
> * ContextPool::mutex
31+
> * ResourcePool<?>::mutex
3332
>
3433
> > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool
35-
> > likewise, orc::ThreadSafeContexts carry their own lock, the ContextPool::mutex just protects the pool
34+
> > likewise, the ResourcePool<?>::mutexes just protect the associated resource pool
3635
3736
The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
3837

src/jitlayers.cpp

Lines changed: 112 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -494,58 +494,6 @@ static auto countBasicBlocks(const Function &F)
494494
return std::distance(F.begin(), F.end());
495495
}
496496

497-
OptimizerResultT JuliaOJIT::OptimizerT::operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
498-
TSM.withModuleDo([&](Module &M) {
499-
uint64_t start_time = 0;
500-
if (dump_llvm_opt_stream != NULL) {
501-
// Print LLVM function statistics _before_ optimization
502-
// Print all the information about this invocation as a YAML object
503-
jl_printf(dump_llvm_opt_stream, "- \n");
504-
// We print the name and some statistics for each function in the module, both
505-
// before optimization and again afterwards.
506-
jl_printf(dump_llvm_opt_stream, " before: \n");
507-
for (auto &F : M.functions()) {
508-
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
509-
continue;
510-
}
511-
// Each function is printed as a YAML object with several attributes
512-
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
513-
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
514-
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
515-
}
516-
517-
start_time = jl_hrtime();
518-
}
519-
520-
JL_TIMING(LLVM_OPT);
521-
522-
{
523-
//Lock around our pass manager
524-
std::lock_guard<std::mutex> lock(this->mutex);
525-
PM.run(M);
526-
}
527-
528-
uint64_t end_time = 0;
529-
if (dump_llvm_opt_stream != NULL) {
530-
end_time = jl_hrtime();
531-
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
532-
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);
533-
534-
// Print LLVM function statistics _after_ optimization
535-
jl_printf(dump_llvm_opt_stream, " after: \n");
536-
for (auto &F : M.functions()) {
537-
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
538-
continue;
539-
}
540-
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
541-
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
542-
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
543-
}
544-
}
545-
});
546-
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
547-
}
548-
549497
void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
550498
size_t optlevel = ~0ull;
551499
TSM.withModuleDo([&](Module &M) {
@@ -570,7 +518,7 @@ void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsib
570518
}
571519
});
572520
assert(optlevel != ~0ull && "Failed to select a valid optimization level!");
573-
this->optimizers[optlevel].emit(std::move(R), std::move(TSM));
521+
this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
574522
}
575523

576524
void jl_register_jit_object(const object::ObjectFile &debugObj,
@@ -911,6 +859,106 @@ namespace {
911859
.setCodeModel(TM.getCodeModel())
912860
.setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
913861
}
862+
863+
struct TMCreator {
864+
orc::JITTargetMachineBuilder JTMB;
865+
866+
TMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)) {}
867+
868+
std::unique_ptr<TargetMachine> operator()() {
869+
return cantFail(JTMB.createTargetMachine());
870+
}
871+
};
872+
873+
struct PMCreator {
874+
std::unique_ptr<TargetMachine> TM;
875+
int optlevel;
876+
PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
877+
PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {}
878+
PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {}
879+
friend void swap(PMCreator &self, PMCreator &other) {
880+
using std::swap;
881+
swap(self.TM, other.TM);
882+
swap(self.optlevel, other.optlevel);
883+
}
884+
PMCreator &operator=(PMCreator other) {
885+
swap(*this, other);
886+
return *this;
887+
}
888+
std::unique_ptr<legacy::PassManager> operator()() {
889+
auto PM = std::make_unique<legacy::PassManager>();
890+
addPassesForOptLevel(*PM, *TM, optlevel);
891+
return PM;
892+
}
893+
};
894+
895+
struct OptimizerT {
896+
OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
897+
898+
OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
899+
TSM.withModuleDo([&](Module &M) {
900+
uint64_t start_time = 0;
901+
if (dump_llvm_opt_stream != NULL) {
902+
// Print LLVM function statistics _before_ optimization
903+
// Print all the information about this invocation as a YAML object
904+
jl_printf(dump_llvm_opt_stream, "- \n");
905+
// We print the name and some statistics for each function in the module, both
906+
// before optimization and again afterwards.
907+
jl_printf(dump_llvm_opt_stream, " before: \n");
908+
for (auto &F : M.functions()) {
909+
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
910+
continue;
911+
}
912+
// Each function is printed as a YAML object with several attributes
913+
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
914+
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
915+
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
916+
}
917+
918+
start_time = jl_hrtime();
919+
}
920+
921+
JL_TIMING(LLVM_OPT);
922+
923+
//Run the optimization
924+
(***PMs).run(M);
925+
926+
uint64_t end_time = 0;
927+
if (dump_llvm_opt_stream != NULL) {
928+
end_time = jl_hrtime();
929+
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
930+
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);
931+
932+
// Print LLVM function statistics _after_ optimization
933+
jl_printf(dump_llvm_opt_stream, " after: \n");
934+
for (auto &F : M.functions()) {
935+
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
936+
continue;
937+
}
938+
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
939+
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
940+
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
941+
}
942+
}
943+
});
944+
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
945+
}
946+
private:
947+
int optlevel;
948+
JuliaOJIT::ResourcePool<std::unique_ptr<legacy::PassManager>> PMs;
949+
};
950+
951+
struct CompilerT : orc::IRCompileLayer::IRCompiler {
952+
953+
CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel)
954+
: orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}
955+
956+
Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
957+
return orc::SimpleCompiler(***TMs)(M);
958+
}
959+
960+
JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
961+
};
914962
}
915963

916964
llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
@@ -920,15 +968,14 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
920968
return jl_data_layout;
921969
}
922970

971+
JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
972+
: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
973+
std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
974+
OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel)) {}
975+
923976
JuliaOJIT::JuliaOJIT()
924977
: TM(createTargetMachine()),
925978
DL(jl_create_datalayout(*TM)),
926-
TMs{
927-
cantFail(createJTMBFromTM(*TM, 0).createTargetMachine()),
928-
cantFail(createJTMBFromTM(*TM, 1).createTargetMachine()),
929-
cantFail(createJTMBFromTM(*TM, 2).createTargetMachine()),
930-
cantFail(createJTMBFromTM(*TM, 3).createTargetMachine())
931-
},
932979
#if JL_LLVM_VERSION >= 130000
933980
ES(cantFail(orc::SelfExecutorProcessControl::Create())),
934981
#else
@@ -955,17 +1002,13 @@ JuliaOJIT::JuliaOJIT()
9551002
}
9561003
),
9571004
#endif
958-
CompileLayer0(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 0))),
959-
CompileLayer1(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 1))),
960-
CompileLayer2(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 2))),
961-
CompileLayer3(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 3))),
962-
OptimizeLayers{
963-
{ES, CompileLayer0, OptimizerT(PM0, PM_mutexes[0], 0)},
964-
{ES, CompileLayer1, OptimizerT(PM1, PM_mutexes[1], 1)},
965-
{ES, CompileLayer2, OptimizerT(PM2, PM_mutexes[2], 2)},
966-
{ES, CompileLayer3, OptimizerT(PM3, PM_mutexes[3], 3)},
1005+
Pipelines{
1006+
std::make_unique<PipelineT>(ObjectLayer, *TM, 0),
1007+
std::make_unique<PipelineT>(ObjectLayer, *TM, 1),
1008+
std::make_unique<PipelineT>(ObjectLayer, *TM, 2),
1009+
std::make_unique<PipelineT>(ObjectLayer, *TM, 3),
9671010
},
968-
OptSelLayer(OptimizeLayers)
1011+
OptSelLayer(Pipelines)
9691012
{
9701013
#ifdef JL_USE_JITLINK
9711014
# if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
@@ -987,10 +1030,6 @@ JuliaOJIT::JuliaOJIT()
9871030
registerRTDyldJITObject(Object, LO, MemMgr);
9881031
});
9891032
#endif
990-
addPassesForOptLevel(PM0, *TMs[0], 0);
991-
addPassesForOptLevel(PM1, *TMs[1], 1);
992-
addPassesForOptLevel(PM2, *TMs[2], 2);
993-
addPassesForOptLevel(PM3, *TMs[3], 3);
9941033

9951034
// Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
9961035
// symbols in the program as well. The nullptr argument to the function

src/jitlayers.h

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,10 @@ class JuliaOJIT {
193193
typedef orc::IRCompileLayer CompileLayerT;
194194
typedef orc::IRTransformLayer OptimizeLayerT;
195195
typedef object::OwningBinary<object::ObjectFile> OwningObj;
196-
private:
197196
template<typename ResourceT, size_t max = 0>
198197
struct ResourcePool {
199198
public:
200-
ResourcePool(function_ref<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
199+
ResourcePool(std::function<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
201200
class OwningResource {
202201
public:
203202
OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {}
@@ -206,7 +205,7 @@ class JuliaOJIT {
206205
OwningResource(OwningResource &&) = default;
207206
OwningResource &operator=(OwningResource &&) = default;
208207
~OwningResource() {
209-
if (resource) pool.release_(std::move(*resource));
208+
if (resource) pool.release(std::move(*resource));
210209
}
211210
ResourceT release() {
212211
ResourceT res(std::move(*resource));
@@ -242,11 +241,15 @@ class JuliaOJIT {
242241
llvm::Optional<ResourceT> resource;
243242
};
244243

245-
OwningResource acquire() {
246-
return OwningResource(*this, acquire_());
244+
OwningResource operator*() {
245+
return OwningResource(*this, acquire());
246+
}
247+
248+
OwningResource get() {
249+
return **this;
247250
}
248251

249-
ResourceT acquire_() {
252+
ResourceT acquire() {
250253
std::unique_lock<std::mutex> lock(mutex->mutex);
251254
if (!pool.empty()) {
252255
return pool.pop_back_val();
@@ -259,13 +262,13 @@ class JuliaOJIT {
259262
assert(!pool.empty() && "Expected resource pool to have a value!");
260263
return pool.pop_back_val();
261264
}
262-
void release_(ResourceT &&resource) {
265+
void release(ResourceT &&resource) {
263266
std::lock_guard<std::mutex> lock(mutex->mutex);
264267
pool.push_back(std::move(resource));
265268
mutex->empty.notify_one();
266269
}
267270
private:
268-
llvm::function_ref<ResourceT()> creator;
271+
std::function<ResourceT()> creator;
269272
size_t created = 0;
270273
llvm::SmallVector<ResourceT, max == 0 ? 8 : max> pool;
271274
struct WNMutex {
@@ -275,33 +278,31 @@ class JuliaOJIT {
275278

276279
std::unique_ptr<WNMutex> mutex;
277280
};
278-
struct OptimizerT {
279-
OptimizerT(legacy::PassManager &PM, std::mutex &mutex, int optlevel) : optlevel(optlevel), PM(PM), mutex(mutex) {}
280-
281-
OptimizerResultT operator()(orc::ThreadSafeModule M, orc::MaterializationResponsibility &R);
282-
private:
283-
int optlevel;
284-
legacy::PassManager &PM;
285-
std::mutex &mutex;
281+
struct PipelineT {
282+
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
283+
CompileLayerT CompileLayer;
284+
OptimizeLayerT OptimizeLayer;
286285
};
287-
// Custom object emission notification handler for the JuliaOJIT
288-
template <typename ObjT, typename LoadResult>
289-
void registerObject(const ObjT &Obj, const LoadResult &LO);
290286

291287
struct OptSelLayerT : orc::IRLayer {
292288

293289
template<size_t N>
294-
OptSelLayerT(OptimizeLayerT (&optimizers)[N]) : orc::IRLayer(optimizers[0].getExecutionSession(), optimizers[0].getManglingOptions()), optimizers(optimizers), count(N) {
290+
OptSelLayerT(std::unique_ptr<PipelineT> (&optimizers)[N]) : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(), optimizers[0]->OptimizeLayer.getManglingOptions()), optimizers(optimizers), count(N) {
295291
static_assert(N > 0, "Expected array with at least one optimizer!");
296292
}
297293

298294
void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
299295

300296
private:
301-
OptimizeLayerT *optimizers;
297+
std::unique_ptr<PipelineT> *optimizers;
302298
size_t count;
303299
};
304300

301+
private:
302+
// Custom object emission notification handler for the JuliaOJIT
303+
template <typename ObjT, typename LoadResult>
304+
void registerObject(const ObjT &Obj, const LoadResult &LO);
305+
305306
public:
306307

307308
JuliaOJIT();
@@ -321,13 +322,13 @@ class JuliaOJIT {
321322
uint64_t getFunctionAddress(StringRef Name);
322323
StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst);
323324
auto getContext() {
324-
return ContextPool.acquire();
325+
return *ContextPool;
325326
}
326327
orc::ThreadSafeContext acquireContext() {
327-
return ContextPool.acquire_();
328+
return ContextPool.acquire();
328329
}
329330
void releaseContext(orc::ThreadSafeContext &&ctx) {
330-
ContextPool.release_(std::move(ctx));
331+
ContextPool.release(std::move(ctx));
331332
}
332333
const DataLayout& getDataLayout() const;
333334
TargetMachine &getTargetMachine();
@@ -340,14 +341,6 @@ class JuliaOJIT {
340341

341342
std::unique_ptr<TargetMachine> TM;
342343
DataLayout DL;
343-
// Should be big enough that in the common case, The
344-
// object fits in its entirety
345-
legacy::PassManager PM0; // per-optlevel pass managers
346-
legacy::PassManager PM1;
347-
legacy::PassManager PM2;
348-
legacy::PassManager PM3;
349-
std::mutex PM_mutexes[4];
350-
std::unique_ptr<TargetMachine> TMs[4];
351344

352345
orc::ExecutionSession ES;
353346
orc::JITDylib &GlobalJD;
@@ -359,11 +352,7 @@ class JuliaOJIT {
359352
std::shared_ptr<RTDyldMemoryManager> MemMgr;
360353
#endif
361354
ObjLayerT ObjectLayer;
362-
CompileLayerT CompileLayer0;
363-
CompileLayerT CompileLayer1;
364-
CompileLayerT CompileLayer2;
365-
CompileLayerT CompileLayer3;
366-
OptimizeLayerT OptimizeLayers[4];
355+
std::unique_ptr<PipelineT> Pipelines[4];
367356
OptSelLayerT OptSelLayer;
368357

369358
DenseMap<void*, std::string> ReverseLocalSymbolTable;

0 commit comments

Comments
 (0)