From 0d3c8df48734891bb9a8f3cef6ec70de6cf616ca Mon Sep 17 00:00:00 2001 From: William Moses Date: Thu, 30 Jan 2025 11:55:05 +0100 Subject: [PATCH] Add IR dumping (#638) * Add IR dumping * fix * fix * fix * jll bump * Update Project.toml --- ext/ReactantCUDAExt.jl | 6 ++++++ src/Compiler.jl | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ext/ReactantCUDAExt.jl b/ext/ReactantCUDAExt.jl index 7cf2a61f3..55ac2a3dc 100644 --- a/ext/ReactantCUDAExt.jl +++ b/ext/ReactantCUDAExt.jl @@ -358,6 +358,9 @@ function compile(job) end entryname = LLVM.name(meta.entry) + if Reactant.Compiler.DUMP_LLVMIR[] + println("cuda.jl immediate IR\n", string(mod)) + end opt_level = 2 tm = GPUCompiler.llvm_machine(job.config.target) LLVM.@dispose pb = LLVM.NewPMPassBuilder() begin @@ -401,6 +404,9 @@ function compile(job) end return true end + if Reactant.Compiler.DUMP_LLVMIR[] + println("cuda.jl postopt IR\n", string(mod)) + end if !isempty(errors) throw(GPUCompiler.InvalidIRError(job, errors)) end diff --git a/src/Compiler.jl b/src/Compiler.jl index 42f1c53f6..781b9b621 100644 --- a/src/Compiler.jl +++ b/src/Compiler.jl @@ -318,7 +318,11 @@ function optimization_passes(; no_nan::Bool=false, sroa::Bool=false) passes = ["inline{default-pipeline=canonicalize max-iterations=4}"] if sroa push!(passes, "propagate-constant-bounds") - push!(passes, "sroa-wrappers") + if DUMP_LLVMIR[] + push!(passes, "sroa-wrappers{dump_prellvm=true dump_postllvm=true}") + else + push!(passes, "sroa-wrappers") + end push!(passes, "libdevice-funcs-raise") push!(passes, "canonicalize") push!(passes, "remove-duplicate-func-def") @@ -428,6 +432,7 @@ function cubinFeatures() end const DEBUG_KERNEL = Ref{Bool}(false) +const DUMP_LLVMIR = Ref{Bool}(false) function compile_mlir!( mod, f, args; optimize::Union{Bool,Symbol}=true, no_nan::Bool=false, backend="gpu"