@@ -248,3 +248,121 @@ function run_and_collect(cmd)
248
248
249
249
return proc, log
250
250
end
251
+
252
+
253
+
254
+ # # opaque closures
255
+
256
+ # TODO : once stabilised, move bits of this into GPUCompiler.jl
257
+
258
+ using Core. Compiler: IRCode
259
+ using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode
260
+
261
+ struct OpaqueClosure{F, E, A, R} # func, env, args, ret
262
+ env:: E
263
+ end
264
+
265
+ # XXX : because we can't call functions from other CUDA modules, we effectively need to
266
+ # recompile when the target function changes. this, and because of how GPUCompiler's
267
+ # deferred compilation mechanism currently works, is why we have `F` as a type param.
268
+
269
+ # XXX : because of GPU code requiring specialized signatures, we also need to recompile
270
+ # when the environment or argument types change. together with the above, this
271
+ # negates much of the benefit of opaque closures.
272
+
273
+ # TODO : support for constructing an opaque closure from source code
274
+
275
+ # TODO : complete support for passing an environment. this probably requires a split into
276
+ # host and device structures to, e.g., root a CuArray and pass a CuDeviceArray.
277
+
278
+ function compute_ir_rettype (ir:: IRCode )
279
+ rt = Union{}
280
+ for i = 1 : length (ir. stmts)
281
+ stmt = ir. stmts[i][:inst ]
282
+ if isa (stmt, Core. Compiler. ReturnNode) && isdefined (stmt, :val )
283
+ rt = Core. Compiler. tmerge (Core. Compiler. argextype (stmt. val, ir), rt)
284
+ end
285
+ end
286
+ return Core. Compiler. widenconst (rt)
287
+ end
288
+
289
+ function compute_oc_signature (ir:: IRCode , nargs:: Int , isva:: Bool )
290
+ argtypes = Vector {Any} (undef, nargs)
291
+ for i = 1 : nargs
292
+ argtypes[i] = Core. Compiler. widenconst (ir. argtypes[i+ 1 ])
293
+ end
294
+ if isva
295
+ lastarg = pop! (argtypes)
296
+ if lastarg <: Tuple
297
+ append! (argtypes, lastarg. parameters)
298
+ else
299
+ push! (argtypes, Vararg{Any})
300
+ end
301
+ end
302
+ return Tuple{argtypes... }
303
+ end
304
+
305
+ function OpaqueClosure (ir:: IRCode , @nospecialize env... ; isva:: Bool = false )
306
+ # NOTE: we need ir.argtypes[1] == typeof(env)
307
+ ir = Core. Compiler. copy (ir)
308
+ nargs = length (ir. argtypes)- 1
309
+ sig = compute_oc_signature (ir, nargs, isva)
310
+ rt = compute_ir_rettype (ir)
311
+ src = ccall (:jl_new_code_info_uninit , Ref{CodeInfo}, ())
312
+ src. slotnames = Base. fill (:none , nargs+ 1 )
313
+ src. slotflags = Base. fill (zero (UInt8), length (ir. argtypes))
314
+ src. slottypes = copy (ir. argtypes)
315
+ src. rettype = rt
316
+ src = Core. Compiler. ir_to_codeinf! (src, ir)
317
+ config = compiler_config (device (); kernel= false )
318
+ return generate_opaque_closure (config, src, sig, rt, nargs, isva, env... )
319
+ end
320
+
321
+ function OpaqueGPUClosure (src:: CodeInfo , @nospecialize env... )
322
+ src. inferred || throw (ArgumentError (" Expected inferred src::CodeInfo" ))
323
+ mi = src. parent:: Core.MethodInstance
324
+ sig = Base. tuple_type_tail (mi. specTypes)
325
+ method = mi. def:: Method
326
+ nargs = method. nargs- 1
327
+ isva = method. isva
328
+ return generate_opaque_closure (config, src, sig, src. rettype, nargs, isva, env... )
329
+ end
330
+
331
+ function generate_opaque_closure (config:: CompilerConfig , src:: CodeInfo ,
332
+ @nospecialize (sig), @nospecialize (rt),
333
+ nargs:: Int , isva:: Bool , @nospecialize env... ;
334
+ mod:: Module = @__MODULE__ ,
335
+ file:: Union{Nothing,Symbol} = nothing , line:: Int = 0 )
336
+ # create a method (like `jl_make_opaque_closure_method`)
337
+ meth = ccall (:jl_new_method_uninit , Ref{Method}, (Any,), Main)
338
+ meth. sig = Tuple
339
+ meth. isva = isva # XXX : probably not supported?
340
+ meth. is_for_opaque_closure = 0 # XXX : do we want this?
341
+ meth. name = Symbol (" opaque gpu closure" )
342
+ meth. nargs = nargs + 1
343
+ meth. file = something (file, Symbol ())
344
+ meth. line = line
345
+ ccall (:jl_method_set_source , Nothing, (Any, Any), meth, src)
346
+
347
+ # look up a method instance and create a compiler job
348
+ full_sig = Tuple{typeof (env), sig. parameters... }
349
+ mi = ccall (:jl_specializations_get_linfo , Ref{MethodInstance},
350
+ (Any, Any, Any), meth, full_sig, Core. svec ())
351
+ job = CompilerJob (mi, config) # this captures the current world age
352
+
353
+ # create a code instance and store it in the cache
354
+ ci = CodeInstance (mi, rt, C_NULL , src, Int32 (0 ), meth. primary_world, typemax (UInt),
355
+ UInt32 (0 ), UInt32 (0 ), nothing , UInt8 (0 ))
356
+ Core. Compiler. setindex! (GPUCompiler. ci_cache (job), ci, mi)
357
+
358
+ id = length (GPUCompiler. deferred_codegen_jobs) + 1
359
+ GPUCompiler. deferred_codegen_jobs[id] = job
360
+ return OpaqueClosure {id, typeof(env), sig, rt} (env)
361
+ end
362
+
363
+ # device-side call to an opaque closure
364
+ function (oc:: OpaqueClosure{F} )(a, b) where F
365
+ ptr = ccall (" extern deferred_codegen" , llvmcall, Ptr{Cvoid}, (Int,), F)
366
+ assume (ptr != C_NULL )
367
+ return ccall (ptr, Int, (Int, Int), a, b)
368
+ end
0 commit comments