Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use rocprof_v2 to implement Profiler for AMDGPU in Julia #695

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions gen/rocprof/generator.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using Clang.Generators
using JuliaFormatter

include_dir = normpath("/opt/rocm/include")
rocfft_dir = joinpath(include_dir, "rocprofiler/v2")
options = load_options("rocprof/rocprof-generator.toml")

args = get_default_args()
push!(args, "-I$include_dir")

headers = [
joinpath(rocfft_dir, header)
for header in readdir(rocfft_dir)
if endswith(header, ".h")
]

ctx = create_context(headers, args, options)
build!(ctx)

path = options["general"]["output_file_path"]
format_file(path, YASStyle())
7 changes: 7 additions & 0 deletions gen/rocprof/rocprof-generator.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[general]
library_name = "librocprofiler64v2"
output_file_path = "./librocprof.jl"
export_symbol_prefixes = []

[codegen]
use_ccall_macro = true
149 changes: 149 additions & 0 deletions prof.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# ENV["HSA_TOOLS_LIB"] = "/opt/rocm/lib/librocprofiler64v2.so"
# ENV["ROCPROFILER_METRICS_PATH"] = "/opt/rocm/libexec/rocprofiler/counters/derived_counters.xml"

using AMDGPU
import AMDGPU: Profiler, @check

@show Profiler.version()

# Application tracing

## Initialize tools
@check Profiler.rocprofiler_initialize()

## create session with replay mode
r_id = Ref{Profiler.rocprofiler_session_id_t}()
@check Profiler.rocprofiler_create_session(Profiler.ROCPROFILER_NONE_REPLAY_MODE, r_id)
id = r_id[]

## Create output buffer for the data
function output_callback(record, end_record, session_id, buffer_id)
@info "Output callback" record, end_record, session_id, buffer_id
return nothing
end

r_buffer_id = Ref{Profiler.rocprofiler_buffer_id_t}()
@check Profiler.rocprofiler_create_buffer(
id,
@cfunction(output_callback, Cvoid, (
Ptr{Profiler.rocprofiler_record_header_t},
Ptr{Profiler.rocprofiler_record_header_t},
Profiler.rocprofiler_session_id_t,
Profiler.rocprofiler_buffer_id_t)),
0x9999, r_buffer_id
)
buffer_id = r_buffer_id[]

# Specifying the APIs to be traced in a vector
apis_requested = Vector{Profiler.rocprofiler_tracer_activity_domain_t}(undef, 0)
push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HIP_API)
push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HIP_OPS)
push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HSA_API)
push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HSA_OPS)
push!(apis_requested, Profiler.ACTIVITY_DOMAIN_ROCTX)

GC.@preserve apis_requested begin
# Looking at the code we can release the pointer after rocprofiler_create_filter
filter_data = Ref{Profiler.rocprofiler_filter_data_t}()
GC.@preserve filter_data begin
ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_data_t}, filter_data)
Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_data_t))

ptr.trace_apis = pointer(apis_requested)
end

r_api_tracing_filter_id = Ref{Profiler.rocprofiler_filter_id_t}()
filter_property = Ref{Profiler.rocprofiler_filter_property_t}()
GC.@preserve filter_property begin
ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_property_t}, filter_property)
Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_property_t))
end

# Creating filter for tracing APIs
@check Profiler.rocprofiler_create_filter(
id, Profiler.ROCPROFILER_API_TRACE,
filter_data[], length(apis_requested),
r_api_tracing_filter_id, filter_property[]
)
api_tracing_filter_id = r_api_tracing_filter_id[]
end
api_tracing_filter_id

function timestamp()
r = Ref{Profiler.rocprofiler_timestamp_t}()
Profiler.rocprofiler_get_timestamp(r)
return r[].value
end

function trace_sync_callback(record::Profiler.rocprofiler_record_tracer_t, session_id)
if record.domain == Profiler.ACTIVITY_DOMAIN_HSA_API ||
record.domain == Profiler.ACTIVITY_DOMAIN_HIP_API
r_fn_name = Ref{Ptr{Cchar}}()

@check Profiler.rocprofiler_query_tracer_operation_name(
record.domain, record.operation_id, r_fn_name
)
fn_name = Base.unsafe_string(r_fn_name[])
else
fn_name = nothing
end

if record.phase == Profiler.ROCPROFILER_PHASE_ENTER
ts_begin = timestamp()
ts_end = nothing
elseif record.phase == Profiler.ROCPROFILER_PHASE_EXIT
ts_begin = nothing
ts_end = timestamp()
else
ts_begin = record.timestamps._begin.value
ts_end = record.timestamps._end.value
end

if record.name != C_NULL
name = Base.unsafe_string(record.name)
else
name = nothing
end

@info "trace_sync" domain=record.domain phase=record.phase correlation=record.correlation_id.value ts_begin ts_end fn_name record.name
return nothing
end

@check Profiler.rocprofiler_set_filter_buffer(id, api_tracing_filter_id, buffer_id)
@check Profiler.rocprofiler_set_api_trace_sync_callback(
id, api_tracing_filter_id,
@cfunction(trace_sync_callback, Cvoid, (
Profiler.rocprofiler_record_tracer_t,
Profiler.rocprofiler_session_id_t,
)),
)

# Kernel tracing
filter_data = Ref{Profiler.rocprofiler_filter_data_t}()
GC.@preserve filter_data begin
ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_data_t}, filter_data)
Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_data_t))
end

r_kernel_tracing_filter_id = Ref{Profiler.rocprofiler_filter_id_t}()
@check Profiler.rocprofiler_create_filter(
id, Profiler.ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
filter_data[], 0, r_kernel_tracing_filter_id, filter_property[]
)
kernel_tracing_filter_id = r_kernel_tracing_filter_id[]

function kernel()
return nothing
end

@roc kernel()

Profiler.rocprofiler_start_session(id)

@roc kernel()
AMDGPU.synchronize()

Profiler.rocprofiler_terminate_session(id)
Profiler.rocprofiler_flush_data(id, buffer_id)
Profiler.rocprofiler_destroy_session(id)
Profiler.rocprofiler_finalize()
3 changes: 3 additions & 0 deletions src/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ include("ROCKernels.jl")
import .ROCKernels: ROCBackend
export ROCBackend

include("profiler/profiler.jl")


function __init__()
# Used to shutdown hostcalls if any is running.
atexit(() -> begin Runtime.RT_EXITING[] = true end)
Expand Down
8 changes: 6 additions & 2 deletions src/discovery/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ end

export lld_artifact, lld_path, libhsaruntime, libdevice_libs, libhip
export librocblas, librocsparse, librocsolver, librocalution
export librocrand, librocfft, libMIOpen_path
export librocrand, librocfft, libMIOpen_path, librocprofiler64v2
export julia_exeflags

function _hip_runtime_version()
Expand Down Expand Up @@ -106,10 +106,14 @@ function __init__()
global librocblas = get_library(lib_prefix * "rocblas"; rocm_path)
global librocsparse = get_library(lib_prefix * "rocsparse"; rocm_path)
global librocsolver = get_library(lib_prefix * "rocsolver"; rocm_path)
global librocalution = get_library(lib_prefix * "rocalution"; rocm_path)
# XXX: librocalution is not used by AMDGPU, but depends on MPI
# this opens up various issues https://juliaparallel.org/MPI.jl/stable/knownissues/#Known-issues
# This fix would be to provide librocalution through JLL, for now we use just "librocalution_hip"
global librocalution = get_library(lib_prefix * "rocalution_hip"; rocm_path)
global librocrand = get_library(lib_prefix * "rocrand"; rocm_path)
global librocfft = get_library(lib_prefix * "rocfft"; rocm_path)
global libMIOpen_path = get_library(lib_prefix * "MIOpen"; rocm_path)
global librocprofiler64v2 = get_library(lib_prefix * "rocprofiler64v2"; rocm_path)
catch err
@error """ROCm discovery failed!
Discovered ROCm path: $rocm_path.
Expand Down
Loading