Skip to content

Commit

Permalink
replace rocprof with rocprofv2 for the tune gemm script (#613)
Browse files Browse the repository at this point in the history
* replace rocprof with rocprofv2

* remove comment

* add link for rocprofv2 output header bug
  • Loading branch information
xiaohuguo2023 authored Jul 17, 2024
1 parent 2ddcac6 commit 59d6be1
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions scripts/amd/gemm/tune_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,15 @@ def main():


def extract_kernel_time(M, N, K, config, df, bias_size):
# Correct the header by removing 'sig' and 'obj' to reduce number from 21 to 19
# once the bug(https://github.com/ROCm/rocprofiler/issues/144) fixed, we should
# not need below two lines
cols = ['Index','KernelName','gpu-id','queue-id','queue-index','pid','tid','grd','wgr','lds','scr','arch_vgpr','accum_vgpr','sgpr','wave_size','DispatchNs','BeginNs','EndNs','CompleteNs']
df.columns = cols
configStr, _ = gen_kernel_and_configStr_from_config(M, N, K, config, None, None, None, bias_size)
df = df[df['KernelName'].str.contains(configStr)]
meanTime = df['DurationNs'].tail(100).mean()
filtered_df = df[df['KernelName'].str.contains(configStr, na=False)].copy()
filtered_df['DurationNs'] = filtered_df['EndNs'] - filtered_df['BeginNs']
meanTime = filtered_df['DurationNs'].tail(100).mean()
return config, meanTime


Expand All @@ -409,7 +415,7 @@ def profile_batch_kernels(M, N, K, gpuid, gpus, jobs, verbose):
kernel_name = generated_kernel_name(M, N, K, jobId)
if verbose:
print(f"profiling {kernel_name} on GPU {gpuid}")
run_bash_command_wrapper(f"rocprof --stats -o results-{jobId}.csv python {kernel_name}", capture=(verbose < 2))
run_bash_command_wrapper(f"rocprofv2 --plugin file --plugin-version 1 --kernel-trace -o {jobId} python {generated_kernel_name(M, N, K, jobId)}", capture=(verbose < 2))
jobId += ngpus


Expand Down Expand Up @@ -451,7 +457,7 @@ def tune_gemm_config(M, N, K, col_a, col_b, dtype_a, dtype_b, dtype_c, init_type
thread_pool = multiprocessing.Pool(processes=num_threads)
tasks = []
idx = 0
df_prof = [pd.read_csv(f"results-{i}.csv") for i in range(jobs)]
df_prof = [pd.read_csv(f"results_{i}.csv", skiprows=1, header=None, delimiter=',', quotechar='"', escapechar='\\') for i in range(jobs)]
for config in configs:
file_idx = idx % jobs
tasks += [thread_pool.apply_async(extract_kernel_time, args=(M, N, K, config, df_prof[file_idx], bias_size))]
Expand Down Expand Up @@ -899,7 +905,7 @@ def main():
os.remove(generated_script)
if not skipWarmup:
os.remove(generated_script + ".failed_configs")
for f in glob.glob(f"results-{i}.*"):
for f in glob.glob(f"results_{i}.*"):
os.remove(f)

# Check correctness if asked to
Expand Down

0 comments on commit 59d6be1

Please sign in to comment.