Skip to content

Commit

Permalink
improve post processing of rocprof results
Browse files Browse the repository at this point in the history
- set --iters=200 as default. This is enough since the time is stable
after the first few runs.
- Filter out kernel time that is too large. We use the first kernel
time as the threshold. There must be something wrong with the kernel
if its elapsedTime is larger than the first run. We need to
investigate the reason. For now, just filter them out.
  • Loading branch information
zhanglx13 committed Aug 17, 2024
1 parent 05aead8 commit e21d43c
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions python/perf-kernels/tune_gemm/tune_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,16 @@ def extract_kernel_time(M, N, K, config, df):
configStr = gen_configStr(config)
df = df[df['KernelName'].str.contains(configStr)]
meanTime = df['DurationNs'].tail(100).mean()
return config, meanTime

first_value = df['DurationNs'].iloc[0]
filtered_data = df['DurationNs'][df['DurationNs'] <= first_value]
new_meanTime = filtered_data.tail(100).mean()

maxTime = df['DurationNs'].max()
maxTimeID = df['DurationNs'].idxmax()
#print(f"{maxTime=} {maxTimeID=} {meanTime=} {new_meanTime=} {first_value=}")
df['DurationNs'].to_csv(f"{M}-{N}-{K}.csv", index=False)
return config, new_meanTime


def profile_batch_kernels(M, N, K, gpuid, gpus, jobs, verbose):
Expand Down Expand Up @@ -429,7 +438,7 @@ def parse_args():
parser.add_argument("--num_threads", type=int, default=32,
help="number of threads to use for kernel compilation and post processing")
parser.add_argument("--jobs", type=int, default=1, help="number of tasks during the profiling process")
parser.add_argument("--iters", type=int, default=1000, help="number of iterations used in --benchmark mode")
parser.add_argument("--iters", type=int, default=200, help="number of iterations used in --benchmark mode")
parser.add_argument("--init_type", type=str, default='randn', choices=['randn', 'hpl', 'trig_float', 'zeros'],
help="Input tensor initialization (default normal distribution)")
parser.add_argument(
Expand Down

0 comments on commit e21d43c

Please sign in to comment.