-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
quickTunerPreproc.py, preprocessor script for quick tuner scripts #1575
base: develop
Are you sure you want to change the base?
Changes from 4 commits
0489788
6a5a43a
e2d92bf
1cd8bda
5995293
d44114d
c7b9486
30999f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
quickTuner preprocessor script to combine .debug output files from tuningRunner.py or tuna-script.sh | ||
|
||
Usage: quickTunerPreprocess.py [-h] --input-dir INPUT_DIR --output OUTPUT [--op {gemm,conv}] [-d] [--file-ext FILE_EXT] | ||
|
||
Example Usage: | ||
|
||
python3 quickTunerPreprocess.py --input_dir /path/to/debug/files --ouput combined_data | ||
|
||
|
||
Note: | ||
If using MITuna edit MITuna/tuna/rocmlir/rocmlir_worker.py, editing: | ||
|
||
|
||
cmd = env_str + f" python3 ./bin/tuningRunner.py -q {special_args} \ | ||
--config='{config_string}' --mlir-build-dir `pwd` \ | ||
--output=- --tflops \ | ||
--rocmlir_gen_flags='--device={self.gpu_id}' 2>/dev/null" | ||
|
||
|
||
to: | ||
|
||
import uuid | ||
|
||
if not os.path.exists("./run"): | ||
os.makedirs("./run") | ||
|
||
unique_file_id = uuid.uuid4().hex | ||
|
||
file_id = os.path.join("./run", unique_file_id) | ||
|
||
cmd = env_str + f" python3 ./bin/tuningRunner.py -q {special_args} \ | ||
--config='{config_string}' --mlir-build-dir `pwd` \ | ||
--output={file_id} --tflops --debug \ | ||
--rocmlir_gen_flags='--device={self.gpu_id}' 2>/dev/null" | ||
|
||
""" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import pandas as pd | ||
import glob | ||
from sklearn.preprocessing import MinMaxScaler | ||
|
||
class qtPreprocessor(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit class case, and I'm pretty sure Python 3 doesn't require the |
||
""" | ||
class to process *.debug files into a single script | ||
""" | ||
|
||
def __init__(self, pargs): | ||
self.input_dir = pargs.input_dir | ||
|
||
@staticmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why staticmethod? Not a criticism, I'm just less familiar with why that's done in python and I'm curious. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was intended to be imported and called within the quickTunerGen.py set of classes but I could not rationalize each individual class having their own instance of this preprocessor class hanging out inside:
From my experience with using the |
||
def __get_stats_gemm(df, ct): | ||
""" | ||
static helper method to get stats for a dataframe: | ||
(number of files processed, number of unique gemms, group by datatype) | ||
""" | ||
print(f"Files processed: {ct}") | ||
|
||
# num of dtypes | ||
dtypes= {t[0]:df for t,df in df.groupby(['DataType'])} | ||
|
||
print("Types found:") | ||
for dt in dtypes: | ||
print(f"\t{dt}") | ||
|
||
# num unique gemms in file: | ||
cols = ['TransA', 'TransB', 'G', 'M', 'N','K'] | ||
unique_gemms = df[cols].drop_duplicates() | ||
|
||
num_gemms = len(unique_gemms) | ||
print(f"Number of unique Gemms: {num_gemms}") | ||
for _,row in unique_gemms.iterrows(): | ||
tup = tuple(row) | ||
print(f"{tup[0]},{tup[1]},{tup[2]},{tup[3]},{tup[4]},{tup[5]}") | ||
ethansaurusrex marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
@staticmethod | ||
def __get_stats_conv(df, ct): | ||
""" | ||
static helper method to get stats for a dataframe: | ||
(number of files processed, number of unique gemms, group by datatype) | ||
""" | ||
raise NotImplementedError() | ||
|
||
@staticmethod | ||
def process(input_dir, output_name=None, op='gemm', file_ext="debug", debug=False, normalize=True): | ||
""" | ||
staticmethod process() function that compiles output files into a single dataframe and saves to tsv file | ||
""" | ||
|
||
tsv_files = glob.glob(os.path.join(input_dir, f"*.{file_ext}")) | ||
print(os.path.join(input_dir, f"*.{file_ext}")) | ||
|
||
dfs = [] | ||
ct = 0 | ||
for file in tsv_files: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will replace this with a |
||
df = pd.read_csv(file, sep='\t') | ||
if normalize: | ||
scaler = MinMaxScaler() | ||
df['NormalizedTFlops'] = scaler.fit_transform(df[['TFlops']]) | ||
dfs.append(df) | ||
ct += 1 | ||
if not dfs: | ||
return None | ||
new_df = pd.concat(dfs, ignore_index=True) | ||
|
||
if output_name: | ||
new_df.to_csv(output_name, sep='\t') | ||
if debug: | ||
print(f"Saved to {output_name}") | ||
|
||
if debug: | ||
# here output some stats about files | ||
if op == 'gemm': | ||
qtPreprocessor.__get_stats_gemm(new_df, ct) | ||
elif op == 'conv': | ||
qtPreprocessor.__get_stats_conv(new_df, ct) | ||
|
||
return new_df | ||
|
||
|
||
def main(args=None): | ||
if args is None: | ||
args = sys.argv[1:] | ||
|
||
parser = argparse.ArgumentParser(prog='quickTunerPreprocess.py', | ||
description='Collect *.debug files from tuningRunner.py into a single file to be used in quickTunerGen.py') | ||
|
||
parser.add_argument('--input-dir', | ||
required=True, | ||
type=str, | ||
help='Input directory where files are saved') | ||
|
||
parser.add_argument('--output', | ||
required=True, | ||
type=str, | ||
help='File to save data to') | ||
|
||
parser.add_argument('--op', | ||
choices=['gemm', 'conv'], | ||
default='gemm', | ||
help='Formats debug print info') | ||
|
||
parser.add_argument('-d', '--debug', | ||
action='store_true', | ||
help='Prints debug information') | ||
|
||
parser.add_argument('--file-ext', | ||
default='debug', | ||
type=str, | ||
help='File extension') | ||
|
||
parser.add_argument('--normalize', | ||
default=True, | ||
action='store_true', | ||
help='Normalize on a per-file basis, necessary for quickTunerGen to work') | ||
|
||
pargs = parser.parse_args() | ||
|
||
|
||
qtPreprocessor.process(pargs.input_dir, pargs.output, pargs.op, pargs.file_ext, pargs.debug) | ||
|
||
if __name__ == '__main__': | ||
main(sys.argv[1:]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this instead be a patch to
rocmlir_worker
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not in its current form; it cuts off the usual functionality of passing results to stdout and we don't have code to read results from the files and send it on. I do have an open issue about collecting all the results into the database and this may become part of it.