From 1b80bd895d78a0c68af9f421de5377be48020422 Mon Sep 17 00:00:00 2001 From: "Morkovkin, Andrey" Date: Thu, 3 Mar 2022 00:03:30 +0300 Subject: [PATCH] Add capability to collect performance data with VTune, emon, psrecord --- bench.py | 98 ++++++++++- runner.py | 261 ++++++++++++++++++++++++++---- sklearn_bench/dbscan.py | 2 +- sklearn_bench/df_clsf.py | 4 +- sklearn_bench/df_regr.py | 4 +- sklearn_bench/distances.py | 2 +- sklearn_bench/elasticnet.py | 4 +- sklearn_bench/kmeans.py | 4 +- sklearn_bench/knn_clsf.py | 6 +- sklearn_bench/knn_regr.py | 6 +- sklearn_bench/lasso.py | 4 +- sklearn_bench/linear.py | 4 +- sklearn_bench/log_reg.py | 4 +- sklearn_bench/nusvc.py | 4 +- sklearn_bench/nusvr.py | 4 +- sklearn_bench/pca.py | 4 +- sklearn_bench/ridge.py | 4 +- sklearn_bench/svm.py | 6 +- sklearn_bench/svr.py | 4 +- sklearn_bench/train_test_split.py | 2 +- sklearn_bench/tsne.py | 2 +- tools/emon/edp_config.txt | 74 +++++++++ utils.py | 25 +-- xgboost_bench/gbt.py | 5 +- 24 files changed, 448 insertions(+), 89 deletions(-) create mode 100644 tools/emon/edp_config.txt diff --git a/bench.py b/bench.py index 5ed513271..31c804398 100644 --- a/bench.py +++ b/bench.py @@ -20,10 +20,18 @@ import sys import timeit import re +import platform +import hashlib +import os +import subprocess import numpy as np import sklearn - +try: + import itt + itt_module_installed = True +except: + itt_module_installed = False def get_dtype(data): ''' @@ -159,6 +167,8 @@ def parse_args(parser, size=None, loop_types=(), parser.add_argument('--time-method', type=str, default='box_filter', choices=('box_filter'), help='Method used for time mesurements') + parser.add_argument('--box-filter-measurements-analysis', type=int, default=100, + help='Maximum number of measurements in box filter (for analyzed stage)') parser.add_argument('--box-filter-measurements', type=int, default=100, help='Maximum number of measurements in box filter') parser.add_argument('--inner-loops', default=100, type=int, @@ -167,6 +177,8 @@ def parse_args(parser, size=None, loop_types=(), parser.add_argument('--outer-loops', default=100, type=int, help='Maximum outer loop iterations ' '(we take the min over outer iterations)') + parser.add_argument('--time-limit-analysis', default=10., type=float, + help='Target time to spend to benchmark (for analyzed stage)') parser.add_argument('--time-limit', default=10., type=float, help='Target time to spend to benchmark') parser.add_argument('--goal-outer-loops', default=10, @@ -186,6 +198,25 @@ def parse_args(parser, size=None, loop_types=(), parser.add_argument('--device', default='none', type=str, choices=('host', 'cpu', 'gpu', 'none'), help='Execution context device') + parser.add_argument('--emon', default=False, + action='store_true', + help='Should emon profiling be started') + parser.add_argument('--vtune', default=False, + action='store_true', + help='Should vtune profiling be started') + parser.add_argument('--psrecord', default=False, + action='store_true', + help='Should psrecord profiling be started') + parser.add_argument('--ittpy', default=False, + action='store_true', + help='Should ittpy domains be integrated') + parser.add_argument('--sgx-gramine', default=False, + action='store_true', + help='Should benchmark run with Gramine & Intel(R) SGX') + parser.add_argument('--flush-caches', default=False, + action='store_true', + help='Should benchmark flush CPU caches after each run during measuring') + parser.add_argument('--target-stage', type=str, default='default', help='Select target stage for analysis.') for data in ['X', 'y']: for stage in ['train', 'test']: @@ -201,6 +232,9 @@ def parse_args(parser, size=None, loop_types=(), params = parser.parse_args() + if params.ittpy and itt_module_installed: + itt.pause() + if not params.no_intel_optimized: try: from sklearnex import patch_sklearn @@ -272,18 +306,68 @@ def prepare_daal_threads(num_threads=-1): return num_threads -def measure_function_time(func, *args, params, **kwargs): - return time_box_filter(func, *args, - n_meas=params.box_filter_measurements, - time_limit=params.time_limit, **kwargs) +def measure_function_time(func, *args, params, stage, **kwargs): + results = time_box_filter(func, *args, params=params, stage=stage, **kwargs) + return results -def time_box_filter(func, *args, n_meas, time_limit, **kwargs): +def detect_LLC_size(): + with open('/sys/devices/system/cpu/cpu0/cache/index3/size', 'r') as f: + llc_size_str = f.readline().strip() + llc_size = int(llc_size_str[:-1]) * 1024 + return llc_size + + +def flush_caches(): + flush_datafile = 'data/flush_data.npy' + if os.path.exists(flush_datafile): + with open(flush_datafile, 'rb') as f: + data = np.load(f).astype(np.double) + else: + data_size = detect_LLC_size() // 8 * 8 # size in doubles x8 + columns_number = 100 + rows_number = data_size // columns_number + data = np.random.rand(rows_number, columns_number).astype(np.double) + with open(flush_datafile, 'wb') as f: + np.save(f, data) + + iterations_to_flush = 3 + try: + from sklearnex.cluster import KMeans + except: + from sklearn.cluster import KMeans + for number_flush_iteration in range(iterations_to_flush): + model = KMeans(max_iter=3, tol=1e-7).fit(data) + + +def time_box_filter(func, *args, params, stage, **kwargs): + flush_caches_flag = params.flush_caches + if params.target_stage != 'default': + if params.target_stage == stage: + time_limit = params.time_limit_analysis + n_meas = params.box_filter_measurements_analysis + is_the_target_stage = True + else: + time_limit = 0 + n_meas = 1 + is_the_target_stage = False + else: + time_limit = params.time_limit + n_meas = params.box_filter_measurements + is_the_target_stage = True + times = [] while len(times) < n_meas: + if flush_caches_flag: + flush_caches() + + if params.ittpy and is_the_target_stage and itt_module_installed: + itt.resume() t0 = timeit.default_timer() val = func(*args, **kwargs) t1 = timeit.default_timer() + if params.ittpy and is_the_target_stage and itt_module_installed: + itt.pause() times.append(t1 - t0) if sum(times) > time_limit: break @@ -564,7 +648,9 @@ def print_output(library, algorithm, stages, params, functions, result['algorithm_parameters']['init'] = 'random' result['algorithm_parameters'].pop('handle', None) output.append(result) + print('# Intel(R) Extension for Scikit-learn case result:') print(json.dumps(output, indent=4)) + print('# Intel(R) Extension for Scikit-learn case finished.') def run_with_context(params, function): diff --git a/runner.py b/runner.py index 9a9d527c5..7d76385a9 100755 --- a/runner.py +++ b/runner.py @@ -20,10 +20,14 @@ import os import socket import sys +import datetime +import shutil +import subprocess from typing import Any, Dict, List, Union import utils from pathlib import Path +import hashlib def get_configs(path: Path) -> List[str]: @@ -36,6 +40,125 @@ def get_configs(path: Path) -> List[str]: result += get_configs(new_path) return result +allowed_analysis_types = ['vtune', 'emon', 'psrecord', 'ittpy'] + +def check_additional_orders(args, common_params): + result = {} + if args.sgx_gramine: + result['sgx_gramine'] = args.sgx_gramine + + analysis_config = {} + if 'analysis' in common_params.keys(): + for analyse_type in allowed_analysis_types: + if analyse_type in common_params['analysis'].keys(): + analysis_config[analyse_type] = common_params['analysis'][analyse_type] + + result.update(analysis_config) + return result + +def get_program_name(analysis_config): + program_name = 'python' + if 'sgx_gramine' in analysis_config.keys() and analysis_config['sgx_gramine'] == True: + program_name = 'gramine-sgx ./sklearnex' + return program_name + +def dict_to_cmd_args(dictionary): + results = [] + for key, item in dictionary.items(): + if isinstance(item, list): + for subitem in item: + results.append(f'{key} {subitem}') + else: + results.append(f'{key} {item}') + return " ".join(results) + +def get_analyse_prefix(analysis_config): + for key in allowed_analysis_types: + if key in analysis_config.keys(): + args = dict_to_cmd_args(analysis_config[key]) + return args + else: + return None + +def get_benchmark_extra_args(analysis_config): + result = [] + for key in analysis_config.keys(): + result.append(f'--{key}'.replace('_', '-')) + return ' '.join(result) + +emon_dat_file_name, emon_xlsx_file_name = None, None + +def vtune_postproc(analysis_config, analysis_folder): + vtune_foldername = os.path.join(analysis_folder, 'vtune_'+analysis_folder.split('/')[-1]) + if '-r' in analysis_config['vtune'].keys(): + shutil.move(analysis_config['vtune']['-r'], vtune_foldername) + else: + for file in os.listdir('.'): + if 'r00' in file: + shutil.move(file, vtune_foldername) + +def fetch_expected_emon_filename(edp_config_path): + with open(edp_config_path, 'r') as edp_config: + for line in edp_config: + if line.strip().startswith('EMON_DATA='): + emon_dat_file_name = line.strip()[10:] + if line.strip().startswith('OUTPUT='): + emon_xlsx_file_name = line.strip()[7:] + else: + return 'emon.dat', 'summary.xlsx' + return emon_dat_filename, emon_xlsx_file_name + +def emon_postproc(analysis_config, analysis_folder): + global emon_dat_file_name, emon_xlsx_file_name + if emon_dat_file_name == None: + emon_dat_file_name, emon_xlsx_file_name = fetch_expected_emon_filename('utils/emon/edp_config.txt') + if '-f' in analysis_config['emon'].keys(): + shutil.move(analysis_config['emon']['-f'], emon_dat_file_name) + else: + shutil.move('emon.dat', emon_dat_file_name) + emon_processing_command = 'emon -process-edp ./utils/emon/edp_config.txt' + res = subprocess.run(emon_processing_command.split(' '), stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding='utf-8') + if res.stderr[:-1] != '': + logging.error(f'EMON error message: {res.stderr[:-1]}') + shutil.move(emon_dat_file_name, analysis_folder) + shutil.move(emon_xlsx_file_name, analysis_folder) + +def psrecord_postproc(analysis_config, analysis_folder): + if '--log' in analysis_config.keys(): + shutil.move(analysis_config['--log'], analysis_folder) + if '--plot' in analysis_config.keys(): + shutil.move(analysis_config['--plot'], analysis_folder) + +def postproc_analysis_result(analysis_config, analysis_folder): + if 'vtune' in analysis_config.keys(): + vtune_postproc(analysis_config, analysis_folder) + elif 'emon' in analysis_config.keys(): + emon_postproc(analysis_config, analysis_folder) + elif 'psrecord' in analysis_config.keys(): + psrecord_postproc(analysis_config, analysis_folder) + +def emon_preproc(analysis_config, command_line): + emon_sh = 'emon_runner.sh' + subcommand = f'#!/bin/bash\n' \ + f'{command_line}\n' + with open(emon_sh, 'w') as f: + f.write(subcommand) + os.chmod(emon_sh, 0o755) + return emon_sh, subcommand + +def preproc_analysis(analysis_config, analysis_prefix, bench_command_line): + subcommand = '' + if 'emon' in analysis_config.keys(): + emon_sh, subcommand = emon_preproc(analysis_config, bench_command_line) + command = f'emon {analysis_prefix} ./{emon_sh}' + elif 'psrecord' in analysis_config.keys(): + command = f'psrecord {analysis_prefix} "{bench_command_line}"' + elif 'vtune' in analysis_config.keys(): + command = f'vtune {analysis_prefix} -- {bench_command_line}' + else: + command = bench_command_line + return command, subcommand if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -64,6 +187,34 @@ def get_configs(path: Path) -> List[str]: 'unmarked workloads will be launched anyway') parser.add_argument('--no-intel-optimized', default=False, action='store_true', help='Use Scikit-learn without Intel optimizations') + parser.add_argument('--sgx-gramine', default=False, action='store_true', + help='Run benchmarks with Gramine & Intel(R) SGX.') + parser.add_argument('--vtune', default=False, action='store_true', + help='Profile benchmarks with VTune.') + parser.add_argument('--emon', default=False, action='store_true', + help='Profile benchmarks with EMON.') + parser.add_argument('--psrecord', default=False, action='store_true', + help='Analyze memory consumption with psrecord') + parser.add_argument('--box-filter-measurements-analysis', default=500, type=int, + help='Maximum number of measurements in box filter (analysed stage). ' + 'When benchmark uses this parameter to understand number of ' + 'runs for target stage, other stage will be run ' + 'only for getting trained model.' + 'Parameter won\'t be used if analysis options aren\'t enabled.') + parser.add_argument('--time-limit-analysis', default=100., type=float, + help='Time to spend to currently analysed stage. ' + 'When benchmark uses this parameter to calculate ' + 'time for target stage, other stage will be run ' + 'only for getting trained model.' + 'Parameter won\'t be used if analysis options aren\'t enabled.') + parser.add_argument('--box-filter-measurements', type=int, default=100, + help='Maximum number of measurements in box filter.') + parser.add_argument('--time-limit', default=10., type=float, + help='Target time to spend to benchmark.') + parser.add_argument('--flush-caches', default=False, + action='store_true', + help='Should benchmark flush CPU caches after each run during measuring.' + 'Recommended for default runs and vtune profiling (in case you would like to flush caches).') parser.add_argument('--output-file', default='results.json', type=argparse.FileType('w'), help='Output file of benchmarks to use with their runner') @@ -78,6 +229,7 @@ def get_configs(path: Path) -> List[str]: 'the default config will be used. ' 'Need "openpyxl" library') args = parser.parse_args() + timestamp = str(datetime.date.today()).replace(' ', '--').replace(':', '-').replace('.', '-') logging.basicConfig( stream=sys.stdout, format='%(levelname)s: %(message)s', level=args.verbose) @@ -95,10 +247,18 @@ def get_configs(path: Path) -> List[str]: logging.info('Datasets folder is not set, using local folder') json_result: Dict[str, Union[Dict[str, Any], List[Any]]] = { + 'common': {}, 'hardware': utils.get_hw_parameters(), 'software': utils.get_sw_parameters(), 'results': [] } + json_result['common']['timestamp'] = timestamp + + path_to_analysis_dir = 'analysis_'+timestamp + if os.path.exists(path_to_analysis_dir): + shutil.rmtree(path_to_analysis_dir) + os.makedirs(path_to_analysis_dir) + is_successful = True # getting jsons from folders paths_to_configs: List[str] = list() @@ -119,6 +279,15 @@ def get_configs(path: Path) -> List[str]: common_params = config['common'] for params_set in config['cases']: params = common_params.copy() + # print('PRE PARAMS:', params) + analysis_config = check_additional_orders(args, common_params) + # print('ANALYSIS CONFIG:', analysis_config) + if 'analysis' in params.keys(): + del params['analysis'] + # print('POST PARAMS:', params) + program_name = get_program_name(analysis_config) + analysis_prefix = get_analyse_prefix(analysis_config) + bench_extra_args = get_benchmark_extra_args(analysis_config) params.update(params_set.copy()) if 'workload-size' in params: @@ -278,37 +447,67 @@ class GenerationArgs: '--no-intel-optimized ' if args.no_intel_optimized else '' for lib in libs: for i, case in enumerate(cases): - command = f'python {lib}_bench/{algorithm}.py ' \ - + no_intel_optimize \ - + f'--arch {hostname} {case} {paths} ' \ - + f'--dataset-name {dataset_name}' - command = ' '.join(command.split()) - logging.info(command) - if not args.dummy_run: - case = f'{lib},{algorithm} ' + case - stdout, stderr = utils.read_output_from_command( - command, env=os.environ.copy()) - stdout, extra_stdout = utils.filter_stdout(stdout) - stderr = utils.filter_stderr(stderr) - - print(stdout, end='\n') - - if extra_stdout != '': - stderr += f'CASE {case} EXTRA OUTPUT:\n' \ - + f'{extra_stdout}\n' - try: - if isinstance(json_result['results'], list): - json_result['results'].extend( - json.loads(stdout)) - except json.JSONDecodeError as decoding_exception: - stderr += f'CASE {case} JSON DECODING ERROR:\n' \ - + f'{decoding_exception}\n{stdout}\n' - - if stderr != '': - if 'daal4py' not in stderr: - is_successful = False - logging.warning( - 'Error in benchmark: \n' + stderr) + analysis_stage_collection = ['default'] + if analysis_prefix != None: + analysis_stage_collection.extend(['fit', 'infer']) + for analysis_stage in analysis_stage_collection: + bench_command_line = f'{program_name} {lib}_bench/{algorithm}.py ' \ + + no_intel_optimize \ + + f'--arch {hostname} {case} {paths} ' \ + + f'--dataset-name {dataset_name} ' \ + + f'--box-filter-measurements-analysis {args.box_filter_measurements_analysis} ' \ + + f'--box-filter-measurements {args.box_filter_measurements} ' \ + + f'--time-limit-analysis {args.time_limit_analysis} ' \ + + f'--time-limit {args.time_limit} ' \ + + f'--target-stage {analysis_stage} ' + if args.flush_caches: + bench_command_line += ' --flush-caches ' + hash_of_case = hashlib.sha256(bench_command_line.encode('utf-8')).hexdigest() + if analysis_stage == 'default': + command = bench_command_line + subcommand = None + else: + bench_command_line += f' {bench_extra_args} ' + command, subcommand = preproc_analysis(analysis_config, analysis_prefix, bench_command_line) + + command = ' '.join(command.split()) + + logging.info(command) + if 'emon' in analysis_config.keys() and subcommand != None: + logging.info(f'Subcommand: {subcommand}') + if not args.dummy_run: + case_result = f'{lib},{algorithm} ' + case + stdout, stderr = utils.read_output_from_command(command, env=os.environ.copy()) + stdout, extra_stdout = utils.filter_stdout(stdout) + stderr = utils.filter_stderr(stderr) + try: + output_json = json.loads(stdout) + json_decoding_ok = True + except json.JSONDecodeError as decoding_exception: + stderr += f'CASE {case_result} JSON DECODING ERROR:\n' \ + + f'{decoding_exception}\n{stdout}\n' + json_decoding_ok = False + if analysis_stage != 'default': + actual_config = None + for cfg in output_json: + if cfg['stage'] == 'training' and analysis_stage == 'fit': + actual_config = cfg + elif cfg['stage'] != 'training' and analysis_stage != 'fit': + actual_config = cfg + current_rows_number = actual_config['input_data']['rows'] + current_columns_number = actual_config['input_data']['columns'] + case_folder = f"{lib}_{algorithm}_{analysis_stage}_{dataset_name}_{current_rows_number}x{current_columns_number}_{hash_of_case[:6]}" + analysis_folder = os.path.join(path_to_analysis_dir, case_folder) + os.makedirs(analysis_folder) + postproc_analysis_result(analysis_config, analysis_folder) + + if analysis_prefix != None: + for item in output_json: + item['hash_prefix'] = hash_of_case[:6] + item['analysis'] = analysis_config + + if json_decoding_ok and analysis_stage == 'default': + json_result['results'].extend(output_json) json.dump(json_result, args.output_file, indent=4) name_result_file = args.output_file.name diff --git a/sklearn_bench/dbscan.py b/sklearn_bench/dbscan.py index 94a55bafa..b1c449ad4 100644 --- a/sklearn_bench/dbscan.py +++ b/sklearn_bench/dbscan.py @@ -35,7 +35,7 @@ def main(): # 'kdtree' when running unpatched scikit-learn. # Time fit - time, _ = bench.measure_function_time(dbscan.fit, X, params=params) + time, _ = bench.measure_function_time(dbscan.fit, X, params=params, stage='fit') labels = dbscan.labels_ params.n_clusters = len(set(labels)) - (1 if -1 in labels else 0) diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py index 5d35ef02d..b63dbf10d 100644 --- a/sklearn_bench/df_clsf.py +++ b/sklearn_bench/df_clsf.py @@ -40,7 +40,7 @@ def main(): params.n_classes = len(np.unique(y_train)) - fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params, stage='fit') y_pred = clf.predict(X_train) y_proba = clf.predict_proba(X_train) train_acc = bench.accuracy_score(y_train, y_pred) @@ -48,7 +48,7 @@ def main(): train_roc_auc = bench.roc_auc_score(y_train, y_proba) predict_time, y_pred = bench.measure_function_time( - clf.predict, X_test, params=params) + clf.predict, X_test, params=params, stage='infer') y_proba = clf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, y_pred) test_log_loss = bench.log_loss(y_test, y_proba) diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py index 4c7491af3..84971f982 100644 --- a/sklearn_bench/df_regr.py +++ b/sklearn_bench/df_regr.py @@ -38,14 +38,14 @@ def main(): random_state=params.seed, n_jobs=params.n_jobs) - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') y_pred = regr.predict(X_train) train_rmse = bench.rmse_score(y_train, y_pred) train_r2 = bench.r2_score(y_train, y_pred) predict_time, y_pred = bench.measure_function_time( - regr.predict, X_test, params=params) + regr.predict, X_test, params=params, stage='infer') test_rmse = bench.rmse_score(y_test, y_pred) test_r2 = bench.r2_score(y_test, y_pred) diff --git a/sklearn_bench/distances.py b/sklearn_bench/distances.py index c708513d1..1d5e82fe7 100644 --- a/sklearn_bench/distances.py +++ b/sklearn_bench/distances.py @@ -26,7 +26,7 @@ def main(): X, _, _, _ = bench.load_data(params, generated_data=['X_train'], add_dtype=True) time, _ = bench.measure_function_time(pairwise_distances, X, metric=params.metric, - n_jobs=params.n_jobs, params=params) + n_jobs=params.n_jobs, params=params, stage='fit') bench.print_output(library='sklearn', algorithm='distances', stages=['computation'], params=params, functions=[params.metric.capitalize()], diff --git a/sklearn_bench/elasticnet.py b/sklearn_bench/elasticnet.py index 3467e0dda..88bf9dc4b 100755 --- a/sklearn_bench/elasticnet.py +++ b/sklearn_bench/elasticnet.py @@ -30,11 +30,11 @@ def main(): alpha=params.alpha, tol=params.tol, max_iter=params.maxiter) # Time fit - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') # Time predict predict_time, y_pred = bench.measure_function_time(regr.predict, - X_train, params=params) + X_train, params=params, stage='infer') train_rmse = bench.rmse_score(y_train, y_pred) train_r2 = bench.r2_score(y_train, y_pred) diff --git a/sklearn_bench/kmeans.py b/sklearn_bench/kmeans.py index b522a0e92..5a01b8430 100644 --- a/sklearn_bench/kmeans.py +++ b/sklearn_bench/kmeans.py @@ -55,14 +55,14 @@ def fit_kmeans(X, X_init): # Time fit fit_time, kmeans = bench.measure_function_time(fit_kmeans, X_train, - X_init, params=params) + X_init, params=params, , stage='fit') train_predict = kmeans.predict(X_train) acc_train = davies_bouldin_score(X_train, train_predict) # Time predict predict_time, test_predict = bench.measure_function_time( - kmeans.predict, X_test, params=params) + kmeans.predict, X_test, params=params, stage='infer') acc_test = davies_bouldin_score(X_test, test_predict) diff --git a/sklearn_bench/knn_clsf.py b/sklearn_bench/knn_clsf.py index f58be1650..fbb1de503 100755 --- a/sklearn_bench/knn_clsf.py +++ b/sklearn_bench/knn_clsf.py @@ -36,7 +36,7 @@ def main(): # Measure time and accuracy on fitting train_time, _ = bench.measure_function_time( - knn_clsf.fit, X_train, y_train, params=params) + knn_clsf.fit, X_train, y_train, params=params, stage='fit') if params.task == 'classification': y_pred = knn_clsf.predict(X_train) y_proba = knn_clsf.predict_proba(X_train) @@ -47,14 +47,14 @@ def main(): # Measure time and accuracy on prediction if params.task == 'classification': predict_time, yp = bench.measure_function_time(knn_clsf.predict, X_test, - params=params) + params=params, stage='infer') y_proba = knn_clsf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, yp) test_log_loss = bench.log_loss(y_test, y_proba) test_roc_auc = bench.roc_auc_score(y_test, y_proba) else: predict_time, _ = bench.measure_function_time(knn_clsf.kneighbors, X_test, - params=params) + params=params, stage='infer') if params.task == 'classification': bench.print_output( diff --git a/sklearn_bench/knn_regr.py b/sklearn_bench/knn_regr.py index c2048e3f4..c1e9d14cc 100644 --- a/sklearn_bench/knn_regr.py +++ b/sklearn_bench/knn_regr.py @@ -36,7 +36,7 @@ def main(): # Measure time and accuracy on fitting train_time, _ = bench.measure_function_time( - knn_regr.fit, X_train, y_train, params=params) + knn_regr.fit, X_train, y_train, params=params, stage='fit') if params.task == 'regression': y_pred = knn_regr.predict(X_train) train_rmse = bench.rmse_score(y_train, y_pred) @@ -45,12 +45,12 @@ def main(): # Measure time and accuracy on prediction if params.task == 'regression': predict_time, yp = bench.measure_function_time(knn_regr.predict, X_test, - params=params) + params=params, stage='infer') test_rmse = bench.rmse_score(y_test, yp) test_r2 = bench.r2_score(y_test, yp) else: predict_time, _ = bench.measure_function_time(knn_regr.kneighbors, X_test, - params=params) + params=params, stage='infer') if params.task == 'regression': bench.print_output( diff --git a/sklearn_bench/lasso.py b/sklearn_bench/lasso.py index c167bc359..b24ef1dd0 100755 --- a/sklearn_bench/lasso.py +++ b/sklearn_bench/lasso.py @@ -30,11 +30,11 @@ def main(): tol=params.tol, max_iter=params.maxiter) # Time fit - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') # Time predict predict_time, yp = bench.measure_function_time( - regr.predict, X_train, params=params) + regr.predict, X_train, params=params, stage='infer') train_rmse = bench.rmse_score(y_train, yp) train_r2 = bench.r2_score(y_train, yp) diff --git a/sklearn_bench/linear.py b/sklearn_bench/linear.py index 7da0dba45..3b8a83a5b 100644 --- a/sklearn_bench/linear.py +++ b/sklearn_bench/linear.py @@ -30,10 +30,10 @@ def main(): n_jobs=params.n_jobs) # Time fit - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') # Time predict - predict_time, yp = bench.measure_function_time(regr.predict, X_test, params=params) + predict_time, yp = bench.measure_function_time(regr.predict, X_test, params=params, stage='infer') test_rmse = bench.rmse_score(y_test, yp) test_r2 = bench.r2_score(y_test, yp) diff --git a/sklearn_bench/log_reg.py b/sklearn_bench/log_reg.py index 733ee5765..dc3eba08d 100644 --- a/sklearn_bench/log_reg.py +++ b/sklearn_bench/log_reg.py @@ -41,7 +41,7 @@ def main(): tol=params.tol, max_iter=params.maxiter, solver=params.solver, multi_class=params.multiclass) # Time fit and predict - fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params, stage='fit') y_pred = clf.predict(X_train) y_proba = clf.predict_proba(X_train) @@ -50,7 +50,7 @@ def main(): train_roc_auc = bench.roc_auc_score(y_train, y_proba) predict_time, y_pred = bench.measure_function_time( - clf.predict, X_test, params=params) + clf.predict, X_test, params=params, stage='infer') y_proba = clf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, y_pred) test_log_loss = bench.log_loss(y_test, y_proba) diff --git a/sklearn_bench/nusvc.py b/sklearn_bench/nusvc.py index d3e6eeece..029c86af3 100644 --- a/sklearn_bench/nusvc.py +++ b/sklearn_bench/nusvc.py @@ -38,7 +38,7 @@ def main(): tol=params.tol, gamma=params.gamma, probability=params.probability, random_state=43, degree=params.degree) - fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params, stage='fit') params.sv_len = clf.support_.shape[0] if params.probability: @@ -63,7 +63,7 @@ def main(): train_acc = bench.accuracy_score(y_train, y_pred) _, y_pred = bench.measure_function_time( - clf_predict, X_test, params=params) + clf_predict, X_test, params=params, stage='infer') test_acc = bench.accuracy_score(y_test, y_pred) bench.print_output( diff --git a/sklearn_bench/nusvr.py b/sklearn_bench/nusvr.py index ccfe519ba..9837396b1 100644 --- a/sklearn_bench/nusvr.py +++ b/sklearn_bench/nusvr.py @@ -38,7 +38,7 @@ def main(): cache_size=params.cache_size_mb, tol=params.tol, gamma=params.gamma, degree=params.degree) - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') params.sv_len = regr.support_.shape[0] predict_train_time, y_pred = bench.measure_function_time( @@ -47,7 +47,7 @@ def main(): train_r2 = bench.r2_score(y_train, y_pred) _, y_pred = bench.measure_function_time( - regr.predict, X_test, params=params) + regr.predict, X_test, params=params, stage='infer') test_rmse = bench.rmse_score(y_test, y_pred) test_r2 = bench.r2_score(y_test, y_pred) diff --git a/sklearn_bench/pca.py b/sklearn_bench/pca.py index 31d7bffc2..512000109 100644 --- a/sklearn_bench/pca.py +++ b/sklearn_bench/pca.py @@ -34,11 +34,11 @@ def main(): n_components=params.n_components) # Time fit - fit_time, _ = bench.measure_function_time(pca.fit, X_train, params=params) + fit_time, _ = bench.measure_function_time(pca.fit, X_train, params=params, stage='fit') # Time transform transform_time, _ = bench.measure_function_time( - pca.transform, X_train, params=params) + pca.transform, X_train, params=params, stage='infer') bench.print_output( library='sklearn', diff --git a/sklearn_bench/ridge.py b/sklearn_bench/ridge.py index 19718a4e7..314d53143 100644 --- a/sklearn_bench/ridge.py +++ b/sklearn_bench/ridge.py @@ -31,10 +31,10 @@ def main(): solver=params.solver) # Time fit - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, box_filter_measurements=params.box_filter_measurements_fit, time_limit=params.time_limit_fit, params=params, stage='fit') # Time predict - predict_time, yp = bench.measure_function_time(regr.predict, X_test, params=params) + predict_time, yp = bench.measure_function_time(regr.predict, X_test, box_filter_measurements=params.box_filter_measurements_infer, time_limit=params.time_limit_infer, params=params, stage='infer') test_rmse = bench.rmse_score(y_test, yp) test_r2 = bench.r2_score(y_test, yp) diff --git a/sklearn_bench/svm.py b/sklearn_bench/svm.py index 5ac4c939c..6aaaaa32d 100644 --- a/sklearn_bench/svm.py +++ b/sklearn_bench/svm.py @@ -38,7 +38,7 @@ def main(): tol=params.tol, gamma=params.gamma, probability=params.probability, random_state=43, degree=params.degree) - fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params, stage='fit') params.sv_len = clf.support_.shape[0] if params.probability: @@ -53,7 +53,7 @@ def main(): train_roc_auc = bench.roc_auc_score(y_train, y_pred) _, y_pred = bench.measure_function_time( - clf_predict, X_test, params=params) + clf_predict, X_test, params=params, stage='infer') test_log_loss = bench.log_loss(y_test, y_pred) test_roc_auc = bench.roc_auc_score(y_test, y_pred) else: @@ -69,7 +69,7 @@ def main(): train_acc = bench.accuracy_score(y_train, y_pred) _, y_pred = bench.measure_function_time( - clf_predict, X_test, params=params) + clf_predict, X_test, params=params, stage='infer') test_acc = bench.accuracy_score(y_test, y_pred) bench.print_output( diff --git a/sklearn_bench/svr.py b/sklearn_bench/svr.py index 7e9dc2c8d..9b31cbc85 100644 --- a/sklearn_bench/svr.py +++ b/sklearn_bench/svr.py @@ -38,7 +38,7 @@ def main(): cache_size=params.cache_size_mb, tol=params.tol, gamma=params.gamma, degree=params.degree) - fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) + fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params, stage='fit') params.sv_len = regr.support_.shape[0] predict_train_time, y_pred = bench.measure_function_time( @@ -47,7 +47,7 @@ def main(): train_r2 = bench.r2_score(y_train, y_pred) _, y_pred = bench.measure_function_time( - regr.predict, X_test, params=params) + regr.predict, X_test, params=params, stage='infer') test_rmse = bench.rmse_score(y_test, y_pred) test_r2 = bench.r2_score(y_test, y_pred) diff --git a/sklearn_bench/train_test_split.py b/sklearn_bench/train_test_split.py index 046719b48..cce2ebb95 100644 --- a/sklearn_bench/train_test_split.py +++ b/sklearn_bench/train_test_split.py @@ -43,7 +43,7 @@ def main(): tts_params['rng'] = params.rng time, _ = bench.measure_function_time( - train_test_split, *data_args, params=params, **tts_params) + train_test_split, *data_args, params=params, stage='fit', **tts_params) bench.print_output(library='sklearn', algorithm='train_test_split', stages=['training'], params=params, diff --git a/sklearn_bench/tsne.py b/sklearn_bench/tsne.py index 2d9f2d0aa..4418d6d97 100644 --- a/sklearn_bench/tsne.py +++ b/sklearn_bench/tsne.py @@ -31,7 +31,7 @@ def main(): learning_rate=params.learning_rate, angle=params.angle, min_grad_norm=params.min_grad_norm, random_state=params.random_state) - fit_time, _ = bench.measure_function_time(tsne.fit, X, params=params) + fit_time, _ = bench.measure_function_time(tsne.fit, X, params=params, stage='fit') divergence = tsne.kl_divergence_ bench.print_output( diff --git a/tools/emon/edp_config.txt b/tools/emon/edp_config.txt new file mode 100644 index 000000000..2e6ce7ec2 --- /dev/null +++ b/tools/emon/edp_config.txt @@ -0,0 +1,74 @@ +################################################################################## +# # +# EDP RUBY SCRIPT CONFIGURATION FILE TEMPLATE # +# Use this file as a template for creating your configuration file # +# emon -process-edp # +# # +# # +################################################################################## + +#ruby interpreter, change it according to the path where ruby is installed in your system +#RUBY_PATH="c:\Ruby192\bin\ruby.exe" +RUBY_PATH="ruby" + +#JVM options for JRuby. +#--server means using server JVM, which means better GC efficiency +#5g is the heap size, a larger heap size is always helpful. +#RUBY_OPTIONS=--server -J-Xmx5g -J-Xms5g --1.8 + +#Number of threads to process data in parallel. Typically it should equal to the number of logical CPUs in your processing system. +PARALLELISM=24 + +#input file names, you may need to change them +EMON_DATA=emon.dat + +#output file name, you may want to change it +OUTPUT=summary.xlsx + +#The metrics definition file - HW platform specific file +#You don't need to specify this, EMON will use the correct file by default +#Uncomment this line only if you want to use customer metric file +#METRICS=skylake_server-2s.xml +#Use the following settings for processing data from Hybrid platforms +#EMON will use the correct files by default and use the settings only to override +#with custom metric files +#The separate metric files for big core and small core type platforms can be specified +#will be used instead of the METRICS setting above +#SC_METRICS= +#BC_METRICS= + +#Excel chart format file - HW platform specific file +#You don't need to specify this, EMON will use the correct file by default +#Uncomment this line only if you want to use customer metric file +#CHART_FORMAT=chart_format_clx_2s.txt +#Use the following settings for processing data from Hybrid platforms +#EMON will use the correct files by default and use the settings only to override +#with custom metric files +#The separate metric files for big core and small core type platforms can be specified +#will be used instead of the METRICS setting above +#SC_CHART_FORMAT= +#BC_CHART_FORMAT= + +#the average value will be calculated from the %BEGIN% sample to %END% sample. +#setting %END% to a negative value means the last availabe sample. +#BEGIN=1 +#END=100000 +#The BEGIN/END could also be a wall clock time in the format of mm/dd/yyyy hh:mm:ss.mmm +#EMON data must be collected with timestamps (-c parameter) in this case. +#BEGIN="08/24/2012 17:53:20.885" +#END="08/24/2012 17:53:35.885" + +#by default only system view will be outputted +#there are 3 optional views to be selected +#you can select one or more of them +VIEW=--socket-view --core-view --thread-view + +#set the throughput (Transaction per Second) +#TPS=--tps 10 + +#if timestamps are presented in EMON data, the charts can be plotted with time as the x-axis. +#by default the sample number is used as the x-axis. Don't enable this if timestamp data is not present in EMON data +TIMESTAMP_IN_CHART="--timestamp-in-chart" + +# QPI frequency +#QPI= diff --git a/utils.py b/utils.py index 192a5f421..c5967ef94 100755 --- a/utils.py +++ b/utils.py @@ -35,21 +35,22 @@ def filter_stderr(text: str) -> str: def filter_stdout(text: str) -> Tuple[str, str]: - verbosity_letters = 'EWIDT' - filtered, extra = '', '' + filtered, extra = [], [] + inside_result_section = False for line in text.split('\n'): - if line == '': + stripped_line = line.strip() + if stripped_line == '# Intel(R) Extension for Scikit-learn case result:': + inside_result_section = True continue - to_remove = False - for letter in verbosity_letters: - if line.startswith(f'[{letter}]'): - to_remove = True - break - if to_remove: - extra += line + '\n' + if stripped_line == '# Intel(R) Extension for Scikit-learn case finished.': + inside_result_section = False + continue + if inside_result_section: + filtered.append(line) else: - filtered += line + '\n' - return filtered, extra + extra.append(line) + + return '\n'.join(filtered), '\n'.join(extra) def files_in_folder(folder: str, files: Iterable[str]) -> bool: diff --git a/xgboost_bench/gbt.py b/xgboost_bench/gbt.py index 8540f4f5a..13ea24899 100644 --- a/xgboost_bench/gbt.py +++ b/xgboost_bench/gbt.py @@ -32,7 +32,6 @@ def convert_xgb_predictions(y_pred, objective): y_pred = y_pred.astype(np.int32) return y_pred - parser = argparse.ArgumentParser(description='xgboost gradient boosted trees benchmark') @@ -162,7 +161,7 @@ def predict(dmatrix): # type: ignore fit_time, booster = bench.measure_function_time( - fit, None if params.count_dmatrix else dtrain, params=params) + fit, None if params.count_dmatrix else dtrain, params=params, stage='fit') train_metric = metric_func( convert_xgb_predictions( booster.predict(dtrain), @@ -170,7 +169,7 @@ def predict(dmatrix): # type: ignore y_train) predict_time, y_pred = bench.measure_function_time( - predict, None if params.inplace_predict or params.count_dmatrix else dtest, params=params) + predict, None if params.inplace_predict or params.count_dmatrix else dtest, stage='infer', params=params) test_metric = metric_func(convert_xgb_predictions(y_pred, params.objective), y_test) bench.print_output(library='xgboost', algorithm=f'gradient_boosted_trees_{task}',