From 33160835aedeec59635ea8b563c9f718fbef5e7d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 22 Oct 2023 23:46:16 -0700 Subject: [PATCH 1/3] provide lgmb_mb converter script to fix result files --- report_generator/fix-lgbm-mb-results.py | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 report_generator/fix-lgbm-mb-results.py diff --git a/report_generator/fix-lgbm-mb-results.py b/report_generator/fix-lgbm-mb-results.py new file mode 100644 index 000000000..35b1689cf --- /dev/null +++ b/report_generator/fix-lgbm-mb-results.py @@ -0,0 +1,93 @@ +# ============================================================================== +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +""" +Temporary solution to fix the .json result files created from lgbm_mb.py. +The result files are in an incompatible format for report_generator.py. +Attempts to produce xlsx reports fail and create empty files. + +After running this script on my-file.json, a new file my-file-fixed.json will be +produced, containing a JSON version of the results in a compatible format. + +Usage: + + python fix-lgbm-mb-results.py my-file.json [another-file.json ...] + + +Note: This is just a quick and dirty hack that does not fix the underlying + issue. Rather than changing this file (if something breaks again), the + original script lgbm_mb.py should be updated such that it produces valid + JSON dumps again. +""" + +from argparse import ArgumentParser +import json +from pathlib import Path + +def fix_file(fname: Path): + with open(fname) as fp: + data = json.load(fp) + + # copy all data (aux info etc) + fixed = {} + for key, val in data.items(): + fixed[key] = val + + # reset the results - we'll fix them + fixed["results"] = [] + + current_result = {} + for result in data["results"]: + if "algorithm" in result: + # found a new algo / measurement + current_result = result + continue + + if "stage" in result: + comb = current_result | result + if "device" not in comb: + comb["device"] = "none" + + if "time[s]" not in comb: + comb["time[s]"] = result.get("training_time") or result["prediction_time"] + + if "algorithm_parameters" not in comb: + comb["algorithm_paramters"] = {} + + if "accuracy[%]" in comb: + comb["accuracy"] = comb["accuracy[%]"] + + replace_pairs = ( + ("lgbm_train", "training"), + ("lgbm_predict", "prediction"), + ("daal4py_predict", "alternative_prediction"), + ) + for s, r in replace_pairs: + comb["stage"] = comb["stage"].replace(s, r) + + fixed["results"].append(comb) + + out_fname = fname.stem + "-fixed.json" + with open(out_fname, "w") as fp: + json.dump(fixed, fp, indent=4) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("filenames", nargs="+") + args = parser.parse_args() + for fname in args.filenames: + fix_file(Path(fname)) From 88a4cabe3ce5910f98ec35f75ba3f3e64ffc0e69 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 22 Oct 2023 23:36:57 -0700 Subject: [PATCH 2/3] Fix breaking kwarg --- modelbuilders_bench/lgbm_mb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modelbuilders_bench/lgbm_mb.py b/modelbuilders_bench/lgbm_mb.py index f263d419c..7ddc6a68c 100644 --- a/modelbuilders_bench/lgbm_mb.py +++ b/modelbuilders_bench/lgbm_mb.py @@ -118,8 +118,7 @@ t_train, model_lgbm = bench.measure_function_time(lgbm.train, lgbm_params, lgbm_train, params=params, num_boost_round=params.n_estimators, - valid_sets=lgbm_train, - verbose_eval=False) + valid_sets=lgbm_train) train_metric = None if not X_train.equals(X_test): y_train_pred = model_lgbm.predict(X_train) From c0406cb5c5df762fc1cbfaf3b02692c171760c9f Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 27 Oct 2023 06:49:03 -0700 Subject: [PATCH 3/3] formatting --- report_generator/fix-lgbm-mb-results.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/report_generator/fix-lgbm-mb-results.py b/report_generator/fix-lgbm-mb-results.py index 35b1689cf..c5dbc9702 100644 --- a/report_generator/fix-lgbm-mb-results.py +++ b/report_generator/fix-lgbm-mb-results.py @@ -37,6 +37,7 @@ import json from pathlib import Path + def fix_file(fname: Path): with open(fname) as fp: data = json.load(fp) @@ -62,7 +63,9 @@ def fix_file(fname: Path): comb["device"] = "none" if "time[s]" not in comb: - comb["time[s]"] = result.get("training_time") or result["prediction_time"] + comb["time[s]"] = ( + result.get("training_time") or result["prediction_time"] + ) if "algorithm_parameters" not in comb: comb["algorithm_paramters"] = {}