Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update model_training process (train and export) #169

Merged
merged 2 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,8 @@ def extract(args):
if args.output:
save_csv(data_path, "extracted_" + args.output, feature_power_data)
query = feature_to_query(FeatureGroups[fg][0])
query_results[query][[TIMESTAMP_COL, query]].groupby([TIMESTAMP_COL]).sum().to_csv(args.output[0:-4]+"_raw.csv")
raw_data = query_results[query][[TIMESTAMP_COL, query]].groupby([TIMESTAMP_COL]).sum()
save_csv(data_path, "extracted_" + args.output[0:-4]+"_raw.csv", raw_data)
return feature_power_data, power_cols

def isolate(args):
Expand Down Expand Up @@ -407,10 +408,12 @@ def train(args):
print_cols = ["feature_group", "model_name", "mae"]
print("AbsPower pipeline results:")
metadata_df = load_pipeline_metadata(pipeline.path, energy_source, ModelOutputType.AbsPower.name)
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
if metadata_df is not None:
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
print("DynPower pipeline results:")
metadata_df = load_pipeline_metadata(pipeline.path, energy_source, ModelOutputType.DynPower.name)
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
if metadata_df is not None:
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])

warnings.resetwarnings()

Expand Down Expand Up @@ -616,7 +619,7 @@ def _summary_plot(energy_source, summary_df, output_folder, name):
sns.barplot(data=data, x="Feature Group", y="MAE", hue="Model", ax=ax)
ax.set_title(component)
ax.set_ylabel("MAE (Watt)")
ax.set_ylim((0, 50))
ax.set_ylim((0, 100))
if i < col_num-1:
ax.set_xlabel("")
ax.legend(bbox_to_anchor=(1.05, 1.05))
Expand Down Expand Up @@ -671,7 +674,8 @@ def plot(args):
from estimate import default_predicted_col_func
from sklearn.preprocessing import MaxAbsScaler

best_result_map, power_labels_map, best_model_id_map, _ = estimate(args)
best_result_map, power_labels_map, best_model_id_map, summary_df = estimate(args)
print(summary_df)
for energy_source, best_restult in best_result_map.items():
best_restult = best_restult.reset_index()
power_labels = power_labels_map[energy_source]
Expand Down Expand Up @@ -737,7 +741,7 @@ def export(args):
machine_path = get_machine_path(output_path, args.version, machine_id)

collect_date, _ = extract_time(args.benchmark)
exporter.export(pipeline_path, machine_path, version=args.version, publisher=args.publisher, collect_date=collect_date, include_raw=args.include_raw)
exporter.export(data_path, pipeline_path, machine_path, machine_id=machine_id, version=args.version, publisher=args.publisher, collect_date=collect_date, include_raw=args.include_raw)

args.energy_source = ",".join(PowerSourceMap.keys())

Expand Down Expand Up @@ -839,7 +843,6 @@ def plot_scenario(args):
data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator) + "_" + args.scenario
_ts_plot(power_data, power_cols, "Power source: {} ({})".format(energy_source, args.scenario), output_folder, data_filename, ylabel="Power (W)")


if __name__ == "__main__":
# set model top path to data path
os.environ['MODEL_PATH'] = data_path
Expand Down
4 changes: 3 additions & 1 deletion manifests/base/patch/patch-estimator-sidecar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ metadata:
data:
MODEL_CONFIG: |
NODE_COMPONENTS_ESTIMATOR=true
NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/Linux-4.15.0-213-generic-x86_64_v0.6/rapl/AbsPower/KubeletOnly/GradientBoostingRegressorTrainer_1.zip
NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.6/nx12/std_v0.6/rapl/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_1.zip
NODE_TOTAL_ESTIMATOR=true
NODE_TOTAL_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.6/nx12/std_v0.6/acpi/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_1.zip
---
apiVersion: apps/v1
kind: DaemonSet
Expand Down
Loading