Skip to content

Commit

Permalink
fix/update model training and export
Browse files Browse the repository at this point in the history
Signed-off-by: Sunyanan Choochotkaew <[email protected]>
  • Loading branch information
sunya-ch committed Sep 28, 2023
1 parent dbe1166 commit 3f3ec0a
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 202 deletions.
17 changes: 10 additions & 7 deletions cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,8 @@ def extract(args):
if args.output:
save_csv(data_path, "extracted_" + args.output, feature_power_data)
query = feature_to_query(FeatureGroups[fg][0])
query_results[query][[TIMESTAMP_COL, query]].groupby([TIMESTAMP_COL]).sum().to_csv(args.output[0:-4]+"_raw.csv")
raw_data = query_results[query][[TIMESTAMP_COL, query]].groupby([TIMESTAMP_COL]).sum()
save_csv(data_path, "extracted_" + args.output[0:-4]+"_raw.csv", raw_data)
return feature_power_data, power_cols

def isolate(args):
Expand Down Expand Up @@ -407,10 +408,12 @@ def train(args):
print_cols = ["feature_group", "model_name", "mae"]
print("AbsPower pipeline results:")
metadata_df = load_pipeline_metadata(pipeline.path, energy_source, ModelOutputType.AbsPower.name)
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
if metadata_df is not None:
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
print("DynPower pipeline results:")
metadata_df = load_pipeline_metadata(pipeline.path, energy_source, ModelOutputType.DynPower.name)
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])
if metadata_df is not None:
print(metadata_df.sort_values(by=[ERROR_KEY])[print_cols])

warnings.resetwarnings()

Expand Down Expand Up @@ -616,7 +619,7 @@ def _summary_plot(energy_source, summary_df, output_folder, name):
sns.barplot(data=data, x="Feature Group", y="MAE", hue="Model", ax=ax)
ax.set_title(component)
ax.set_ylabel("MAE (Watt)")
ax.set_ylim((0, 50))
ax.set_ylim((0, 100))
if i < col_num-1:
ax.set_xlabel("")
ax.legend(bbox_to_anchor=(1.05, 1.05))
Expand Down Expand Up @@ -671,7 +674,8 @@ def plot(args):
from estimate import default_predicted_col_func
from sklearn.preprocessing import MaxAbsScaler

best_result_map, power_labels_map, best_model_id_map, _ = estimate(args)
best_result_map, power_labels_map, best_model_id_map, summary_df = estimate(args)
print(summary_df)
for energy_source, best_restult in best_result_map.items():
best_restult = best_restult.reset_index()
power_labels = power_labels_map[energy_source]
Expand Down Expand Up @@ -737,7 +741,7 @@ def export(args):
machine_path = get_machine_path(output_path, args.version, machine_id)

collect_date, _ = extract_time(args.benchmark)
exporter.export(pipeline_path, machine_path, version=args.version, publisher=args.publisher, collect_date=collect_date, include_raw=args.include_raw)
exporter.export(data_path, pipeline_path, machine_path, machine_id=machine_id, version=args.version, publisher=args.publisher, collect_date=collect_date, include_raw=args.include_raw)

args.energy_source = ",".join(PowerSourceMap.keys())

Expand Down Expand Up @@ -839,7 +843,6 @@ def plot_scenario(args):
data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator) + "_" + args.scenario
_ts_plot(power_data, power_cols, "Power source: {} ({})".format(energy_source, args.scenario), output_folder, data_filename, ylabel="Power (W)")


if __name__ == "__main__":
# set model top path to data path
os.environ['MODEL_PATH'] = data_path
Expand Down
321 changes: 146 additions & 175 deletions model_training/benchmark/stressng.yaml

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions model_training/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export VERSION=${VERSION-v0.6}
export PIPELINE_PREFIX=${PIPELINE_PREFIX-"std_"}
export CPE_DATAPATH=${CPE_DATAPATH-"$(pwd)/data"}
export ENTRYPOINT_IMG=${ENTRYPOINT_IMG-"quay.io/sustainable_computing_io/kepler_model_server:v0.6"}
export MODEL_PATH=$CPE_DATAPATH

mkdir -p $HOME/bin
export PATH=$HOME/bin:$PATH
Expand Down Expand Up @@ -113,7 +114,7 @@ function wait_for_benchmark() {
function save_benchmark() {
BENCHMARK=$1
BENCHMARK_NS=$2
kubectl get benchmark $BENCHMARK -n ${BENCHMARK_NS} -ojson > data/${BENCHMARK}.json
kubectl get benchmark $BENCHMARK -n ${BENCHMARK_NS} -ojson > $CPE_DATAPATH/${BENCHMARK}.json
}

function collect_idle() {
Expand Down Expand Up @@ -180,7 +181,7 @@ function quick_collect() {
}

function train() {
train_model stressng_kepler_query,coremark_kepler_query,parsec_kepler_query ${VERSION}
train_model stressng_kepler_query ${VERSION}
}

function quick_train() {
Expand Down
20 changes: 17 additions & 3 deletions src/train/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
from loader import load_csv, load_pipeline_metadata, get_model_group_path, load_metadata, load_train_args, get_preprocess_folder, get_general_filename
from saver import WEIGHT_FILENAME, save_pipeline_metadata, save_train_args
from format import time_to_str
from writer import generate_pipeline_page, generate_validation_results, append_version_readme

def export(pipeline_path, machine_path, version, publisher, collect_date, include_raw=False):
def export(data_path, pipeline_path, machine_path, machine_id, version, publisher, collect_date, include_raw=False):
if not validate_arguments(pipeline_path):
return

Expand Down Expand Up @@ -47,7 +48,9 @@ def export(pipeline_path, machine_path, version, publisher, collect_date, includ

extractor = pipeline_metadata["extractor"]
isolator = pipeline_metadata["isolator"]
mae_validated_df_map = dict()
for energy_source in PowerSourceMap.keys():
mae_validated_df_map[energy_source] = dict()
for ot in ModelOutputType:
metadata_df = load_pipeline_metadata(pipeline_path, energy_source, ot.name)
if metadata_df is None:
Expand Down Expand Up @@ -80,8 +83,19 @@ def export(pipeline_path, machine_path, version, publisher, collect_date, includ
save_pipeline_metadata(out_pipeline_path, pipeline_metadata, energy_source, ot.name, mae_validated_df)
print("Exported models for {}/{}".format(energy_source, ot.name))
print(mae_validated_df)
mae_validated_df_map[energy_source][ot.name] = mae_validated_df
else:
print("No valid models exported for {}/{}".format(energy_source, ot.name))


train_args = load_train_args(pipeline_path)
train_args["machine_id"] = machine_id

# save train args
save_train_args(out_pipeline_path, load_train_args(pipeline_path))
save_train_args(out_pipeline_path, train_args)

# generate document
generate_pipeline_page(data_path, machine_path, train_args)
generate_validation_results(machine_path, train_args, mae_validated_df_map)
append_version_readme(machine_path, train_args, pipeline_metadata, include_raw)


1 change: 0 additions & 1 deletion src/train/exporter/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from loader import load_train_args
from config import ERROR_KEY
from train_types import PowerSourceMap

required_benchmark = ["stressng_kepler_query"]

Expand Down
7 changes: 5 additions & 2 deletions src/train/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def save_metadata(self):
all_metadata = get_all_metadata(model_toppath, self.name)
for energy_source, model_type_metadata in all_metadata.items():
for model_type, metadata_df in model_type_metadata.items():
metadata_df = metadata_df.sort_values(by=[ERROR_KEY])
metadata_df = metadata_df.sort_values(by=["feature_group", ERROR_KEY])
save_pipeline_metadata(self.path, self.metadata, energy_source, model_type, metadata_df)

def print_pipeline_process_end(self, energy_source, feature_group, abs_data, dyn_data):
Expand Down Expand Up @@ -214,7 +214,10 @@ def initial_trainers(trainer_names, node_level, pipeline_name, target_energy_sou
energy_components = PowerSourceMap[energy_source]
for feature_group in valid_feature_groups:
for trainer_name in trainer_names:
trainer_class = load_class("trainer", trainer_name)
try:
trainer_class = load_class("trainer", trainer_name)
except:
continue
trainer = trainer_class(energy_components, feature_group.name, energy_source, node_level, pipeline_name=pipeline_name)
trainers += [trainer]
return trainers
Expand Down
33 changes: 22 additions & 11 deletions src/util/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
CHECKPOINT_FOLDERNAME = 'checkpoint'
PREPROCESS_FOLDERNAME = "preprocessed_data"

default_init_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/"
default_init_pipeline_name = "Linux-4.15.0-213-generic-x86_64_v0.6"
default_init_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.6/nx12"
default_init_pipeline_name = "std_v0.6"
default_trainer_name = "GradientBoostingRegressorTrainer"
default_node_type = "1"
any_node_type = -1
default_feature_group = FeatureGroup.KubeletOnly
default_feature_group = FeatureGroup.BPFOnly

trainers_with_weight = ["SGDRegressorTrainer"]

def load_json(path, name):
if ".json" not in name:
Expand Down Expand Up @@ -236,20 +238,29 @@ def get_download_output_path(download_path, energy_source, output_type):
energy_source_path = assure_path(os.path.join(download_path, energy_source))
return os.path.join(energy_source_path, output_type.name)

def get_url(output_type, feature_group=default_feature_group, trainer_name=default_trainer_name, node_type=default_node_type, model_topurl=default_init_model_url, energy_source="rapl", pipeline_name=default_init_pipeline_name):
def get_url(output_type, feature_group=default_feature_group, trainer_name=default_trainer_name, node_type=default_node_type, model_topurl=default_init_model_url, energy_source="rapl", pipeline_name=default_init_pipeline_name, model_name=None, weight=False):
group_path = get_model_group_path(model_topurl, output_type=output_type, feature_group=feature_group, energy_source=energy_source, pipeline_name=pipeline_name, assure=False)
model_name = get_model_name(trainer_name, node_type)
return os.path.join(group_path, model_name + ".zip")

def get_pipeline_url(model_topurl=default_init_model_url, pipeline_name=default_init_pipeline_name):
return os.path.join(model_topurl, pipeline_name + ".zip")
if model_name is None:
model_name = get_model_name(trainer_name, node_type)
file_ext = ".zip"
if weight:
file_ext = ".json"
return os.path.join(group_path, model_name + file_ext)

def get_pipeline_url(model_topurl=default_init_model_url, pipeline_name=default_init_pipeline_name, weight=False):
file_ext = ".zip"
if weight:
file_ext = ".json"
return os.path.join(model_topurl, pipeline_name + file_ext)

def class_to_json(class_obj):
return json.loads(json.dumps(class_obj.__dict__))

def get_machine_path(output_path, version, machine_id):
def get_machine_path(output_path, version, machine_id, assure=True):
export_path = os.path.join(output_path, version, machine_id)
return assure_path(export_path)
if assure:
return assure_path(export_path)
return export_path

def get_preprocess_folder(pipeline_path, assure=True):
preprocess_folder = os.path.join(pipeline_path, PREPROCESS_FOLDERNAME)
Expand Down
2 changes: 1 addition & 1 deletion src/util/train_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def deep_sort(elements):
FeatureGroup.AcceleratorOnly: deep_sort(ACCELERATE_FEATURES),
}

SingleSourceFeatures = [FeatureGroup.CounterOnly.name, FeatureGroup.CgroupOnly.name, FeatureGroup.BPFOnly.name, FeatureGroup.KubeletOnly.name]
SingleSourceFeatures = [FeatureGroup.CounterOnly.name, FeatureGroup.CgroupOnly.name, FeatureGroup.BPFOnly.name, FeatureGroup.BPFIRQ.name, FeatureGroup.KubeletOnly.name]

def is_single_source_feature_group(fg):
return fg.name in SingleSourceFeatures
Expand Down

0 comments on commit 3f3ec0a

Please sign in to comment.