-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #215 from sunya-ch/trainer
Add curvefit trainer (fix minor issue, update test data)
- Loading branch information
Showing
23 changed files
with
355 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
apiVersion: tekton.dev/v1 | ||
kind: PipelineRun | ||
metadata: | ||
name: example-abs-train-pipeline | ||
spec: | ||
timeouts: | ||
pipeline: "6h" | ||
tasks: "5h50m" | ||
workspaces: | ||
- name: mnt | ||
persistentVolumeClaim: | ||
claimName: task-pvc | ||
params: | ||
- name: PIPELINE_NAME | ||
value: AbsPowerTrainPipelineExample | ||
- name: OUTPUT_TYPE | ||
value: AbsPower | ||
pipelineRef: | ||
name: single-train-pipeline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import os | ||
import sys | ||
cur_path = os.path.join(os.path.dirname(__file__), '.') | ||
sys.path.append(cur_path) | ||
|
||
from estimate_common import transform_and_predict, load_model_by_pickle, load_model_by_json, is_component_model | ||
from util.train_types import main_feature, get_valid_feature_groups | ||
|
||
import os | ||
import sys | ||
src_path = os.path.join(os.path.dirname(__file__), '..', '..') | ||
sys.path.append(src_path) | ||
|
||
from util import ModelOutputType | ||
|
||
import collections.abc | ||
|
||
class CurveFitModel(): | ||
def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False, feature_group=None): | ||
self.name = model_name | ||
self.features = features | ||
if feature_group is None: | ||
self.feauture_group = get_valid_feature_groups(features)[0] | ||
else: | ||
self.feauture_group = feature_group | ||
self.output_type = ModelOutputType[output_type] | ||
|
||
self.comp_type = not component_init and is_component_model(model_file) | ||
if self.comp_type: | ||
self.models = dict() | ||
model_info = load_model_by_json(model_path, model_file) | ||
for comp, model_metadata in model_info.items(): | ||
model = CurveFitModel(model_path, self.name, self.output_type.name, model_metadata['model_file'], model_metadata['features'], model_metadata['fe_files'], component_init=True) | ||
feature_index = main_feature(self.feauture_group.name, comp) | ||
model.model.set_feature_index(feature_index) | ||
self.models[comp] = model | ||
else: | ||
self.model = load_model_by_pickle(model_path, model_file) | ||
self.fe_list = [] | ||
for fe_filename in fe_files: | ||
self.fe_list += [load_model_by_pickle(model_path, fe_filename)] | ||
|
||
def get_power(self, request): | ||
if self.comp_type: | ||
results = dict() | ||
for comp, model in self.models.items(): | ||
y, msg = transform_and_predict(model, request) | ||
if msg != "": | ||
return [], msg | ||
if not isinstance(y, collections.abc.Sequence): | ||
y = [y] | ||
results[comp] = y | ||
return results, msg | ||
else: | ||
return transform_and_predict(self, request) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import os | ||
import sys | ||
trainer_path = os.path.join(os.path.dirname(__file__), '..') | ||
sys.path.append(trainer_path) | ||
|
||
from trainer.curvefit import CurveFitTrainer, CurveFitModel | ||
|
||
import numpy as np | ||
|
||
def expo_func(x, a, b, c): | ||
y = a*np.exp(b*x) + c | ||
return y | ||
|
||
class ExponentialRegressionTrainer(CurveFitTrainer): | ||
|
||
def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name): | ||
super(ExponentialRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name) | ||
self.fe_files = [] | ||
|
||
def init_model(self): | ||
return CurveFitModel(expo_func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import os | ||
import sys | ||
trainer_path = os.path.join(os.path.dirname(__file__), '..') | ||
sys.path.append(trainer_path) | ||
|
||
from trainer.curvefit import CurveFitTrainer, CurveFitModel | ||
|
||
import numpy as np | ||
|
||
def p0_func(x, y): | ||
print(y.max(), y.min()) | ||
a = y.max()-y.min() | ||
b = 1 | ||
c = y.min() | ||
return [a, b, c] | ||
|
||
def log_func(x, a, b, c): | ||
y = [a * np.log(b*xi) + c if b*xi > 0 and a * np.log(b*xi) > 0 else c for xi in x] | ||
return y | ||
|
||
class LogarithmicRegressionTrainer(CurveFitTrainer): | ||
|
||
def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name): | ||
super(LogarithmicRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name) | ||
self.fe_files = [] | ||
|
||
def init_model(self): | ||
return CurveFitModel(log_func, p0_func=p0_func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import os | ||
import sys | ||
trainer_path = os.path.join(os.path.dirname(__file__), '..') | ||
sys.path.append(trainer_path) | ||
|
||
from trainer.curvefit import CurveFitTrainer, CurveFitModel | ||
|
||
import numpy as np | ||
|
||
def p0_func(x, y): | ||
A = y.max() - y.min() # value range | ||
x0 = 0.5 # sigmoid mid point (as normalized value is in 0 to 1, start mid point = 0.5) | ||
k = A/np.std(y) # growth rate (larger std, lower growth) | ||
off = y.min() # initial offset | ||
return [A,x0,k,off] | ||
|
||
def logi_func(x, A, x0, k, off): | ||
return A / (1 + np.exp(-k*(x-x0)))+off | ||
|
||
class LogisticRegressionTrainer(CurveFitTrainer): | ||
|
||
def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name): | ||
super(LogisticRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name) | ||
self.fe_files = [] | ||
|
||
def init_model(self): | ||
return CurveFitModel(logi_func, p0_func=p0_func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
from sklearn.metrics import mean_absolute_error | ||
from sklearn.exceptions import NotFittedError | ||
import numpy as np | ||
from scipy.optimize import curve_fit | ||
import os | ||
import sys | ||
|
||
util_path = os.path.join(os.path.dirname(__file__), '..', '..', 'util') | ||
sys.path.append(util_path) | ||
|
||
from util import save_pkl, load_pkl | ||
from util.train_types import main_feature, FeatureGroup | ||
|
||
from . import Trainer | ||
|
||
model_class = "curvefit" | ||
|
||
def get_save_path(model_filepath): | ||
return "/".join(model_filepath.split("/")[0:-1]) | ||
|
||
class CurveFitModel(): | ||
def __init__(self, fit_func, p0_func=None): | ||
self.fit_func = fit_func | ||
self.popt = None | ||
self.pcov = None | ||
self.feature_index = None | ||
self.p0_func = p0_func | ||
|
||
def set_feature_index(self, feature_index): | ||
self.feature_index = feature_index | ||
|
||
def _x_values(self, X_values): | ||
return np.array(X_values[:,self.feature_index]).flatten() | ||
|
||
def fit(self, X_values, y_values): | ||
flatten_x = self._x_values(X_values) | ||
flatten_y = np.array(y_values).flatten() | ||
if self.p0_func is not None: | ||
self.popt, self.pcov = curve_fit(self.fit_func, flatten_x, flatten_y, p0=self.p0_func(flatten_x, flatten_y), maxfev=20000) | ||
else: | ||
self.popt, self.pcov = curve_fit(self.fit_func, flatten_x, flatten_y, maxfev=20000) | ||
|
||
def predict(self, X_values): | ||
if self.popt is None: | ||
raise NotFittedError("Model must be fit first") | ||
flatten_x = self._x_values(X_values) | ||
return np.array(self.fit_func(flatten_x, *self.popt)) | ||
|
||
# curvefit will focus on only single feature. default is the first feature in the feature group. | ||
class CurveFitTrainer(Trainer): | ||
def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name, scaler_type="maxabs"): | ||
super(CurveFitTrainer, self).__init__(model_class, energy_components, feature_group, energy_source, node_level, pipeline_name, scaler_type=scaler_type) | ||
self.fe_files = [] | ||
|
||
def train(self, node_type, component, X_values, y_values): | ||
try: | ||
if hasattr(self, 'fe'): | ||
for index in range(len(self.fe)): | ||
X_values = self.fe[index].fit_transform(X_values) | ||
model = self.node_models[node_type][component] | ||
if component == "package": | ||
dram_index = main_feature(self.feature_group_name, "dram") | ||
if model.feature_index != dram_index: | ||
dram_values = np.array(X_values[:,dram_index]).flatten() | ||
zero_dram_indices = [i for i in dram_values if i < 0.1] | ||
X_values = [list(row) for i, row in enumerate(X_values) if i not in zero_dram_indices] | ||
y_values = [row for i, row in enumerate(y_values) if i not in zero_dram_indices] | ||
X_values = np.array(X_values) | ||
model.fit(X_values, y_values) | ||
except Exception as err: | ||
print("Train error", err) | ||
import traceback | ||
traceback.print_exc() | ||
|
||
def save_checkpoint(self, model, filepath): | ||
if hasattr(self, 'fe'): | ||
save_path = get_save_path(filepath) | ||
for index in range(len(self.fe)): | ||
save_pkl(save_path, self.fe_files[index], self.fe[index]) | ||
save_pkl("", filepath, model) | ||
|
||
def load_local_checkpoint(self, filepath): | ||
if hasattr(self, 'fe_files'): | ||
save_path = get_save_path(filepath) | ||
for index in range(len(self.fe_files)): | ||
loaded_fe = load_pkl(save_path, self.fe_files[index]) | ||
if loaded_fe is not None: | ||
self.fe[index] = loaded_fe | ||
loaded_model = load_pkl("", filepath) | ||
return loaded_model, loaded_model is not None | ||
|
||
def should_archive(self, node_type): | ||
return True | ||
|
||
def get_basic_metadata(self, node_type): | ||
return dict() | ||
|
||
def get_mae(self, node_type, component, X_test, y_test): | ||
predicted_values = self.predict(node_type, component, X_test, skip_preprocess=True) | ||
mae = mean_absolute_error(y_test, predicted_values) | ||
return mae | ||
|
||
def get_mape(self, node_type, component, X_test, y_test): | ||
y_test = list(y_test) | ||
predicted_values = self.predict(node_type, component, X_test, skip_preprocess=True) | ||
non_zero_predicted_values = np.array([predicted_values[i] for i in range(len(predicted_values)) if y_test[i] > 0]) | ||
if len(non_zero_predicted_values) == 0: | ||
return -1 | ||
non_zero_y_test = np.array([y for y in y_test if y > 0]) | ||
absolute_percentage_errors = np.abs((non_zero_y_test - non_zero_predicted_values) / non_zero_y_test) * 100 | ||
mape = np.mean(absolute_percentage_errors) | ||
return mape | ||
|
||
def save_model(self, component_save_path, node_type, component): | ||
model = self.node_models[node_type][component] | ||
filepath = os.path.join(component_save_path, component) | ||
self.save_checkpoint(model, filepath) | ||
|
||
def component_model_filename(self, component): | ||
return component + ".pkl" | ||
|
||
def get_weight_dict(self, node_type): | ||
weight_dict = dict() | ||
|
||
for component, model in self.node_models[node_type].items(): | ||
scaler = self.node_scalers[node_type] | ||
weight_dict[component] = { | ||
"All_Weights": { | ||
"Categorical_Variables": dict(), | ||
"Numerical_Variables": {self.features[i]: | ||
{"scale": scaler.scale_[i]} for i in range(len(self.features))}, | ||
"CurveFit_Weights": list(model.popt) | ||
} | ||
} | ||
return weight_dict |
Oops, something went wrong.