Skip to content

Commit

Permalink
Merge pull request #172 from sunya-ch/trainer
Browse files Browse the repository at this point in the history
fix extractor/profileisolator bug and update test case
  • Loading branch information
rootfs authored Sep 28, 2023
2 parents 0639591 + dbe1166 commit 45e1594
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 15 deletions.
8 changes: 7 additions & 1 deletion src/train/extractor/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def get_workload_feature_data(self, query_results, features):

if all(col in aggr_query_data.columns for col in container_id_cols):
aggr_query_data.rename(columns={query: feature}, inplace=True)
aggr_query_data[container_id_colname] = aggr_query_data[container_id_cols].apply(lambda x: '/'.join(x), axis=1)
aggr_query_data[container_id_colname] = aggr_query_data[container_id_cols].apply(lambda x: '/'.join([str(xi) for xi in x]), axis=1)
# separate for each container_id
container_id_list = pd.unique(aggr_query_data[container_id_colname])

Expand Down Expand Up @@ -230,17 +230,20 @@ def get_power_data(self, query_results, energy_components, source):
if usage_ratio_query not in query_results:
# sum over mode (idle, dynamic) and unit col
df = aggr_query_data.groupby([TIMESTAMP_COL]).sum().reset_index().set_index(TIMESTAMP_COL)
time_diff_values = df.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
df = df.loc[:, df.columns != unit_col]
# rename
colname = component_to_col(component)
df.rename(columns={query: colname}, inplace=True)
# find current value from aggregated query
df = df.sort_index()[colname].diff().dropna()
df /= time_diff_values
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
power_data_list += [df]
else:
# sum over mode (idle, dynamic)
aggr_query_data = aggr_query_data.groupby([unit_col, TIMESTAMP_COL]).sum().reset_index().set_index(TIMESTAMP_COL)
time_diff_values = aggr_query_data.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
# add per unit_col
unit_vals = pd.unique(aggr_query_data[unit_col])
for unit_val in unit_vals:
Expand All @@ -250,16 +253,19 @@ def get_power_data(self, query_results, energy_components, source):
df.rename(columns={query: colname}, inplace=True)
# find current value from aggregated query
df = df.sort_index()[colname].diff().dropna()
df /= time_diff_values
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
power_data_list += [df]
else:
# sum over mode
aggr_query_data = aggr_query_data.groupby([TIMESTAMP_COL]).sum()
time_diff_values = aggr_query_data.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
# rename
colname = component_to_col(component)
aggr_query_data.rename(columns={query: colname}, inplace=True)
# find current value from aggregated query
df = aggr_query_data.sort_index()[colname].diff().dropna()
df /= time_diff_values
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
power_data_list += [df]
power_data = pd.concat(power_data_list, axis=1).dropna()
Expand Down
4 changes: 2 additions & 2 deletions src/train/isolator/isolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def isolate(self, data, label_cols, energy_source):
data = data.reset_index()
data = data.set_index([TIMESTAMP_COL])
target_data, _ = isolate_container(data, self.background_containers, label_cols)
isolated_data = target_data.copy()
isolated_data = squeeze_data(target_data, label_cols)
print(isolated_data)
try:
for label_col in label_cols:
component = col_to_component(label_col)
Expand All @@ -137,7 +138,6 @@ def isolate(self, data, label_cols, energy_source):
return None
isolated_data[label_col] = isolated_data[label_col] - isolated_data['profile']
isolated_data.drop(columns='profile', inplace=True)
isolated_data = isolated_data.reset_index()
if index_list[0] is not None:
isolated_data = isolated_data.set_index(index_list)
return isolated_data
Expand Down
Binary file added tests/data/prom_output/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/data/prom_output/idle.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/data/prom_output/prom_response.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/data/prom_response.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/data/prom_response_idle.json

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions tests/extractor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,22 @@ def save_extract_results(instance, feature_group, extracted_data, node_level, sa
save_csv(save_path, filename, extracted_data)

def get_expected_power_columns(energy_components=test_energy_components, num_of_unit=test_num_of_unit):
return [component_to_col(component, "package", unit_val) for component in energy_components for unit_val in range(0,num_of_unit)]
# TODO: if ratio applied,
# return [component_to_col(component, "package", unit_val) for component in energy_components for unit_val in range(0,num_of_unit)]
return [component_to_col(component) for component in energy_components]

def assert_extract(extracted_data, power_columns, energy_components, num_of_unit, feature_group):
extracted_data_column_names = extracted_data.columns
# basic assert
assert extracted_data is not None, "extracted data is None"
assert len(power_columns) > 0, "no power label column {}".format(extracted_data_column_names)
assert node_info_column in extracted_data_column_names, "no {} in column {}".format(node_info_column, extracted_data_column_names)
expected_power_column_length = len(energy_components) * num_of_unit
# TODO: if ratio applied, expected_power_column_length = len(energy_components) * num_of_unit
expected_power_column_length = len(energy_components)
# detail assert
assert len(power_columns) == expected_power_column_length, "unexpected power label columns {}, expected {}".format(power_columns, expected_power_column_length)
expected_col_size = expected_power_column_length + len(FeatureGroups[FeatureGroup[feature_group]]) + 1 + num_of_unit # power ratio
# TODO: if ratio applied, expected_col_size must + 1 for power_ratio
expected_col_size = expected_power_column_length + len(FeatureGroups[FeatureGroup[feature_group]]) + num_of_unit # power ratio
assert len(extracted_data_column_names) == expected_col_size, "unexpected column length: expected {}, got {}({}) ".format(expected_col_size, extracted_data_column_names, len(extracted_data_column_names))

def process(query_results, feature_group, save_path=extractor_output_path, customize_extractors=test_customize_extractors, energy_source=test_energy_source, num_of_unit=2):
Expand Down
9 changes: 4 additions & 5 deletions tests/isolator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def save_results(instance, extractor_name, feature_group, isolated_data, save_pa
def assert_isolate(extractor_result, isolated_data):
isolated_data_column_names = isolated_data.columns
assert isolated_data is not None, "isolated data is None"
value_df = isolated_data.drop(columns=container_level_index)
value_df = isolated_data.reset_index().drop(columns=container_level_index)
negative_df = value_df[(value_df<0).all(1)]
assert len(negative_df) == 0, "all data must be non-negative \n {}".format(negative_df)
assert len(extractor_result.columns) == len(isolated_data_column_names), "unexpected column length: expected {}, got {}({}) ".format(len(extractor_result.columns), isolated_data_column_names, len(isolated_data_column_names))
Expand Down Expand Up @@ -90,10 +90,9 @@ def process(test_isolators=test_isolators, customize_isolators=[], extract_path=
save_results(test_instance, extractor_name, feature_group, isolated_data, save_path=save_path)



if __name__ == '__main__':
# Add customize isolator here
customize_isolators = [TrainIsolator(idle_data=test_idle_data, profiler=DefaultProfiler)]
customize_isolators = [TrainIsolator(target_hints=["coremark"])]
customize_isolators += [ProfileBackgroundIsolator(test_profiles, test_idle_data)]
customize_isolators = [ProfileBackgroundIsolator(test_profiles, test_idle_data)]
customize_isolators += [TrainIsolator(idle_data=test_idle_data, profiler=DefaultProfiler)]
customize_isolators += [TrainIsolator(target_hints=["coremark"])]
process(customize_isolators=customize_isolators)

0 comments on commit 45e1594

Please sign in to comment.