Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving GMT to microjoules #932

Merged
merged 26 commits into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4d848e1
Splitting measurements table in metrics and values; Moving sampling_r…
ArneTR Jan 1, 2025
7a20170
Typos and textual additions
ArneTR Jan 1, 2025
0799d00
(Tests): Updated tests and demo data to new measurement table split
ArneTR Jan 1, 2025
bc8a9da
Renamed sampling resolution to sampling rate
ArneTR Jan 1, 2025
df2ef77
Added sampling rate also to comparison views
ArneTR Jan 1, 2025
0cce865
(Tests): Aligned display order of phases stats and tests
ArneTR Jan 1, 2025
5c0f09b
(rework): Moved all providers from mJ to uJ
ArneTR Oct 3, 2024
a402737
(rework): Moved calibrate to uJ
ArneTR Jan 3, 2025
245fa0b
(feature/rework): Fronted can now display values as Wh instead of J. …
ArneTR Jan 3, 2025
78bf802
(rework): Moving sampling rate to INT; Using uJ as default energy now…
ArneTR Jan 3, 2025
3f47e3e
Clarifications and comments and removed legacy
ArneTR Jan 3, 2025
a4be19a
(Tests): Updated all test data to uJ; Updated all tests for new uJ en…
ArneTR Jan 3, 2025
da6f523
(fix): tests
ArneTR Jan 3, 2025
f04ca61
Added ee
ArneTR Jan 3, 2025
4262b32
(style): Typos [skip ci]
ArneTR Jan 3, 2025
c77706f
Merge branch 'main' into microjoules
ArneTR Jan 3, 2025
c38c0b7
(Fix): Ordering was incorrect for getting compare data.
ArneTR Jan 3, 2025
6f19f96
(Fix): Merge markers
ArneTR Jan 3, 2025
7cf6416
(fix): New uJ demo data got lost
ArneTR Jan 3, 2025
b56ca7c
(fix): Some fixes while refactoring
ArneTR Jan 3, 2025
51d2762
(fix): Tests
ArneTR Jan 3, 2025
56cb5f8
(Tests): Network I/O
ArneTR Jan 3, 2025
5a90a9f
(Style): Some typos etc.
ArneTR Jan 3, 2025
b835bb5
(rework): Moved phase_stats totally to Decimal
ArneTR Jan 3, 2025
12da8ca
(Fix): Tests - NetworkIO sample data is actually sampled at 1 s interval
ArneTR Jan 4, 2025
5a716bf
(Tests): Added NetworkIOProvider Tests
ArneTR Jan 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 40 additions & 12 deletions api/api_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@ def store_artifact(artifact_type: Enum, key:str, data, ex=2592000):
# The same unit for proper comparison!
#
def rescale_metric_value(value, unit):
if unit == 'mJ':
value = value * 1_000
unit = 'uJ'

# We only expect values to be uJ for energy in the future. Changing values now temporarily.
# TODO: Refactor this once all data in the DB is uJ
if unit not in ('uJ', 'ug') and not unit.startswith('ugCO2e/'):
raise ValueError('Unexpected unit occured for metric rescaling: ', unit)
ArneTR marked this conversation as resolved.
Show resolved Hide resolved

Expand Down Expand Up @@ -249,7 +243,7 @@ def get_comparison_details(user, ids, comparison_db_key):
WHERE
(TRUE = %s OR user_id = ANY(%s::int[]))
AND id = ANY(%s::uuid[])
ORDER BY created_at -- Must be same order as in get_phase_stats
ORDER BY created_at DESC -- must be same order as get_phase_stats so that the order in the comparison bar charts aligns with the comparsion_details array
''').format(sql.Identifier(comparison_db_key))
ArneTR marked this conversation as resolved.
Show resolved Hide resolved

params = (user.is_super_user(), user.visible_users(), ids)
Expand Down Expand Up @@ -386,7 +380,8 @@ def determine_comparison_case(user, ids):
def get_phase_stats(user, ids):
query = """
SELECT
a.phase, a.metric, a.detail_name, a.value, a.type, a.max_value, a.min_value, a.unit,
a.phase, a.metric, a.detail_name, a.value, a.type, a.max_value, a.min_value,
a.sampling_rate_avg, a.sampling_rate_max, a.sampling_rate_95p, a.unit,
b.uri, c.description, b.filename, b.commit_hash, b.branch,
b.id
FROM phase_stats as a
Expand All @@ -397,7 +392,9 @@ def get_phase_stats(user, ids):
(TRUE = %s OR b.user_id = ANY(%s::int[]))
AND a.run_id = ANY(%s::uuid[])
ORDER BY
b.created_at ASC -- Must be same order as in get_comparison_details
-- at least the run_ids must be same order as get_comparsion_details so that the order in the comparison bar charts aligns with the comparsion_details array
b.created_at ASC,
a.id ASC
"""
params = (user.is_super_user(), user.visible_users(), ids)
return DB().fetch_all(query, params=params)
Expand Down Expand Up @@ -505,7 +502,8 @@ def get_phase_stats_object(phase_stats, case=None, comparison_details=None, comp

for phase_stat in phase_stats:
[
phase, metric_name, detail_name, value, metric_type, max_value, min_value, unit,
phase, metric_name, detail_name, value, metric_type, max_value, min_value,
sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit,
repo, machine_description, filename, commit_hash, branch,
run_id
] = phase_stat
Expand Down Expand Up @@ -566,17 +564,29 @@ def get_phase_stats_object(phase_stats, case=None, comparison_details=None, comp
'max_mean': None,
'min_mean': None,
'stddev': None,
'sr_avg_avg': None,
'sr_max_max': None,
'sr_95p_max': None,
'sr_avg_values': [sampling_rate_avg], # temporary, we will delete this later
'sr_max_values': [sampling_rate_max], # temporary, we will delete this later
'sr_95p_values': [sampling_rate_95p], # temporary, we will delete this later
'ci': None,
'p_value': None, # only for the last key the list compare to the rest. one-sided t-test
'is_significant': None, # only for the last key the list compare to the rest. one-sided t-test
'values': [value],
}
if comparison_details:
detail_data[key]['values'] = [None for _ in comparison_details[key]] # create None filled list in comparision casese so that we can later understand which values are missing when parsing in JS for example
if comparison_details: # create None filled lists in comparision casese so that we can later understand which values are missing when parsing in JS for example
detail_data[key]['values'] = [None for _ in comparison_details[key]]
detail_data[key]['sr_avg_values'] = [None for _ in comparison_details[key]]
detail_data[key]['sr_max_values'] = [None for _ in comparison_details[key]]
detail_data[key]['sr_95p_values'] = [None for _ in comparison_details[key]]

# we replace None where we can with actual values
if comparison_details:
detail_data[key]['values'][comparison_details[key][run_id]['index']] = value
detail_data[key]['sr_avg_values'][comparison_details[key][run_id]['index']] = sampling_rate_avg
detail_data[key]['sr_max_values'][comparison_details[key][run_id]['index']] = sampling_rate_max
detail_data[key]['sr_95p_values'][comparison_details[key][run_id]['index']] = sampling_rate_95p

# since we do not save the min/max values we need to to the comparison here in every loop again
# all other statistics are derived later in add_phase_stats_statistics()
Expand Down Expand Up @@ -607,16 +617,24 @@ def add_phase_stats_statistics(phase_stats_object):
# if a detail has multiple values we calculate a std.dev and the one-sided t-test for the last value

values_none_filtered = [item for item in key_obj['values'] if item is not None]
sr_avg_values_none_filtered = [item for item in key_obj['sr_avg_values'] if item is not None]
sr_max_values_none_filtered = [item for item in key_obj['sr_max_values'] if item is not None]
sr_95p_values_none_filtered = [item for item in key_obj['sr_95p_values'] if item is not None]

key_obj['mean'] = values_none_filtered[0] # default. might be overridden
key_obj['max_mean'] = values_none_filtered[0] # default. might be overridden
key_obj['min_mean'] = values_none_filtered[0] # default. might be overridden
key_obj['sr_avg_avg'] = sr_avg_values_none_filtered[0] # default. might be overridden
key_obj['sr_max_max'] = sr_max_values_none_filtered[0] # default. might be overridden
key_obj['sr_95p_max'] = sr_95p_values_none_filtered[0] # default. might be overridden

if len(values_none_filtered) > 1:

t_stat = get_t_stat(len(values_none_filtered))

# JSON does not recognize the numpy data types. Sometimes int64 is returned
key_obj['mean'] = np.mean(values_none_filtered).item()
key_obj['sr_avg_avg'] = np.mean(sr_avg_values_none_filtered).item()

key_obj['stddev'] = np.std(values_none_filtered, correction=1).item()
# We are using now the STDDEV of the sample for two reasons:
Expand All @@ -627,7 +645,11 @@ def add_phase_stats_statistics(phase_stats_object):
# it is safer to use the sample STDDEV as it is always higher

key_obj['max_mean'] = np.max(values_none_filtered).item() # overwrite with max of list
key_obj['sr_max_max'] = np.max(sr_max_values_none_filtered).item() # overwrite with max of list
key_obj['sr_95p_max'] = np.max(sr_95p_values_none_filtered).item() # overwrite with max of list

key_obj['min_mean'] = np.min(values_none_filtered).item() # overwrite with min of list

key_obj['ci'] = (key_obj['stddev']*t_stat).item()

if len(values_none_filtered) > 2:
Expand All @@ -641,6 +663,12 @@ def add_phase_stats_statistics(phase_stats_object):
else:
key_obj['is_significant'] = True

# remove temporary keys only needed for mean/max/min calculations
del key_obj['sr_avg_values']
del key_obj['sr_max_values']
del key_obj['sr_95p_values']



## builds stats between the keys
if len(phase_stats_object['comparison_identifiers']) == 2:
Expand Down
15 changes: 9 additions & 6 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,19 +477,22 @@ async def get_measurements_single(run_id: str, user: User = Depends(authenticate
raise RequestValidationError('Run ID is not a valid UUID or empty')

query = '''
SELECT m.detail_name, m.time, m.metric,
m.value, m.unit
FROM measurements as m
JOIN runs as r ON m.run_id = r.id
SELECT
mm.detail_name, mv.time, mm.metric,
mv.value, mm.unit
FROM measurement_metrics as mm
JOIN measurement_values as mv ON mv.measurement_metric_id = mm.id
JOIN runs as r ON mm.run_id = r.id
WHERE
(TRUE = %s OR r.user_id = ANY(%s::int[]))
AND m.run_id = %s
AND mm.run_id = %s
'''
ArneTR marked this conversation as resolved.
Show resolved Hide resolved

params = (user.is_super_user(), user.visible_users(), run_id)

# extremely important to order here, cause the charting library in JS cannot do that automatically!
query = f"{query} ORDER BY m.metric ASC, m.detail_name ASC, m.time ASC"
# Furthermore we do time-lag caclulations and need the order of metric first and then time in stats.js:179... . Please do not change
query = f"{query} ORDER BY mm.metric ASC, mm.detail_name ASC, mv.time ASC"

data = DB().fetch_all(query, params=params)
if data is None or data == []:
Expand Down
Loading
Loading