Merge pull request #52 from naved001/bug-fix/dont-charge-stopped-pods

naved001 · web-flow · commit 73a9391f67db · 2024-03-31T17:46:31.000-04:00
Bug fix/dont charge stopped pods
diff --git a/openshift_metrics/tests/test_utils.py b/openshift_metrics/tests/test_utils.py
@@ -397,7 +397,7 @@ def test_condense_metrics(self):
                         "cpu": 10,
                         "mem": 15,
                     },
-                    60: {
+                    900: {
                         "cpu": 10,
                         "mem": 15,
                     }
@@ -409,7 +409,7 @@ def test_condense_metrics(self):
                         "cpu": 2,
                         "mem": 256,
                     },
-                    100: {
+                    900: {
                         "cpu": 2,
                         "mem": 256,
                     }
@@ -422,7 +422,7 @@ def test_condense_metrics(self):
                     0: {
                         "cpu": 10,
                         "mem": 15,
-                        "duration": 120
+                        "duration": 1800
                     }
                 }
             },
@@ -431,7 +431,7 @@ def test_condense_metrics(self):
                     0: {
                         "cpu": 2,
                         "mem": 256,
-                        "duration": 200
+                        "duration": 1800
                     }
                 }
             },
@@ -473,15 +473,15 @@ def test_condense_metrics_with_change(self):
                         "cpu": 20,
                         "mem": 25,
                     },
-                    60: {
+                    900: {
                         "cpu": 20,
                         "mem": 25,
                     },
-                    120: {
+                    1800: {
                         "cpu": 25,
                         "mem": 25,
                     },
-                    180: {
+                    2700: {
                         "cpu": 20,
                         "mem": 25,
                     }
@@ -494,17 +494,17 @@ def test_condense_metrics_with_change(self):
                     0: {
                         "cpu": 20,
                         "mem": 25,
-                        "duration": 120
+                        "duration": 1800
                     },
-                    120: {
+                    1800: {
                         "cpu": 25,
                         "mem": 25,
-                        "duration": 60
+                        "duration": 900
                     },
-                    180: {
+                    2700: {
                         "cpu": 20,
                         "mem": 25,
-                        "duration": 60
+                        "duration": 900
                     }
                 }
             },
@@ -521,7 +521,7 @@ def test_condense_metrics_skip_metric(self):
                         "mem": 35,
                         "gpu": 1,
                     },
-                    60: {
+                    900: {
                         "cpu": 30,
                         "mem": 35,
                         "gpu": 2,
@@ -536,14 +536,95 @@ def test_condense_metrics_skip_metric(self):
                         "cpu": 30,
                         "mem": 35,
                         "gpu": 1,
-                        "duration": 120
+                        "duration": 1800
                     }
                 }
             },
         }
         condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem'])
         self.assertEqual(condensed_dict, expected_condensed_dict)
 
+    def test_condense_metrics_with_timeskips(self):
+        test_input_dict = {
+            "pod1": {
+                "metrics": {
+                    0: {
+                        "cpu": 1,
+                        "mem": 4,
+                    },
+                    900: {
+                        "cpu": 1,
+                        "mem": 4,
+                    },
+                    1800: {
+                        "cpu": 1,
+                        "mem": 4,
+                    },
+                    5400: { # time skipped
+                        "cpu": 1,
+                        "mem": 4,
+                    },
+                    6300: {
+                        "cpu": 1,
+                        "mem": 4,
+                    },
+                    8100: { # metric changed and time skipped
+                        "cpu": 2,
+                        "mem": 8,
+                    },
+                    9000: {
+                        "cpu": 2,
+                        "mem": 8,
+                    },
+                }
+            },
+            "pod2": {
+                "metrics": {
+                    0: {
+                        "cpu": 2,
+                        "mem": 16,
+                    },
+                    900: {
+                        "cpu": 2,
+                        "mem": 16,
+                    }
+                }
+            },
+        }
+        expected_condensed_dict = {
+            "pod1": {
+                "metrics": {
+                    0: {
+                        "cpu": 1,
+                        "mem": 4,
+                        "duration": 2700
+                    },
+                    5400: {
+                        "cpu": 1,
+                        "mem": 4,
+                        "duration": 1800
+                    },
+                    8100: {
+                        "cpu": 2,
+                        "mem": 8,
+                        "duration": 1800
+                    },
+                }
+            },
+            "pod2": {
+                "metrics": {
+                    0: {
+                        "cpu": 2,
+                        "mem": 16,
+                        "duration": 1800
+                    }
+                }
+            },
+        }
+        condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem'])
+        self.assertEqual(condensed_dict, expected_condensed_dict)
+
+
 class TestWriteMetricsByPod(TestCase):
 
     @mock.patch('openshift_metrics.utils.get_namespace_attributes')
diff --git a/openshift_metrics/utils.py b/openshift_metrics/utils.py
@@ -252,8 +252,37 @@ def merge_metrics(metric_name, metric_list, output_dict):
 def condense_metrics(input_metrics_dict, metrics_to_check):
     """
     Checks if the value of metrics is the same, and removes redundant
-    metrics while updating the duration
+    metrics while updating the duration. If there's a gap in the reported
+    metrics then don't count that as part of duration.
+
+    Here's a sample input dictionary in which I have separated missing metrics
+    or different metrics by empty lines.
+
+    {'naved-test+test-pod': {'gpu_type': 'No GPU',
+                         'metrics': {1711741500: {'cpu_request': '1',
+                                                  'memory_request': '3221225472'},
+                                     1711742400: {'cpu_request': '1',
+                                                  'memory_request': '3221225472'},
+                                     1711743300: {'cpu_request': '1',
+                                                  'memory_request': '3221225472'},
+                                     1711744200: {'cpu_request': '1',
+                                                  'memory_request': '3221225472'},
+
+                                     1711746000: {'cpu_request': '1',
+                                                  'memory_request': '3221225472'},
+
+                                     1711746900: {'cpu_request': '1',
+                                                  'memory_request': '4294967296'},
+                                     1711747800: {'cpu_request': '1',
+                                                  'memory_request': '4294967296'},
+                                     1711748700: {'cpu_request': '1',
+                                                  'memory_request': '4294967296'},
+
+                                     1711765800: {'cpu_request': '1',
+                                                  'memory_request': '4294967296'}},
+                         'namespace': 'naved-test'}}
     """
+    interval = STEP_MIN * 60
     condensed_dict = {}
     for pod, pod_dict in input_metrics_dict.items():
         metrics_dict = pod_dict["metrics"]
@@ -262,26 +291,28 @@ def condense_metrics(input_metrics_dict, metrics_to_check):
 
         start_epoch_time = epoch_times_list[0]
 
-        # calculate the interval if we have more than 1 measurement, otherwise
-        # use the STEP_MIN from the query as best guess
-        if len(epoch_times_list) > 1:
-            interval = epoch_times_list[1] - epoch_times_list[0]
-        else:
-            interval = STEP_MIN * 60
-
         start_metric_dict = metrics_dict[start_epoch_time].copy()
-        for epoch_time in epoch_times_list:
+
+        for i in range(len(epoch_times_list)):
+            epoch_time = epoch_times_list[i]
             same_metrics = True
+            continuous_metrics = True
             for metric in metrics_to_check:
                 if metrics_dict[start_epoch_time].get(metric, 0) != metrics_dict[epoch_time].get(metric, 0):  # fmt: skip
                     same_metrics = False
 
-            if not same_metrics:
-                duration = epoch_time - start_epoch_time
+            if i !=0 and epoch_time - epoch_times_list[i-1]> interval:
+                # i.e. if the difference between 2 consecutive timestamps
+                # is more than the expected frequency then the pod was stopped
+                continuous_metrics = False
+
+            if not same_metrics or not continuous_metrics:
+                duration = epoch_times_list[i-1] - start_epoch_time + interval
                 start_metric_dict["duration"] = duration
                 new_metrics_dict[start_epoch_time] = start_metric_dict
                 start_epoch_time = epoch_time
                 start_metric_dict = metrics_dict[start_epoch_time].copy()
+
         duration = epoch_time - start_epoch_time + interval
         start_metric_dict["duration"] = duration
         new_metrics_dict[start_epoch_time] = start_metric_dict