make metric dimensions more generic and customizable; #4

niaid · Apr 28, 2022 · b3ac3ef · b3ac3ef
1 parent fbaaab2
commit b3ac3ef
Show file tree

Hide file tree

Showing 16 changed files with 331 additions and 327 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@ tests/test/*
 
 # mkdocs
 site
+docs/API_usage_files/
 
 # development environment
 .env

diff --git a/cloudwatcher/argparser.py b/cloudwatcher/argparser.py
@@ -1,10 +1,9 @@
 """ Computing configuration representation """
 
 import argparse
-from random import choices
 
 from ._version import __version__
-from .const import LOG_CMD, METRIC_CMD, SUBPARSER_MESSAGES
+from .const import CLI_DEFAULTS, LOG_CMD, METRIC_CMD, SUBPARSER_MESSAGES
 
 
 class _VersionInHelpParser(argparse.ArgumentParser):
@@ -19,18 +18,6 @@ def format_help(self):
 def build_argparser():
     """Build argument parser"""
 
-    # args defaults
-    metric_name = "mem_used"
-    id = "memory_usage"
-    days = 1
-    hours = 0
-    minutes = 0
-    unit = "Bytes"
-    stat = "Maximum"
-    period = 60
-    dir = "./"
-    region = "us-east-1"
-
     # add argument parser
     parser = _VersionInHelpParser(
         description="CloudWatch logs and metrics explorer.",
@@ -70,7 +57,7 @@ def add_subparser(cmd, msg, subparsers):
             help="Region to monitor the metrics within. (default: %(default)s)",
             type=str,
             required=False,
-            default=region,
+            default=CLI_DEFAULTS["region"],
             metavar="R",
         )
         aws_creds_group.add_argument(
@@ -103,7 +90,7 @@ def add_subparser(cmd, msg, subparsers):
             "-d",
             "--dir",
             help="Directory to store the results in. Used with `--save` (default: %(default)s)",
-            default=dir,
+            default=CLI_DEFAULTS["dir"],
         )
 
     sps[METRIC_CMD].add_argument(
@@ -118,25 +105,34 @@ def add_subparser(cmd, msg, subparsers):
         "-i",
         "--id",
         help="The unique identifier to assign to the metric data. Must be of the form '^[a-z][a-zA-Z0-9_]*$'.",
-        default=id,
+        default=CLI_DEFAULTS["id"],
         required=False,
         metavar="ID",
     )
     sps[METRIC_CMD].add_argument(
         "-m",
         "--metric",
         help="Name of the metric collected by CloudWatchAgent (default: %(default)s)",
-        default=metric_name,
+        default=CLI_DEFAULTS["metric_name"],
         required=False,
         metavar="N",
     )
     sps[METRIC_CMD].add_argument(
-        "-iid",
-        "--instance-id",
-        help="Instance ID, needs to follow 'i-<numbers>' format",
+        "-dn",
+        "--dimension-name",
+        help="The name of the dimension to query. (default: %(default)s)",
+        required=False,
+        type=str,
+        metavar="N",
+        default=CLI_DEFAULTS["dimension_name"],
+    )
+    sps[METRIC_CMD].add_argument(
+        "-dv",
+        "--dimension-value",
+        help="The value of the dimension to filter on.",
         required=True,
         type=str,
-        metavar="ID",
+        metavar="V",
     )
     sps[METRIC_CMD].add_argument(
         "--uptime",
@@ -150,23 +146,23 @@ def add_subparser(cmd, msg, subparsers):
     metric_collection_start_time.add_argument(
         "--days",
         help="How many days to subtract from the current date to determine the metric collection start time (default: %(default)s).",
-        default=days,
+        default=CLI_DEFAULTS["days"],
         type=int,
         metavar="D",
     )
     metric_collection_start_time.add_argument(
         "-hr",
         "--hours",
         help="How many hours to subtract from the current time to determine the metric collection start time (default: %(default)s).",
-        default=hours,
+        default=CLI_DEFAULTS["hours"],
         type=int,
         metavar="H",
     )
     metric_collection_start_time.add_argument(
         "-mi",
         "--minutes",
         help="How many minutes to subtract from the current time to determine the metric collection start time (default: %(default)s).",
-        default=minutes,
+        default=CLI_DEFAULTS["minutes"],
         type=int,
         metavar="M",
     )
@@ -178,15 +174,14 @@ def add_subparser(cmd, msg, subparsers):
             If you specify a unit, it acts as a filter and returns only data that was
             collected with that unit specified. Use 'Bytes' for memory (default: %(default)s)
             """,
-        default=unit,
         type=str,
         metavar="U",
     )
     sps[METRIC_CMD].add_argument(
         "-s",
         "--stat",
         help="The statistic to apply over the time intervals, e.g. 'Maximum' (default: %(default)s)",
-        default=stat,
+        default=CLI_DEFAULTS["stat"],
         type=str,
         metavar="S",
     )
@@ -197,7 +192,7 @@ def add_subparser(cmd, msg, subparsers):
             The granularity, in seconds, of the returned data points. Choices: 1, 5, 10, 30, 60, or any multiple of 60 (default: %(default)s). 
             It affects the data availability. See the docs 'Usage' section for more details.
             """,
-        default=period,
+        default=CLI_DEFAULTS["period"],
         type=int,
         metavar="P",
     )

diff --git a/cloudwatcher/cli.py b/cloudwatcher/cli.py
@@ -36,11 +36,6 @@ def main():
         if args.query_json is not None:
             raise NotImplementedError("Querying via JSON is not yet implemented")
 
-        if not args.instance_id.startswith("i-"):
-            raise ValueError(
-                f"Instance id needs to start with 'i-'. Got: {args.instance_id}"
-            )
-
         if not os.path.exists(args.dir):
             _LOGGER.info(f"Creating directory: {args.dir}")
             os.makedirs(args.dir, exist_ok=True)
@@ -50,7 +45,8 @@ def main():
             metric_name=args.metric,
             metric_id=args.id,
             metric_unit=args.unit,
-            ec2_instance_id=args.instance_id,
+            dimension_value=args.dimension_value,
+            dimension_name=args.dimension_name,
             aws_access_key_id=args.aws_access_key_id,
             aws_secret_access_key=args.aws_secret_access_key,
             aws_session_token=args.aws_session_token,
@@ -67,30 +63,26 @@ def main():
 
         metric_watcher.log_response(response=response)
         metric_watcher.log_metric(response=response)
+        metric_watcher.log_metric_summary(response=response)
 
+        name_prefix = f"{args.dimension_name}_{args.dimension_value}_{args.metric}"
         if args.save:
             metric_watcher.save_metric_json(
-                file_path=os.path.join(
-                    args.dir, f"{args.instance_id}_{args.metric}.json"
-                ),
+                file_path=os.path.join(args.dir, f"{name_prefix}.json"),
                 response=response,
             )
             metric_watcher.save_metric_csv(
-                file_path=os.path.join(
-                    args.dir, f"{args.instance_id}_{args.metric}.csv"
-                ),
+                file_path=os.path.join(args.dir, f"{name_prefix}.csv"),
                 response=response,
             )
             metric_watcher.save_response_json(
-                file_path=os.path.join(args.dir, f"{args.instance_id}_response.json"),
+                file_path=os.path.join(args.dir, f"{name_prefix}_response.json"),
                 response=response,
             )
 
         if args.plot:
             metric_watcher.save_metric_plot(
-                file_path=os.path.join(
-                    args.dir, f"{args.instance_id}_{args.metric}.png"
-                ),
+                file_path=os.path.join(args.dir, f"{name_prefix}.png"),
                 response=response,
             )
 

diff --git a/cloudwatcher/const.py b/cloudwatcher/const.py
@@ -17,3 +17,17 @@
     "hour": {"days": 0, "hours": 1, "minutes": 0, "stat": "Maximum", "period": 1},
     "minute": {"days": 0, "hours": 0, "minutes": 1, "stat": "Maximum", "period": 1},
 }
+
+CLI_DEFAULTS = {
+    "metric_name": "mem_used",
+    "id": "memory_usage",
+    "dimension_name": "InstanceId",
+    "days": 1,
+    "hours": 0,
+    "minutes": 0,
+    "unit": "Bytes",
+    "stat": "Maximum",
+    "period": 60,
+    "dir": "./",
+    "region": "us-east-1",
+}
diff --git a/cloudwatcher/metric_handlers.py b/cloudwatcher/metric_handlers.py
@@ -102,7 +102,7 @@ def __call__(self, target: str) -> None:
             raise NotImplementedError(
                 "Logging responses to a file is not yet implemented."
             )
-        _LOGGER.info(json.dumps(self.response, indent=4, default=str))
+        _LOGGER.debug(json.dumps(self.response, indent=4, default=str))
 
 
 class TimedMetricPlotter(TimedMetricHandler):

diff --git a/cloudwatcher/metricwatcher.py b/cloudwatcher/metricwatcher.py
@@ -6,6 +6,7 @@
 import pytz
 
 from .cloudwatcher import CloudWatcher
+from .const import DEFAULT_QUERY_KWARGS, QUERY_KWARGS_PRESETS
 from .metric_handlers import (
     ResponseHandler,
     ResponseLogger,
@@ -18,7 +19,6 @@
     TimedMetricPlotter,
     TimedMetricSummarizer,
 )
-from .const import DEFAULT_QUERY_KWARGS, QUERY_KWARGS_PRESETS
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -31,10 +31,11 @@ class MetricWatcher(CloudWatcher):
     def __init__(
         self,
         namespace: str,
-        ec2_instance_id: str,
+        dimension_name: str,
+        dimension_value: str,
         metric_name: str,
         metric_id: str,
-        metric_unit: str,
+        metric_unit: Optional[str] = None,
         aws_access_key_id: Optional[str] = None,
         aws_secret_access_key: Optional[str] = None,
         aws_session_token: Optional[str] = None,
@@ -55,7 +56,8 @@ def __init__(
             aws_region_name=aws_region_name,
         )
         self.namespace = namespace
-        self.ec2_instance_id = ec2_instance_id
+        self.dimension_name = dimension_name
+        self.dimension_value = dimension_value
         self.metric_name = metric_name
         self.metric_id = metric_id
         self.metric_unit = metric_unit
@@ -91,8 +93,9 @@ def query_ec2_metrics(
         start_time = now - datetime.timedelta(days=days, hours=hours, minutes=minutes)
 
         _LOGGER.info(
-            f"Querying '{self.metric_name}' for EC2 instance '{self.ec2_instance_id}'"
-            f" from {start_time.strftime('%H:%M:%S')} to {now.strftime('%H:%M:%S')}"
+            f"Querying '{self.metric_name}' for dimension "
+            f"('{self.dimension_name}'='{self.dimension_value}') from "
+            f"{start_time.strftime('%H:%M:%S')} to {now.strftime('%H:%M:%S')}"
         )
 
         response = self.client.get_metric_data(
@@ -104,11 +107,16 @@ def query_ec2_metrics(
                             "Namespace": self.namespace,
                             "MetricName": self.metric_name,
                             "Dimensions": [
-                                {"Name": "InstanceId", "Value": self.ec2_instance_id}
+                                {
+                                    "Name": self.dimension_name,
+                                    "Value": self.dimension_value,
+                                }
                             ],
                         },
                         "Stat": stat,
-                        "Unit": self.metric_unit,
+                        "Unit": str(
+                            self.metric_unit
+                        ),  # str(None) is desired, if no unit is specified
                         "Period": period,
                     },
                 },
@@ -125,30 +133,28 @@ def query_ec2_metrics(
 
     def get_ec2_uptime(
         self,
+        ec2_instance_id: str,
         days: int,
         hours: int,
         minutes: int,
     ) -> int:
         """
         Get the runtime of an EC2 instance
 
-        :param logging.logger logger: logger to use. Any object that has 'info', 'warning' and 'error' methods
         :param int days: how many days to subtract from the current date to determine the metric collection start time
         :param int hours: how many hours to subtract from the current time to determine the metric collection start time
         :param int minute: how many minutes to subtract from the current time to determine the metric collection start time
-        :param str namespace: namespace of the metric, e.g. 'NepheleNamespace'
-        :param boto3.resource ec2_resource: boto3 resource object to use, optional
-
+        :param str ec2_instance_id: the ID of the EC2 instance to query
         Returns:
             int: runtime of the instance in seconds
         """
-        if not self.is_ec2_running():
+        if not self.is_ec2_running(ec2_instance_id):
             _LOGGER.info(
-                f"Instance '{self.ec2_instance_id}' is not running anymore. "
+                f"Instance '{self.dimension_value}' is not running anymore. "
                 f"Uptime will be estimated based on reported metrics in the last {days} days"
             )
             instances = self.ec2_resource.instances.filter(
-                Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
+                Filters=[{"Name": "instance-id", "Values": [self.dimension_value]}]
             )
             # get the latest reported metric
             metrics_response = self.query_ec2_metrics(
@@ -167,33 +173,32 @@ def get_ec2_uptime(
                     earliest_metric_report_time - latest_metric_report_time
                 ).total_seconds()
             except IndexError:
-                _LOGGER.warning(f"No metric data found for EC2: {self.ec2_instance_id}")
+                _LOGGER.warning(f"No metric data found for EC2: {self.dimension_value}")
                 return
         instances = self.ec2_resource.instances.filter(
-            Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
+            Filters=[{"Name": "instance-id", "Values": [self.dimension_value]}]
         )
         for instance in instances:
             _LOGGER.info(
-                f"Instance '{self.ec2_instance_id}' is still running. "
+                f"Instance '{self.dimension_value}' is still running. "
                 f"Launch time: {instance.launch_time}"
             )
             return (datetime.now(pytz.utc) - instance.launch_time).total_seconds()
 
-    def is_ec2_running(self) -> bool:
+    def is_ec2_running(self, ec2_instance_id: str) -> bool:
         """
         Check if EC2 instance is running
 
+        :param str ec2_instance_id: the ID of the EC2 instance to query
         :returns bool: True if instance is running, False otherwise
         """
         instances = self.ec2_resource.instances.filter(
-            Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
+            Filters=[{"Name": "instance-id", "Values": [ec2_instance_id]}]
         )
         if len(list(instances)) == 0:
             return None
         if len(list(instances)) > 1:
-            raise Exception(
-                f"Multiple EC2 instances matched by ID: {self.ec2_instance_id}"
-            )
+            raise Exception(f"Multiple EC2 instances matched by ID: {ec2_instance_id}")
         for instance in instances:
             # check the status codes and their meanings: https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceState.html
             if instance.state["Code"] <= 16:
@@ -375,7 +380,7 @@ def save_metric_plot(
             query_preset=query_preset,
         )
 
-    def summarize_metric_json(
+    def log_metric_summary(
         self, response: Optional[Dict] = None, query_preset: Optional[str] = None
     ):
         """