Skip to content

Commit

Permalink
make metric dimensions more generic and customizable; #4
Browse files Browse the repository at this point in the history
  • Loading branch information
stolarczyk committed Apr 28, 2022
1 parent fbaaab2 commit b3ac3ef
Show file tree
Hide file tree
Showing 16 changed files with 331 additions and 327 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ tests/test/*

# mkdocs
site
docs/API_usage_files/

# development environment
.env
Expand Down
51 changes: 23 additions & 28 deletions cloudwatcher/argparser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
""" Computing configuration representation """

import argparse
from random import choices

from ._version import __version__
from .const import LOG_CMD, METRIC_CMD, SUBPARSER_MESSAGES
from .const import CLI_DEFAULTS, LOG_CMD, METRIC_CMD, SUBPARSER_MESSAGES


class _VersionInHelpParser(argparse.ArgumentParser):
Expand All @@ -19,18 +18,6 @@ def format_help(self):
def build_argparser():
"""Build argument parser"""

# args defaults
metric_name = "mem_used"
id = "memory_usage"
days = 1
hours = 0
minutes = 0
unit = "Bytes"
stat = "Maximum"
period = 60
dir = "./"
region = "us-east-1"

# add argument parser
parser = _VersionInHelpParser(
description="CloudWatch logs and metrics explorer.",
Expand Down Expand Up @@ -70,7 +57,7 @@ def add_subparser(cmd, msg, subparsers):
help="Region to monitor the metrics within. (default: %(default)s)",
type=str,
required=False,
default=region,
default=CLI_DEFAULTS["region"],
metavar="R",
)
aws_creds_group.add_argument(
Expand Down Expand Up @@ -103,7 +90,7 @@ def add_subparser(cmd, msg, subparsers):
"-d",
"--dir",
help="Directory to store the results in. Used with `--save` (default: %(default)s)",
default=dir,
default=CLI_DEFAULTS["dir"],
)

sps[METRIC_CMD].add_argument(
Expand All @@ -118,25 +105,34 @@ def add_subparser(cmd, msg, subparsers):
"-i",
"--id",
help="The unique identifier to assign to the metric data. Must be of the form '^[a-z][a-zA-Z0-9_]*$'.",
default=id,
default=CLI_DEFAULTS["id"],
required=False,
metavar="ID",
)
sps[METRIC_CMD].add_argument(
"-m",
"--metric",
help="Name of the metric collected by CloudWatchAgent (default: %(default)s)",
default=metric_name,
default=CLI_DEFAULTS["metric_name"],
required=False,
metavar="N",
)
sps[METRIC_CMD].add_argument(
"-iid",
"--instance-id",
help="Instance ID, needs to follow 'i-<numbers>' format",
"-dn",
"--dimension-name",
help="The name of the dimension to query. (default: %(default)s)",
required=False,
type=str,
metavar="N",
default=CLI_DEFAULTS["dimension_name"],
)
sps[METRIC_CMD].add_argument(
"-dv",
"--dimension-value",
help="The value of the dimension to filter on.",
required=True,
type=str,
metavar="ID",
metavar="V",
)
sps[METRIC_CMD].add_argument(
"--uptime",
Expand All @@ -150,23 +146,23 @@ def add_subparser(cmd, msg, subparsers):
metric_collection_start_time.add_argument(
"--days",
help="How many days to subtract from the current date to determine the metric collection start time (default: %(default)s).",
default=days,
default=CLI_DEFAULTS["days"],
type=int,
metavar="D",
)
metric_collection_start_time.add_argument(
"-hr",
"--hours",
help="How many hours to subtract from the current time to determine the metric collection start time (default: %(default)s).",
default=hours,
default=CLI_DEFAULTS["hours"],
type=int,
metavar="H",
)
metric_collection_start_time.add_argument(
"-mi",
"--minutes",
help="How many minutes to subtract from the current time to determine the metric collection start time (default: %(default)s).",
default=minutes,
default=CLI_DEFAULTS["minutes"],
type=int,
metavar="M",
)
Expand All @@ -178,15 +174,14 @@ def add_subparser(cmd, msg, subparsers):
If you specify a unit, it acts as a filter and returns only data that was
collected with that unit specified. Use 'Bytes' for memory (default: %(default)s)
""",
default=unit,
type=str,
metavar="U",
)
sps[METRIC_CMD].add_argument(
"-s",
"--stat",
help="The statistic to apply over the time intervals, e.g. 'Maximum' (default: %(default)s)",
default=stat,
default=CLI_DEFAULTS["stat"],
type=str,
metavar="S",
)
Expand All @@ -197,7 +192,7 @@ def add_subparser(cmd, msg, subparsers):
The granularity, in seconds, of the returned data points. Choices: 1, 5, 10, 30, 60, or any multiple of 60 (default: %(default)s).
It affects the data availability. See the docs 'Usage' section for more details.
""",
default=period,
default=CLI_DEFAULTS["period"],
type=int,
metavar="P",
)
Expand Down
24 changes: 8 additions & 16 deletions cloudwatcher/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ def main():
if args.query_json is not None:
raise NotImplementedError("Querying via JSON is not yet implemented")

if not args.instance_id.startswith("i-"):
raise ValueError(
f"Instance id needs to start with 'i-'. Got: {args.instance_id}"
)

if not os.path.exists(args.dir):
_LOGGER.info(f"Creating directory: {args.dir}")
os.makedirs(args.dir, exist_ok=True)
Expand All @@ -50,7 +45,8 @@ def main():
metric_name=args.metric,
metric_id=args.id,
metric_unit=args.unit,
ec2_instance_id=args.instance_id,
dimension_value=args.dimension_value,
dimension_name=args.dimension_name,
aws_access_key_id=args.aws_access_key_id,
aws_secret_access_key=args.aws_secret_access_key,
aws_session_token=args.aws_session_token,
Expand All @@ -67,30 +63,26 @@ def main():

metric_watcher.log_response(response=response)
metric_watcher.log_metric(response=response)
metric_watcher.log_metric_summary(response=response)

name_prefix = f"{args.dimension_name}_{args.dimension_value}_{args.metric}"
if args.save:
metric_watcher.save_metric_json(
file_path=os.path.join(
args.dir, f"{args.instance_id}_{args.metric}.json"
),
file_path=os.path.join(args.dir, f"{name_prefix}.json"),
response=response,
)
metric_watcher.save_metric_csv(
file_path=os.path.join(
args.dir, f"{args.instance_id}_{args.metric}.csv"
),
file_path=os.path.join(args.dir, f"{name_prefix}.csv"),
response=response,
)
metric_watcher.save_response_json(
file_path=os.path.join(args.dir, f"{args.instance_id}_response.json"),
file_path=os.path.join(args.dir, f"{name_prefix}_response.json"),
response=response,
)

if args.plot:
metric_watcher.save_metric_plot(
file_path=os.path.join(
args.dir, f"{args.instance_id}_{args.metric}.png"
),
file_path=os.path.join(args.dir, f"{name_prefix}.png"),
response=response,
)

Expand Down
14 changes: 14 additions & 0 deletions cloudwatcher/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,17 @@
"hour": {"days": 0, "hours": 1, "minutes": 0, "stat": "Maximum", "period": 1},
"minute": {"days": 0, "hours": 0, "minutes": 1, "stat": "Maximum", "period": 1},
}

CLI_DEFAULTS = {
"metric_name": "mem_used",
"id": "memory_usage",
"dimension_name": "InstanceId",
"days": 1,
"hours": 0,
"minutes": 0,
"unit": "Bytes",
"stat": "Maximum",
"period": 60,
"dir": "./",
"region": "us-east-1",
}
2 changes: 1 addition & 1 deletion cloudwatcher/metric_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __call__(self, target: str) -> None:
raise NotImplementedError(
"Logging responses to a file is not yet implemented."
)
_LOGGER.info(json.dumps(self.response, indent=4, default=str))
_LOGGER.debug(json.dumps(self.response, indent=4, default=str))


class TimedMetricPlotter(TimedMetricHandler):
Expand Down
53 changes: 29 additions & 24 deletions cloudwatcher/metricwatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytz

from .cloudwatcher import CloudWatcher
from .const import DEFAULT_QUERY_KWARGS, QUERY_KWARGS_PRESETS
from .metric_handlers import (
ResponseHandler,
ResponseLogger,
Expand All @@ -18,7 +19,6 @@
TimedMetricPlotter,
TimedMetricSummarizer,
)
from .const import DEFAULT_QUERY_KWARGS, QUERY_KWARGS_PRESETS

_LOGGER = logging.getLogger(__name__)

Expand All @@ -31,10 +31,11 @@ class MetricWatcher(CloudWatcher):
def __init__(
self,
namespace: str,
ec2_instance_id: str,
dimension_name: str,
dimension_value: str,
metric_name: str,
metric_id: str,
metric_unit: str,
metric_unit: Optional[str] = None,
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_session_token: Optional[str] = None,
Expand All @@ -55,7 +56,8 @@ def __init__(
aws_region_name=aws_region_name,
)
self.namespace = namespace
self.ec2_instance_id = ec2_instance_id
self.dimension_name = dimension_name
self.dimension_value = dimension_value
self.metric_name = metric_name
self.metric_id = metric_id
self.metric_unit = metric_unit
Expand Down Expand Up @@ -91,8 +93,9 @@ def query_ec2_metrics(
start_time = now - datetime.timedelta(days=days, hours=hours, minutes=minutes)

_LOGGER.info(
f"Querying '{self.metric_name}' for EC2 instance '{self.ec2_instance_id}'"
f" from {start_time.strftime('%H:%M:%S')} to {now.strftime('%H:%M:%S')}"
f"Querying '{self.metric_name}' for dimension "
f"('{self.dimension_name}'='{self.dimension_value}') from "
f"{start_time.strftime('%H:%M:%S')} to {now.strftime('%H:%M:%S')}"
)

response = self.client.get_metric_data(
Expand All @@ -104,11 +107,16 @@ def query_ec2_metrics(
"Namespace": self.namespace,
"MetricName": self.metric_name,
"Dimensions": [
{"Name": "InstanceId", "Value": self.ec2_instance_id}
{
"Name": self.dimension_name,
"Value": self.dimension_value,
}
],
},
"Stat": stat,
"Unit": self.metric_unit,
"Unit": str(
self.metric_unit
), # str(None) is desired, if no unit is specified
"Period": period,
},
},
Expand All @@ -125,30 +133,28 @@ def query_ec2_metrics(

def get_ec2_uptime(
self,
ec2_instance_id: str,
days: int,
hours: int,
minutes: int,
) -> int:
"""
Get the runtime of an EC2 instance
:param logging.logger logger: logger to use. Any object that has 'info', 'warning' and 'error' methods
:param int days: how many days to subtract from the current date to determine the metric collection start time
:param int hours: how many hours to subtract from the current time to determine the metric collection start time
:param int minute: how many minutes to subtract from the current time to determine the metric collection start time
:param str namespace: namespace of the metric, e.g. 'NepheleNamespace'
:param boto3.resource ec2_resource: boto3 resource object to use, optional
:param str ec2_instance_id: the ID of the EC2 instance to query
Returns:
int: runtime of the instance in seconds
"""
if not self.is_ec2_running():
if not self.is_ec2_running(ec2_instance_id):
_LOGGER.info(
f"Instance '{self.ec2_instance_id}' is not running anymore. "
f"Instance '{self.dimension_value}' is not running anymore. "
f"Uptime will be estimated based on reported metrics in the last {days} days"
)
instances = self.ec2_resource.instances.filter(
Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
Filters=[{"Name": "instance-id", "Values": [self.dimension_value]}]
)
# get the latest reported metric
metrics_response = self.query_ec2_metrics(
Expand All @@ -167,33 +173,32 @@ def get_ec2_uptime(
earliest_metric_report_time - latest_metric_report_time
).total_seconds()
except IndexError:
_LOGGER.warning(f"No metric data found for EC2: {self.ec2_instance_id}")
_LOGGER.warning(f"No metric data found for EC2: {self.dimension_value}")
return
instances = self.ec2_resource.instances.filter(
Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
Filters=[{"Name": "instance-id", "Values": [self.dimension_value]}]
)
for instance in instances:
_LOGGER.info(
f"Instance '{self.ec2_instance_id}' is still running. "
f"Instance '{self.dimension_value}' is still running. "
f"Launch time: {instance.launch_time}"
)
return (datetime.now(pytz.utc) - instance.launch_time).total_seconds()

def is_ec2_running(self) -> bool:
def is_ec2_running(self, ec2_instance_id: str) -> bool:
"""
Check if EC2 instance is running
:param str ec2_instance_id: the ID of the EC2 instance to query
:returns bool: True if instance is running, False otherwise
"""
instances = self.ec2_resource.instances.filter(
Filters=[{"Name": "instance-id", "Values": [self.ec2_instance_id]}]
Filters=[{"Name": "instance-id", "Values": [ec2_instance_id]}]
)
if len(list(instances)) == 0:
return None
if len(list(instances)) > 1:
raise Exception(
f"Multiple EC2 instances matched by ID: {self.ec2_instance_id}"
)
raise Exception(f"Multiple EC2 instances matched by ID: {ec2_instance_id}")
for instance in instances:
# check the status codes and their meanings: https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceState.html
if instance.state["Code"] <= 16:
Expand Down Expand Up @@ -375,7 +380,7 @@ def save_metric_plot(
query_preset=query_preset,
)

def summarize_metric_json(
def log_metric_summary(
self, response: Optional[Dict] = None, query_preset: Optional[str] = None
):
"""
Expand Down
Loading

0 comments on commit b3ac3ef

Please sign in to comment.