diff --git a/.gitignore b/.gitignore index 04c1c20..2a64195 100644 --- a/.gitignore +++ b/.gitignore @@ -27,5 +27,8 @@ terraform.rc # Python virtual environment .venv +# JetBrains IDEs +.idea + # Lambda zip directory out/ diff --git a/functions/check-msk-status/index.py b/functions/check-msk-status/index.py index df2543b..a98e1b8 100644 --- a/functions/check-msk-status/index.py +++ b/functions/check-msk-status/index.py @@ -1,14 +1,16 @@ +from typing import List + import boto3 import os +CLUSTER_ARNS: list[str] = os.environ["CLUSTER_ARNS"].split(",") +ENABLE_CLOUDWATCH_METRICS: str = os.environ["ENABLE_CLOUDWATCH_METRICS"] +ENABLE_SNS_NOTIFICATIONS: str = os.environ["ENABLE_SNS_NOTIFICATIONS"] +LAMBDASNSTOPIC: str = os.environ["SNS_TOPIC_ARN"] +SUPPRESS_STATES: list[str] = os.environ["SUPPRESS_STATES"].split(",") -def lambda_handler(event, context): - CLUSTER_ARNS = os.environ["CLUSTER_ARNS"].split(",") - ENABLE_CLOUDWATCH_METRICS = os.environ["ENABLE_CLOUDWATCH_METRICS"] - ENABLE_SNS_NOTIFICATIONS = os.environ["ENABLE_SNS_NOTIFICATIONS"] - LAMBDASNSTOPIC = os.environ["SNS_TOPIC_ARN"] - SUPPRESS_STATES = os.environ["SUPPRESS_STATES"].split(",") +def lambda_handler(event, context): region = "eu-central-1" # Create boto clients @@ -16,11 +18,6 @@ def lambda_handler(event, context): cloudwatch = boto3.client("cloudwatch") sns = boto3.client("sns") - # Retrieve a list of clusters - response = kafka.list_clusters_v2() - # Extract the cluster ARNs from the response - cluster_arns = response["ClusterInfoList"] - valid_states = ["ACTIVE"] + SUPPRESS_STATES print( "Notifications suppressed for these MSK states: {}".format( @@ -45,18 +42,9 @@ def lambda_handler(event, context): ) ) - # Cover situation where cluster has been deleted. - if ENABLE_CLOUDWATCH_METRICS: - x = 1 if status not in valid_states else 0 - put_custom_metric(cloudwatch=cloudwatch, cluster_name=cluster_name, value=x) - print( - "Put custom metric for cluster: {} with value: {}".format( - cluster_name, x - ) - ) - if ENABLE_SNS_NOTIFICATIONS: - if status not in valid_states: - print("The MSK cluster: {} needs attention.".format(arn)) + if status not in valid_states: + print("The MSK cluster {} needs attention.".format(arn)) + if ENABLE_SNS_NOTIFICATIONS: sns.publish( TopicArn=LAMBDASNSTOPIC, Message="MSK cluster " @@ -65,14 +53,20 @@ def lambda_handler(event, context): + status, Subject="MSK Health Warning!", ) + if ENABLE_CLOUDWATCH_METRICS: + put_custom_metric( + cloudwatch=cloudwatch, cluster_name=cluster_name, value=1 + ) else: print( "The MSK cluster {} is in a healthy state, and is reachable and available for use.".format( arn ) ) - - # Return the status + if ENABLE_CLOUDWATCH_METRICS: + put_custom_metric( + cloudwatch=cloudwatch, cluster_name=cluster_name, value=0 + ) return {"statusCode": 200, "body": "OK"} diff --git a/main.tf b/main.tf index 5f3c1d6..970e9aa 100644 --- a/main.tf +++ b/main.tf @@ -161,7 +161,7 @@ resource "aws_cloudwatch_metric_alarm" "this" { namespace = "Custom/Kafka" period = 300 metric_name = "Status" - alarm_name = "msk_status_monitor-${each.key}-${random_id.id.hex}" + alarm_name = "msk-status-monitor-${each.key}-${random_id.id.hex}" comparison_operator = "GreaterThanThreshold" alarm_description = "This alarm triggers on MSK cluster status" evaluation_periods = 2