-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathStream_CloudWatch_CF_Template.yaml
182 lines (175 loc) · 7.06 KB
/
Stream_CloudWatch_CF_Template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
AWSTemplateFormatVersion: '2010-09-09'
Description: >-
This CloudFormation template creates a CloudWatch dashboard, SNS topic, alarms
for monitoring an Amazon SQS queue. It takes the SQS queue name and email address as input parameters.
Parameters:
SQSQueueName:
Type: String
Description: The name of the Amazon SQS queue to monitor.
EmailAddress:
Type: String
Description: The email address to receive notifications.
Resources:
SNSTopic:
Type: AWS::SNS::Topic
Properties:
Subscription:
- Endpoint: !Ref EmailAddress
Protocol: email
SQSQueueDashboard:
Type: AWS::CloudWatch::Dashboard
Properties:
DashboardName: !Sub 'SQS-Queue-${SQSQueueName}-Monitoring'
DashboardBody: !Sub |
{
"widgets": [
{
"type": "metric",
"x": 0,
"y": 0,
"width": 12,
"height": 6,
"properties": {
"metrics": [
[ "AWS/SQS", "ApproximateNumberOfMessagesVisible", "QueueName", "${SQSQueueName}" ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"title": "Messages in Queue",
"stat": "Sum",
"period": 300
}
},
{
"type": "metric",
"x": 12,
"y": 0,
"width": 12,
"height": 6,
"properties": {
"metrics": [
[ "AWS/SQS", "ApproximateAgeOfOldestMessage", "QueueName", "${SQSQueueName}" ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"title": "Age of Oldest Message",
"stat": "Maximum",
"period": 300
}
},
{
"type": "metric",
"x": 0,
"y": 6,
"width": 24,
"height": 6,
"properties": {
"metrics": [
[ "AWS/SQS", "NumberOfMessagesDeleted", "QueueName", "${SQSQueueName}" ],
[ ".", "NumberOfMessagesSent", ".", "." ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"title": "Messages Deleted and Sent",
"stat": "Sum",
"period": 300
}
}
]
}
MessageVisibleAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub 'SQS-Queue-${SQSQueueName}-MessageVisibleAlarm'
AlarmDescription: 'Alarm for a high number of visible messages in the SQS queue'
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Statistic: Maximum
Period: 300
EvaluationPeriods: 1
Threshold: 10000
ComparisonOperator: GreaterThanThreshold
Dimensions:
- Name: QueueName
Value: !Ref SQSQueueName
AlarmActions:
- !Ref SNSTopic
# This alarm is triggered when the number of visible messages in the queue exceeds 10,000.
# The threshold of 10,000 is based on the assumption that a backlog of more than 10,000 messages
# could indicate that your application is unable to keep up, and processing is getting delayed significantly.
# This threshold should be adjusted based on your expected maximum queue size and traffic patterns.
OldestMessageAgeAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub 'SQS-Queue-${SQSQueueName}-OldestMessageAgeAlarm'
AlarmDescription: 'Alarm for a high age of the oldest message in the SQS queue'
MetricName: ApproximateAgeOfOldestMessage
Namespace: AWS/SQS
Statistic: Maximum
Period: 3600
EvaluationPeriods: 1
Threshold: 3600 # Threshold set to 1 hour (3600 seconds)
ComparisonOperator: GreaterThanThreshold
Dimensions:
- Name: QueueName
Value: !Ref SQSQueueName
AlarmActions:
- !Ref SNSTopic
# This alarm is triggered when the age of the oldest message in the queue exceeds 1 hour (3600 seconds).
# The one-hour processing threshold is set because Amazon marketing data is typically delivered hourly. We expect messages to be processed within this timeframe.
# having a message stuck for over 1 hour indicates an issue worth investigating.
# This threshold can be adjusted based on your expected message processing times.
MessageDeletedDivergenceAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub 'SQS-Queue-${SQSQueueName}-MessageDeletedDivergenceAlarm'
AlarmDescription: 'Alert when a significant divergence is observed between the number of messages sent to the queue and the number of messages deleted from the queue within an hour. This could indicate an issue with your application processing messages successfully.'
TreatMissingData: notBreaching
Metrics:
- Id: e1
Expression: 'IF((m1-m2) > 100,1,0)'
Label: MessageDeletedDivergenceAlarm
# This expression evaluates to 1 (alarm state) if the difference between the number of messages sent (m1)
# and the number of messages deleted (m2) exceeds 100 within the evaluation period (1 hour).
# This is based on the assumption that a significant divergence between sent and deleted messages
# could indicate an issue with your application processing messages successfully.
- Id: m1
MetricStat:
Metric:
Namespace: AWS/SQS
MetricName: NumberOfMessagesSent
Dimensions:
- Name: QueueName
Value: !Ref SQSQueueName
Period: 3600 # Evaluation period is 1 hour (3600 seconds)
Stat: Minimum
Unit: Count
ReturnData: false
- Id: m2
MetricStat:
Metric:
Namespace: AWS/SQS
MetricName: NumberOfMessagesDeleted
Dimensions:
- Name: QueueName
Value: !Ref SQSQueueName
Period: 3600 # Evaluation period is 1 hour (3600 seconds)
Stat: Minimum
Unit: Count
ReturnData: false
EvaluationPeriods: 1
Threshold: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
# The alarm is triggered if the result of the expression (e1) is greater than or equal to 1.
# This is based on the assumption that messages are expected to be processed every hour, as Amazon Marketing Stream is meant for hourly data.
# If your processing cycle is different, please adjust the threshold and evaluation period accordingly.
Outputs:
DashboardURL:
Description: The URL of the CloudWatch dashboard
Value: !Sub 'https://${AWS::Region}.console.aws.amazon.com/cloudwatch/home?region=${AWS::Region}#dashboards:name=${SQSQueueDashboard}'
SNSTopicArn:
Description: The ARN of the SNS topic
Value: !Ref SNSTopic