Skip to content

Commit e6fc1d9

Browse files
authored
Re-enable alerting after Lambda URL migration (#194)
1 parent 09a307d commit e6fc1d9

File tree

2 files changed

+125
-20
lines changed

2 files changed

+125
-20
lines changed

cloudformation/alerting.yml

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
AWSTemplateFormatVersion: "2010-09-09"
2+
Description: Stack Alarms
3+
Transform: AWS::Serverless-2016-10-31
4+
5+
Parameters:
6+
AlertSNSArn:
7+
Description: SNS Queue to send general alarm alerts to
8+
Type: String
9+
PriorityAlertSNSArn:
10+
Description: SNS Queue to send priority alarm alerts to
11+
Type: String
12+
ApplicationPrefix:
13+
Type: String
14+
Description: Application prefix, no ending dash
15+
AllowedPattern: ^[a-zA-Z0-9]+[a-zA-Z0-9-]+[a-zA-Z0-9]+$
16+
ApplicationFriendlyName:
17+
Type: String
18+
Description: Application friendly name that will be used in resource descriptions
19+
MainCloudfrontDistributionId:
20+
Type: String
21+
Description: Cloudfront Distribution ID that serves main API endpoints.
22+
23+
24+
Resources:
25+
AppDLQMessagesAlarm:
26+
Type: "AWS::CloudWatch::Alarm"
27+
Properties:
28+
AlarmName: !Sub ${ApplicationPrefix}-sqs-dlq
29+
AlarmDescription: "Items are present in the application DLQ, meaning some messages failed to process."
30+
Namespace: "AWS/SQS"
31+
MetricName: "ApproximateNumberOfMessagesVisible"
32+
Statistic: "Maximum"
33+
Period: 60
34+
EvaluationPeriods: 1
35+
ComparisonOperator: "GreaterThanThreshold"
36+
Threshold: 0
37+
Dimensions:
38+
- Name: QueueName
39+
Value: !Sub ${ApplicationPrefix}-sqs-dlq
40+
AlarmActions:
41+
- !Ref PriorityAlertSNSArn
42+
43+
AppLatencyAlarm:
44+
Type: "AWS::CloudWatch::Alarm"
45+
Properties:
46+
AlarmName: !Sub ${ApplicationPrefix}-latency-high
47+
AlarmDescription: "Trailing Mean - 95% API gateway latency is > 1.25s for 2 times in 4 minutes."
48+
Namespace: "AWS/Lambda"
49+
MetricName: "UrlRequestLatency"
50+
ExtendedStatistic: "tm95"
51+
Period: "120"
52+
EvaluationPeriods: "2"
53+
ComparisonOperator: "GreaterThanThreshold"
54+
Threshold: "1250"
55+
AlarmActions:
56+
- !Ref AlertSNSArn
57+
Dimensions:
58+
- Name: "FunctionName"
59+
Value: !Sub ${ApplicationPrefix}-lambda
60+
61+
AppNoRequestsAlarm:
62+
Type: "AWS::CloudWatch::Alarm"
63+
Properties:
64+
AlarmName: !Sub ${ApplicationPrefix}-no-requests
65+
AlarmDescription: "No requests have been received in the past 5 minutes."
66+
Namespace: "AWS/Lambda"
67+
MetricName: "UrlRequestCount"
68+
Statistic: "Sum"
69+
Period: "300"
70+
EvaluationPeriods: "1"
71+
ComparisonOperator: "LessThanThreshold"
72+
Threshold: "1"
73+
AlarmActions:
74+
- !Ref PriorityAlertSNSArn
75+
Dimensions:
76+
- Name: "FunctionName"
77+
Value: !Sub ${ApplicationPrefix}-lambda
78+
79+
AppInvocationErrorAlarm:
80+
Type: "AWS::CloudWatch::Alarm"
81+
Properties:
82+
AlarmName: !Sub ${ApplicationPrefix}-error-invocation
83+
AlarmDescription: "Lambda threw an error, meaning the Fastify application itself has encountered an error"
84+
Namespace: "AWS/Lambda"
85+
MetricName: "Errors"
86+
Statistic: "Sum"
87+
Period: "300"
88+
EvaluationPeriods: "1"
89+
ComparisonOperator: "GreaterThanThreshold"
90+
Threshold: "1"
91+
AlarmActions:
92+
- !Ref PriorityAlertSNSArn
93+
Dimensions:
94+
- Name: "FunctionName"
95+
Value: !Sub ${ApplicationPrefix}-lambda
96+
97+
App5xxErrorAlarm:
98+
Type: "AWS::CloudWatch::Alarm"
99+
Properties:
100+
AlarmName: !Sub ${ApplicationPrefix}-cloudfront-5xx-error
101+
AlarmDescription: "Main application responses are more than 1% 5xx errors (from Cloudfront)"
102+
Namespace: "AWS/CloudFront"
103+
MetricName: "5xxErrorRate"
104+
Statistic: "Average"
105+
Period: "300"
106+
EvaluationPeriods: "1"
107+
ComparisonOperator: "GreaterThanThreshold"
108+
Threshold: "1"
109+
AlarmActions:
110+
- !Ref PriorityAlertSNSArn
111+
Dimensions:
112+
- Name: "DistributionId"
113+
Value: !Ref MainCloudfrontDistributionId

cloudformation/main.yml

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,18 @@ Resources:
107107
QueueName: !Sub ${ApplicationPrefix}-sqs
108108
MessageTimeout: !Ref SqsMessageTimeout
109109

110+
AppAlarms:
111+
Condition: IsProd
112+
Type: AWS::Serverless::Application
113+
Properties:
114+
Location: ./alerting.yml
115+
Parameters:
116+
AlertSNSArn: !Ref AlertSNSArn
117+
PriorityAlertSNSArn: !Ref PriorityAlertSNSArn
118+
ApplicationPrefix: !Ref ApplicationPrefix
119+
ApplicationFriendlyName: !Ref ApplicationFriendlyName
120+
MainCloudfrontDistributionId: !GetAtt AppFrontendCloudfrontDistribution.Id
121+
110122
LinkryRecordSetv4:
111123
Condition: IsDev
112124
Type: AWS::Route53::RecordSet
@@ -602,26 +614,6 @@ Resources:
602614
AttributeName: "expireAt"
603615
Enabled: true
604616

605-
606-
AppDLQMessagesAlarm:
607-
Type: "AWS::CloudWatch::Alarm"
608-
Condition: IsProd
609-
Properties:
610-
AlarmName: !Sub ${ApplicationPrefix}-sqs-dlq
611-
AlarmDescription: "Items are present in the application DLQ, meaning some messages failed to process."
612-
Namespace: "AWS/SQS"
613-
MetricName: "ApproximateNumberOfMessagesVisible"
614-
Statistic: "Maximum"
615-
Period: 60
616-
EvaluationPeriods: 1
617-
ComparisonOperator: "GreaterThanThreshold"
618-
Threshold: 0
619-
Dimensions:
620-
- Name: QueueName
621-
Value: !Sub ${ApplicationPrefix}-sqs-dlq
622-
AlarmActions:
623-
- !Ref PriorityAlertSNSArn
624-
625617
AppFrontendS3Bucket:
626618
Type: AWS::S3::Bucket
627619
Properties:

0 commit comments

Comments
 (0)