Skip to content

Commit b575efc

Browse files
CCM-14044 Setting prod defaults
1 parent 2a91bed commit b575efc

7 files changed

Lines changed: 124 additions & 2 deletions

File tree

infrastructure/terraform/components/api/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,12 @@ No requirements.
2121
| <a name="input_enable_api_data_trace"></a> [enable\_api\_data\_trace](#input\_enable\_api\_data\_trace) | Enable API Gateway data trace logging | `bool` | `false` | no |
2222
| <a name="input_enable_backups"></a> [enable\_backups](#input\_enable\_backups) | Enable backups | `bool` | `false` | no |
2323
| <a name="input_enable_event_cache"></a> [enable\_event\_cache](#input\_enable\_event\_cache) | Enable caching of events to an S3 bucket | `bool` | `true` | no |
24+
| <a name="input_enable_event_publishing_anomaly_detection"></a> [enable\_event\_publishing\_anomaly\_detection](#input\_enable\_event\_publishing\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume. | `bool` | `true` | no |
2425
| <a name="input_enable_sns_delivery_logging"></a> [enable\_sns\_delivery\_logging](#input\_enable\_sns\_delivery\_logging) | Enable SNS Delivery Failure Notifications | `bool` | `true` | no |
2526
| <a name="input_environment"></a> [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes |
27+
| <a name="input_event_publishing_anomaly_band_width"></a> [event\_publishing\_anomaly\_band\_width](#input\_event\_publishing\_anomaly\_band\_width) | The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4. | `number` | `5` | no |
28+
| <a name="input_event_publishing_anomaly_evaluation_periods"></a> [event\_publishing\_anomaly\_evaluation\_periods](#input\_event\_publishing\_anomaly\_evaluation\_periods) | Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event\_publishing\_anomaly\_period. | `number` | `3` | no |
29+
| <a name="input_event_publishing_anomaly_period"></a> [event\_publishing\_anomaly\_period](#input\_event\_publishing\_anomaly\_period) | The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600. | `number` | `300` | no |
2630
| <a name="input_eventpub_control_plane_bus_arn"></a> [eventpub\_control\_plane\_bus\_arn](#input\_eventpub\_control\_plane\_bus\_arn) | ARN of the EventBridge control plane bus for eventpub | `string` | `""` | no |
2731
| <a name="input_eventpub_data_plane_bus_arn"></a> [eventpub\_data\_plane\_bus\_arn](#input\_eventpub\_data\_plane\_bus\_arn) | ARN of the EventBridge data plane bus for eventpub | `string` | `""` | no |
2832
| <a name="input_force_destroy"></a> [force\_destroy](#input\_force\_destroy) | Flag to force deletion of S3 buckets | `bool` | `false` | no |
@@ -52,7 +56,7 @@ No requirements.
5256
| <a name="module_ddb_alarms_mi"></a> [ddb\_alarms\_mi](#module\_ddb\_alarms\_mi) | ../../modules/alarms-ddb | n/a |
5357
| <a name="module_ddb_alarms_suppliers"></a> [ddb\_alarms\_suppliers](#module\_ddb\_alarms\_suppliers) | ../../modules/alarms-ddb | n/a |
5458
| <a name="module_domain_truststore"></a> [domain\_truststore](#module\_domain\_truststore) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-s3bucket.zip | n/a |
55-
| <a name="module_eventpub"></a> [eventpub](#module\_eventpub) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.31/terraform-eventpub.zip | n/a |
59+
| <a name="module_eventpub"></a> [eventpub](#module\_eventpub) | git::https://github.com/NHSDigital/nhs-notify-shared-modules.git//infrastructure/terraform/modules/eventpub | 3.0.4 |
5660
| <a name="module_eventsub"></a> [eventsub](#module\_eventsub) | ../../modules/eventsub | n/a |
5761
| <a name="module_get_letter"></a> [get\_letter](#module\_get\_letter) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
5862
| <a name="module_get_letter_data"></a> [get\_letter\_data](#module\_get\_letter\_data) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |

infrastructure/terraform/components/api/modules_eventpub.tf

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module "eventpub" {
2-
source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.31/terraform-eventpub.zip"
2+
source = "git::https://github.com/NHSDigital/nhs-notify-shared-modules.git//infrastructure/terraform/modules/eventpub?ref=3.0.4"
33

44
name = "eventpub"
55

@@ -31,7 +31,14 @@ module "eventpub" {
3131
additional_policies_for_event_cache_bucket = [
3232
data.aws_iam_policy_document.eventcache[0].json
3333
]
34+
35+
# CloudWatch Anomaly Detection for publishing
36+
enable_event_publishing_anomaly_detection = var.enable_event_publishing_anomaly_detection
37+
event_publishing_anomaly_band_width = var.event_publishing_anomaly_band_width
38+
event_publishing_anomaly_evaluation_periods = var.event_publishing_anomaly_evaluation_periods
39+
event_publishing_anomaly_period = var.event_publishing_anomaly_period
3440
}
41+
3542
data "aws_iam_policy_document" "eventcache" {
3643
count = local.event_cache_bucket_name != null ? 1 : 0
3744
statement {

infrastructure/terraform/components/api/variables.tf

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,27 @@ variable "enable_alarms" {
199199
description = "Enable CloudWatch alarms for this deployed environment"
200200
default = true
201201
}
202+
203+
variable "enable_event_publishing_anomaly_detection" {
204+
type = bool
205+
description = "Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume."
206+
default = true
207+
}
208+
209+
variable "event_publishing_anomaly_evaluation_periods" {
210+
type = number
211+
description = "Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event_publishing_anomaly_period."
212+
default = 3
213+
}
214+
215+
variable "event_publishing_anomaly_period" {
216+
type = number
217+
description = "The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600."
218+
default = 300
219+
}
220+
221+
variable "event_publishing_anomaly_band_width" {
222+
type = number
223+
description = "The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4."
224+
default = 5
225+
}

infrastructure/terraform/modules/eventsub/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@
1414
| <a name="input_aws_account_id"></a> [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes |
1515
| <a name="input_component"></a> [component](#input\_component) | The name of the terraformscaffold component calling this module | `string` | n/a | yes |
1616
| <a name="input_default_tags"></a> [default\_tags](#input\_default\_tags) | Default tag map for application to all taggable resources in the module | `map(string)` | `{}` | no |
17+
| <a name="input_enable_event_anomaly_detection"></a> [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for SNS topic message publishing | `bool` | `true` | no |
1718
| <a name="input_enable_event_cache"></a> [enable\_event\_cache](#input\_enable\_event\_cache) | Enable caching of events to an S3 bucket | `bool` | `true` | no |
1819
| <a name="input_enable_firehose_raw_message_delivery"></a> [enable\_firehose\_raw\_message\_delivery](#input\_enable\_firehose\_raw\_message\_delivery) | Enables raw message delivery on firehose subscription | `bool` | `false` | no |
1920
| <a name="input_enable_sns_delivery_logging"></a> [enable\_sns\_delivery\_logging](#input\_enable\_sns\_delivery\_logging) | Enable SNS Delivery Failure Notifications | `bool` | `true` | no |
2021
| <a name="input_environment"></a> [environment](#input\_environment) | The name of the terraformscaffold environment the module is called for | `string` | n/a | yes |
22+
| <a name="input_event_anomaly_band_width"></a> [event\_anomaly\_band\_width](#input\_event\_anomaly\_band\_width) | The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4. | `number` | `3` | no |
23+
| <a name="input_event_anomaly_evaluation_periods"></a> [event\_anomaly\_evaluation\_periods](#input\_event\_anomaly\_evaluation\_periods) | Number of evaluation periods for the anomaly alarm. Each period is defined by event\_anomaly\_period. | `number` | `2` | no |
24+
| <a name="input_event_anomaly_period"></a> [event\_anomaly\_period](#input\_event\_anomaly\_period) | The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600. | `number` | `300` | no |
2125
| <a name="input_event_cache_buffer_interval"></a> [event\_cache\_buffer\_interval](#input\_event\_cache\_buffer\_interval) | The buffer interval for data firehose | `number` | `500` | no |
2226
| <a name="input_event_cache_expiry_days"></a> [event\_cache\_expiry\_days](#input\_event\_cache\_expiry\_days) | s3 archiving expiry in days | `number` | `30` | no |
2327
| <a name="input_force_destroy"></a> [force\_destroy](#input\_force\_destroy) | When enabled will force destroy event-cache S3 bucket | `bool` | `false` | no |
@@ -42,6 +46,7 @@
4246
|------|-------------|
4347
| <a name="output_s3_bucket_event_cache"></a> [s3\_bucket\_event\_cache](#output\_s3\_bucket\_event\_cache) | S3 Bucket ARN and Name for event cache |
4448
| <a name="output_sns_topic"></a> [sns\_topic](#output\_sns\_topic) | SNS Topic ARN and Name |
49+
| <a name="output_subscriber_anomaly_alarm"></a> [subscriber\_anomaly\_alarm](#output\_subscriber\_anomaly\_alarm) | Subscriber anomaly detection alarm details |
4550
<!-- vale on -->
4651
<!-- markdownlint-enable -->
4752
<!-- END_TF_DOCS -->
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
resource "aws_cloudwatch_metric_alarm" "subscriber_anomaly" {
2+
count = var.enable_event_anomaly_detection ? 1 : 0
3+
4+
alarm_name = "${local.csi}-subscriber-anomaly"
5+
alarm_description = "ANOMALY: Detects anomalous patterns in messages published to the SNS fanout topic"
6+
comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold"
7+
evaluation_periods = var.event_anomaly_evaluation_periods
8+
threshold_metric_id = "ad1"
9+
treat_missing_data = "notBreaching"
10+
11+
metric_query {
12+
id = "m1"
13+
return_data = true
14+
15+
metric {
16+
metric_name = "NumberOfMessagesPublished"
17+
namespace = "AWS/SNS"
18+
period = var.event_anomaly_period
19+
stat = "Sum"
20+
21+
dimensions = {
22+
TopicName = aws_sns_topic.main.name
23+
}
24+
}
25+
}
26+
27+
metric_query {
28+
id = "ad1"
29+
expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})"
30+
label = "NumberOfMessagesPublished (expected)"
31+
return_data = true
32+
}
33+
34+
tags = merge(
35+
var.default_tags,
36+
{
37+
Name = "${local.csi}-subscriber-anomaly"
38+
}
39+
)
40+
}

infrastructure/terraform/modules/eventsub/outputs.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,12 @@ output "s3_bucket_event_cache" {
1313
bucket = module.s3bucket_event_cache[0].bucket
1414
} : {}
1515
}
16+
17+
# CloudWatch Anomaly Detection Alarm
18+
output "subscriber_anomaly_alarm" {
19+
description = "Subscriber anomaly detection alarm details"
20+
value = var.enable_event_anomaly_detection ? {
21+
arn = aws_cloudwatch_metric_alarm.subscriber_anomaly[0].arn
22+
name = aws_cloudwatch_metric_alarm.subscriber_anomaly[0].alarm_name
23+
} : null
24+
}

infrastructure/terraform/modules/eventsub/variables.tf

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,39 @@ variable "sns_success_logging_sample_percent" {
7979
default = 0
8080
}
8181

82+
##
83+
# CloudWatch Anomaly Detection Variables
84+
##
85+
86+
variable "enable_event_anomaly_detection" {
87+
type = bool
88+
description = "Enable CloudWatch anomaly detection alarm for SNS topic message publishing"
89+
default = true
90+
}
91+
92+
variable "event_anomaly_evaluation_periods" {
93+
type = number
94+
description = "Number of evaluation periods for the anomaly alarm. Each period is defined by event_anomaly_period."
95+
default = 2
96+
}
97+
98+
variable "event_anomaly_period" {
99+
type = number
100+
description = "The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600."
101+
default = 300
102+
}
103+
104+
variable "event_anomaly_band_width" {
105+
type = number
106+
description = "The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4."
107+
default = 3
108+
109+
validation {
110+
condition = var.event_anomaly_band_width >= 2 && var.event_anomaly_band_width <= 10
111+
error_message = "Band width must be between 2 and 10"
112+
}
113+
}
114+
82115
variable "log_level" {
83116
type = string
84117
description = "The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels"

0 commit comments

Comments
 (0)