From 3ba67c509c1e1d539da0bb43f375b9b56738eea2 Mon Sep 17 00:00:00 2001 From: Josiah Siegel Date: Wed, 28 Aug 2024 10:15:20 -0400 Subject: [PATCH] update exception alerts to exclude dependencies --- .../application_insights/alert_exception.tf | 113 ++++++++++-------- .../app/terraform/modules/metabase/main.tf | 6 +- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/operations/app/terraform/modules/application_insights/alert_exception.tf b/operations/app/terraform/modules/application_insights/alert_exception.tf index 45e62e6ee5f..28f969e4c50 100644 --- a/operations/app/terraform/modules/application_insights/alert_exception.tf +++ b/operations/app/terraform/modules/application_insights/alert_exception.tf @@ -4,83 +4,98 @@ # Severity 3 - Informational # Severity 4 - Verbose -resource "azurerm_monitor_metric_alert" "exception_alert_critical" { +resource "azurerm_monitor_scheduled_query_rules_alert" "exception_alert_critical" { count = local.alerting_enabled name = "Over 100 Exceptions Raised in the Last Hour" description = "Over 100 Exceptions Raised in the Last Hour" + location = var.location resource_group_name = var.resource_group - scopes = [azurerm_application_insights.app_insights.id] - window_size = "PT1H" - frequency = "PT1M" - severity = 0 - - criteria { - metric_namespace = "microsoft.insights/components" - metric_name = "exceptions/count" - aggregation = "Count" - operator = "GreaterThan" - threshold = 99 - } action { - action_group_id = azurerm_monitor_action_group.action_group[0].id + action_group = [azurerm_monitor_action_group.action_group[0].id] } + data_source_id = azurerm_application_insights.app_insights.id + enabled = true + query = <<-EOT + let requests = requests + | distinct id; + let pageViews = pageViews + | distinct id; + let trace = union requests, pageViews; + exceptions + | join kind=inner trace on $left.operation_ParentId == $right.id + EOT + throttling = 120 + severity = 0 + frequency = 15 + time_window = 60 - tags = { - environment = var.environment + trigger { + operator = "GreaterThanOrEqual" + threshold = 100 } } -resource "azurerm_monitor_metric_alert" "exception_alert_error" { +resource "azurerm_monitor_scheduled_query_rules_alert" "exception_alert_error" { count = local.alerting_enabled name = "Over 10 Exceptions Raised in the Last Hour" description = "Over 10 Exceptions Raised in the Last Hour" + location = var.location resource_group_name = var.resource_group - scopes = [azurerm_application_insights.app_insights.id] - window_size = "PT1H" - frequency = "PT1M" - severity = 1 - - criteria { - metric_namespace = "microsoft.insights/components" - metric_name = "exceptions/count" - aggregation = "Count" - operator = "GreaterThan" - threshold = 9 - } action { - action_group_id = azurerm_monitor_action_group.action_group[0].id + action_group = [azurerm_monitor_action_group.action_group[0].id] } + data_source_id = azurerm_application_insights.app_insights.id + enabled = true + query = <<-EOT + let requests = requests + | distinct id; + let pageViews = pageViews + | distinct id; + let trace = union requests, pageViews; + exceptions + | join kind=inner trace on $left.operation_ParentId == $right.id + EOT + throttling = 120 + severity = 2 + frequency = 15 + time_window = 60 - tags = { - environment = var.environment + trigger { + operator = "GreaterThanOrEqual" + threshold = 10 } } -resource "azurerm_monitor_metric_alert" "exception_alert_warn" { - count = local.prod_exclusive_alerting +resource "azurerm_monitor_scheduled_query_rules_alert" "exception_alert_warn" { + count = local.alerting_enabled name = "One or More Exceptions Raised in the Last Hour" description = "One or More Exceptions Raised in the Last Hour" + location = var.location resource_group_name = var.resource_group - scopes = [azurerm_application_insights.app_insights.id] - window_size = "PT30M" - frequency = "PT5M" - severity = 3 - - criteria { - metric_namespace = "microsoft.insights/components" - metric_name = "exceptions/count" - aggregation = "Count" - operator = "GreaterThan" - threshold = 0 - } action { - action_group_id = azurerm_monitor_action_group.action_group[0].id + action_group = [azurerm_monitor_action_group.action_group[0].id] } + data_source_id = azurerm_application_insights.app_insights.id + enabled = true + query = <<-EOT + let requests = requests + | distinct id; + let pageViews = pageViews + | distinct id; + let trace = union requests, pageViews; + exceptions + | join kind=inner trace on $left.operation_ParentId == $right.id + EOT + throttling = 120 + severity = 3 + frequency = 15 + time_window = 60 - tags = { - environment = var.environment + trigger { + operator = "GreaterThanOrEqual" + threshold = 1 } } \ No newline at end of file diff --git a/operations/app/terraform/modules/metabase/main.tf b/operations/app/terraform/modules/metabase/main.tf index 5d5ba7f903a..ea377d4f623 100644 --- a/operations/app/terraform/modules/metabase/main.tf +++ b/operations/app/terraform/modules/metabase/main.tf @@ -14,8 +14,8 @@ resource "azurerm_linux_web_app" "metabase" { site_config { application_stack { - docker_image_name = "metabase/metabase:latest" - docker_registry_url = "https://index.docker.io" + docker_image_name = "metabase/metabase:v0.50.7" + docker_registry_url = "https://index.docker.io/v1" } ip_restriction { @@ -122,6 +122,8 @@ resource "azurerm_linux_web_app" "metabase" { # If the virtual network is set via the resource app_service_virtual_network_swift_connection # then ignore_changes should be used in the web app configuration. virtual_network_subnet_id, + # Ignore auto-generated hidden-links + tags ] } }