diff --git a/docs/severity.md b/docs/severity.md index 149003026..f05c3304c 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -1212,7 +1212,6 @@ |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| |Nginx heartbeat|X|-|-|-|-| -|Nginx dropped connections|X|X|-|-|-| ## smart-agent_ntp diff --git a/modules/smart-agent_nginx/README.md b/modules/smart-agent_nginx/README.md index de09f7c9d..3263d46ad 100644 --- a/modules/smart-agent_nginx/README.md +++ b/modules/smart-agent_nginx/README.md @@ -76,7 +76,6 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| |Nginx heartbeat|X|-|-|-|-| -|Nginx dropped connections|X|X|-|-|-| ## How to collect required metrics? @@ -117,8 +116,7 @@ parameter to the corresponding monitor configuration: datapointsToExclude: - metricNames: - '*' - - '!connections.failed' - - '!nginx_connections.reading' + - '!nginx.connections_current' ``` diff --git a/modules/smart-agent_nginx/detectors-gen.tf b/modules/smart-agent_nginx/detectors-gen.tf index aba4f75c1..ca57fa0db 100644 --- a/modules/smart-agent_nginx/detectors-gen.tf +++ b/modules/smart-agent_nginx/detectors-gen.tf @@ -7,7 +7,7 @@ resource "signalfx_detector" "heartbeat" { program_text = <<-EOF from signalfx.detectors.not_reporting import not_reporting - signal = data('nginx_connections.reading', filter=%{if var.heartbeat_exclude_not_running_vm}${local.not_running_vm_filters} and %{endif}${module.filtering.signalflow})${var.heartbeat_aggregation_function}${var.heartbeat_transformation_function}.publish('signal') + signal = data('nginx.connections_current', filter=%{if var.heartbeat_exclude_not_running_vm}${local.not_running_vm_filters} and %{endif}${module.filtering.signalflow})${var.heartbeat_aggregation_function}${var.heartbeat_transformation_function}.publish('signal') not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}', auto_resolve_after='${local.heartbeat_auto_resolve_after}').publish('CRIT') EOF @@ -25,44 +25,3 @@ EOF max_delay = var.heartbeat_max_delay } - -resource "signalfx_detector" "dropped_connections" { - name = format("%s %s", local.detector_name_prefix, "Nginx dropped connections") - - authorized_writer_teams = var.authorized_writer_teams - teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) - tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) - - program_text = <<-EOF - signal = data('connections.failed', filter=${module.filtering.signalflow})${var.dropped_connections_aggregation_function}${var.dropped_connections_transformation_function}.publish('signal') - detect(when(signal > ${var.dropped_connections_threshold_critical}%{if var.dropped_connections_lasting_duration_critical != null}, lasting='${var.dropped_connections_lasting_duration_critical}', at_least=${var.dropped_connections_at_least_percentage_critical}%{endif})).publish('CRIT') - detect(when(signal > ${var.dropped_connections_threshold_major}%{if var.dropped_connections_lasting_duration_major != null}, lasting='${var.dropped_connections_lasting_duration_major}', at_least=${var.dropped_connections_at_least_percentage_major}%{endif}) and (not when(signal > ${var.dropped_connections_threshold_critical}%{if var.dropped_connections_lasting_duration_critical != null}, lasting='${var.dropped_connections_lasting_duration_critical}', at_least=${var.dropped_connections_at_least_percentage_critical}%{endif}))).publish('MAJOR') -EOF - - rule { - description = "is too high > ${var.dropped_connections_threshold_critical}" - severity = "Critical" - detect_label = "CRIT" - disabled = coalesce(var.dropped_connections_disabled_critical, var.dropped_connections_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.dropped_connections_notifications, "critical", []), var.notifications.critical), null) - runbook_url = try(coalesce(var.dropped_connections_runbook_url, var.runbook_url), "") - tip = var.dropped_connections_tip - parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject - parameterized_body = var.message_body == "" ? local.rule_body : var.message_body - } - - rule { - description = "is too high > ${var.dropped_connections_threshold_major}" - severity = "Major" - detect_label = "MAJOR" - disabled = coalesce(var.dropped_connections_disabled_major, var.dropped_connections_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.dropped_connections_notifications, "major", []), var.notifications.major), null) - runbook_url = try(coalesce(var.dropped_connections_runbook_url, var.runbook_url), "") - tip = var.dropped_connections_tip - parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject - parameterized_body = var.message_body == "" ? local.rule_body : var.message_body - } - - max_delay = var.dropped_connections_max_delay -} - diff --git a/modules/smart-agent_nginx/outputs.tf b/modules/smart-agent_nginx/outputs.tf index a127998b6..606361b57 100644 --- a/modules/smart-agent_nginx/outputs.tf +++ b/modules/smart-agent_nginx/outputs.tf @@ -1,8 +1,3 @@ -output "dropped_connections" { - description = "Detector resource for dropped_connections" - value = signalfx_detector.dropped_connections -} - output "heartbeat" { description = "Detector resource for heartbeat" value = signalfx_detector.heartbeat diff --git a/modules/smart-agent_nginx/variables-gen.tf b/modules/smart-agent_nginx/variables-gen.tf index b03a269b3..d79769563 100644 --- a/modules/smart-agent_nginx/variables-gen.tf +++ b/modules/smart-agent_nginx/variables-gen.tf @@ -53,94 +53,3 @@ variable "heartbeat_timeframe" { type = string default = "25m" } - -# dropped_connections detector - -variable "dropped_connections_notifications" { - description = "Notification recipients list per severity overridden for dropped_connections detector" - type = map(list(string)) - default = {} -} - -variable "dropped_connections_aggregation_function" { - description = "Aggregation function and group by for dropped_connections detector (i.e. \".mean(by=['host'])\")" - type = string - default = "" -} - -variable "dropped_connections_transformation_function" { - description = "Transformation function for dropped_connections detector (i.e. \".mean(over='5m')\")" - type = string - default = ".min(over='5m')" -} - -variable "dropped_connections_max_delay" { - description = "Enforce max delay for dropped_connections detector (use \"0\" or \"null\" for \"Auto\")" - type = number - default = null -} - -variable "dropped_connections_tip" { - description = "Suggested first course of action or any note useful for incident handling" - type = string - default = "" -} - -variable "dropped_connections_runbook_url" { - description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" - type = string - default = "" -} - -variable "dropped_connections_disabled" { - description = "Disable all alerting rules for dropped_connections detector" - type = bool - default = null -} - -variable "dropped_connections_disabled_critical" { - description = "Disable critical alerting rule for dropped_connections detector" - type = bool - default = null -} - -variable "dropped_connections_disabled_major" { - description = "Disable major alerting rule for dropped_connections detector" - type = bool - default = null -} - -variable "dropped_connections_threshold_critical" { - description = "Critical threshold for dropped_connections detector" - type = number - default = 1 -} - -variable "dropped_connections_lasting_duration_critical" { - description = "Minimum duration that conditions must be true before raising alert" - type = string - default = null -} - -variable "dropped_connections_at_least_percentage_critical" { - description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" - type = number - default = 1 -} -variable "dropped_connections_threshold_major" { - description = "Major threshold for dropped_connections detector" - type = number - default = 0 -} - -variable "dropped_connections_lasting_duration_major" { - description = "Minimum duration that conditions must be true before raising alert" - type = string - default = null -} - -variable "dropped_connections_at_least_percentage_major" { - description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" - type = number - default = 1 -}