diff --git a/docs/sources/setup/install/helm/reference.md b/docs/sources/setup/install/helm/reference.md index 10537abc79457..94a97fc636a79 100644 --- a/docs/sources/setup/install/helm/reference.md +++ b/docs/sources/setup/install/helm/reference.md @@ -8042,6 +8042,11 @@ false "severity": "warning", "threshold": 5 }, + "LokiCompactorHasNotSuccessfullyRunCompaction": { + "enabled": true, + "for": "1h", + "severity": "critical" + }, "LokiRequestErrors": { "enabled": true, "for": "15m", @@ -8180,6 +8185,11 @@ null "severity": "warning", "threshold": 5 }, + "LokiCompactorHasNotSuccessfullyRunCompaction": { + "enabled": true, + "for": "1h", + "severity": "critical" + }, "LokiRequestErrors": { "enabled": true, "for": "15m", diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 93487e40a2221..c1f38d81edee1 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,6 +13,7 @@ Entries should include a reference to the pull request that introduced the chang ## Unreleased +- [FEATURE] Add compaction not run alert rules. [#19901](https://github.com/grafana/loki/pull/19901) - [ENHANCEMENT] Use fsGroupChangePolicy=OnRootMismatch on loki to speed up pod starts [#13942](https://github.com/grafana/loki/pull/13942) ## 6.46.0 diff --git a/production/helm/loki/src/alerts.yaml.tpl b/production/helm/loki/src/alerts.yaml.tpl index 9f6b66654462f..9bb8ca3097262 100644 --- a/production/helm/loki/src/alerts.yaml.tpl +++ b/production/helm/loki/src/alerts.yaml.tpl @@ -87,6 +87,50 @@ groups: {{- end }} {{- end }} +{{- if and (not .disabled.LokiCompactorHasNotSuccessfullyRunCompaction) .configs.LokiCompactorHasNotSuccessfullyRunCompaction.enabled }} + {{- with .configs.LokiCompactorHasNotSuccessfullyRunCompaction }} + - alert: "LokiCompactorHasNotSuccessfullyRunCompaction" + annotations: + message: | + {{`{{`}} $labels.cluster {{`}}`}} {{`{{`}} $labels.namespace {{`}}`}} has not run compaction in the last 3 hours since the last compaction. This may indicate a problem with the compactor. + {{- with $additionalAnnotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + expr: | + min ( + time() - (loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{} > 0) + ) + by (cluster, namespace) + > 60 * 60 * 3 + for: {{ .for }} + labels: + severity: {{ .severity }} + {{- with $additionalLabels }} + {{ toYaml . | nindent 10 }} + {{- end }} + - alert: "LokiCompactorHasNotSuccessfullyRunCompaction" + annotations: + message: | + {{`{{`}} $labels.cluster {{`}}`}} {{`{{`}} $labels.namespace {{`}}`}} has not run compaction in the last 3h since startup. This may indicate a problem with the compactor. + {{- with $additionalAnnotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + expr: | + max( + max_over_time( + loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{}[3h] + ) + ) by (cluster, namespace) + == 0 + for: {{ .for }} + labels: + severity: {{ .severity }} + {{- with $additionalLabels }} + {{ toYaml . | nindent 10 }} + {{- end }} + {{- end }} +{{- end }} + {{- if and (not .disabled.LokiCanaryLatency) .configs.LokiCanaryLatency.enabled }} {{- with .configs.LokiCanaryLatency }} - name: "loki_canaries_alerts" diff --git a/production/helm/loki/values.yaml b/production/helm/loki/values.yaml index 4786759719ec6..aab0595f60c50 100644 --- a/production/helm/loki/values.yaml +++ b/production/helm/loki/values.yaml @@ -4115,6 +4115,10 @@ monitoring: enabled: true for: 5m severity: warning + LokiCompactorHasNotSuccessfullyRunCompaction: + enabled: true + for: 1h + severity: critical LokiCanaryLatency: enabled: true for: 15m