From 8cad84a9b419dba257494e222939d7455f39e5e4 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 29 Oct 2025 17:08:38 -0400 Subject: [PATCH 01/11] modernize the apache couchdb mixin --- .../{alerts => }/alerts.libsonnet | 46 +- apache-couchdb-mixin/config.libsonnet | 63 +- apache-couchdb-mixin/dashboards.libsonnet | 107 + .../dashboards/couchdb-nodes.libsonnet | 1376 ------------ .../dashboards/couchdb-overview.libsonnet | 1890 ---------------- .../dashboards/dashboards.libsonnet | 2 - .../dashboards_out/couchdb-logs.json | 334 +++ .../dashboards_out/couchdb-nodes.json | 1425 ++++--------- .../dashboards_out/couchdb-overview.json | 1899 +++++------------ apache-couchdb-mixin/g.libsonnet | 1 + apache-couchdb-mixin/jsonnetfile.json | 30 +- apache-couchdb-mixin/links.libsonnet | 27 + apache-couchdb-mixin/main.libsonnet | 49 + apache-couchdb-mixin/mixin.libsonnet | 38 +- apache-couchdb-mixin/panels.libsonnet | 349 +++ .../prometheus_alerts.yaml | 4 +- apache-couchdb-mixin/rows.libsonnet | 93 + apache-couchdb-mixin/signals/nodes.libsonnet | 318 +++ .../signals/overview.libsonnet | 313 +++ .../signals/replicator.libsonnet | 106 + 20 files changed, 2759 insertions(+), 5711 deletions(-) rename apache-couchdb-mixin/{alerts => }/alerts.libsonnet (90%) create mode 100644 apache-couchdb-mixin/dashboards.libsonnet delete mode 100644 apache-couchdb-mixin/dashboards/couchdb-nodes.libsonnet delete mode 100644 apache-couchdb-mixin/dashboards/couchdb-overview.libsonnet delete mode 100644 apache-couchdb-mixin/dashboards/dashboards.libsonnet create mode 100644 apache-couchdb-mixin/dashboards_out/couchdb-logs.json create mode 100644 apache-couchdb-mixin/g.libsonnet create mode 100644 apache-couchdb-mixin/links.libsonnet create mode 100644 apache-couchdb-mixin/main.libsonnet create mode 100644 apache-couchdb-mixin/panels.libsonnet create mode 100644 apache-couchdb-mixin/rows.libsonnet create mode 100644 apache-couchdb-mixin/signals/nodes.libsonnet create mode 100644 apache-couchdb-mixin/signals/overview.libsonnet create mode 100644 apache-couchdb-mixin/signals/replicator.libsonnet diff --git a/apache-couchdb-mixin/alerts/alerts.libsonnet b/apache-couchdb-mixin/alerts.libsonnet similarity index 90% rename from apache-couchdb-mixin/alerts/alerts.libsonnet rename to apache-couchdb-mixin/alerts.libsonnet index 891145122..158c3c4ba 100644 --- a/apache-couchdb-mixin/alerts/alerts.libsonnet +++ b/apache-couchdb-mixin/alerts.libsonnet @@ -1,5 +1,5 @@ { - prometheusAlerts+:: { + new(this): { groups+: [ { name: 'ApacheCouchDBAlerts', @@ -8,7 +8,7 @@ alert: 'CouchDBUnhealthyCluster', expr: ||| min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < %(alertsCriticalClusterIsUnstable5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -19,14 +19,14 @@ ( '{{$labels.couchdb_cluster}} has reported a value of {{ printf "%%.0f" $value }} for its stability over the last 5 minutes, ' + 'which is below the threshold of %(alertsCriticalClusterIsUnstable5m)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBHigh4xxResponseCodes', expr: ||| sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > %(alertsWarning4xxResponseCodes5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -37,14 +37,14 @@ ( '{{ printf "%%.0f" $value }} 4xx responses have been detected over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarning4xxResponseCodes5m)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBHigh5xxResponseCodes', expr: ||| sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > %(alertsCritical5xxResponseCodes5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -55,14 +55,14 @@ ( '{{ printf "%%.0f" $value }} 5xx responses have been detected over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsCritical5xxResponseCodes5m)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBModerateRequestLatency', expr: ||| sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > %(alertsWarningRequestLatency5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -73,14 +73,14 @@ ( 'An average of {{ printf "%%.0f" $value }}ms of request latency has occurred over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarningRequestLatency5m)sms. ' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBHighRequestLatency', expr: ||| sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > %(alertsCriticalRequestLatency5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -91,14 +91,14 @@ ( 'An average of {{ printf "%%.0f" $value }}ms of request latency has occurred over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsCriticalRequestLatency5m)sms. ' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBManyReplicatorJobsPending', expr: ||| sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > %(alertsWarningPendingReplicatorJobs5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -109,14 +109,14 @@ ( '{{ printf "%%.0f" $value }} replicator jobs are pending on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarningPendingReplicatorJobs5m)s. ' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBReplicatorJobsCrashing', expr: ||| sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > %(alertsCriticalCrashingReplicatorJobs5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -127,14 +127,14 @@ ( '{{ printf "%%.0f" $value }} replicator jobs have crashed over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsCriticalCrashingReplicatorJobs5m)s. ' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchDBReplicatorChangesQueuesDying', expr: ||| sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > %(alertsWarningDyingReplicatorChangesQueues5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -145,14 +145,14 @@ ( '{{ printf "%%.0f" $value }} replicator changes queue processes have died over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarningDyingReplicatorChangesQueues5m)s. ' - ) % $._config, + ) % this.config, }, }, { - alert: 'CouchDBReplicatorConnectionOwnersCrashing', + alert: 'CouchDBReplicatorOwnersCrashing', expr: ||| sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > %(alertsWarningCrashingReplicatorConnectionOwners5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -163,14 +163,14 @@ ( '{{ printf "%%.0f" $value }} replicator connection owner processes have crashed over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarningCrashingReplicatorConnectionOwners5m)s. ' - ) % $._config, + ) % this.config, }, }, { - alert: 'CouchDBReplicatorConnectionWorkersCrashing', + alert: 'CouchDBReplicatorWorkersCrashing', expr: ||| sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > %(alertsWarningCrashingReplicatorConnectionWorkers5m)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -181,7 +181,7 @@ ( '{{ printf "%%.0f" $value }} replicator connection worker processes have crashed over the last 5 minutes on {{$labels.instance}}, ' + 'which is above the threshold of %(alertsWarningCrashingReplicatorConnectionWorkers5m)s. ' - ) % $._config, + ) % this.config, }, }, ], diff --git a/apache-couchdb-mixin/config.libsonnet b/apache-couchdb-mixin/config.libsonnet index cf095c6a7..b64f7b595 100644 --- a/apache-couchdb-mixin/config.libsonnet +++ b/apache-couchdb-mixin/config.libsonnet @@ -1,26 +1,49 @@ { - _config+:: { - enableMultiCluster: false, - couchDBSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"', - multiClusterSelector: 'job=~"$job"', + local this = self, + filteringSelector: 'job="integrations/apache-couchdb"', + groupLabels: ['job', 'couchdb_cluster', 'cluster'], + logLabels: ['job', 'cluster', 'instance'], + instanceLabels: ['instance'], - dashboardTags: ['apache-couchdb-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', + dashboardTags: ['apache-couchdb-mixin'], + uid: 'couchdb', + dashboardNamePrefix: 'Apache CouchDB', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + metricsSource: [ + 'prometheus', + /* + * the prometheusWithTotal is used for backwards compatibility as some metrics are suffixed with _total but in later versions of the couchdb-mixin. + * i.e. couchdb_open_os_files_total => couchdb_open_os_files + * This is to ensure that the signals for the metrics that are suffixed with _total continue to work as expected. + * This was an identified as a noticeable change from 3.3.0 to 3.5.0 + */ + 'prometheusWithTotal', + ], - //alert thresholds - alertsCriticalClusterIsUnstable5m: 1, //1 is stable - alertsWarning4xxResponseCodes5m: 5, - alertsCritical5xxResponseCodes5m: 0, - alertsWarningRequestLatency5m: 500, //ms - alertsCriticalRequestLatency5m: 1000, //ms - alertsWarningPendingReplicatorJobs5m: 10, - alertsCriticalCrashingReplicatorJobs5m: 0, - alertsWarningDyingReplicatorChangesQueues5m: 0, - alertsWarningCrashingReplicatorConnectionOwners5m: 0, - alertsWarningCrashingReplicatorConnectionWorkers5m: 0, + // Logging configuration + enableLokiLogs: true, + extraLogLabels: ['level'], + logsVolumeGroupBy: 'level', + showLogsVolume: true, - enableLokiLogs: true, + //alert thresholds + alertsCriticalClusterIsUnstable5m: 1, //1 is stable + alertsWarning4xxResponseCodes5m: 5, + alertsCritical5xxResponseCodes5m: 0, + alertsWarningRequestLatency5m: 500, //ms + alertsCriticalRequestLatency5m: 1000, //ms + alertsWarningPendingReplicatorJobs5m: 10, + alertsCriticalCrashingReplicatorJobs5m: 0, + alertsWarningDyingReplicatorChangesQueues5m: 0, + alertsWarningCrashingReplicatorConnectionOwners5m: 0, + alertsWarningCrashingReplicatorConnectionWorkers5m: 0, + + // Signals configuration + signals+: { + overview: (import './signals/overview.libsonnet')(this), + nodes: (import './signals/nodes.libsonnet')(this), + replicator: (import './signals/replicator.libsonnet')(this), }, } diff --git a/apache-couchdb-mixin/dashboards.libsonnet b/apache-couchdb-mixin/dashboards.libsonnet new file mode 100644 index 000000000..370b9966b --- /dev/null +++ b/apache-couchdb-mixin/dashboards.libsonnet @@ -0,0 +1,107 @@ +local g = import './g.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; + +{ + local root = self, + + new(this):: + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + { + 'couchdb-overview.json': + g.dashboard.new(prefix + ' overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.overview, + this.grafana.rows.overviewRequests, + this.grafana.rows.overviewReplication, + ] + ) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '_overview', + tags, + links { couchdbOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'couchdb-nodes.json': + g.dashboard.new(prefix + ' nodes') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.nodes, + this.grafana.rows.nodeRequests, + this.grafana.rows.nodeLogs, + ], + ), + ), + ) + root.applyCommon( + vars.multiInstance, + uid + '_nodes', + tags, + links { couchdbNodes+:: {} }, + annotations, + timezone, + refresh, + period + ), + + } + + if this.config.enableLokiLogs then { + 'couchdb-logs.json': + logslib.new( + prefix + ' logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) + { + dashboards+: + { + logs+: + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { couchdbLogs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } + else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/apache-couchdb-mixin/dashboards/couchdb-nodes.libsonnet b/apache-couchdb-mixin/dashboards/couchdb-nodes.libsonnet deleted file mode 100644 index 522eae13f..000000000 --- a/apache-couchdb-mixin/dashboards/couchdb-nodes.libsonnet +++ /dev/null @@ -1,1376 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'couchdb-nodes'; - -local promDatasourceName = 'prometheus_datasource'; -local lokiDatasourceName = 'loki_datasource'; -local getMatcher(cfg) = '%(couchDBSelector)s, couchdb_cluster=~"$couchdb_cluster", instance=~"$instance"' % cfg; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local lokiDatasource = { - uid: '${%s}' % lokiDatasourceName, -}; - -local erlangMemoryUsagePanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'couchdb_erlang_memory_bytes{' + getMatcher(cfg) + ', memory_type="total"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Erlang memory usage', - description: "The amount of memory used by a node's Erlang Virtual Machine.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local openOSFilesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'couchdb_open_os_files_total{' + getMatcher(cfg) + '}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Open OS files', - description: 'The total number of file descriptors open on a node', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local openDatabasesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'couchdb_open_databases_total{' + getMatcher(cfg) + '}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Open databases', - description: 'The total number of open databases on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local databaseWritesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_database_writes_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Database writes', - description: 'The number of database writes on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'wps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local databaseReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_database_reads_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Database reads', - description: 'The number of database reads on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local viewReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_view_reads_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'View reads', - description: 'The number of view reads on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local viewTimeoutsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_view_timeouts_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'View timeouts', - description: 'The number of view requests that timed out on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local temporaryViewReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_temporary_view_reads_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Temporary view reads', - description: 'The number of temporary view reads on a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local requestsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Requests', - collapsed: false, -}; - -local requestMethodsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_request_methods{' + getMatcher(cfg) + '}[$__rate_interval]) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - {{method}}', - ), - ], - type: 'timeseries', - title: 'Request methods', - description: 'The request rate split by HTTP Method for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local requestLatencyPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile="0.5"}', - datasource=promDatasource, - legendFormat='{{instance}} - p50', - ), - prometheus.target( - 'couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile="0.75"}', - datasource=promDatasource, - legendFormat='{{instance}} - p75', - ), - prometheus.target( - 'couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile="0.95"}', - datasource=promDatasource, - legendFormat='{{instance}} - p95', - ), - prometheus.target( - 'couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile="0.99"}', - datasource=promDatasource, - legendFormat='{{instance}} - p99', - ), - ], - type: 'timeseries', - title: 'Request latency quantiles', - description: 'The request latency quantiles for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local bulkRequestsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_bulk_requests_total{' + getMatcher(cfg) + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Bulk requests', - description: 'The number of bulk requests for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local responseStatusOverviewPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"2.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - 2xx', - interval='1m', - ), - prometheus.target( - 'sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"3.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - 3xx', - interval='1m', - ), - prometheus.target( - 'sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"4.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - 4xx', - interval='1m', - ), - prometheus.target( - 'sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"5.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - 5xx', - interval='1m', - ), - ], - type: 'piechart', - title: 'Response status overview', - description: 'The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - }, - overrides: [], - }, - options: { - legend: { - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local goodResponseStatusesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"[23].*"}[$__rate_interval]) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - {{code}}', - ), - ], - type: 'timeseries', - title: 'Good response statuses', - description: 'The response rate split by good HTTP statuses for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local errorResponseStatusesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"[45].*"}[$__rate_interval]) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - {{code}}', - ), - ], - type: 'timeseries', - title: 'Error response statuses', - description: 'The response rate split by error HTTP statuses for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local logsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Logs', - collapsed: false, -}; - -local logTypesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(couchdb_couch_log_requests_total{' + getMatcher(cfg) + ', level=~"$log_level"}[$__interval:]) != 0', - datasource=promDatasource, - legendFormat='{{instance}} - {{level}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Log types', - description: 'The number of logged messages for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local systemLogsPanel(cfg) = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{' + getMatcher(cfg) + '} |= `` | (filename=~"/var/log/couchdb/couchdb.log" or log_type="couchdb") |~ "$log_level"', - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'System logs', - description: 'Recent logs from the Apache CouchDB logs file for a node.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, -}; - -{ - grafanaDashboards+:: { - 'couchdb-nodes.json': - dashboard.new( - 'Apache CouchDB nodes', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Apache CouchDB dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - std.flattenArrays([ - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - ], - if $._config.enableLokiLogs then [ - template.datasource( - lokiDatasourceName, - 'loki', - null, - label='Loki Datasource', - refresh='load' - ), - ] else [], - [ - template.new( - 'job', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable, job)', - label='Job', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable{%(multiClusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=1, - includeAll=true, - multi=true, - allValues='', - hide=if $._config.enableMultiCluster then '' else 'variable' % $._config, - sort=0 - ), - template.new( - 'couchdb_cluster', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable{job=~"$job"}, couchdb_cluster)', - label='CouchDB cluster', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable{couchdb_cluster=~"$couchdb_cluster"}, instance)', - label='Instance', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'log_level', - promDatasource, - 'label_values(couchdb_couch_log_requests_total{instance=~"$instance"}, level)', - label='Log level', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ], - ]) - ) - .addPanels( - std.flattenArrays([ - [ - erlangMemoryUsagePanel($._config) { gridPos: { h: 6, w: 8, x: 0, y: 0 } }, - openOSFilesPanel($._config) { gridPos: { h: 6, w: 8, x: 8, y: 0 } }, - openDatabasesPanel($._config) { gridPos: { h: 6, w: 8, x: 16, y: 0 } }, - databaseWritesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - databaseReadsPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, - viewReadsPanel($._config) { gridPos: { h: 6, w: 8, x: 0, y: 12 } }, - viewTimeoutsPanel($._config) { gridPos: { h: 6, w: 8, x: 8, y: 12 } }, - temporaryViewReadsPanel($._config) { gridPos: { h: 6, w: 8, x: 16, y: 12 } }, - requestsRow { gridPos: { h: 1, w: 24, x: 0, y: 18 } }, - bulkRequestsPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 19 } }, - requestLatencyPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 19 } }, - requestMethodsPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 25 } }, - responseStatusOverviewPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 25 } }, - goodResponseStatusesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 31 } }, - errorResponseStatusesPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 31 } }, - logsRow { gridPos: { h: 1, w: 24, x: 0, y: 37 } }, - logTypesPanel($._config) { gridPos: { h: 6, w: 24, x: 0, y: 38 } }, - ], - if $._config.enableLokiLogs then [ - systemLogsPanel($._config) { gridPos: { h: 6, w: 24, x: 0, y: 44 } }, - ] else [], - [ - ], - ]) - ), - }, -} diff --git a/apache-couchdb-mixin/dashboards/couchdb-overview.libsonnet b/apache-couchdb-mixin/dashboards/couchdb-overview.libsonnet deleted file mode 100644 index 06703831d..000000000 --- a/apache-couchdb-mixin/dashboards/couchdb-overview.libsonnet +++ /dev/null @@ -1,1890 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'couchdb-overview'; - -local promDatasourceName = 'prometheus_datasource'; -local getMatcher(cfg) = '%(couchDBSelector)s, couchdb_cluster=~"$couchdb_cluster"' % cfg; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local numberOfClustersPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'count(count by(couchdb_cluster, job) (couchdb_request_time_seconds_count{' + getMatcher(cfg) + '}))', - datasource=promDatasource, - legendFormat='{{ couchdb_cluster }}', - format='time_series', - ), - ], - type: 'stat', - title: 'Number of clusters', - description: 'The number of clusters being reported.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'yellow', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.2.3', -}; - -local numberOfNodesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum(count by(couchdb_cluster, job) (couchdb_request_time_seconds_count{' + getMatcher(cfg) + '}))', - datasource=promDatasource, - legendFormat='{{ couchdb_cluster }}', - format='time_series', - ), - ], - type: 'stat', - title: 'Number of nodes', - description: 'The number of nodes being reported.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'red', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.2.3', -}; - -local clusterHealthPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum(min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{' + getMatcher(cfg) + '})) / count(count by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{' + getMatcher(cfg) + '})) * 100', - datasource=promDatasource, - legendFormat='{{ couchdb_cluster }}', - format='time_series', - ), - ], - type: 'stat', - title: 'Clusters healthy', - description: 'Percentage of clusters that have all nodes that are currently reporting healthy.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'yellow', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'yellow', - value: 1, - }, - { - color: 'green', - value: 100, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - text: {}, - textMode: 'auto', - }, - pluginVersion: '9.2.3', -}; - -local openOSFilesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchdb_cluster, job) (couchdb_open_os_files_total{' + getMatcher(cfg) + '})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Open OS files', - description: 'The total number of file descriptors open aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local openDatabasesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (couchdb_open_databases_total{' + getMatcher(cfg) + '})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Open databases', - description: 'The total number of open databases aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local databaseWritesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_database_writes_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Database writes', - description: 'The number of database writes aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'wps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local databaseReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_database_reads_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Database reads', - description: 'The number of database reads aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local viewReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_httpd_view_reads_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'View reads', - description: 'The number of view reads aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local viewTimeoutsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_httpd_view_timeouts_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'View timeouts', - description: 'The number of view requests that timed out aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local temporaryViewReadsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_httpd_temporary_view_reads_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Temporary view reads', - description: 'The number of temporary view reads aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'rps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local requestsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Requests', - collapsed: false, -}; - -local requestMethodsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster, method) (rate(couchdb_httpd_request_methods{' + getMatcher(cfg) + '}[$__rate_interval])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - {{method}}', - ), - ], - type: 'timeseries', - title: 'Request methods', - description: 'The request rate split by HTTP Method aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local averageRequestLatencyPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile=~"0.5"})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - p50', - ), - prometheus.target( - 'avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile=~"0.75"})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - p75', - ), - prometheus.target( - 'avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile=~"0.95"})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - p95', - ), - prometheus.target( - 'avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{' + getMatcher(cfg) + ', quantile=~"0.99"})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - p99', - ), - ], - type: 'timeseries', - title: 'Request latency quantiles', - description: 'The average request latency quantiles aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - pluginVersion: '9.2.3', -}; - -local bulkRequestsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (rate(couchdb_httpd_bulk_requests_total{' + getMatcher(cfg) + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Bulk requests', - description: 'The number of bulk requests aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local responseStatusOverviewPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"2.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - 2xx', - interval='1m', - ), - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"3.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - 3xx', - interval='1m', - ), - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"4.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - 4xx', - interval='1m', - ), - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"5.*"}[$__interval:])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - 5xx', - interval='1m', - ), - ], - type: 'piechart', - title: 'Response status overview', - description: 'The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - }, - overrides: [], - }, - options: { - legend: { - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local goodResponseStatusesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster, code) (rate(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"[23].*"}[$__rate_interval])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - {{code}}', - ), - ], - type: 'timeseries', - title: 'Good response statuses', - description: 'The response rate split by good HTTP statuses aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local errorResponseStatusesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster, code) (rate(couchdb_httpd_status_codes{' + getMatcher(cfg) + ', code=~"[45].*"}[$__rate_interval])) != 0', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}} - {{code}}', - ), - ], - type: 'timeseries', - title: 'Error response statuses', - description: 'The response rate split by error HTTP statuses aggregated across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, -}; - -local replicationRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Replication', - collapsed: false, -}; - -local replicatorChangesManagerDeathsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_manager_deaths_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator changes manager deaths', - description: 'Number of replicator changes manager processor deaths across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorChangesQueueDeathsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_queue_deaths_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator changes queue deaths', - description: 'Number of replicator changes queue processor deaths across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorChangesReaderDeathsPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_reader_deaths_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator changes reader deaths', - description: 'Number of replicator changes reader processor deaths across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorConnectionOwnerCrashesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_connection_owner_crashes_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator connection owner crashes', - description: 'Number of replicator connection owner crashes across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorConnectionWorkerCrashesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_connection_worker_crashes_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator connection worker crashes', - description: 'Number of replicator connection worker crashes across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorJobCrashesPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_jobs_crashes_total{' + getMatcher(cfg) + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Replicator job crashes', - description: 'Number of replicator job crashes across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -local replicatorJobsPendingPanel(cfg) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchdb_cluster) (couchdb_couch_replicator_jobs_pending{' + getMatcher(cfg) + '})', - datasource=promDatasource, - legendFormat='{{couchdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Replicator jobs pending', - description: 'Number of replicator jobs pending across all nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'couchdb-overview.json': - dashboard.new( - 'Apache CouchDB overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Apache CouchDB dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable, job)', - label='Job', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable{%(multiClusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=1, - includeAll=true, - multi=true, - allValues='', - hide=if $._config.enableMultiCluster then '' else 'variable' % $._config, - sort=0 - ), - template.new( - 'couchdb_cluster', - promDatasource, - 'label_values(couchdb_couch_replicator_cluster_is_stable{job=~"$job"}, couchdb_cluster)', - label='CouchDB cluster', - refresh=1, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ] - ) - .addPanels( - [ - numberOfClustersPanel($._config) { gridPos: { h: 6, w: 8, x: 0, y: 0 } }, - numberOfNodesPanel($._config) { gridPos: { h: 6, w: 8, x: 8, y: 0 } }, - clusterHealthPanel($._config) { gridPos: { h: 6, w: 8, x: 16, y: 0 } }, - openOSFilesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - openDatabasesPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, - databaseWritesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 12 } }, - databaseReadsPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 12 } }, - viewReadsPanel($._config) { gridPos: { h: 6, w: 8, x: 0, y: 18 } }, - viewTimeoutsPanel($._config) { gridPos: { h: 6, w: 8, x: 8, y: 18 } }, - temporaryViewReadsPanel($._config) { gridPos: { h: 6, w: 8, x: 16, y: 18 } }, - requestsRow { gridPos: { h: 1, w: 24, x: 0, y: 24 } }, - bulkRequestsPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 25 } }, - averageRequestLatencyPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 25 } }, - requestMethodsPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 31 } }, - responseStatusOverviewPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 31 } }, - goodResponseStatusesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 37 } }, - errorResponseStatusesPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 37 } }, - replicationRow { gridPos: { h: 1, w: 24, x: 0, y: 43 } }, - replicatorChangesManagerDeathsPanel($._config) { gridPos: { h: 6, w: 8, x: 0, y: 44 } }, - replicatorChangesQueueDeathsPanel($._config) { gridPos: { h: 6, w: 8, x: 8, y: 44 } }, - replicatorChangesReaderDeathsPanel($._config) { gridPos: { h: 6, w: 8, x: 16, y: 44 } }, - replicatorConnectionOwnerCrashesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 50 } }, - replicatorConnectionWorkerCrashesPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 50 } }, - replicatorJobCrashesPanel($._config) { gridPos: { h: 6, w: 12, x: 0, y: 56 } }, - replicatorJobsPendingPanel($._config) { gridPos: { h: 6, w: 12, x: 12, y: 56 } }, - ] - ), - }, -} diff --git a/apache-couchdb-mixin/dashboards/dashboards.libsonnet b/apache-couchdb-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index c04891029..000000000 --- a/apache-couchdb-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,2 +0,0 @@ -(import 'couchdb-overview.libsonnet') + -(import 'couchdb-nodes.libsonnet') diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-logs.json b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json new file mode 100644 index 000000000..ba5fea85b --- /dev/null +++ b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json @@ -0,0 +1,334 @@ +{ + "annotations": { + "list": [ ] + }, + "links": [ + { + "keepTime": true, + "title": "Apache CouchDB nodes", + "type": "link", + "url": "couchdb_nodes" + }, + { + "keepTime": true, + "title": "Apache CouchDB overview", + "type": "link", + "url": "/d/couchdb_overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "apache-couchdb-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "apache-couchdb-mixin" + ], + "templating": { + "list": [ + { + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values({job=\"integrations/apache-couchdb\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Couchdb_cluster", + "multi": true, + "name": "couchdb_cluster", + "query": "label_values({job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values({job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Level", + "multi": true, + "name": "level", + "query": "label_values({job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\"}, level)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "Regex search", + "name": "regex_search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "Apache CouchDB logs", + "uid": "couchdb-logs" + } \ No newline at end of file diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index cf0272da0..a2a6d8abc 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -1,117 +1,124 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "Apache CouchDB logs", + "type": "link", + "url": "couchdb-logs" + }, + { + "keepTime": true, + "title": "Apache CouchDB overview", + "type": "link", + "url": "/d/couchdb_overview" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ "apache-couchdb-mixin" ], - "targetBlank": false, - "title": "Other Apache CouchDB dashboards", - "type": "dashboards", - "url": "" + "title": "All dashboards", + "type": "dashboards" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Nodes", + "type": "row" + }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The amount of memory used by a node's Erlang Virtual Machine.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ + "min": 0, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*(T|t)otal.*" + }, + "properties": [ { - "color": "green", - "value": null + "id": "color", + "value": { + "fixedColor": "light-orange", + "mode": "fixed" + } }, { - "color": "red", - "value": 80 + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } } ] - }, - "unit": "decbytes" - }, - "overrides": [ ] + } + ] }, "gridPos": { "h": 6, "w": 8, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_erlang_memory_bytes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", memory_type=\"total\"}", + "expr": "couchdb_erlang_memory_bytes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", memory_type=\"total\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Erlang memory usage" } ], "title": "Erlang memory usage", @@ -119,86 +126,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total number of file descriptors open on a node", + "description": "The total number of file descriptors open on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 8, - "y": 0 + "y": 1 }, "id": 3, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_open_os_files_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}", + "expr": "couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Open OS files" } ], "title": "Open OS files", @@ -206,89 +178,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The total number of open databases on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 6, "w": 8, "x": 16, - "y": 0 + "y": 1 }, "id": 4, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_open_databases_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}", + "expr": "couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Open databases" } ], "title": "Open databases", @@ -296,90 +230,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of database writes on a node.", + "description": "The total number of database writes on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "wps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 9 }, "id": 5, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_database_writes_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_database_writes_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Database writes" } ], "title": "Database writes", @@ -387,90 +283,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of database reads on a node.", + "description": "The total number of database reads on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 9 }, "id": 6, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_database_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_database_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Database reads" } ], "title": "Database reads", @@ -478,90 +336,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of view reads on a node.", + "description": "The total number of view reads on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 0, - "y": 12 + "y": 15 }, "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_view_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_httpd_view_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "View reads" } ], "title": "View reads", @@ -569,90 +389,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of view requests that timed out on a node.", + "description": "The total number of view requests that timed out on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 8, - "y": 12 + "y": 15 }, "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_view_timeouts_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_httpd_view_timeouts_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "View timeouts" } ], "title": "View timeouts", @@ -660,90 +442,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of temporary view reads on a node.", + "description": "The total number of temporary view reads on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 16, - "y": 12 + "y": 15 }, "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_temporary_view_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_httpd_temporary_view_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Temporary view reads" } ], "title": "Temporary view reads", @@ -751,106 +495,65 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 18 + "y": 23 }, "id": 10, - "targets": [ ], + "panels": [ ], "title": "Requests", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of bulk requests for a node.", + "description": "The total number of bulk requests on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "unit": "reqps" - }, - "overrides": [ ] + "unit": "rps" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 24 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_bulk_requests_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(couchdb_httpd_bulk_requests_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Bulk requests" } ], "title": "Bulk requests", @@ -858,208 +561,137 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The request latency quantiles for a node.", + "description": "The average request latency for a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 24 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", quantile=\"0.5\"}", + "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.5\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - p50" + "instant": false, + "legendFormat": "{{instance}} - p50", + "refId": "Request latency p50" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", quantile=\"0.75\"}", + "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.75\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - p75" + "instant": false, + "legendFormat": "{{instance}} - p75", + "refId": "Request latency p75" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", quantile=\"0.95\"}", + "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.95\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - p95" + "instant": false, + "legendFormat": "{{instance}} - p95", + "refId": "Request latency p95" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", quantile=\"0.99\"}", + "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.99\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - p99" + "instant": false, + "legendFormat": "{{instance}} - p99", + "refId": "Request latency p99" } ], - "title": "Request latency quantiles", + "title": "Average request latency", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The request rate split by HTTP Method for a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 25 + "y": 32 }, "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_request_methods{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"}[$__rate_interval]) != 0", + "expr": "rate(couchdb_httpd_request_methods{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{method}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - {{method}}", + "refId": "Request methods" } ], "title": "Request methods", @@ -1067,91 +699,71 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) for a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ] - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 25 + "y": 32 }, "id": 14, - "options": { - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"2.*\"}[$__interval:])) != 0", + "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}\n)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - 2xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - 2xx", + "refId": "Response status 2XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"3.*\"}[$__interval:])) != 0", + "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}\n)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - 3xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - 3xx", + "refId": "Response status 3XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"4.*\"}[$__interval:])) != 0", + "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}\n)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - 4xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - 4xx", + "refId": "Response status 4XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(instance, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"5.*\"}[$__interval:])) != 0", + "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}\n)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - 5xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - 5xx", + "refId": "Response status 5XX" } ], "title": "Response status overview", @@ -1159,90 +771,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The response rate split by good HTTP statuses for a node.", + "description": "The total number of good response statuses on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 31 + "y": 40 }, "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"[23].*\"}[$__rate_interval]) != 0", + "expr": "rate(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[23].*\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{code}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - {{code}}", + "refId": "Good response statuses" } ], "title": "Good response statuses", @@ -1250,90 +824,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The response rate split by error HTTP statuses for a node.", + "description": "The total number of error response statuses on a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 31 + "y": 40 }, "id": 16, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", code=~\"[45].*\"}[$__rate_interval]) != 0", + "expr": "rate(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[45].*\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{code}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - {{code}}", + "refId": "Error response statuses" } ], "title": "Error response statuses", @@ -1341,289 +877,152 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 37 + "y": 48 }, "id": 17, - "targets": [ ], + "panels": [ ], "title": "Logs", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The number of logged messages for a node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 24, "x": 0, - "y": 38 + "y": 49 }, "id": 18, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "increase(couchdb_couch_log_requests_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\", level=~\"$log_level\"}[$__interval:]) != 0", + "expr": "increase(couchdb_couch_log_requests_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{level}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - {{level}}", + "refId": "Log types" } ], "title": "Log types", "type": "timeseries" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "Recent logs from the Apache CouchDB logs file for a node.", - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 44 - }, - "id": 19, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", instance=~\"$instance\"} |= `` | (filename=~\"/var/log/couchdb/couchdb.log\" or log_type=\"couchdb\") |~ \"$log_level\"", - "queryType": "range", - "refId": "A" - } - ], - "title": "System logs", - "type": "logs" } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "apache-couchdb-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "current": { }, - "hide": 0, - "label": "Loki Datasource", - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable, job)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 2, "includeAll": true, - "label": "Cluster", + "label": "Couchdb_cluster", "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=~\"$job\"}, cluster)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "name": "couchdb_cluster", + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".*", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "CouchDB cluster", + "label": "Cluster", "multi": true, - "name": "couchdb_cluster", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=~\"$job\"}, couchdb_cluster)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "name": "cluster", + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable{couchdb_cluster=~\"$couchdb_cluster\"}, instance)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Log level", - "multi": true, - "name": "log_level", - "options": [ ], - "query": "label_values(couchdb_couch_log_requests_total{instance=~\"$instance\"}, level)", - "refresh": 1, + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" } ] }, @@ -1631,33 +1030,7 @@ "from": "now-1h", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "Apache CouchDB nodes", - "uid": "couchdb-nodes", - "version": 0 + "uid": "couchdb_nodes" } \ No newline at end of file diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index 7f22a3592..5d9504ed5 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -1,93 +1,87 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "Apache CouchDB logs", + "type": "link", + "url": "couchdb-logs" + }, + { + "keepTime": true, + "title": "Apache CouchDB nodes", + "type": "link", + "url": "couchdb_nodes" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ "apache-couchdb-mixin" ], - "targetBlank": false, - "title": "Other Apache CouchDB dashboards", - "type": "dashboards", - "url": "" + "title": "All dashboards", + "type": "dashboards" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Overview", + "type": "row" + }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The number of clusters being reported.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "yellow", - "value": null - }, - { - "color": "red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] + "fixedColor": "text", + "mode": "fixed" } - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.2.3", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "count(count by(couchdb_cluster, job) (couchdb_request_time_seconds_count{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}))", + "expr": "count(count by(job, couchdb_cluster, cluster) (couchdb_request_time_seconds_count{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ couchdb_cluster }}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Number of clusters" } ], "title": "Number of clusters", @@ -95,66 +89,46 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The number of nodes being reported.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] + "fixedColor": "text", + "mode": "fixed" } - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 8, - "y": 0 + "y": 1 }, "id": 3, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.2.3", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum(count by(couchdb_cluster, job) (couchdb_request_time_seconds_count{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}))", + "expr": "count(count by(job, couchdb_cluster, cluster, instance) (couchdb_request_time_seconds_count{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ couchdb_cluster }}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Number of nodes" } ], "title": "Number of nodes", @@ -162,21 +136,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Percentage of clusters that have all nodes that are currently reporting healthy.", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { - "color": "yellow", - "value": null + "color": "yellow" }, { "color": "red", @@ -193,41 +162,31 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 16, - "y": 0 + "y": 1 }, "id": 4, "options": { "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { }, - "textMode": "auto" + "graphMode": "none" }, - "pluginVersion": "9.2.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum(min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"})) / count(count by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"})) * 100", + "expr": "sum(min by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})) / count(count by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})) * 100", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ couchdb_cluster }}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Clusters healthy" } ], "title": "Clusters healthy", @@ -235,90 +194,50 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The total number of file descriptors open aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 9 }, "id": 5, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchdb_cluster, job) (couchdb_open_os_files_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Open OS files" } ], "title": "Open OS files", @@ -326,90 +245,50 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The total number of open databases aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 9 }, "id": 6, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (couchdb_open_databases_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Open databases" } ], "title": "Open databases", @@ -417,90 +296,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of database writes aggregated across all nodes.", + "description": "The total number of database writes aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "wps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 17 }, "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_database_writes_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_database_writes_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Database writes" } ], "title": "Database writes", @@ -508,90 +349,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of database reads aggregated across all nodes.", + "description": "The total number of database reads aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 17 }, "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_database_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_database_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Database reads" } ], "title": "Database reads", @@ -599,90 +402,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of view reads aggregated across all nodes.", + "description": "The total number of view reads aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 8, "x": 0, - "y": 18 + "y": 25 }, "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_httpd_view_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_httpd_view_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "View reads" } ], "title": "View reads", @@ -690,90 +455,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of view requests that timed out aggregated across all nodes.", + "description": "The total number of view requests that timed out aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 8, "x": 8, - "y": 18 + "y": 25 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_httpd_view_timeouts_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_httpd_view_timeouts_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "View timeouts" } ], "title": "View timeouts", @@ -781,90 +508,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of temporary view reads aggregated across all nodes.", + "description": "The total number of temporary view reads aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "rps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 8, "x": 16, - "y": 18 + "y": 25 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_httpd_temporary_view_reads_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_httpd_temporary_view_reads_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Temporary view reads" } ], "title": "Temporary view reads", @@ -872,408 +561,270 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 24 + "y": 33 }, "id": 12, - "targets": [ ], + "panels": [ ], "title": "Requests", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of bulk requests aggregated across all nodes.", + "description": "The request rate split by HTTP Method aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 25 + "y": 34 }, "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (rate(couchdb_httpd_bulk_requests_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, couchdb_cluster, cluster, method) (increase(couchdb_httpd_request_methods{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset $__interval))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }} - {{method}}", + "refId": "Request methods" } ], - "title": "Bulk requests", + "title": "Request methods", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The average request latency quantiles aggregated across all nodes.", + "description": "The average request latency aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 25 + "y": 34 }, "id": 14, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "9.2.3", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", quantile=~\"0.5\"})", + "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.5\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - p50" + "instant": false, + "legendFormat": "{{ couchdb_cluster }} - p50", + "refId": "Average request latency p50" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", quantile=~\"0.75\"})", + "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.75\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - p75" + "instant": false, + "legendFormat": "{{ couchdb_cluster }} - p75", + "refId": "Average request latency p75" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", quantile=~\"0.95\"})", + "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.95\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - p95" + "instant": false, + "legendFormat": "{{ couchdb_cluster }} - p95", + "refId": "Average request latency p95" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job, couchdb_cluster, quantile) (couchdb_request_time_seconds{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", quantile=~\"0.99\"})", + "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.99\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - p99" + "instant": false, + "legendFormat": "{{ couchdb_cluster }} - p99", + "refId": "Average request latency p99" } ], - "title": "Request latency quantiles", + "title": "Average request latency", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The request rate split by HTTP Method aggregated across all nodes.", + "description": "The total number of bulk requests aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "unit": "reqps" - }, - "overrides": [ ] + "unit": "rps" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 31 + "y": 42 }, "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, method) (rate(couchdb_httpd_request_methods{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__rate_interval])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (rate(couchdb_httpd_bulk_requests_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))\nor\nsum by(job, couchdb_cluster, cluster) (rate(couchdb_httpd_bulk_requests{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - {{method}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Bulk requests" } ], - "title": "Request methods", + "title": "Bulk requests", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) aggregated across all nodes.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ] - }, - "overrides": [ ] - }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 31 + "y": 42 }, "id": 16, - "options": { - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"2.*\"}[$__interval:])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset $__interval))", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - 2xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }} - 2xx", + "refId": "Response status 2XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"3.*\"}[$__interval:])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset $__interval))", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - 3xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }} - 3xx", + "refId": "Response status 3XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"4.*\"}[$__interval:])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset $__interval))", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - 4xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }} - 4xx", + "refId": "Response status 4XX" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"5.*\"}[$__interval:])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset $__interval))", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - 5xx" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }} - 5xx", + "refId": "Response status 5XX" } ], "title": "Response status overview", @@ -1281,90 +832,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The response rate split by good HTTP statuses aggregated across all nodes.", + "description": "The total number of good response statuses aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] + "unit": "rps" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 37 + "y": 50 }, "id": 17, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, code) (rate(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"[23].*\"}[$__rate_interval])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[23].*\"}[$__interval:] offset $__interval))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - {{code}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Good response statuses" } ], "title": "Good response statuses", @@ -1372,90 +885,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The response rate split by error HTTP statuses aggregated across all nodes.", + "description": "The total number of error response statuses aggregated across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "unit": "reqps" - }, - "overrides": [ ] + "unit": "rps" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 37 + "y": 50 }, "id": 18, "options": { "legend": { "calcs": [ ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, code) (rate(couchdb_httpd_status_codes{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\", code=~\"[45].*\"}[$__rate_interval])) != 0", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[45].*\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}} - {{code}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster }}", + "refId": "Error response statuses" } ], "title": "Error response statuses", @@ -1463,107 +937,63 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 43 + "y": 58 }, "id": 19, - "targets": [ ], + "panels": [ ], "title": "Replication", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replicator changes manager processor deaths across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + } + } }, "gridPos": { "h": 6, "w": 8, "x": 0, - "y": 44 + "y": 59 }, "id": 20, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_manager_deaths_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_changes_manager_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Changes manager deaths" } ], "title": "Replicator changes manager deaths", @@ -1571,91 +1001,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replicator changes queue processor deaths across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 8, - "y": 44 + "y": 59 }, "id": 21, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_queue_deaths_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_changes_queue_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Changes queue deaths" } ], "title": "Replicator changes queue deaths", @@ -1663,91 +1053,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replicator changes reader processor deaths across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 8, "x": 16, - "y": 44 + "y": 59 }, "id": 22, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_changes_reader_deaths_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_changes_reader_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Changes reader deaths" } ], "title": "Replicator changes reader deaths", @@ -1755,91 +1105,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replicator connection owner crashes across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 67 }, "id": 23, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_connection_owner_crashes_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_connection_owner_crashes_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Connection owner crashes" } ], "title": "Replicator connection owner crashes", @@ -1847,91 +1157,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replicator connection worker crashes across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 67 }, "id": 24, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_connection_worker_crashes_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_connection_worker_crashes_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Connection worker crashes" } ], "title": "Replicator connection worker crashes", @@ -1939,272 +1209,191 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of replicator job crashes across all nodes.", + "description": "Number of replicator jobs crashes across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 56 + "y": 75 }, "id": 25, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (increase(couchdb_couch_replicator_jobs_crashes_total{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_jobs_crashes_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Jobs crashes" } ], - "title": "Replicator job crashes", + "title": "Replicator jobs crashes", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of replicator jobs pending across all nodes.", + "description": "Number of replicator jobs queued across all nodes.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 56 + "y": 75 }, "id": 26, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster) (couchdb_couch_replicator_jobs_pending{job=~\"$job\", couchdb_cluster=~\"$couchdb_cluster\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_jobs_pending{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchdb_cluster}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{ couchdb_cluster}}", + "refId": "Jobs queued" } ], - "title": "Replicator jobs pending", + "title": "Replicator jobs queued", "type": "timeseries" } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "apache-couchdb-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable, job)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Couchdb_cluster", + "multi": true, + "name": "couchdb_cluster", + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 2, "includeAll": true, "label": "Cluster", "multi": true, "name": "cluster", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=~\"$job\"}, cluster)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "CouchDB cluster", + "label": "Instance", "multi": true, - "name": "couchdb_cluster", - "options": [ ], - "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=~\"$job\"}, couchdb_cluster)", - "refresh": 1, + "name": "instance", + "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" } ] }, @@ -2212,33 +1401,7 @@ "from": "now-1h", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "Apache CouchDB overview", - "uid": "couchdb-overview", - "version": 0 + "uid": "couchdb_overview" } \ No newline at end of file diff --git a/apache-couchdb-mixin/g.libsonnet b/apache-couchdb-mixin/g.libsonnet new file mode 100644 index 000000000..e6a2060ee --- /dev/null +++ b/apache-couchdb-mixin/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/apache-couchdb-mixin/jsonnetfile.json b/apache-couchdb-mixin/jsonnetfile.json index 65cebf84b..ed5b224a1 100644 --- a/apache-couchdb-mixin/jsonnetfile.json +++ b/apache-couchdb-mixin/jsonnetfile.json @@ -9,7 +9,35 @@ } }, "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" } ], "legacyImports": true -} + } + \ No newline at end of file diff --git a/apache-couchdb-mixin/links.libsonnet b/apache-couchdb-mixin/links.libsonnet new file mode 100644 index 000000000..bb750a55c --- /dev/null +++ b/apache-couchdb-mixin/links.libsonnet @@ -0,0 +1,27 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): + { + couchdbOverview: + link.link.new(this.config.dashboardNamePrefix + ' overview', '/d/' + this.grafana.dashboards['couchdb-overview.json'].uid) + + link.link.options.withKeepTime(true), + + couchdbNodes: + link.link.new(this.config.dashboardNamePrefix + ' nodes', this.grafana.dashboards['couchdb-nodes.json'].uid) + + link.link.options.withKeepTime(true), + + otherDashboards: + link.dashboards.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then { + couchdbLogs: + link.link.new(this.config.dashboardNamePrefix + ' logs', this.grafana.dashboards['couchdb-logs.json'].uid) + + link.link.options.withKeepTime(true), + } else {}, +} diff --git a/apache-couchdb-mixin/main.libsonnet b/apache-couchdb-mixin/main.libsonnet new file mode 100644 index 000000000..ee790fd83 --- /dev/null +++ b/apache-couchdb-mixin/main.libsonnet @@ -0,0 +1,49 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='couchdb_couch_replicator_cluster_is_stable', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/apache-couchdb-mixin/mixin.libsonnet b/apache-couchdb-mixin/mixin.libsonnet index 4d987cf31..4b3a5bf89 100644 --- a/apache-couchdb-mixin/mixin.libsonnet +++ b/apache-couchdb-mixin/mixin.libsonnet @@ -1,3 +1,35 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local prestolib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + +local presto = + prestolib.new() + + prestolib.withConfigMixin( + { + filteringSelector: config.filteringSelector, + uid: config.uid, + enableLokiLogs: config.enableLokiLogs, + } + ); + +local optional_labels = { + cluster+: { + allValue: '.*', + }, + couchb_cluster+: { + label: 'CouchDB cluster', + allValue: '.*', + }, +}; + +{ + grafanaDashboards+:: { + [fname]: + local dashboard = presto.grafana.dashboards[fname]; + dashboard + util.patch_variables(dashboard, optional_labels) + + for fname in std.objectFields(presto.grafana.dashboards) + }, + prometheusAlerts+:: presto.prometheus.alerts, + prometheusRules+:: presto.prometheus.recordingRules, +} diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet new file mode 100644 index 000000000..d5c41520f --- /dev/null +++ b/apache-couchdb-mixin/panels.libsonnet @@ -0,0 +1,349 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this):: + { + local signals = this.signals, + /** + * Overview dashboard panels + */ + overviewNumberOfClustersPanel: + commonlib.panels.generic.stat.info.new( + 'Number of clusters', + targets=[signals.overview.clusterCount.asTarget()], + ) + g.panel.stat.panelOptions.withDescription('The number of clusters being reported.'), + + overviewNumberOfNodesPanel: + commonlib.panels.generic.stat.info.new( + 'Number of nodes', + targets=[signals.overview.nodeCount.asTarget()], + ) + g.panel.stat.panelOptions.withDescription('The number of nodes being reported.'), + + overviewClusterHealthPanel: + g.panel.stat.new(title='Clusters healthy') + + g.panel.stat.queryOptions.withTargets([signals.overview.clusterHealth.asTarget()]) + + g.panel.stat.panelOptions.withDescription('Percentage of clusters that have all nodes that are currently reporting healthy.') + + g.panel.stat.standardOptions.withUnit('percent') + + g.panel.stat.options.withColorMode('value') + + g.panel.stat.options.withGraphMode('none') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.thresholdStep.withColor('yellow'), + g.panel.stat.thresholdStep.withColor('red') + g.panel.stat.thresholdStep.withValue(0), + g.panel.stat.thresholdStep.withColor('yellow') + g.panel.stat.thresholdStep.withValue(1), + g.panel.stat.thresholdStep.withColor('green') + g.panel.stat.thresholdStep.withValue(100), + ]), + + overviewOpenOSFilesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Open OS files', + targets=[signals.overview.openOSFiles.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of file descriptors open aggregated across all nodes.'), + + overviewOpenDatabasesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Open databases', + targets=[signals.overview.openDatabases.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of open databases aggregated across all nodes.'), + + overviewDatabaseWritesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Database writes', + targets=[signals.overview.databaseWrites.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of database writes aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('wps'), + + overviewDatabaseReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Database reads', + targets=[signals.overview.databaseReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of database reads aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + overviewViewReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'View reads', + targets=[signals.overview.viewReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of view reads aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + + overviewViewTimeoutsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'View timeouts', + targets=[signals.overview.viewTimeouts.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of view requests that timed out aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + overviewTemporaryViewReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Temporary view reads', + targets=[signals.overview.temporaryViewReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of temporary view reads aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + overviewRequestMethodsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Request methods', + targets=[signals.overview.requestMethods.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The request rate split by HTTP Method aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + overviewAverageRequestLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Average request latency', + targets=[ + signals.overview.averageRequestLatencyp50.asTarget(), + signals.overview.averageRequestLatencyp75.asTarget(), + signals.overview.averageRequestLatencyp95.asTarget(), + signals.overview.averageRequestLatencyp99.asTarget(), + ], + ) + + g.panel.timeSeries.panelOptions.withDescription('The average request latency aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('s'), + + overviewBulkRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Bulk requests', + targets=[signals.overview.bulkRequests.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of bulk requests aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + overviewResponseStatusOverviewPanel: + g.panel.pieChart.new('Response status overview') + + g.panel.pieChart.queryOptions.withTargets([ + signals.overview.responseStatus2xx.asTarget() { interval: '2m' }, + signals.overview.responseStatus3xx.asTarget() { interval: '2m' }, + signals.overview.responseStatus4xx.asTarget() { interval: '2m' }, + signals.overview.responseStatus5xx.asTarget() { interval: '2m' }, + ]) + g.panel.pieChart.panelOptions.withDescription('The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) aggregated across all nodes.'), + + overviewGoodResponseStatusesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Good response statuses', + targets=[signals.overview.goodResponseStatuses.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of good response statuses aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + overviewErrorResponseStatusesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Error response statuses', + targets=[signals.overview.errorResponseStatuses.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + + overviewReplicatorChangesManagerDeathsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator changes manager deaths', + targets=[signals.replicator.changesManagerDeaths.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator changes manager processor deaths across all nodes.'), + + overviewReplicatorChangesQueueDeathsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator changes queue deaths', + targets=[signals.replicator.changesQueueDeaths.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator changes queue processor deaths across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + overviewReplicatorChangesReaderDeathsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator changes reader deaths', + targets=[signals.replicator.changesReaderDeaths.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator changes reader processor deaths across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + overviewReplicatorConnectionOwnerCrashesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator connection owner crashes', + targets=[signals.replicator.connectionOwnerCrashes.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator connection owner crashes across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + overviewReplicatorConnectionWorkerCrashesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator connection worker crashes', + targets=[signals.replicator.connectionWorkerCrashes.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator connection worker crashes across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + overviewReplicatorJobsCrashesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator jobs crashes', + targets=[signals.replicator.jobsCrashes.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator jobs crashes across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + overviewReplicatorJobsQueuedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Replicator jobs queued', + targets=[signals.replicator.jobsQueued.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('Number of replicator jobs queued across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + + /** + Node dashboard panels + */ + + nodeErlangMemoryUsagePanel: + commonlib.panels.memory.timeSeries.usageBytes.new( + 'Erlang memory usage', + targets=[signals.nodes.erlangMemoryUsage.asTarget()], + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.panelOptions.withDescription("The amount of memory used by a node's Erlang Virtual Machine."), + + nodeOpenOSFilesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Open OS files', + targets=[signals.nodes.openOSFiles.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of file descriptors open on a node.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + nodeOpenDatabasesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Open databases', + targets=[signals.nodes.openDatabases.asTarget()], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of open databases on a node.') + + g.panel.timeSeries.standardOptions.withUnit(value='none'), + + nodeDatabaseWritesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Database writes', + targets=[signals.nodes.databaseWrites.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of database writes on a node.') + + g.panel.timeSeries.standardOptions.withUnit('wps'), + + nodeDatabaseReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Database reads', + targets=[signals.nodes.databaseReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of database reads on a node.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + nodeViewReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'View reads', + targets=[signals.nodes.viewReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of view reads on a node.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + nodeViewTimeoutsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'View timeouts', + targets=[signals.nodes.viewTimeouts.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of view requests that timed out on a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + nodeTemporaryViewReadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Temporary view reads', + targets=[signals.nodes.temporaryViewReads.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of temporary view reads on a node.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + nodeRequestMethodsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Request methods', + targets=[signals.nodes.requestMethods.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The request rate split by HTTP Method for a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + nodeAverageRequestLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Average request latency', + targets=[ + signals.nodes.requestLatencyp50.asTarget(), + signals.nodes.requestLatencyp75.asTarget(), + signals.nodes.requestLatencyp95.asTarget(), + signals.nodes.requestLatencyp99.asTarget(), + ], + ) + + g.panel.timeSeries.panelOptions.withDescription('The average request latency for a node.') + + g.panel.timeSeries.standardOptions.withUnit('s'), + + nodeBulkRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Bulk requests', + targets=[signals.nodes.bulkRequests.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of bulk requests on a node.') + + g.panel.timeSeries.standardOptions.withUnit('rps'), + + nodeResponseStatusOverviewPanel: + g.panel.pieChart.new('Response status overview') + + g.panel.pieChart.queryOptions.withTargets([ + signals.nodes.responseStatus2xx.asTarget() { interval: '2m' }, + signals.nodes.responseStatus3xx.asTarget() { interval: '2m' }, + signals.nodes.responseStatus4xx.asTarget() { interval: '2m' }, + signals.nodes.responseStatus5xx.asTarget() { interval: '2m' }, + ]) + + g.panel.pieChart.panelOptions.withDescription('The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) for a node.') + + g.panel.pieChart.standardOptions.withUnit('none'), + + nodeRequestLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Request latency', + targets=[ + signals.nodes.requestLatencyp50.asTarget(), + signals.nodes.requestLatencyp75.asTarget(), + signals.nodes.requestLatencyp95.asTarget(), + signals.nodes.requestLatencyp99.asTarget(), + ], + ) + + g.panel.timeSeries.panelOptions.withDescription('The request latency for a node.') + + g.panel.timeSeries.standardOptions.withUnit('s'), + + nodeGoodResponseStatusesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Good response statuses', + targets=[signals.nodes.goodResponseStatuses.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of good response statuses on a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + nodeErrorResponseStatusesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Error response statuses', + targets=[signals.nodes.errorResponseStatuses.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses on a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + + nodeLogTypesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Log types', + targets=[signals.nodes.logTypes.asTarget() { interval: '2m' }], + ) + + g.panel.timeSeries.panelOptions.withDescription('The number of logged messages for a node.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + }, +} diff --git a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml index ec8923844..7af93a67a 100644 --- a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -73,7 +73,7 @@ groups: for: 5m labels: severity: warning - - alert: CouchDBReplicatorConnectionOwnersCrashing + - alert: CouchDBReplicatorOwnersCrashing annotations: description: '{{ printf "%.0f" $value }} replicator connection owner processes have crashed over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator connection owner process crashes for a node. @@ -82,7 +82,7 @@ groups: for: 5m labels: severity: warning - - alert: CouchDBReplicatorConnectionWorkersCrashing + - alert: CouchDBReplicatorWorkersCrashing annotations: description: '{{ printf "%.0f" $value }} replicator connection worker processes have crashed over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator connection worker process crashes for a node. diff --git a/apache-couchdb-mixin/rows.libsonnet b/apache-couchdb-mixin/rows.libsonnet new file mode 100644 index 000000000..b6d588aa1 --- /dev/null +++ b/apache-couchdb-mixin/rows.libsonnet @@ -0,0 +1,93 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + overview: + g.panel.row.new('Overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.overviewNumberOfClustersPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewNumberOfNodesPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewClusterHealthPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewOpenOSFilesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewOpenDatabasesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewDatabaseWritesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewDatabaseReadsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewViewReadsPanel { gridPos+: { w: 8 } }, + this.grafana.panels.overviewViewTimeoutsPanel { gridPos+: { w: 8 } }, + this.grafana.panels.overviewTemporaryViewReadsPanel { gridPos+: { w: 8 } }, + ], + ), + + overviewRequests: + g.panel.row.new('Requests') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.overviewRequestMethodsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewAverageRequestLatencyPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewBulkRequestsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewResponseStatusOverviewPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewGoodResponseStatusesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewErrorResponseStatusesPanel { gridPos+: { w: 12 } }, + ], + ), + + overviewReplication: + g.panel.row.new('Replication') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.overviewReplicatorChangesManagerDeathsPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewReplicatorChangesQueueDeathsPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewReplicatorChangesReaderDeathsPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.overviewReplicatorConnectionOwnerCrashesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewReplicatorConnectionWorkerCrashesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewReplicatorJobsCrashesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewReplicatorJobsQueuedPanel { gridPos+: { w: 12 } }, + ], + ), + + nodes: + g.panel.row.new('Nodes') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.nodeErlangMemoryUsagePanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.nodeOpenOSFilesPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.nodeOpenDatabasesPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.nodeDatabaseWritesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeDatabaseReadsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeViewReadsPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.nodeViewTimeoutsPanel { gridPos+: { h: 6, w: 8 } }, + this.grafana.panels.nodeTemporaryViewReadsPanel { gridPos+: { h: 6, w: 8 } }, + ], + ), + + nodeRequests: + g.panel.row.new('Requests') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.nodeBulkRequestsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeAverageRequestLatencyPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeRequestMethodsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeResponseStatusOverviewPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeGoodResponseStatusesPanel { gridPos+: { w: 12 } }, + this.grafana.panels.nodeErrorResponseStatusesPanel { gridPos+: { w: 12 } }, + ], + ), + + nodeLogs: + g.panel.row.new('Logs') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + this.grafana.panels.nodeLogTypesPanel { gridPos+: { w: 24 } }, + ], + ), + }, + + +} diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet new file mode 100644 index 000000000..7c0db25b1 --- /dev/null +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -0,0 +1,318 @@ +function(this) { + local legendCustomTemplate = std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + legendCustomTemplate: legendCustomTemplate, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'couchdb_couch_replicator_cluster_is_stable', + }, + + signals: { + erlangMemoryUsage: { + name: 'Erlang memory usage', + nameShort: 'Erlang memory', + type: 'gauge', + description: "The amount of memory used by a node's Erlang Virtual Machine.", + unit: 'decbytes', + sources: { + prometheus: { + expr: 'couchdb_erlang_memory_bytes{%(queriesSelector)s, memory_type="total"}', + }, + }, + }, + + openOSFiles: { + name: 'Open OS files', + nameShort: 'Open OS files', + type: 'gauge', + description: 'The total number of file descriptors open on a node.', + unit: 'none', + sources: { + prometheus: { + expr: 'couchdb_open_os_files_total{%(queriesSelector)s}', + }, + + }, + }, + + openDatabases: { + name: 'Open databases', + nameShort: 'Open databases', + type: 'gauge', + description: 'The total number of open databases on a node.', + unit: 'none', + sources: { + prometheus: { + expr: 'couchdb_open_databases_total{%(queriesSelector)s}', + }, + }, + }, + + databaseWrites: { + name: 'Database writes', + nameShort: 'Database writes', + type: 'counter', + description: 'The total number of database writes on a node.', + unit: 'wps', + sources: { + prometheus: { + expr: 'couchdb_database_writes_total{%(queriesSelector)s}', + }, + }, + }, + + databaseReads: { + name: 'Database reads', + nameShort: 'Database reads', + type: 'counter', + description: 'The total number of database reads on a node.', + unit: 'rps', + sources: { + prometheus: { + expr: 'couchdb_database_reads_total{%(queriesSelector)s}', + }, + }, + }, + + viewReads: { + name: 'View reads', + nameShort: 'View reads', + type: 'counter', + description: 'The total number of view reads on a node.', + unit: 'rps', + sources: { + prometheus: { + expr: 'couchdb_httpd_view_reads_total{%(queriesSelector)s}', + }, + }, + }, + + viewTimeouts: { + name: 'View timeouts', + nameShort: 'View timeouts', + type: 'counter', + description: 'The total number of view requests that timed out on a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'couchdb_httpd_view_timeouts_total{%(queriesSelector)s}', + }, + }, + }, + + temporaryViewReads: { + name: 'Temporary view reads', + nameShort: 'Temporary view reads', + type: 'counter', + description: 'The number of temporary view reads on a node.', + unit: 'rps', + sources: { + prometheus: { + expr: 'couchdb_httpd_temporary_view_reads_total{%(queriesSelector)s}', + }, + }, + }, + + + // requests + + requestMethods: { + name: 'Request methods', + nameShort: 'Request methods', + type: 'counter', + description: 'The request rate split by HTTP Method for a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'couchdb_httpd_request_methods{%(queriesSelector)s}', + legendCustomTemplate: legendCustomTemplate + ' - {{method}}', + }, + }, + }, + + requestLatencyp50: { + name: 'Request latency p50', + nameShort: 'Request latency p50', + type: 'gauge', + description: 'The 50th percentile of request latency for a node.', + unit: 's', + sources: { + prometheus: { + expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.5"}', + legendCustomTemplate: legendCustomTemplate + ' - p50', + }, + }, + }, + + requestLatencyp75: { + name: 'Request latency p75', + nameShort: 'Request latency p75', + type: 'gauge', + description: 'The 75th percentile of request latency for a node.', + unit: 's', + sources: { + prometheus: { + expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.75"}', + legendCustomTemplate: legendCustomTemplate + ' - p75', + }, + }, + }, + + requestLatencyp95: { + name: 'Request latency p95', + nameShort: 'Request latency p95', + type: 'gauge', + description: 'The 95th percentile of request latency for a node.', + unit: 's', + sources: { + prometheus: { + expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.95"}', + legendCustomTemplate: legendCustomTemplate + ' - p95', + }, + }, + }, + + requestLatencyp99: { + name: 'Request latency p99', + nameShort: 'Request latency p99', + type: 'gauge', + description: 'The 99th percentile of request latency for a node.', + unit: 's', + sources: { + prometheus: { + expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.99"}', + legendCustomTemplate: legendCustomTemplate + ' - p99', + }, + }, + }, + + + bulkRequests: { + name: 'Bulk requests', + nameShort: 'Bulk requests', + type: 'counter', + description: 'The number of bulk requests on a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'couchdb_httpd_bulk_requests_total{%(queriesSelector)s}', + }, + }, + }, + + responseStatus2xx: { + name: 'Response status 2XX', + nameShort: 'Response status 2XX', + type: 'gauge', + description: 'The number of response status 2XX on a node.', + unit: 'requests', + aggLevel: 'instance', + aggFunction: 'sum', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}', + rangeFunction: 'increase', + legendCustomTemplate: legendCustomTemplate + ' - 2xx', + }, + }, + }, + + responseStatus3xx: { + name: 'Response status 3XX', + nameShort: 'Response status 3XX', + type: 'gauge', + description: 'The number of response status 3XX on a node.', + unit: 'requests', + aggLevel: 'instance', + aggFunction: 'sum', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}', + legendCustomTemplate: legendCustomTemplate + ' - 3xx', + rangeFunction: 'increase', + }, + }, + }, + + responseStatus4xx: { + name: 'Response status 4XX', + nameShort: 'Response status 4XX', + type: 'gauge', + description: 'The number of response status 4XX on a node.', + unit: 'requests', + aggLevel: 'instance', + aggFunction: 'sum', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}', + rangeFunction: 'increase', + legendCustomTemplate: legendCustomTemplate + ' - 4xx', + }, + }, + }, + + responseStatus5xx: { + name: 'Response status 5XX', + nameShort: 'Response status 5XX', + type: 'gauge', + description: 'The number of response status 5XX on a node.', + unit: 'requests', + aggLevel: 'instance', + aggFunction: 'sum', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}', + rangeFunction: 'increase', + legendCustomTemplate: legendCustomTemplate + ' - 5xx', + }, + }, + }, + + goodResponseStatuses: { + name: 'Good response statuses', + nameShort: 'Good response status', + type: 'counter', + description: 'The response rate split by good HTTP statuses for a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[23].*"}', + legendCustomTemplate: legendCustomTemplate + ' - {{code}}', + }, + }, + }, + + errorResponseStatuses: { + name: 'Error response statuses', + nameShort: 'Error response status', + type: 'counter', + description: 'The response rate split by error HTTP statuses for a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[45].*"}', + legendCustomTemplate: legendCustomTemplate + ' - {{code}}', + }, + }, + }, + + logTypes: { + name: 'Log types', + nameShort: 'Log types', + type: 'counter', + description: 'The number of logged messages for a node.', + unit: 'none', + sources: { + prometheus: { + expr: 'couchdb_couch_log_requests_total{%(queriesSelector)s}', + rangeFunction: 'increase', + legendCustomTemplate: legendCustomTemplate + ' - {{level}}', + }, + }, + }, + }, +} diff --git a/apache-couchdb-mixin/signals/overview.libsonnet b/apache-couchdb-mixin/signals/overview.libsonnet new file mode 100644 index 000000000..d76aed3f4 --- /dev/null +++ b/apache-couchdb-mixin/signals/overview.libsonnet @@ -0,0 +1,313 @@ +function(this) { + local legendCustomTemplate = '{{ couchdb_cluster }}', + local groupLabelAggTerm = std.join(', ', this.groupLabels), + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + legendCustomTemplate: legendCustomTemplate, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'couchdb_couch_replicator_cluster_is_stable', + }, + signals: { + clusterCount: { + name: 'Number of clusters', + nameShort: 'Clusters', + type: 'raw', + description: 'The number of clusters being reported.', + unit: 'none', + sources: { + prometheus: { + expr: 'count(count by(' + groupLabelAggTerm + ') (couchdb_request_time_seconds_count{%(queriesSelector)s}))', + }, + }, + }, + + nodeCount: { + name: 'Number of nodes', + nameShort: 'Nodes', + type: 'raw', + description: 'The number of nodes being reported.', + unit: 'none', + sources: { + prometheus: { + expr: 'count(count by(' + groupLabelAggTerm + ', instance) (couchdb_request_time_seconds_count{%(queriesSelector)s}))', + }, + }, + }, + + clusterHealth: { + name: 'Clusters healthy', + nameShort: 'Cluster healthy', + type: 'raw', + description: 'Percentage of clusters that have all nodes that are currently reporting healthy.', + unit: 'percent', + sources: { + prometheus: { + expr: 'sum(min by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_cluster_is_stable{%(queriesSelector)s})) / count(count by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_cluster_is_stable{%(queriesSelector)s})) * 100', + }, + }, + }, + + openOSFiles: { + name: 'Open OS files', + nameShort: 'Open OS files', + type: 'raw', + description: 'The total number of file descriptors open aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_os_files_total{%(queriesSelector)s})', + }, + }, + }, + + openDatabases: { + name: 'Open databases', + nameShort: 'Open databases', + type: 'raw', + description: 'The total number of open databases aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_databases_total{%(queriesSelector)s})', + }, + }, + }, + + databaseWrites: { + name: 'Database writes', + nameShort: 'Database writes', + type: 'raw', + description: 'The total number of database writes aggregated across all nodes.', + unit: 'wps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_database_writes_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + databaseReads: { + name: 'Database reads', + nameShort: 'Database reads', + type: 'raw', + description: 'The total number of database reads aggregated across all nodes.', + unit: 'rps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_database_reads_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + viewReads: { + name: 'View reads', + nameShort: 'View reads', + type: 'raw', + description: 'The total number of view reads aggregated across all nodes.', + unit: 'rps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_httpd_view_reads_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + viewTimeouts: { + name: 'View timeouts', + nameShort: 'View timeouts', + type: 'raw', + description: 'The total number of view requests that timed out aggregated across all nodes.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_httpd_view_timeouts_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + temporaryViewReads: { + name: 'Temporary view reads', + nameShort: 'Temporary view reads', + type: 'raw', + description: 'The total number of temporary view reads aggregated across all nodes.', + unit: 'rps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_httpd_temporary_view_reads_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + requestMethods: { + name: 'Request methods', + nameShort: 'Request methods', + type: 'raw', + description: 'The request rate split by HTTP Method aggregated across all nodes.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ', method) (increase(couchdb_httpd_request_methods{%(queriesSelector)s}[$__interval:] offset $__interval))', + legendCustomTemplate: legendCustomTemplate + ' - {{method}}', + }, + }, + }, + + averageRequestLatencyp50: { + name: 'Average request latency p50', + nameShort: 'Average request latency p50', + type: 'raw', + description: 'The average request latency p50 aggregated across all nodes.', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.5"})', + legendCustomTemplate: legendCustomTemplate + ' - p50', + }, + }, + }, + + averageRequestLatencyp75: { + name: 'Average request latency p75', + nameShort: 'Average request latency p75', + type: 'raw', + description: 'The average request latency p75 aggregated across all nodes.', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.75"})', + legendCustomTemplate: legendCustomTemplate + ' - p75', + }, + }, + }, + + averageRequestLatencyp95: { + name: 'Average request latency p95', + nameShort: 'Average request latency p95', + type: 'raw', + description: 'The average request latency p95 aggregated across all nodes.', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.95"})', + legendCustomTemplate: legendCustomTemplate + ' - p95', + }, + }, + }, + + averageRequestLatencyp99: { + name: 'Average request latency p99', + nameShort: 'Average request latency p99', + type: 'raw', + description: 'The average request latency p99 aggregated across all nodes.', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.99"})', + legendCustomTemplate: legendCustomTemplate + ' - p99', + }, + }, + }, + + bulkRequests: { + name: 'Bulk requests', + nameShort: 'Bulk requests', + type: 'raw', + description: 'The total number of bulk requests aggregated across all nodes.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_httpd_bulk_requests{%(queriesSelector)s}[$__rate_interval]))', + }, + prometheusWithTotal: { + expr: 'sum by(' + groupLabelAggTerm + ') (rate(couchdb_httpd_bulk_requests_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + responseStatus2xx: { + name: 'Response status 2XX', + nameShort: 'Response status 2XX', + type: 'raw', + description: 'The total number of response status 2XX aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset $__interval))', + legendCustomTemplate: legendCustomTemplate + ' - 2xx', + }, + }, + }, + + responseStatus3xx: { + name: 'Response status 3XX', + nameShort: 'Response status 3XX', + type: 'raw', + description: 'The total number of response status 3XX aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset $__interval))', + legendCustomTemplate: legendCustomTemplate + ' - 3xx', + }, + }, + }, + + responseStatus4xx: { + name: 'Response status 4XX', + nameShort: 'Response status 4XX', + type: 'raw', + description: 'The total number of response status 4XX aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset $__interval))', + legendCustomTemplate: legendCustomTemplate + ' - 4xx', + }, + }, + }, + + responseStatus5xx: { + name: 'Response status 5XX', + nameShort: 'Response status 5XX', + type: 'raw', + description: 'The total number of response status 5XX aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset $__interval))', + legendCustomTemplate: legendCustomTemplate + ' - 5xx', + }, + }, + }, + + goodResponseStatuses: { + name: 'Good response statuses', + nameShort: 'Good response statuses', + type: 'raw', + description: 'The total number of good response statuses aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[23].*"}[$__interval:] offset $__interval))', + }, + }, + }, + + errorResponseStatuses: { + name: 'Error response statuses', + nameShort: 'Error response statuses', + type: 'raw', + description: 'The total number of error response statuses aggregated across all nodes.', + unit: 'requests', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[45].*"})', + }, + }, + }, + }, +} diff --git a/apache-couchdb-mixin/signals/replicator.libsonnet b/apache-couchdb-mixin/signals/replicator.libsonnet new file mode 100644 index 000000000..46ad2828a --- /dev/null +++ b/apache-couchdb-mixin/signals/replicator.libsonnet @@ -0,0 +1,106 @@ +function(this) { + local legendCustomTemplate = '{{ couchdb_cluster}}', + local groupLabelAggTerm = std.join(', ', this.groupLabels), + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + legendCustomTemplate: legendCustomTemplate, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'couchdb_couch_replicator_cluster_is_stable', + }, + signals: { + changesManagerDeaths: { + name: 'Changes manager deaths', + nameShort: 'Changes manager deaths', + type: 'raw', + description: 'The total number of changes manager deaths aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_changes_manager_deaths_total{%(queriesSelector)s})', + }, + }, + }, + + changesQueueDeaths: { + name: 'Changes queue deaths', + nameShort: 'Changes queue deaths', + type: 'raw', + description: 'The total number of changes queue deaths aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_changes_queue_deaths_total{%(queriesSelector)s})', + }, + }, + }, + + changesReaderDeaths: { + name: 'Changes reader deaths', + nameShort: 'Changes reader deaths', + type: 'raw', + description: 'The total number of changes reader deaths aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_changes_reader_deaths_total{%(queriesSelector)s})', + }, + }, + }, + + connectionOwnerCrashes: { + name: 'Connection owner crashes', + nameShort: 'Connection owner crashes', + type: 'raw', + description: 'The total number of connection owner crashes aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_connection_owner_crashes_total{%(queriesSelector)s})', + }, + }, + }, + + connectionWorkerCrashes: { + name: 'Connection worker crashes', + nameShort: 'Connection worker crashes', + type: 'raw', + description: 'The total number of connection worker crashes aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_connection_worker_crashes_total{%(queriesSelector)s})', + }, + }, + }, + + jobsCrashes: { + name: 'Jobs crashes', + nameShort: 'Jobs crashes', + type: 'raw', + description: 'The total number of jobs crashes aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_jobs_crashes_total{%(queriesSelector)s})', + }, + }, + }, + + jobsQueued: { + name: 'Jobs queued', + nameShort: 'Jobs queued', + type: 'raw', + description: 'The total number of jobs queued aggregated across all nodes.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_jobs_pending{%(queriesSelector)s})', + }, + }, + }, + }, +} From c3f6bb4776d622c30c3e71e59f9567ffb60ceea3 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 29 Oct 2025 17:17:43 -0400 Subject: [PATCH 02/11] fix links and try to fix lint --- apache-couchdb-mixin/alerts.libsonnet | 4 ++-- apache-couchdb-mixin/dashboards_out/couchdb-logs.json | 2 +- apache-couchdb-mixin/dashboards_out/couchdb-overview.json | 2 +- apache-couchdb-mixin/links.libsonnet | 2 +- .../prometheus_rules_out/prometheus_alerts.yaml | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apache-couchdb-mixin/alerts.libsonnet b/apache-couchdb-mixin/alerts.libsonnet index 158c3c4ba..d11d633fe 100644 --- a/apache-couchdb-mixin/alerts.libsonnet +++ b/apache-couchdb-mixin/alerts.libsonnet @@ -25,7 +25,7 @@ { alert: 'CouchDBHigh4xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > %(alertsWarning4xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.."}[5m])) > %(alertsWarning4xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { @@ -43,7 +43,7 @@ { alert: 'CouchDBHigh5xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > %(alertsCritical5xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.."}[5m])) > %(alertsCritical5xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-logs.json b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json index ba5fea85b..6e94cfcab 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-logs.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json @@ -7,7 +7,7 @@ "keepTime": true, "title": "Apache CouchDB nodes", "type": "link", - "url": "couchdb_nodes" + "url": "/d/couchdb_nodes" }, { "keepTime": true, diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index 5d9504ed5..87a6f41bd 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -13,7 +13,7 @@ "keepTime": true, "title": "Apache CouchDB nodes", "type": "link", - "url": "couchdb_nodes" + "url": "/d/couchdb_nodes" }, { "asDropdown": true, diff --git a/apache-couchdb-mixin/links.libsonnet b/apache-couchdb-mixin/links.libsonnet index bb750a55c..5458cf288 100644 --- a/apache-couchdb-mixin/links.libsonnet +++ b/apache-couchdb-mixin/links.libsonnet @@ -9,7 +9,7 @@ local g = import './g.libsonnet'; + link.link.options.withKeepTime(true), couchdbNodes: - link.link.new(this.config.dashboardNamePrefix + ' nodes', this.grafana.dashboards['couchdb-nodes.json'].uid) + link.link.new(this.config.dashboardNamePrefix + ' nodes', '/d/' + this.grafana.dashboards['couchdb-nodes.json'].uid) + link.link.options.withKeepTime(true), otherDashboards: diff --git a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml index 7af93a67a..c8e90f827 100644 --- a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -15,7 +15,7 @@ groups: description: '{{ printf "%.0f" $value }} 4xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 5.' summary: There are a high number of 4xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > 5 + sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.."}[5m])) > 5 for: 5m labels: severity: warning @@ -24,7 +24,7 @@ groups: description: '{{ printf "%.0f" $value }} 5xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0.' summary: There are a high number of 5xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > 0 + sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.."}[5m])) > 0 for: 5m labels: severity: critical From d9e918d69535dca621ec19564b096d94141420fd Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 13 Nov 2025 10:54:14 -0500 Subject: [PATCH 03/11] pr feedback --- .../dashboards_out/couchdb-nodes.json | 98 +++++++------------ .../dashboards_out/couchdb-overview.json | 91 +++++------------ apache-couchdb-mixin/mixin.libsonnet | 16 +-- apache-couchdb-mixin/panels.libsonnet | 97 +++++++++--------- apache-couchdb-mixin/rows.libsonnet | 2 +- apache-couchdb-mixin/signals/nodes.libsonnet | 68 +++---------- .../signals/overview.libsonnet | 64 +++--------- 7 files changed, 149 insertions(+), 287 deletions(-) diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index a2a6d8abc..939fbcfc2 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -166,7 +166,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", + "expr": "couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}\nor\ncouchdb_open_os_files{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", @@ -218,7 +218,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", + "expr": "couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}\nor\ncouchdb_open_databases{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", @@ -567,14 +567,12 @@ "description": "The average request latency for a node.", "fieldConfig": { "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" + "color": { + "mode": "thresholds" }, - "unit": "s" + "custom": { + "axisLabel": "s" + } } }, "gridPos": { @@ -586,63 +584,25 @@ "id": 12, "options": { "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + "placement": "right" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.5\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{instance}} - p50", - "refId": "Request latency p50" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.75\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{instance}} - p75", - "refId": "Request latency p75" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.95\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{instance}} - p95", - "refId": "Request latency p95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.99\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{instance}} - p99", - "refId": "Request latency p99" + "expr": "sum by(job, instance, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "timeseries", + "instant": true, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "Average request latency" } ], - "title": "Average request latency", - "type": "timeseries" + "title": "Request latency quantiles", + "type": "histogram" }, { "datasource": { @@ -671,8 +631,10 @@ "id": 13, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -715,6 +677,12 @@ "y": 32 }, "id": 14, + "options": { + "legend": { + "asTable": true, + "placement": "right" + } + }, "pluginVersion": "v11.4.0", "targets": [ { @@ -774,7 +742,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total number of good response statuses on a node.", + "description": "The total number of good response (HTTP 2xx-3xx) statuses on a node.", "fieldConfig": { "defaults": { "custom": { @@ -796,8 +764,10 @@ "id": 15, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -827,7 +797,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total number of error response statuses on a node.", + "description": "The total number of error response (HTTP 4xx-5xx) statuses on a node.", "fieldConfig": { "defaults": { "custom": { @@ -849,8 +819,10 @@ "id": 16, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -915,8 +887,10 @@ "id": 18, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index 87a6f41bd..dbf61e3ee 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -233,7 +233,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_os_files_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})\nor\nsum by(job, couchdb_cluster, cluster) (couchdb_open_os_files{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{ couchdb_cluster }}", @@ -284,7 +284,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_open_databases_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})\nor\nsum by(job, couchdb_cluster, cluster) (couchdb_open_databases{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{ couchdb_cluster }}", @@ -599,8 +599,10 @@ "id": 13, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -627,20 +629,18 @@ }, { "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The average request latency aggregated across all nodes.", + "description": "The request latency aggregated across all nodes.", "fieldConfig": { "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" + "color": { + "mode": "thresholds" }, - "unit": "s" + "custom": { + "axisLabel": "s" + } } }, "gridPos": { @@ -652,63 +652,26 @@ "id": 14, "options": { "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + "asTable": true, + "placement": "right" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.5\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{ couchdb_cluster }} - p50", - "refId": "Average request latency p50" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.75\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{ couchdb_cluster }} - p75", - "refId": "Average request latency p75" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.95\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{ couchdb_cluster }} - p95", - "refId": "Average request latency p95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "avg by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", quantile=\"0.99\"})", - "format": "time_series", - "instant": false, - "legendFormat": "{{ couchdb_cluster }} - p99", - "refId": "Average request latency p99" + "expr": "sum by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "format": "timeseries", + "instant": true, + "legendFormat": "{{ couchdb_cluster }} - {{quantile}}", + "refId": "Request latency" } ], - "title": "Average request latency", - "type": "timeseries" + "title": "Request latency quantiles", + "type": "histogram" }, { "datasource": { @@ -835,7 +798,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total number of good response statuses aggregated across all nodes.", + "description": "The total number of good response (HTTP 2xx-3xx) statuses aggregated across all nodes.", "fieldConfig": { "defaults": { "custom": { @@ -845,7 +808,7 @@ "lineWidth": 2, "showPoints": "never" }, - "unit": "rps" + "unit": "reqps" } }, "gridPos": { @@ -888,7 +851,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total number of error response statuses aggregated across all nodes.", + "description": "The total number of error response statuses (HTTP 4xx-5xx) aggregated across all nodes.", "fieldConfig": { "defaults": { "custom": { @@ -898,7 +861,7 @@ "lineWidth": 2, "showPoints": "never" }, - "unit": "rps" + "unit": "reqps" } }, "gridPos": { diff --git a/apache-couchdb-mixin/mixin.libsonnet b/apache-couchdb-mixin/mixin.libsonnet index 4b3a5bf89..82500a2ec 100644 --- a/apache-couchdb-mixin/mixin.libsonnet +++ b/apache-couchdb-mixin/mixin.libsonnet @@ -1,10 +1,10 @@ -local prestolib = import './main.libsonnet'; +local couchdblib = import './main.libsonnet'; local config = (import './config.libsonnet'); local util = import 'grafana-cloud-integration-utils/util.libsonnet'; -local presto = - prestolib.new() - + prestolib.withConfigMixin( +local couchdb = + couchdblib.new() + + couchdblib.withConfigMixin( { filteringSelector: config.filteringSelector, uid: config.uid, @@ -25,11 +25,11 @@ local optional_labels = { { grafanaDashboards+:: { [fname]: - local dashboard = presto.grafana.dashboards[fname]; + local dashboard = couchdb.grafana.dashboards[fname]; dashboard + util.patch_variables(dashboard, optional_labels) - for fname in std.objectFields(presto.grafana.dashboards) + for fname in std.objectFields(couchdb.grafana.dashboards) }, - prometheusAlerts+:: presto.prometheus.alerts, - prometheusRules+:: presto.prometheus.recordingRules, + prometheusAlerts+:: couchdb.prometheus.alerts, + prometheusRules+:: couchdb.prometheus.recordingRules, } diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet index d5c41520f..8e7cef0d6 100644 --- a/apache-couchdb-mixin/panels.libsonnet +++ b/apache-couchdb-mixin/panels.libsonnet @@ -95,20 +95,22 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[signals.overview.requestMethods.asTarget() { interval: '2m' }], ) + g.panel.timeSeries.panelOptions.withDescription('The request rate split by HTTP Method aggregated across all nodes.') - + g.panel.timeSeries.standardOptions.withUnit('reqps'), - - overviewAverageRequestLatencyPanel: - commonlib.panels.generic.timeSeries.base.new( - 'Average request latency', - targets=[ - signals.overview.averageRequestLatencyp50.asTarget(), - signals.overview.averageRequestLatencyp75.asTarget(), - signals.overview.averageRequestLatencyp95.asTarget(), - signals.overview.averageRequestLatencyp99.asTarget(), - ], - ) - + g.panel.timeSeries.panelOptions.withDescription('The average request latency aggregated across all nodes.') - + g.panel.timeSeries.standardOptions.withUnit('s'), + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withPlacement('right'), + + overviewRequestLatencyPanel: + g.panel.histogram.new(title='Request latency quantiles') + + g.panel.histogram.queryOptions.withTargets([ + signals.overview.requestLatency.asTarget() + + g.query.prometheus.withInstant(true) + + g.query.prometheus.withFormat('timeseries'), + ]) + + g.panel.histogram.panelOptions.withDescription('The request latency aggregated across all nodes.') + + g.panel.histogram.standardOptions.color.withMode('thresholds') + + g.panel.histogram.options.legend.withAsTable(true) + + g.panel.histogram.options.legend.withPlacement('right') + + g.panel.histogram.fieldConfig.defaults.custom.withAxisLabel('s'), overviewBulkRequestsPanel: commonlib.panels.generic.timeSeries.base.new( @@ -132,16 +134,16 @@ local commonlib = import 'common-lib/common/main.libsonnet'; 'Good response statuses', targets=[signals.overview.goodResponseStatuses.asTarget() { interval: '2m' }], ) - + g.panel.timeSeries.panelOptions.withDescription('The total number of good response statuses aggregated across all nodes.') - + g.panel.timeSeries.standardOptions.withUnit('rps'), + + g.panel.timeSeries.panelOptions.withDescription('The total number of good response (HTTP 2xx-3xx) statuses aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), overviewErrorResponseStatusesPanel: commonlib.panels.generic.timeSeries.base.new( 'Error response statuses', targets=[signals.overview.errorResponseStatuses.asTarget()], ) - + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses aggregated across all nodes.') - + g.panel.timeSeries.standardOptions.withUnit('rps'), + + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses (HTTP 4xx-5xx) aggregated across all nodes.') + + g.panel.timeSeries.standardOptions.withUnit('reqps'), overviewReplicatorChangesManagerDeathsPanel: @@ -274,20 +276,22 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[signals.nodes.requestMethods.asTarget() { interval: '2m' }], ) + g.panel.timeSeries.panelOptions.withDescription('The request rate split by HTTP Method for a node.') - + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withPlacement('right'), nodeAverageRequestLatencyPanel: - commonlib.panels.generic.timeSeries.base.new( - 'Average request latency', - targets=[ - signals.nodes.requestLatencyp50.asTarget(), - signals.nodes.requestLatencyp75.asTarget(), - signals.nodes.requestLatencyp95.asTarget(), - signals.nodes.requestLatencyp99.asTarget(), - ], - ) - + g.panel.timeSeries.panelOptions.withDescription('The average request latency for a node.') - + g.panel.timeSeries.standardOptions.withUnit('s'), + g.panel.histogram.new(title='Request latency quantiles') + + g.panel.histogram.queryOptions.withTargets([ + signals.nodes.averageRequestLatency.asTarget() + + g.query.prometheus.withInstant(true) + + g.query.prometheus.withFormat('timeseries'), + ]) + + g.panel.gauge.queryOptions.withDatasource('prometheus', '${' + this.grafana.variables.datasources.prometheus.name + '}') + + g.panel.histogram.standardOptions.color.withMode('thresholds') + + g.panel.histogram.options.legend.withPlacement('right') + + g.panel.histogram.panelOptions.withDescription('The average request latency for a node.') + + g.panel.histogram.fieldConfig.defaults.custom.withAxisLabel('s'), nodeBulkRequestsPanel: commonlib.panels.generic.timeSeries.base.new( @@ -306,36 +310,29 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.nodes.responseStatus5xx.asTarget() { interval: '2m' }, ]) + g.panel.pieChart.panelOptions.withDescription('The responses grouped by HTTP status type (2xx, 3xx, 4xx, and 5xx) for a node.') - + g.panel.pieChart.standardOptions.withUnit('none'), - - nodeRequestLatencyPanel: - commonlib.panels.generic.timeSeries.base.new( - 'Request latency', - targets=[ - signals.nodes.requestLatencyp50.asTarget(), - signals.nodes.requestLatencyp75.asTarget(), - signals.nodes.requestLatencyp95.asTarget(), - signals.nodes.requestLatencyp99.asTarget(), - ], - ) - + g.panel.timeSeries.panelOptions.withDescription('The request latency for a node.') - + g.panel.timeSeries.standardOptions.withUnit('s'), + + g.panel.pieChart.standardOptions.withUnit('none') + + g.panel.pieChart.options.legend.withAsTable(true) + + g.panel.pieChart.options.legend.withPlacement('right'), nodeGoodResponseStatusesPanel: commonlib.panels.generic.timeSeries.base.new( 'Good response statuses', targets=[signals.nodes.goodResponseStatuses.asTarget() { interval: '2m' }], ) - + g.panel.timeSeries.panelOptions.withDescription('The total number of good response statuses on a node.') - + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + g.panel.timeSeries.panelOptions.withDescription('The total number of good response (HTTP 2xx-3xx) statuses on a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withPlacement('right'), nodeErrorResponseStatusesPanel: commonlib.panels.generic.timeSeries.base.new( 'Error response statuses', targets=[signals.nodes.errorResponseStatuses.asTarget() { interval: '2m' }], ) - + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses on a node.') - + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + g.panel.timeSeries.panelOptions.withDescription('The total number of error response (HTTP 4xx-5xx) statuses on a node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withPlacement('right'), nodeLogTypesPanel: @@ -344,6 +341,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[signals.nodes.logTypes.asTarget() { interval: '2m' }], ) + g.panel.timeSeries.panelOptions.withDescription('The number of logged messages for a node.') - + g.panel.timeSeries.standardOptions.withUnit('none'), + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withPlacement('right'), }, } diff --git a/apache-couchdb-mixin/rows.libsonnet b/apache-couchdb-mixin/rows.libsonnet index b6d588aa1..fbdb13731 100644 --- a/apache-couchdb-mixin/rows.libsonnet +++ b/apache-couchdb-mixin/rows.libsonnet @@ -26,7 +26,7 @@ local g = import './g.libsonnet'; + g.panel.row.withPanels( [ this.grafana.panels.overviewRequestMethodsPanel { gridPos+: { w: 12 } }, - this.grafana.panels.overviewAverageRequestLatencyPanel { gridPos+: { w: 12 } }, + this.grafana.panels.overviewRequestLatencyPanel { gridPos+: { w: 12 } }, this.grafana.panels.overviewBulkRequestsPanel { gridPos+: { w: 12 } }, this.grafana.panels.overviewResponseStatusOverviewPanel { gridPos+: { w: 12 } }, this.grafana.panels.overviewGoodResponseStatusesPanel { gridPos+: { w: 12 } }, diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index 7c0db25b1..fa1afd261 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -33,9 +33,11 @@ function(this) { unit: 'none', sources: { prometheus: { - expr: 'couchdb_open_os_files_total{%(queriesSelector)s}', + expr: 'couchdb_open_os_files{%(queriesSelector)s}', }, - + prometheusWithTotal: { + expr: 'couchdb_open_os_files_total{%(queriesSelector)s}' + } }, }, @@ -47,7 +49,10 @@ function(this) { unit: 'none', sources: { prometheus: { - expr: 'couchdb_open_databases_total{%(queriesSelector)s}', + expr: 'couchdb_open_databases{%(queriesSelector)s}', + }, + prometheusWithTotal: { + expr: 'couchdb_open_databases_total{%(queriesSelector)s}' }, }, }, @@ -134,63 +139,20 @@ function(this) { }, }, - requestLatencyp50: { - name: 'Request latency p50', - nameShort: 'Request latency p50', - type: 'gauge', - description: 'The 50th percentile of request latency for a node.', - unit: 's', - sources: { - prometheus: { - expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.5"}', - legendCustomTemplate: legendCustomTemplate + ' - p50', - }, - }, - }, - - requestLatencyp75: { - name: 'Request latency p75', - nameShort: 'Request latency p75', - type: 'gauge', - description: 'The 75th percentile of request latency for a node.', - unit: 's', - sources: { - prometheus: { - expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.75"}', - legendCustomTemplate: legendCustomTemplate + ' - p75', - }, - }, - }, - - requestLatencyp95: { - name: 'Request latency p95', - nameShort: 'Request latency p95', - type: 'gauge', - description: 'The 95th percentile of request latency for a node.', - unit: 's', - sources: { - prometheus: { - expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.95"}', - legendCustomTemplate: legendCustomTemplate + ' - p95', - }, - }, - }, - - requestLatencyp99: { - name: 'Request latency p99', - nameShort: 'Request latency p99', - type: 'gauge', - description: 'The 99th percentile of request latency for a node.', + averageRequestLatency: { + name: 'Average request latency', + nameShort: 'Average request latency', + type: 'raw', + description: 'The average request latency for a node.', unit: 's', sources: { prometheus: { - expr: 'couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.99"}', - legendCustomTemplate: legendCustomTemplate + ' - p99', + expr: 'sum by(job, instance, cluster, quantile) (couchdb_request_time_seconds{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - {{quantile}}', }, }, }, - bulkRequests: { name: 'Bulk requests', nameShort: 'Bulk requests', diff --git a/apache-couchdb-mixin/signals/overview.libsonnet b/apache-couchdb-mixin/signals/overview.libsonnet index d76aed3f4..9dbb5de59 100644 --- a/apache-couchdb-mixin/signals/overview.libsonnet +++ b/apache-couchdb-mixin/signals/overview.libsonnet @@ -59,6 +59,9 @@ function(this) { unit: 'none', sources: { prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_os_files{%(queriesSelector)s})', + }, + prometheusWithTotal: { expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_os_files_total{%(queriesSelector)s})', }, }, @@ -72,6 +75,9 @@ function(this) { unit: 'none', sources: { prometheus: { + expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_databases{%(queriesSelector)s})', + }, + prometheusWithTotal: { expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_open_databases_total{%(queriesSelector)s})', }, }, @@ -156,58 +162,16 @@ function(this) { }, }, - averageRequestLatencyp50: { - name: 'Average request latency p50', - nameShort: 'Average request latency p50', - type: 'raw', - description: 'The average request latency p50 aggregated across all nodes.', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.5"})', - legendCustomTemplate: legendCustomTemplate + ' - p50', - }, - }, - }, - - averageRequestLatencyp75: { - name: 'Average request latency p75', - nameShort: 'Average request latency p75', - type: 'raw', - description: 'The average request latency p75 aggregated across all nodes.', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.75"})', - legendCustomTemplate: legendCustomTemplate + ' - p75', - }, - }, - }, - - averageRequestLatencyp95: { - name: 'Average request latency p95', - nameShort: 'Average request latency p95', - type: 'raw', - description: 'The average request latency p95 aggregated across all nodes.', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.95"})', - legendCustomTemplate: legendCustomTemplate + ' - p95', - }, - }, - }, - - averageRequestLatencyp99: { - name: 'Average request latency p99', - nameShort: 'Average request latency p99', + requestLatency: { + name: 'Request latency', + nameShort: 'Request latency', type: 'raw', - description: 'The average request latency p99 aggregated across all nodes.', + description: 'The request latency aggregated across all nodes.', unit: 's', sources: { prometheus: { - expr: 'avg by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s, quantile="0.99"})', - legendCustomTemplate: legendCustomTemplate + ' - p99', + expr: 'sum by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - {{quantile}}', }, }, }, @@ -288,7 +252,7 @@ function(this) { name: 'Good response statuses', nameShort: 'Good response statuses', type: 'raw', - description: 'The total number of good response statuses aggregated across all nodes.', + description: 'The total number of good response (HTTP 2xx-3xx) statuses aggregated across all nodes.', unit: 'requests', sources: { prometheus: { @@ -301,7 +265,7 @@ function(this) { name: 'Error response statuses', nameShort: 'Error response statuses', type: 'raw', - description: 'The total number of error response statuses aggregated across all nodes.', + description: 'The total number of error response (HTTP 4xx-5xx) statuses aggregated across all nodes.', unit: 'requests', sources: { prometheus: { From 70067030c8e701bf26dbeb0a78453c06195f4bf1 Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 13 Nov 2025 10:56:28 -0500 Subject: [PATCH 04/11] make fmt --- apache-couchdb-mixin/panels.libsonnet | 2 +- apache-couchdb-mixin/signals/nodes.libsonnet | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet index 8e7cef0d6..71a3fb65d 100644 --- a/apache-couchdb-mixin/panels.libsonnet +++ b/apache-couchdb-mixin/panels.libsonnet @@ -105,7 +105,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.overview.requestLatency.asTarget() + g.query.prometheus.withInstant(true) + g.query.prometheus.withFormat('timeseries'), - ]) + ]) + g.panel.histogram.panelOptions.withDescription('The request latency aggregated across all nodes.') + g.panel.histogram.standardOptions.color.withMode('thresholds') + g.panel.histogram.options.legend.withAsTable(true) diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index fa1afd261..afdd9fa7a 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -36,8 +36,8 @@ function(this) { expr: 'couchdb_open_os_files{%(queriesSelector)s}', }, prometheusWithTotal: { - expr: 'couchdb_open_os_files_total{%(queriesSelector)s}' - } + expr: 'couchdb_open_os_files_total{%(queriesSelector)s}', + }, }, }, @@ -52,7 +52,7 @@ function(this) { expr: 'couchdb_open_databases{%(queriesSelector)s}', }, prometheusWithTotal: { - expr: 'couchdb_open_databases_total{%(queriesSelector)s}' + expr: 'couchdb_open_databases_total{%(queriesSelector)s}', }, }, }, From 092776c5016e7cc1b19a346a671f01cf62062d0a Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 13 Nov 2025 14:39:23 -0500 Subject: [PATCH 05/11] fix units on histogram --- apache-couchdb-mixin/dashboards_out/couchdb-nodes.json | 4 +--- apache-couchdb-mixin/dashboards_out/couchdb-overview.json | 4 +--- apache-couchdb-mixin/panels.libsonnet | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index 939fbcfc2..0eeb0739c 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -570,9 +570,7 @@ "color": { "mode": "thresholds" }, - "custom": { - "axisLabel": "s" - } + "unit": "s" } }, "gridPos": { diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index dbf61e3ee..eef5a4f9c 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -638,9 +638,7 @@ "color": { "mode": "thresholds" }, - "custom": { - "axisLabel": "s" - } + "unit": "s" } }, "gridPos": { diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet index 71a3fb65d..eed071174 100644 --- a/apache-couchdb-mixin/panels.libsonnet +++ b/apache-couchdb-mixin/panels.libsonnet @@ -110,7 +110,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.histogram.standardOptions.color.withMode('thresholds') + g.panel.histogram.options.legend.withAsTable(true) + g.panel.histogram.options.legend.withPlacement('right') - + g.panel.histogram.fieldConfig.defaults.custom.withAxisLabel('s'), + + g.panel.histogram.standardOptions.withUnit('s'), overviewBulkRequestsPanel: commonlib.panels.generic.timeSeries.base.new( @@ -291,7 +291,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.histogram.standardOptions.color.withMode('thresholds') + g.panel.histogram.options.legend.withPlacement('right') + g.panel.histogram.panelOptions.withDescription('The average request latency for a node.') - + g.panel.histogram.fieldConfig.defaults.custom.withAxisLabel('s'), + + g.panel.histogram.standardOptions.withUnit('s'), nodeBulkRequestsPanel: commonlib.panels.generic.timeSeries.base.new( From 9186490cba508f6f932a102b3c1dbdbed4d8966c Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 10:08:53 -0500 Subject: [PATCH 06/11] fix a couple of issues caught in PR review --- apache-couchdb-mixin/README.md | 6 ++--- apache-couchdb-mixin/alerts.libsonnet | 20 +++++++-------- .../dashboards_out/couchdb-logs.json | 2 +- .../dashboards_out/couchdb-nodes.json | 14 +++++------ .../dashboards_out/couchdb-overview.json | 16 ++++++------ apache-couchdb-mixin/mixin.libsonnet | 2 +- .../prometheus_alerts.yaml | 20 +++++++-------- apache-couchdb-mixin/signals/nodes.libsonnet | 25 ++++++++----------- .../signals/overview.libsonnet | 8 +++--- .../signals/replicator.libsonnet | 2 +- 10 files changed, 56 insertions(+), 59 deletions(-) diff --git a/apache-couchdb-mixin/README.md b/apache-couchdb-mixin/README.md index d6472386c..1ce5d4769 100644 --- a/apache-couchdb-mixin/README.md +++ b/apache-couchdb-mixin/README.md @@ -18,7 +18,7 @@ and the following alerts: - CouchDBReplicatorJobsCrashing - CouchDBReplicatorChangesQueuesDying - CouchDBReplicatorConnectionOwnersCrashing -- CouchDBReplicatorConnectionWorkersCrashing +- CouchDBReplicatorWorkersCrashing ## Apache CouchDB Overview @@ -68,8 +68,8 @@ scrape_configs: - CouchDBManyReplicatorJobsPending: There is a high number of replicator jobs pending for a node. - CouchDBReplicatorJobsCrashing: There are replicator jobs crashing for a node. - CouchDBReplicatorChangesQueuesDying: There are replicator changes queue process deaths for a node. -- CouchDBReplicatorConnectionOwnersCrashing: There are replicator connection owner process crashes for a node. -- CouchDBReplicatorConnectionWorkersCrashing: There are replicator connection worker process crashes for a node. +- CouchDBReplicatorOwnersCrashing: There are replicator connection owner process crashes for a node. +- CouchDBReplicatorWorkersCrashing: There are replicator connection worker process crashes for a node. ## Install tools diff --git a/apache-couchdb-mixin/alerts.libsonnet b/apache-couchdb-mixin/alerts.libsonnet index d11d633fe..27500d58b 100644 --- a/apache-couchdb-mixin/alerts.libsonnet +++ b/apache-couchdb-mixin/alerts.libsonnet @@ -7,7 +7,7 @@ { alert: 'CouchDBUnhealthyCluster', expr: ||| - min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < %(alertsCriticalClusterIsUnstable5m)s + min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{%(filteringSelector)s}) < %(alertsCriticalClusterIsUnstable5m)s ||| % this.config, 'for': '5m', labels: { @@ -25,7 +25,7 @@ { alert: 'CouchDBHigh4xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.."}[5m])) > %(alertsWarning4xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"4.*"}[5m])) > %(alertsWarning4xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { @@ -43,7 +43,7 @@ { alert: 'CouchDBHigh5xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.."}[5m])) > %(alertsCritical5xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"5.*"}[5m])) > %(alertsCritical5xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { @@ -61,7 +61,7 @@ { alert: 'CouchDBModerateRequestLatency', expr: ||| - sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > %(alertsWarningRequestLatency5m)s + sum by(job, instance) (rate(couchdb_request_time_seconds_sum{%(filteringSelector)s}[5m]) / rate(couchdb_request_time_seconds_count{%(filteringSelector)s}[5m])) * 1000 > %(alertsWarningRequestLatency5m)s ||| % this.config, 'for': '5m', labels: { @@ -79,7 +79,7 @@ { alert: 'CouchDBHighRequestLatency', expr: ||| - sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > %(alertsCriticalRequestLatency5m)s + sum by(job, instance) (rate(couchdb_request_time_seconds_sum{%(filteringSelector)s}[5m]) / rate(couchdb_request_time_seconds_count{%(filteringSelector)s}[5m])) * 1000 > %(alertsCriticalRequestLatency5m)s ||| % this.config, 'for': '5m', labels: { @@ -97,7 +97,7 @@ { alert: 'CouchDBManyReplicatorJobsPending', expr: ||| - sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > %(alertsWarningPendingReplicatorJobs5m)s + sum by(job, instance) (couchdb_couch_replicator_jobs_pending{%(filteringSelector)s}) > %(alertsWarningPendingReplicatorJobs5m)s ||| % this.config, 'for': '5m', labels: { @@ -115,7 +115,7 @@ { alert: 'CouchDBReplicatorJobsCrashing', expr: ||| - sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > %(alertsCriticalCrashingReplicatorJobs5m)s + sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total{%(filteringSelector)s}[5m])) > %(alertsCriticalCrashingReplicatorJobs5m)s ||| % this.config, 'for': '5m', labels: { @@ -133,7 +133,7 @@ { alert: 'CouchDBReplicatorChangesQueuesDying', expr: ||| - sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > %(alertsWarningDyingReplicatorChangesQueues5m)s + sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total{%(filteringSelector)s}[5m])) > %(alertsWarningDyingReplicatorChangesQueues5m)s ||| % this.config, 'for': '5m', labels: { @@ -151,7 +151,7 @@ { alert: 'CouchDBReplicatorOwnersCrashing', expr: ||| - sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > %(alertsWarningCrashingReplicatorConnectionOwners5m)s + sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total{%(filteringSelector)s}[5m])) > %(alertsWarningCrashingReplicatorConnectionOwners5m)s ||| % this.config, 'for': '5m', labels: { @@ -169,7 +169,7 @@ { alert: 'CouchDBReplicatorWorkersCrashing', expr: ||| - sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > %(alertsWarningCrashingReplicatorConnectionWorkers5m)s + sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total{%(filteringSelector)s}[5m])) > %(alertsWarningCrashingReplicatorConnectionWorkers5m)s ||| % this.config, 'for': '5m', labels: { diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-logs.json b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json index 6e94cfcab..4dbbd0f42 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-logs.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-logs.json @@ -258,7 +258,7 @@ "uid": "${loki_datasource}" }, "includeAll": true, - "label": "Couchdb_cluster", + "label": "CouchDB cluster", "multi": true, "name": "couchdb_cluster", "query": "label_values({job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index 0eeb0739c..f718a54c0 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -592,7 +592,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, instance, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "timeseries", "instant": true, "legendFormat": "{{instance}} - {{quantile}}", @@ -688,7 +688,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}\n)", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -700,7 +700,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}\n)", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -712,7 +712,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}\n)", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -724,7 +724,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,couchdb_cluster,cluster,instance) (\n couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}\n)", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -944,13 +944,13 @@ "type": "query" }, { - "allValue": ".+", + "allValue": ".*", "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Couchdb_cluster", + "label": "CouchDB cluster", "multi": true, "name": "couchdb_cluster", "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index eef5a4f9c..81c5df1e1 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -124,7 +124,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "count(count by(job, couchdb_cluster, cluster, instance) (couchdb_request_time_seconds_count{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}))", + "expr": "sum(count by(job, couchdb_cluster, cluster, instance) (couchdb_request_time_seconds_count{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}))", "format": "time_series", "instant": false, "legendFormat": "{{ couchdb_cluster }}", @@ -833,11 +833,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[23].*\"}[$__interval:] offset $__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, code) (rate(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[23].*\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "2m", - "legendFormat": "{{ couchdb_cluster }}", + "legendFormat": "{{ couchdb_cluster }} - {{code}}", "refId": "Good response statuses" } ], @@ -886,10 +886,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[45].*\"})", + "expr": "sum by(job, couchdb_cluster, cluster, code) (rate(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[45].*\"}[$__rate_interval]))", "format": "time_series", "instant": false, - "legendFormat": "{{ couchdb_cluster }}", + "legendFormat": "{{ couchdb_cluster }} - {{code}}", "refId": "Error response statuses" } ], @@ -950,7 +950,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (couchdb_couch_replicator_changes_manager_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_couch_replicator_changes_manager_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "legendFormat": "{{ couchdb_cluster}}", @@ -1304,13 +1304,13 @@ "type": "query" }, { - "allValue": ".+", + "allValue": ".*", "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Couchdb_cluster", + "label": "CouchDB cluster", "multi": true, "name": "couchdb_cluster", "query": "label_values(couchdb_couch_replicator_cluster_is_stable{job=\"integrations/apache-couchdb\",job=~\"$job\"}, couchdb_cluster)", diff --git a/apache-couchdb-mixin/mixin.libsonnet b/apache-couchdb-mixin/mixin.libsonnet index 82500a2ec..632f2e942 100644 --- a/apache-couchdb-mixin/mixin.libsonnet +++ b/apache-couchdb-mixin/mixin.libsonnet @@ -16,7 +16,7 @@ local optional_labels = { cluster+: { allValue: '.*', }, - couchb_cluster+: { + couchdb_cluster+: { label: 'CouchDB cluster', allValue: '.*', }, diff --git a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml index c8e90f827..c7072d44f 100644 --- a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -6,7 +6,7 @@ groups: description: '{{$labels.couchdb_cluster}} has reported a value of {{ printf "%.0f" $value }} for its stability over the last 5 minutes, which is below the threshold of 1.' summary: At least one of the nodes in a cluster is reporting the cluster as being unstable. expr: | - min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < 1 + min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable{job="integrations/apache-couchdb"}) < 1 for: 5m labels: severity: critical @@ -15,7 +15,7 @@ groups: description: '{{ printf "%.0f" $value }} 4xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 5.' summary: There are a high number of 4xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.."}[5m])) > 5 + sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"4.*"}[5m])) > 5 for: 5m labels: severity: warning @@ -24,7 +24,7 @@ groups: description: '{{ printf "%.0f" $value }} 5xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0.' summary: There are a high number of 5xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.."}[5m])) > 0 + sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"5.*"}[5m])) > 0 for: 5m labels: severity: critical @@ -33,7 +33,7 @@ groups: description: 'An average of {{ printf "%.0f" $value }}ms of request latency has occurred over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 500ms. ' summary: There is a moderate level of request latency for a node. expr: | - sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 500 + sum by(job, instance) (rate(couchdb_request_time_seconds_sum{job="integrations/apache-couchdb"}[5m]) / rate(couchdb_request_time_seconds_count{job="integrations/apache-couchdb"}[5m])) * 1000 > 500 for: 5m labels: severity: warning @@ -42,7 +42,7 @@ groups: description: 'An average of {{ printf "%.0f" $value }}ms of request latency has occurred over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 1000ms. ' summary: There is a high level of request latency for a node. expr: | - sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 1000 + sum by(job, instance) (rate(couchdb_request_time_seconds_sum{job="integrations/apache-couchdb"}[5m]) / rate(couchdb_request_time_seconds_count{job="integrations/apache-couchdb"}[5m])) * 1000 > 1000 for: 5m labels: severity: critical @@ -51,7 +51,7 @@ groups: description: '{{ printf "%.0f" $value }} replicator jobs are pending on {{$labels.instance}}, which is above the threshold of 10. ' summary: There is a high number of replicator jobs pending for a node. expr: | - sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > 10 + sum by(job, instance) (couchdb_couch_replicator_jobs_pending{job="integrations/apache-couchdb"}) > 10 for: 5m labels: severity: warning @@ -60,7 +60,7 @@ groups: description: '{{ printf "%.0f" $value }} replicator jobs have crashed over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator jobs crashing for a node. expr: | - sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > 0 + sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total{job="integrations/apache-couchdb"}[5m])) > 0 for: 5m labels: severity: critical @@ -69,7 +69,7 @@ groups: description: '{{ printf "%.0f" $value }} replicator changes queue processes have died over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator changes queue process deaths for a node. expr: | - sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > 0 + sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total{job="integrations/apache-couchdb"}[5m])) > 0 for: 5m labels: severity: warning @@ -78,7 +78,7 @@ groups: description: '{{ printf "%.0f" $value }} replicator connection owner processes have crashed over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator connection owner process crashes for a node. expr: | - sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > 0 + sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total{job="integrations/apache-couchdb"}[5m])) > 0 for: 5m labels: severity: warning @@ -87,7 +87,7 @@ groups: description: '{{ printf "%.0f" $value }} replicator connection worker processes have crashed over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. ' summary: There are replicator connection worker process crashes for a node. expr: | - sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > 0 + sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total{job="integrations/apache-couchdb"}[5m])) > 0 for: 5m labels: severity: warning diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index afdd9fa7a..42bc482c3 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -1,5 +1,6 @@ function(this) { local legendCustomTemplate = std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + local groupLabelAggTerm = std.join(', ', this.groupLabels), filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, @@ -147,7 +148,7 @@ function(this) { unit: 's', sources: { prometheus: { - expr: 'sum by(job, instance, cluster, quantile) (couchdb_request_time_seconds{%(queriesSelector)s})', + expr: 'sum by(' + groupLabelAggTerm + ', quantile) (couchdb_request_time_seconds{%(queriesSelector)s})', legendCustomTemplate: legendCustomTemplate + ' - {{quantile}}', }, }, @@ -169,15 +170,12 @@ function(this) { responseStatus2xx: { name: 'Response status 2XX', nameShort: 'Response status 2XX', - type: 'gauge', + type: 'raw', description: 'The number of response status 2XX on a node.', unit: 'requests', - aggLevel: 'instance', - aggFunction: 'sum', sources: { prometheus: { - expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}', - rangeFunction: 'increase', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 2xx', }, }, @@ -186,16 +184,13 @@ function(this) { responseStatus3xx: { name: 'Response status 3XX', nameShort: 'Response status 3XX', - type: 'gauge', + type: 'raw', description: 'The number of response status 3XX on a node.', unit: 'requests', - aggLevel: 'instance', - aggFunction: 'sum', sources: { prometheus: { - expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 3xx', - rangeFunction: 'increase', }, }, }, @@ -203,14 +198,14 @@ function(this) { responseStatus4xx: { name: 'Response status 4XX', nameShort: 'Response status 4XX', - type: 'gauge', + type: 'raw', description: 'The number of response status 4XX on a node.', unit: 'requests', aggLevel: 'instance', aggFunction: 'sum', sources: { prometheus: { - expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 4xx', }, @@ -220,14 +215,14 @@ function(this) { responseStatus5xx: { name: 'Response status 5XX', nameShort: 'Response status 5XX', - type: 'gauge', + type: 'raw', description: 'The number of response status 5XX on a node.', unit: 'requests', aggLevel: 'instance', aggFunction: 'sum', sources: { prometheus: { - expr: 'couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 5xx', }, diff --git a/apache-couchdb-mixin/signals/overview.libsonnet b/apache-couchdb-mixin/signals/overview.libsonnet index 9dbb5de59..9681d8089 100644 --- a/apache-couchdb-mixin/signals/overview.libsonnet +++ b/apache-couchdb-mixin/signals/overview.libsonnet @@ -33,7 +33,7 @@ function(this) { unit: 'none', sources: { prometheus: { - expr: 'count(count by(' + groupLabelAggTerm + ', instance) (couchdb_request_time_seconds_count{%(queriesSelector)s}))', + expr: 'sum(count by(' + groupLabelAggTerm + ', instance) (couchdb_request_time_seconds_count{%(queriesSelector)s}))', }, }, }, @@ -256,7 +256,8 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[23].*"}[$__interval:] offset $__interval))', + expr: 'sum by(' + groupLabelAggTerm + ', code) (rate(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[23].*"}[$__rate_interval]))', + legendCustomTemplate: legendCustomTemplate + ' - {{code}}', }, }, }, @@ -269,7 +270,8 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[45].*"})', + expr: 'sum by(' + groupLabelAggTerm + ', code) (rate(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"[45].*"}[$__rate_interval]))', + legendCustomTemplate: legendCustomTemplate + ' - {{code}}', }, }, }, diff --git a/apache-couchdb-mixin/signals/replicator.libsonnet b/apache-couchdb-mixin/signals/replicator.libsonnet index 46ad2828a..3b18c2838 100644 --- a/apache-couchdb-mixin/signals/replicator.libsonnet +++ b/apache-couchdb-mixin/signals/replicator.libsonnet @@ -20,7 +20,7 @@ function(this) { unit: 'none', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (couchdb_couch_replicator_changes_manager_deaths_total{%(queriesSelector)s})', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_couch_replicator_changes_manager_deaths_total{%(queriesSelector)s}[$__interval:] offset -$__interval))', }, }, }, From f492f27ca6008744dfd76539d5c75e7c5e576bff Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 10:14:57 -0500 Subject: [PATCH 07/11] fix lint with selector --- .../prometheus_rules_out/prometheus_alerts.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml index c7072d44f..472e04792 100644 --- a/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/apache-couchdb-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -15,7 +15,7 @@ groups: description: '{{ printf "%.0f" $value }} 4xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 5.' summary: There are a high number of 4xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"4.*"}[5m])) > 5 + sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"4.."}[5m])) > 5 for: 5m labels: severity: warning @@ -24,7 +24,7 @@ groups: description: '{{ printf "%.0f" $value }} 5xx responses have been detected over the last 5 minutes on {{$labels.instance}}, which is above the threshold of 0.' summary: There are a high number of 5xx responses for incoming requests to a node. expr: | - sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"5.*"}[5m])) > 0 + sum by(job, instance) (increase(couchdb_httpd_status_codes{job="integrations/apache-couchdb",code=~"5.."}[5m])) > 0 for: 5m labels: severity: critical From 467d456192254f1db0c5e7825f64cbbdb9697df4 Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 10:17:24 -0500 Subject: [PATCH 08/11] fix lint with selector --- apache-couchdb-mixin/alerts.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apache-couchdb-mixin/alerts.libsonnet b/apache-couchdb-mixin/alerts.libsonnet index 27500d58b..02fab12d3 100644 --- a/apache-couchdb-mixin/alerts.libsonnet +++ b/apache-couchdb-mixin/alerts.libsonnet @@ -25,7 +25,7 @@ { alert: 'CouchDBHigh4xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"4.*"}[5m])) > %(alertsWarning4xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"4.."}[5m])) > %(alertsWarning4xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { @@ -43,7 +43,7 @@ { alert: 'CouchDBHigh5xxResponseCodes', expr: ||| - sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"5.*"}[5m])) > %(alertsCritical5xxResponseCodes5m)s + sum by(job, instance) (increase(couchdb_httpd_status_codes{%(filteringSelector)s,code=~"5.."}[5m])) > %(alertsCritical5xxResponseCodes5m)s ||| % this.config, 'for': '5m', labels: { From e6a565464d21eb6e6a9906876b6401d7c1b3410b Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 10:31:49 -0500 Subject: [PATCH 09/11] fix some issues due to recent commits; interval/legends --- .../dashboards_out/couchdb-nodes.json | 14 +++++++------- .../dashboards_out/couchdb-overview.json | 2 ++ apache-couchdb-mixin/panels.libsonnet | 5 ++--- apache-couchdb-mixin/signals/nodes.libsonnet | 10 +++++----- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index f718a54c0..faae71abf 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -561,8 +561,8 @@ }, { "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The average request latency for a node.", "fieldConfig": { @@ -592,7 +592,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, couchdb_cluster, cluster, instance, quantile) (couchdb_request_time_seconds{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"})", "format": "timeseries", "instant": true, "legendFormat": "{{instance}} - {{quantile}}", @@ -688,7 +688,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -700,7 +700,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -712,7 +712,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", @@ -724,7 +724,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, "interval": "2m", diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index 81c5df1e1..7d08a8841 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -889,6 +889,7 @@ "expr": "sum by(job, couchdb_cluster, cluster, code) (rate(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"[45].*\"}[$__rate_interval]))", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{ couchdb_cluster }} - {{code}}", "refId": "Error response statuses" } @@ -953,6 +954,7 @@ "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_couch_replicator_changes_manager_deaths_total{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\"}[$__interval:] offset -$__interval))", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{ couchdb_cluster}}", "refId": "Changes manager deaths" } diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet index eed071174..96cc3faae 100644 --- a/apache-couchdb-mixin/panels.libsonnet +++ b/apache-couchdb-mixin/panels.libsonnet @@ -140,7 +140,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; overviewErrorResponseStatusesPanel: commonlib.panels.generic.timeSeries.base.new( 'Error response statuses', - targets=[signals.overview.errorResponseStatuses.asTarget()], + targets=[signals.overview.errorResponseStatuses.asTarget() { interval: '2m' }], ) + g.panel.timeSeries.panelOptions.withDescription('The total number of error response statuses (HTTP 4xx-5xx) aggregated across all nodes.') + g.panel.timeSeries.standardOptions.withUnit('reqps'), @@ -149,7 +149,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; overviewReplicatorChangesManagerDeathsPanel: commonlib.panels.generic.timeSeries.base.new( 'Replicator changes manager deaths', - targets=[signals.replicator.changesManagerDeaths.asTarget()], + targets=[signals.replicator.changesManagerDeaths.asTarget() { interval: '2m' }], ) + g.panel.timeSeries.panelOptions.withDescription('Number of replicator changes manager processor deaths across all nodes.'), @@ -287,7 +287,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInstant(true) + g.query.prometheus.withFormat('timeseries'), ]) - + g.panel.gauge.queryOptions.withDatasource('prometheus', '${' + this.grafana.variables.datasources.prometheus.name + '}') + g.panel.histogram.standardOptions.color.withMode('thresholds') + g.panel.histogram.options.legend.withPlacement('right') + g.panel.histogram.panelOptions.withDescription('The average request latency for a node.') diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index 42bc482c3..d242bcf70 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -1,6 +1,6 @@ function(this) { local legendCustomTemplate = std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), - local groupLabelAggTerm = std.join(', ', this.groupLabels), + local groupLabelAggTerm = std.join(', ', this.groupLabels + this.instanceLabels), filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, @@ -175,7 +175,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', + expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 2xx', }, }, @@ -189,7 +189,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', + expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 3xx', }, }, @@ -205,7 +205,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', + expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 4xx', }, @@ -222,7 +222,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', + expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 5xx', }, From 7f424aa6805247b57c1207fb9e59119d8d6aa76c Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 10:40:23 -0500 Subject: [PATCH 10/11] make fmt --- apache-couchdb-mixin/signals/nodes.libsonnet | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index d242bcf70..f2a898ecf 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -175,7 +175,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 2xx', }, }, @@ -189,7 +189,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', legendCustomTemplate: legendCustomTemplate + ' - 3xx', }, }, @@ -205,7 +205,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 4xx', }, @@ -222,7 +222,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by('+ groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 5xx', }, From 58ca718dfbeebb470a65c9e865a70dbc529c1df8 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 19 Nov 2025 10:51:37 -0500 Subject: [PATCH 11/11] address PR feedback minus the description implementation --- apache-couchdb-mixin/README.md | 29 +++++++++++++++++++ .../dashboards_out/couchdb-nodes.json | 14 ++++++--- .../dashboards_out/couchdb-overview.json | 14 ++++++--- apache-couchdb-mixin/panels.libsonnet | 4 +++ apache-couchdb-mixin/signals/nodes.libsonnet | 8 ++--- .../signals/overview.libsonnet | 8 ++--- 6 files changed, 61 insertions(+), 16 deletions(-) diff --git a/apache-couchdb-mixin/README.md b/apache-couchdb-mixin/README.md index 1ce5d4769..a49117dba 100644 --- a/apache-couchdb-mixin/README.md +++ b/apache-couchdb-mixin/README.md @@ -58,6 +58,35 @@ scrape_configs: __path__: /var/log/couchdb/couchdb.log ``` +## CouchDB Version Compatibility + +This mixin supports **Apache CouchDB 3.3.1 and later** and handles differences in metric naming conventions between versions. + +### Metric Naming Changes + +Between CouchDB 3.3.0 and 3.5.0, there was a change in how some metrics are named. Specifically, some metrics that previously had a `_total` suffix no longer include it in newer versions: + +- **CouchDB 3.3.0 and earlier**: `couchdb_open_os_files_total` +- **CouchDB 3.5.0 and later**: `couchdb_open_os_files` + +### How the Mixin Handles This + +By default, the mixin is configured to work with both naming conventions automatically through the `metricsSource` configuration in `config.libsonnet`. This ensures dashboards and alerts work correctly regardless of which CouchDB version you're running. + +If you need to customize this behavior, you can modify the `metricsSource` in your `config.libsonnet`: + +```jsonnet +{ + _config+:: { + // For CouchDB 3.5.0+ only (no _total suffix) + metricsSource: ['prometheus'], + + // OR for backwards compatibility with both versions + metricsSource: ['prometheus', 'prometheusWithTotal'], + }, +} +``` + ## Alerts Overview - CouchDBUnhealthyCluster: At least one of the nodes in a cluster is reporting the cluster as being unstable. diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json index faae71abf..2be264c67 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-nodes.json @@ -570,6 +570,11 @@ "color": { "mode": "thresholds" }, + "custom": { + "stacking": { + "mode": "normal" + } + }, "unit": "s" } }, @@ -581,6 +586,7 @@ }, "id": 12, "options": { + "bucketCount": 60, "legend": { "placement": "right" } @@ -688,7 +694,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset -$__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -700,7 +706,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset -$__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -712,7 +718,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset -$__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -724,7 +730,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset -$__interval))", + "expr": "sum by(job, couchdb_cluster, cluster, instance) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset -$__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", diff --git a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json index 7d08a8841..745246c9e 100644 --- a/apache-couchdb-mixin/dashboards_out/couchdb-overview.json +++ b/apache-couchdb-mixin/dashboards_out/couchdb-overview.json @@ -638,6 +638,11 @@ "color": { "mode": "thresholds" }, + "custom": { + "stacking": { + "mode": "normal" + } + }, "unit": "s" } }, @@ -649,6 +654,7 @@ }, "id": 14, "options": { + "bucketCount": 60, "legend": { "asTable": true, "placement": "right" @@ -744,7 +750,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset $__interval))", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"2.*\"}[$__interval:] offset $__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -756,7 +762,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset $__interval))", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"3.*\"}[$__interval:] offset $__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -768,7 +774,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset $__interval))", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"4.*\"}[$__interval:] offset $__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", @@ -780,7 +786,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset $__interval))", + "expr": "sum by(job, couchdb_cluster, cluster) (increase(couchdb_httpd_status_codes{job=\"integrations/apache-couchdb\",job=~\"$job\",couchdb_cluster=~\"$couchdb_cluster\",cluster=~\"$cluster\",instance=~\"$instance\", code=~\"5.*\"}[$__interval:] offset $__interval)) != 0", "format": "time_series", "instant": false, "interval": "2m", diff --git a/apache-couchdb-mixin/panels.libsonnet b/apache-couchdb-mixin/panels.libsonnet index 96cc3faae..434c722f2 100644 --- a/apache-couchdb-mixin/panels.libsonnet +++ b/apache-couchdb-mixin/panels.libsonnet @@ -108,6 +108,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.histogram.panelOptions.withDescription('The request latency aggregated across all nodes.') + g.panel.histogram.standardOptions.color.withMode('thresholds') + + g.panel.histogram.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.histogram.options.withBucketCount(60) + g.panel.histogram.options.legend.withAsTable(true) + g.panel.histogram.options.legend.withPlacement('right') + g.panel.histogram.standardOptions.withUnit('s'), @@ -289,6 +291,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.histogram.standardOptions.color.withMode('thresholds') + g.panel.histogram.options.legend.withPlacement('right') + + g.panel.histogram.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.histogram.options.withBucketCount(60) + g.panel.histogram.panelOptions.withDescription('The average request latency for a node.') + g.panel.histogram.standardOptions.withUnit('s'), diff --git a/apache-couchdb-mixin/signals/nodes.libsonnet b/apache-couchdb-mixin/signals/nodes.libsonnet index f2a898ecf..e36ae132d 100644 --- a/apache-couchdb-mixin/signals/nodes.libsonnet +++ b/apache-couchdb-mixin/signals/nodes.libsonnet @@ -175,7 +175,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset -$__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 2xx', }, }, @@ -189,7 +189,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset -$__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 3xx', }, }, @@ -205,7 +205,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset -$__interval)) != 0', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 4xx', }, @@ -222,7 +222,7 @@ function(this) { aggFunction: 'sum', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset -$__interval)) != 0', rangeFunction: 'increase', legendCustomTemplate: legendCustomTemplate + ' - 5xx', }, diff --git a/apache-couchdb-mixin/signals/overview.libsonnet b/apache-couchdb-mixin/signals/overview.libsonnet index 9681d8089..6b7a9ff38 100644 --- a/apache-couchdb-mixin/signals/overview.libsonnet +++ b/apache-couchdb-mixin/signals/overview.libsonnet @@ -200,7 +200,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset $__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"2.*"}[$__interval:] offset $__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 2xx', }, }, @@ -214,7 +214,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset $__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"3.*"}[$__interval:] offset $__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 3xx', }, }, @@ -228,7 +228,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset $__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"4.*"}[$__interval:] offset $__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 4xx', }, }, @@ -242,7 +242,7 @@ function(this) { unit: 'requests', sources: { prometheus: { - expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset $__interval))', + expr: 'sum by(' + groupLabelAggTerm + ') (increase(couchdb_httpd_status_codes{%(queriesSelector)s, code=~"5.*"}[$__interval:] offset $__interval)) != 0', legendCustomTemplate: legendCustomTemplate + ' - 5xx', }, },