Skip to content

Commit 197b882

Browse files
committed
modernize the IBM MQ mixin
1 parent f522be4 commit 197b882

25 files changed

+3449
-5525
lines changed
Lines changed: 74 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,80 @@
11
{
2-
prometheusAlerts+:: {
3-
groups+: [
4-
{
5-
name: 'ibm-mq-alerts',
6-
rules: [
7-
{
8-
alert: 'IBMMQExpiredMessages',
9-
expr: |||
10-
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_expired_message_count) > %(alertsExpiredMessages)s
11-
||| % $._config,
12-
'for': '5m',
13-
labels: {
14-
severity: 'critical',
2+
new(this):
3+
{
4+
groups+: [
5+
{
6+
name: 'ibm-mq-alerts',
7+
rules: [
8+
{
9+
alert: 'IBMMQExpiredMessages',
10+
expr: |||
11+
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_expired_message_count{%(filteringSelector)s}) > %(alertsExpiredMessages)s
12+
||| % this.config,
13+
'for': '5m',
14+
labels: {
15+
severity: 'critical',
16+
},
17+
annotations: {
18+
summary: 'There are expired messages, which imply that application resilience is failing.',
19+
description:
20+
(
21+
'The number of expired messages in the {{$labels.qmgr}} is {{$labels.value}} which is above the threshold of %(alertsExpiredMessages)s.'
22+
) % this.config,
23+
},
1524
},
16-
annotations: {
17-
summary: 'There are expired messages, which imply that application resilience is failing.',
18-
description:
19-
(
20-
'The number of expired messages in the {{$labels.qmgr}} is {{$labels.value}} which is above the threshold of %(alertsExpiredMessages)s.'
21-
) % $._config,
25+
{
26+
alert: 'IBMMQStaleMessages',
27+
expr: |||
28+
sum without (description,instance,job,platform) (ibmmq_queue_oldest_message_age{%(filteringSelector)s}) >= %(alertsStaleMessagesSeconds)s
29+
||| % this.config,
30+
'for': '5m',
31+
labels: {
32+
severity: 'warning',
33+
},
34+
annotations: {
35+
summary: 'Stale messages have been detected.',
36+
description:
37+
(
38+
'A stale message with an age of {{$labels.value}} has been sitting in the {{$labels.queue}} which is above the threshold of %(alertsStaleMessagesSeconds)ss.'
39+
) % this.config,
40+
},
2241
},
23-
},
24-
{
25-
alert: 'IBMMQStaleMessages',
26-
expr: |||
27-
sum without (description,instance,job,platform) (ibmmq_queue_oldest_message_age) >= %(alertsStaleMessagesSeconds)s
28-
||| % $._config,
29-
'for': '5m',
30-
labels: {
31-
severity: 'warning',
42+
{
43+
alert: 'IBMMQLowDiskSpace',
44+
expr: |||
45+
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_queue_manager_file_system_free_space_percentage{%(filteringSelector)s}) <= %(alertsLowDiskSpace)s
46+
||| % this.config,
47+
'for': '5m',
48+
labels: {
49+
severity: 'critical',
50+
},
51+
annotations: {
52+
summary: 'There is limited disk available for a queue manager.',
53+
description:
54+
(
55+
'The amount of disk space available for {{$labels.qmgr}} is at {{$labels.value}}%% which is below the threshold of %(alertsLowDiskSpace)s%%.'
56+
) % this.config,
57+
},
3258
},
33-
annotations: {
34-
summary: 'Stale messages have been detected.',
35-
description:
36-
(
37-
'A stale message with an age of {{$labels.value}} has been sitting in the {{$labels.queue}} which is above the threshold of %(alertsStaleMessagesSeconds)ss.'
38-
) % $._config,
59+
{
60+
alert: 'IBMMQHighQueueManagerCpuUsage',
61+
expr: |||
62+
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_user_cpu_time_estimate_for_queue_manager_percentage{%(filteringSelector)s}) >= %(alertsHighQueueManagerCpuUsage)s
63+
||| % this.config,
64+
'for': '5m',
65+
labels: {
66+
severity: 'critical',
67+
},
68+
annotations: {
69+
summary: 'There is a high CPU usage estimate for a queue manager.',
70+
description:
71+
(
72+
'The amount of CPU usage for the queue manager {{$labels.qmgr}} is at {{$labels.value}}%% which is above the threshold of %(alertsHighQueueManagerCpuUsage)s%%.'
73+
) % this.config,
74+
},
3975
},
40-
},
41-
{
42-
alert: 'IBMMQLowDiskSpace',
43-
expr: |||
44-
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_queue_manager_file_system_free_space_percentage) <= %(alertsLowDiskSpace)s
45-
||| % $._config,
46-
'for': '5m',
47-
labels: {
48-
severity: 'critical',
49-
},
50-
annotations: {
51-
summary: 'There is limited disk available for a queue manager.',
52-
description:
53-
(
54-
'The amount of disk space available for {{$labels.qmgr}} is at {{$labels.value}}%% which is below the threshold of %(alertsLowDiskSpace)s%%.'
55-
) % $._config,
56-
},
57-
},
58-
{
59-
alert: 'IBMMQHighQueueManagerCpuUsage',
60-
expr: |||
61-
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_user_cpu_time_estimate_for_queue_manager_percentage) >= %(alertsHighQueueManagerCpuUsage)s
62-
||| % $._config,
63-
'for': '5m',
64-
labels: {
65-
severity: 'critical',
66-
},
67-
annotations: {
68-
summary: 'There is a high CPU usage estimate for a queue manager.',
69-
description:
70-
(
71-
'The amount of CPU usage for the queue manager {{$labels.qmgr}} is at {{$labels.value}}%% which is above the threshold of %(alertsHighQueueManagerCpuUsage)s%%.'
72-
) % $._config,
73-
},
74-
},
75-
],
76-
},
77-
],
78-
},
76+
],
77+
},
78+
],
79+
},
7980
}

ibm-mq-mixin/config.libsonnet

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,37 @@
11
{
2-
_config+:: {
3-
enableMultiCluster: false,
4-
ibmmqSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
5-
dashboardTags: ['ibm-mq-mixin'],
6-
dashboardPeriod: 'now-1h',
7-
dashboardTimezone: 'default',
8-
dashboardRefresh: '1m',
9-
logExpression: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster", qmgr=~"$qmgr"'
10-
else 'job=~"$job", qmgr=~"$qmgr"',
2+
local this = self,
3+
filteringSelector: 'job="integrations/ibm-mq"',
4+
groupLabels: ['job', 'cluster', 'mq_cluster'],
5+
instanceLabels: ['instance', 'qmgr'],
6+
uid: 'ibm-mq',
117

12-
//alerts thresholds
13-
alertsExpiredMessages: 2, //count
14-
alertsStaleMessagesSeconds: 300, //seconds
15-
alertsLowDiskSpace: 5, //percentage: 0-100
16-
alertsHighQueueManagerCpuUsage: 85, //percentage: 0-100
8+
dashboardNamePrefix: 'IBM MQ',
9+
dashboardTags: ['ibm-mq-mixin'],
10+
dashboardPeriod: 'now-1h',
11+
dashboardTimezone: 'default',
12+
dashboardRefresh: '1m',
1713

18-
enableLokiLogs: true,
14+
// Data source configuration
15+
metricsSource: 'prometheus',
16+
enableLokiLogs: true,
17+
logLabels: this.groupLabels,
18+
extraLogLabels: [],
19+
logsVolumeGroupBy: 'level',
20+
showLogsVolume: true,
21+
22+
// Alerts configuration
23+
alertsExpiredMessages: 2, //count
24+
alertsStaleMessagesSeconds: 300, //seconds
25+
alertsLowDiskSpace: 5, //percentage: 0-100
26+
alertsHighQueueManagerCpuUsage: 85, //percentage: 0-100
27+
28+
// Multi-cluster support (for backward compatibility)
29+
enableMultiCluster: false,
30+
31+
signals+: {
32+
cluster: (import './signals/cluster.libsonnet')(this),
33+
queueManager: (import './signals/queue-manager.libsonnet')(this),
34+
queue: (import './signals/queue.libsonnet')(this),
35+
topic: (import './signals/topics.libsonnet')(this),
1936
},
2037
}

ibm-mq-mixin/dashboards.libsonnet

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
local g = import './g.libsonnet';
2+
local logslib = import 'logs-lib/logs/main.libsonnet';
3+
4+
{
5+
local root = self,
6+
7+
new(this)::
8+
local prefix = this.config.dashboardNamePrefix;
9+
local links = this.grafana.links;
10+
local tags = this.config.dashboardTags;
11+
local uid = g.util.string.slugify(this.config.uid);
12+
local vars = this.grafana.variables;
13+
local annotations = this.grafana.annotations;
14+
local refresh = this.config.dashboardRefresh;
15+
local period = this.config.dashboardPeriod;
16+
local timezone = this.config.dashboardTimezone;
17+
{
18+
'ibm-mq-cluster-overview.json':
19+
g.dashboard.new(prefix + ' cluster overview')
20+
+ g.dashboard.withPanels(
21+
g.util.panel.resolveCollapsedFlagOnRows(
22+
g.util.grid.wrapPanels(
23+
[
24+
this.grafana.rows.clusterOverview,
25+
]
26+
),
27+
)
28+
) + root.applyCommon(
29+
vars.multiInstance,
30+
uid + '-cluster-overview',
31+
tags,
32+
links { ibmMqClusterOverview+:: {} },
33+
annotations,
34+
timezone,
35+
refresh,
36+
period
37+
),
38+
39+
'ibm-mq-queue-manager-overview.json':
40+
g.dashboard.new(prefix + ' queue manager overview')
41+
+ g.dashboard.withPanels(
42+
g.util.panel.resolveCollapsedFlagOnRows(
43+
g.util.grid.wrapPanels(
44+
[
45+
this.grafana.rows.queueManagerOverview,
46+
this.grafana.rows.queueManagerLogs,
47+
]
48+
),
49+
)
50+
) + root.applyCommon(
51+
vars.multiInstance,
52+
uid + '-queue-manager-overview',
53+
tags,
54+
links { ibmMqQueueManagerOverview+:: {} },
55+
annotations,
56+
timezone,
57+
refresh,
58+
period
59+
),
60+
61+
'ibm-mq-queue-overview.json':
62+
g.dashboard.new(prefix + ' queue overview')
63+
+ g.dashboard.withPanels(
64+
g.util.panel.resolveCollapsedFlagOnRows(
65+
g.util.grid.wrapPanels(
66+
[
67+
this.grafana.rows.queueOverview,
68+
]
69+
),
70+
)
71+
) + root.applyCommon(
72+
vars.multiInstance + [
73+
g.dashboard.variable.query.new('queue')
74+
+ g.dashboard.variable.custom.generalOptions.withLabel('Queue')
75+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
76+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='queue', metric='ibmmq_queue_average_queue_time_seconds')
77+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
78+
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
79+
+ g.dashboard.variable.query.refresh.onTime(),
80+
],
81+
uid + '-queue-overview',
82+
tags,
83+
links { ibmMqQueueOverview+:: {} },
84+
annotations,
85+
timezone,
86+
refresh,
87+
period
88+
),
89+
90+
'ibm-mq-topics-overview.json':
91+
g.dashboard.new(prefix + ' topics overview')
92+
+ g.dashboard.withPanels(
93+
g.util.panel.resolveCollapsedFlagOnRows(
94+
g.util.grid.wrapPanels(
95+
[
96+
this.grafana.rows.topicsRow,
97+
this.grafana.rows.subscriptionsRow,
98+
]
99+
),
100+
)
101+
) + root.applyCommon(
102+
vars.multiInstance + [
103+
g.dashboard.variable.query.new('topic')
104+
+ g.dashboard.variable.custom.generalOptions.withLabel('Topic')
105+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
106+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='topic', metric='ibmmq_topic_subscriber_count{qmgr=~"$qmgr",topic!~"SYSTEM.*|\\\\$SYS.*|"}')
107+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
108+
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
109+
+ g.dashboard.variable.query.refresh.onTime(),
110+
111+
g.dashboard.variable.query.new('subscription')
112+
+ g.dashboard.variable.custom.generalOptions.withLabel('Subscription')
113+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
114+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='subscription', metric='ibmmq_subscription_messsages_received{qmgr=~"$qmgr",subscription!~"SYSTEM.*|"}')
115+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
116+
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
117+
+ g.dashboard.variable.query.refresh.onTime(),
118+
],
119+
uid + '-topics-overview',
120+
tags,
121+
links { ibmMqTopicsOverview+:: {} },
122+
annotations,
123+
timezone,
124+
refresh,
125+
period
126+
),
127+
}
128+
+ if this.config.enableLokiLogs then {
129+
'ibm-mq-logs.json':
130+
logslib.new(
131+
prefix + ' logs',
132+
datasourceName=this.grafana.variables.datasources.loki.name,
133+
datasourceRegex=this.grafana.variables.datasources.loki.regex,
134+
filterSelector=this.config.filteringSelector,
135+
labels=this.config.groupLabels + this.config.extraLogLabels + ['qmgr'],
136+
formatParser=null,
137+
showLogsVolume=this.config.showLogsVolume,
138+
)
139+
{
140+
dashboards+:
141+
{
142+
logs+:
143+
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
144+
},
145+
panels+:
146+
{
147+
logs+:
148+
g.panel.logs.options.withEnableLogDetails(true)
149+
+ g.panel.logs.options.withShowTime(false)
150+
+ g.panel.logs.options.withWrapLogMessage(false),
151+
},
152+
variables+: {
153+
toArray+: [
154+
this.grafana.variables.datasources.prometheus { hide: 2 },
155+
],
156+
},
157+
}.dashboards.logs,
158+
}
159+
else {},
160+
161+
applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
162+
g.dashboard.withTags(tags)
163+
+ g.dashboard.withUid(uid)
164+
+ g.dashboard.withLinks(std.objectValues(links))
165+
+ g.dashboard.withTimezone(timezone)
166+
+ g.dashboard.withRefresh(refresh)
167+
+ g.dashboard.time.withFrom(period)
168+
+ g.dashboard.withVariables(vars)
169+
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
170+
}

0 commit comments

Comments
 (0)