Skip to content

Commit e2d86b3

Browse files
committed
19810 redfish: support Redfish Sensor resource model
Add support for more devices that expose the Redfish Sensor resource model (ThermalSubsystem, PowerSubsystem, Sensors, EnvironmentMetrics) instead of the legacy Power/Thermal resources. - Added NVIDIA to vendor detection - Auto-detect chassis resource model and fetch accordingly - Extend process_redfish_perfdata() for nested Thresholds object - Register four new sections in REDFISH_SECTIONS JIRA-Ref: SUP-28174 Change-Id: I6482562905ef74b429e00fec15d063ac6fb04bee
1 parent 90e26cc commit e2d86b3

File tree

5 files changed

+280
-15
lines changed

5 files changed

+280
-15
lines changed

.werks/19810.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[//]: # (werk v3)
2+
# Redfish: support Sensor resource model and NVIDIA devices
3+
4+
key | value
5+
---------- | ---
6+
date | 2026-04-15T16:34:38.718903+00:00
7+
version | 2.5.0b5
8+
class | feature
9+
edition | community
10+
component | checks
11+
level | 1
12+
compatible | yes
13+
14+
The Redfish special agent now supports devices that use the Redfish Sensor resource model
15+
instead of the older Power and Thermal resources. This notably includes NVIDIA MGX
16+
baseboards running OpenBMC.
17+
18+
## What changed
19+
20+
The agent auto-detects which resource model the device exposes and fetches the appropriate
21+
sections automatically. Previously, devices using the Sensor resource model produced no
22+
chassis monitoring data.
23+
24+
NVIDIA is now recognized as a Redfish vendor. Devices from other vendors that also expose
25+
the Sensor resource model will work as well.
26+
27+
Four new sections are available in the _Redfish Compatible Management Controller_ rule:
28+
_Sensors_, _Thermal Subsystem_, _Power Subsystem_, and _Environment Metrics_.
29+
Thresholds reported by these devices are picked up automatically by existing sensor checks.
30+
31+
Existing Redfish monitoring setups are not affected. Already supported devices continue
32+
to work exactly as before.

packages/cmk-plugins/cmk/plugins/redfish/lib.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ class Section:
6666
name="SimpleStorage",
6767
title=Title("Simple Storage Collection (tbd)"),
6868
),
69+
Section(name="Sensors", title=Title("Sensors")),
70+
Section(name="ThermalSubsystem", title=Title("Thermal Subsystem")),
71+
Section(name="PowerSubsystem", title=Title("Power Subsystem")),
72+
Section(name="EnvironmentMetrics", title=Title("Environment Metrics")),
6973
)
7074

7175

@@ -227,6 +231,19 @@ def redfish_health_state(state: Mapping[str, Any]) -> tuple[int, str]:
227231
return dev_state, ", ".join(dev_msg)
228232

229233

234+
def _threshold_value(
235+
entry: Mapping[str, Any],
236+
flat_key: str,
237+
thresholds: Mapping[str, Any],
238+
nested_key: str,
239+
) -> float | None:
240+
"""Read a threshold value, preferring the legacy flat field over the modern nested one."""
241+
flat = _try_convert_to_float(entry.get(flat_key))
242+
if flat is not None:
243+
return flat
244+
return _try_convert_to_float(thresholds.get(nested_key, {}).get("Reading"))
245+
246+
230247
def process_redfish_perfdata(entry: Mapping[str, Any]) -> None | Perfdata:
231248
"""Redfish performance data to monitoring performance data"""
232249
value = None
@@ -242,10 +259,13 @@ def process_redfish_perfdata(entry: Mapping[str, Any]) -> None | Perfdata:
242259

243260
min_range = _try_convert_to_float(entry.get("MinReadingRange", None))
244261
max_range = _try_convert_to_float(entry.get("MaxReadingRange", None))
245-
min_warn = _try_convert_to_float(entry.get("LowerThresholdNonCritical", None))
246-
min_crit = _try_convert_to_float(entry.get("LowerThresholdCritical", None))
247-
upper_warn = _try_convert_to_float(entry.get("UpperThresholdNonCritical", None))
248-
upper_crit = _try_convert_to_float(entry.get("UpperThresholdCritical", None))
262+
263+
# Thresholds: try legacy flat fields first, fall back to modern nested Thresholds object.
264+
thresholds = entry.get("Thresholds", {})
265+
min_warn = _threshold_value(entry, "LowerThresholdNonCritical", thresholds, "LowerCaution")
266+
min_crit = _threshold_value(entry, "LowerThresholdCritical", thresholds, "LowerCritical")
267+
upper_warn = _threshold_value(entry, "UpperThresholdNonCritical", thresholds, "UpperCaution")
268+
upper_crit = _threshold_value(entry, "UpperThresholdCritical", thresholds, "UpperCritical")
249269

250270
if min_warn is None and min_crit is not None:
251271
min_warn = min_crit

packages/cmk-plugins/cmk/plugins/redfish/special_agents/agent_redfish.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,9 @@ def detect_vendor(root_data: Mapping[str, Any]) -> Vendor:
487487
case "Ami" | "Supermicro" | "Seagate" as name:
488488
return Vendor(name=name)
489489

490+
case "NVIDIA":
491+
return Vendor(name="NVIDIA", expand_string="?$expand=*($levels=1)")
492+
490493
case _other_vendor_string:
491494
# TODO: why not use the vendor string here?
492495
return Vendor(name="Generic")
@@ -679,17 +682,12 @@ def get_information(storage: Storage, redfishobj: RedfishData) -> Literal[0]:
679682
sys.stdout.write("<<<redfish_chassis:sep(0)>>>\n")
680683
sys.stdout.write(f"{json.dumps(chassis_data, sort_keys=True)}\n")
681684

682-
chassis_sections = [
683-
"NetworkAdapters",
684-
"Power",
685-
"Thermal",
686-
]
687-
# new_environment_resources = [
688-
# "Sensors",
689-
# "EnvironmentMetrics",
690-
# "PowerSubsystem",
691-
# "ThermalSubsystem",
692-
# ]
685+
legacy_environment = ("Power", "Thermal")
686+
modern_environment = ("Sensors", "ThermalSubsystem", "PowerSubsystem", "EnvironmentMetrics")
687+
688+
has_modern = any(isinstance(chassis.get("Sensors"), dict) for chassis in chassis_data)
689+
chassis_sections = list(modern_environment if has_modern else legacy_environment)
690+
chassis_sections.append("NetworkAdapters")
693691

694692
resulting_sections = list(set(chassis_sections).intersection(redfishobj.sections))
695693
for chassis in chassis_data:
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#!/usr/bin/env python3
2+
# Copyright (C) 2026 Checkmk GmbH - License: GNU General Public License v2
3+
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
4+
# conditions defined in the file COPYING, which is part of this source code package.
5+
6+
import json
7+
from typing import Any
8+
9+
from cmk.agent_based.v2 import StringTable
10+
from cmk.plugins.redfish.agent_based.redfish_sensors import (
11+
discovery_redfish_sensors,
12+
)
13+
from cmk.plugins.redfish.lib import (
14+
_threshold_value,
15+
parse_redfish_multiple,
16+
process_redfish_perfdata,
17+
)
18+
from cmk.plugins.redfish.special_agents.agent_redfish import detect_vendor
19+
20+
# ---------------------------------------------------------------------------
21+
# Helpers
22+
# ---------------------------------------------------------------------------
23+
24+
25+
def _make_modern_sensor(
26+
sensor_id: str = "BMC_TEMP",
27+
*,
28+
reading: float = 31.875,
29+
reading_type: str = "Temperature",
30+
reading_units: str = "Cel",
31+
upper_caution: float | None = 105.0,
32+
upper_critical: float | None = 108.0,
33+
lower_caution: float | None = 5.0,
34+
lower_critical: float | None = None,
35+
health: str = "OK",
36+
state: str = "Enabled",
37+
) -> dict[str, Any]:
38+
thresholds: dict[str, Any] = {}
39+
if upper_caution is not None:
40+
thresholds["UpperCaution"] = {"Reading": upper_caution}
41+
if upper_critical is not None:
42+
thresholds["UpperCritical"] = {"Reading": upper_critical}
43+
if lower_caution is not None:
44+
thresholds["LowerCaution"] = {"Reading": lower_caution}
45+
if lower_critical is not None:
46+
thresholds["LowerCritical"] = {"Reading": lower_critical}
47+
return {
48+
"@odata.id": f"/redfish/v1/Chassis/MGX_BMC_0/Sensors/{sensor_id}",
49+
"@odata.type": "#Sensor.v1_7_0.Sensor",
50+
"Id": sensor_id,
51+
"Name": sensor_id.replace("_", " "),
52+
"Reading": reading,
53+
"ReadingType": reading_type,
54+
"ReadingUnits": reading_units,
55+
"Status": {"Health": health, "State": state},
56+
**({"Thresholds": thresholds} if thresholds else {}),
57+
}
58+
59+
60+
def _make_legacy_sensor(
61+
sensor_id: str = "CPU1_Temp",
62+
*,
63+
reading_celsius: float = 42.0,
64+
upper_warn: float | None = 80.0,
65+
upper_crit: float | None = 90.0,
66+
lower_warn: float | None = 5.0,
67+
lower_crit: float | None = None,
68+
) -> dict[str, Any]:
69+
entry: dict[str, Any] = {
70+
"ReadingCelsius": reading_celsius,
71+
}
72+
if upper_warn is not None:
73+
entry["UpperThresholdNonCritical"] = upper_warn
74+
if upper_crit is not None:
75+
entry["UpperThresholdCritical"] = upper_crit
76+
if lower_warn is not None:
77+
entry["LowerThresholdNonCritical"] = lower_warn
78+
if lower_crit is not None:
79+
entry["LowerThresholdCritical"] = lower_crit
80+
return entry
81+
82+
83+
def _make_string_table(*entries: dict[str, Any]) -> StringTable:
84+
return [[json.dumps(e)] for e in entries]
85+
86+
87+
# ---------------------------------------------------------------------------
88+
# process_redfish_perfdata — threshold extraction
89+
# ---------------------------------------------------------------------------
90+
91+
92+
class TestProcessRedfishPerfdata:
93+
def test_modern_nested_thresholds(self) -> None:
94+
entry = _make_modern_sensor(
95+
reading=31.875, upper_caution=105.0, upper_critical=108.0, lower_caution=5.0
96+
)
97+
result = process_redfish_perfdata(entry)
98+
assert result is not None
99+
assert result.value == 31.875
100+
assert result.levels_upper == ("fixed", (105.0, 108.0))
101+
assert result.levels_lower == ("fixed", (5.0, float("-inf")))
102+
103+
def test_legacy_flat_thresholds(self) -> None:
104+
entry = _make_legacy_sensor(
105+
reading_celsius=42.0, upper_warn=80.0, upper_crit=90.0, lower_warn=5.0
106+
)
107+
result = process_redfish_perfdata(entry)
108+
assert result is not None
109+
assert result.value == 42.0
110+
assert result.levels_upper == ("fixed", (80.0, 90.0))
111+
assert result.levels_lower == ("fixed", (5.0, float("-inf")))
112+
113+
def test_flat_takes_priority_over_nested(self) -> None:
114+
entry: dict[str, Any] = {
115+
"Reading": 50.0,
116+
"UpperThresholdNonCritical": 80.0,
117+
"UpperThresholdCritical": 90.0,
118+
"Thresholds": {
119+
"UpperCaution": {"Reading": 999.0},
120+
"UpperCritical": {"Reading": 999.0},
121+
},
122+
}
123+
result = process_redfish_perfdata(entry)
124+
assert result is not None
125+
assert result.levels_upper == ("fixed", (80.0, 90.0))
126+
127+
def test_flat_zero_threshold_does_not_fall_through(self) -> None:
128+
"""Verify _threshold_value returns the flat 0.0, not the nested -5.0."""
129+
entry: dict[str, Any] = {"LowerThresholdCritical": 0.0}
130+
thresholds: dict[str, Any] = {"LowerCritical": {"Reading": -5.0}}
131+
result = _threshold_value(entry, "LowerThresholdCritical", thresholds, "LowerCritical")
132+
assert result == 0.0
133+
134+
def test_no_thresholds_at_all(self) -> None:
135+
entry: dict[str, Any] = {"Reading": 25.0}
136+
result = process_redfish_perfdata(entry)
137+
assert result is not None
138+
assert result.value == 25.0
139+
assert result.levels_upper is None
140+
assert result.levels_lower is None
141+
142+
def test_no_reading_returns_none(self) -> None:
143+
entry: dict[str, Any] = {"Name": "SomeSensor"}
144+
assert process_redfish_perfdata(entry) is None
145+
146+
147+
# ---------------------------------------------------------------------------
148+
# detect_vendor — NVIDIA
149+
# ---------------------------------------------------------------------------
150+
151+
152+
class TestDetectVendor:
153+
def test_nvidia_from_vendor_field(self) -> None:
154+
root_data: dict[str, Any] = {"Oem": {}, "Vendor": "NVIDIA"}
155+
vendor = detect_vendor(root_data)
156+
assert vendor.name == "NVIDIA"
157+
assert vendor.expand_string == "?$expand=*($levels=1)"
158+
159+
def test_existing_vendors_unchanged(self) -> None:
160+
root_data: dict[str, Any] = {"Oem": {"Dell": {}}}
161+
vendor = detect_vendor(root_data)
162+
assert vendor.name == "Dell"
163+
164+
165+
# ---------------------------------------------------------------------------
166+
# discovery + check — redfish_sensors
167+
# ---------------------------------------------------------------------------
168+
169+
170+
class TestRedfishSensorsDiscovery:
171+
def test_discovers_enabled_sensor(self) -> None:
172+
sensor = _make_modern_sensor("BMC_TEMP")
173+
parsed = parse_redfish_multiple(_make_string_table(sensor))
174+
services = list(discovery_redfish_sensors(parsed))
175+
assert len(services) == 1
176+
assert services[0].item == "BMC_TEMP"
177+
178+
def test_skips_absent_sensor(self) -> None:
179+
sensor = _make_modern_sensor("ABSENT_TEMP", state="Absent")
180+
parsed = parse_redfish_multiple(_make_string_table(sensor))
181+
services = list(discovery_redfish_sensors(parsed))
182+
assert len(services) == 0
183+
184+
def test_discovers_multiple_sensors(self) -> None:
185+
sensors = [
186+
_make_modern_sensor("BMC_TEMP"),
187+
_make_modern_sensor("CPU_TEMP", reading=55.0),
188+
]
189+
parsed = parse_redfish_multiple(_make_string_table(*sensors))
190+
items = {s.item for s in discovery_redfish_sensors(parsed)}
191+
assert items == {"BMC_TEMP", "CPU_TEMP"}
192+
193+
194+
class TestThresholdValue:
195+
def test_flat_present(self) -> None:
196+
entry: dict[str, Any] = {"UpperThresholdCritical": 90.0}
197+
assert _threshold_value(entry, "UpperThresholdCritical", {}, "UpperCritical") == 90.0
198+
199+
def test_nested_fallback(self) -> None:
200+
thresholds: dict[str, Any] = {"UpperCritical": {"Reading": 108.0}}
201+
assert _threshold_value({}, "UpperThresholdCritical", thresholds, "UpperCritical") == 108.0
202+
203+
def test_flat_preferred_over_nested(self) -> None:
204+
entry: dict[str, Any] = {"UpperThresholdCritical": 90.0}
205+
thresholds: dict[str, Any] = {"UpperCritical": {"Reading": 999.0}}
206+
assert (
207+
_threshold_value(entry, "UpperThresholdCritical", thresholds, "UpperCritical") == 90.0
208+
)
209+
210+
def test_neither_present(self) -> None:
211+
assert _threshold_value({}, "UpperThresholdCritical", {}, "UpperCritical") is None

packages/cmk-plugins/tests/cmk/plugins/redfish/rulesets/test_datasource_program.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
"fetching": {
6262
"ArrayControllers": ("always", 0.0),
6363
"Drives": ("cached", 42.0),
64+
"EnvironmentMetrics": ("always", 0.0),
6465
"EthernetInterfaces": ("always", 0.0),
6566
"FirmwareInventory": ("never", -1.0),
6667
"HostBusAdapters": ("always", 0.0),
@@ -70,11 +71,14 @@
7071
"NetworkInterfaces": ("never", -1.0),
7172
"PhysicalDrives": ("always", 0.0),
7273
"Power": ("always", 0.0),
74+
"PowerSubsystem": ("always", 0.0),
7375
"Processors": ("always", 0.0),
76+
"Sensors": ("always", 0.0),
7477
"SimpleStorage": ("always", 0.0),
7578
"SmartStorage": ("always", 0.0),
7679
"Storage": ("always", 0.0),
7780
"Thermal": ("always", 0.0),
81+
"ThermalSubsystem": ("always", 0.0),
7882
"Volumes": ("always", 0.0),
7983
},
8084
"port": 443,

0 commit comments

Comments
 (0)