Skip to content

Commit aeac810

Browse files
Addressed review comments related to refactoring
1 parent b0bfd18 commit aeac810

File tree

2 files changed

+156
-9
lines changed

2 files changed

+156
-9
lines changed

scripts/gnoi_shutdown_daemon.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,9 @@ def main():
218218
time.sleep(1)
219219
continue
220220

221-
if entry.get("state_transition_in_progress", "False") == "True" and entry.get("transition_type") == "shutdown":
222-
logger.log_info(f"Shutdown request detected for {dpu_name}. Initiating gNOI reboot.")
221+
type = entry.get("transition_type")
222+
if entry.get("state_transition_in_progress", "False") == "True" and (type == "shutdown" or type == "reboot"):
223+
logger.log_info(f"{type} request detected for {dpu_name}. Initiating gNOI reboot.")
223224
try:
224225
dpu_ip = get_dpu_ip(dpu_name)
225226
port = get_gnmi_port(dpu_name)
@@ -276,18 +277,20 @@ def main():
276277
time.sleep(STATUS_POLL_INTERVAL_SEC)
277278

278279
if reboot_successful:
280+
if type == "reboot":
281+
success = module_base.clear_module_state_transition(db, dpu_name)
282+
if success:
283+
logger.log_info(f"Cleared transition for {dpu_name}")
284+
else:
285+
logger.log_warning(f"Failed to clear transition for {dpu_name}")
279286
logger.log_info(f"Halting the services on DPU is successful for {dpu_name}.")
280287
else:
281288
logger.log_warning(f"Status polling of halting the services on DPU timed out for {dpu_name}.")
282289

283290
# NOTE:
284-
# Do NOT clear CHASSIS_MODULE_TABLE transition flags here.
285-
# Per HLD and platform flow, the transition is cleared by the
286-
# platform's module.py AFTER set_admin_state(down) has completed
287-
# (i.e., after the module is actually taken down). This avoids
288-
# prematurely unblocking other components before shutdown finishes.
289-
290-
time.sleep(1)
291+
# The CHASSIS_MODULE_TABLE transition flag is cleared for startup/shutdown in
292+
# module_base.py. The daemon does not clear it. For reboot transitions, the
293+
# daemon relies on the TimeoutEnforcer thread to clear any stuck transitions.
291294

292295
if __name__ == "__main__":
293296
main()

tests/gnoi_shutdown_daemon_test.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def __init__(self, *a, **k): # allow construction if the code instantiates Modu
172172
pass
173173
# Support both instance and class access
174174
get_module_state_transition = staticmethod(_fake_transition)
175+
clear_module_state_transition = staticmethod(lambda db, name: True)
175176

176177
with patch("gnoi_shutdown_daemon.SonicV2Connector") as mock_sonic, \
177178
patch("gnoi_shutdown_daemon.ModuleBase", new=_MBStub), \
@@ -220,6 +221,7 @@ def __init__(self, *a, **k): # allow construction if the code instantiates Modu
220221
self.assertTrue(status_args[status_args.index("-rpc") + 1].endswith("RebootStatus"))
221222

222223
all_logs = " | ".join(str(c) for c in mock_logger.method_calls)
224+
self.assertIn("shutdown request detected for DPU0", all_logs)
223225
self.assertIn("Halting the services on DPU is successful for DPU0", all_logs)
224226

225227

@@ -550,6 +552,10 @@ def __init__(self, *a, **k):
550552
@staticmethod
551553
def get_module_state_transition(*_a, **_k):
552554
return {"state_transition_in_progress": "True", "transition_type": "shutdown"}
555+
556+
@staticmethod
557+
def clear_module_state_transition(db, name):
558+
return True
553559

554560

555561
def _mk_pubsub_once2():
@@ -648,3 +654,141 @@ def test_shutdown_reboot_nonzero_does_not_poll_status(self):
648654
self.assertEqual(mock_exec.call_count, 1)
649655
self.assertTrue(any("reboot command failed" in str(c.args[0]).lower()
650656
for c in (mock_logger.log_error.call_args_list or [])))
657+
658+
def test_reboot_transition_type_success(self):
659+
"""Test that reboot transition type is handled correctly and clears transition on success"""
660+
661+
class _MBStubReboot:
662+
def __init__(self, *a, **k):
663+
pass
664+
665+
@staticmethod
666+
def get_module_state_transition(*_a, **_k):
667+
return {"state_transition_in_progress": "True", "transition_type": "reboot"}
668+
669+
@staticmethod
670+
def clear_module_state_transition(db, name):
671+
return True
672+
673+
with patch("gnoi_shutdown_daemon.SonicV2Connector") as mock_sonic, \
674+
patch("gnoi_shutdown_daemon.ModuleBase", new=_MBStubReboot), \
675+
patch("gnoi_shutdown_daemon.execute_gnoi_command") as mock_exec, \
676+
patch("gnoi_shutdown_daemon.is_tcp_open", return_value=True), \
677+
patch("gnoi_shutdown_daemon._cfg_get_entry",
678+
side_effect=lambda table, key:
679+
{"ips@": "10.0.0.1"} if table == "DHCP_SERVER_IPV4_PORT" else {"gnmi_port": "8080"}), \
680+
patch("gnoi_shutdown_daemon.time.sleep", return_value=None), \
681+
patch("gnoi_shutdown_daemon.logger") as mock_logger:
682+
import gnoi_shutdown_daemon as d
683+
db = MagicMock()
684+
pubsub = MagicMock()
685+
pubsub.get_message.side_effect = [
686+
{"type": "pmessage", "channel": "__keyspace@6__:CHASSIS_MODULE_TABLE|DPU0", "data": "set"},
687+
Exception("stop"),
688+
]
689+
db.pubsub.return_value = pubsub
690+
mock_sonic.return_value = db
691+
692+
mock_exec.side_effect = [
693+
(0, "OK", ""), # Reboot command
694+
(0, "reboot complete", ""), # RebootStatus
695+
]
696+
697+
try:
698+
d.main()
699+
except Exception:
700+
pass
701+
702+
# Should make both Reboot and RebootStatus calls
703+
self.assertEqual(mock_exec.call_count, 2)
704+
705+
# Check logs for reboot-specific messages
706+
all_logs = " | ".join(str(c) for c in mock_logger.method_calls)
707+
self.assertIn("reboot request detected for DPU0", all_logs)
708+
self.assertIn("Cleared transition for DPU0", all_logs)
709+
self.assertIn("Halting the services on DPU is successful for DPU0", all_logs)
710+
711+
def test_reboot_transition_clear_failure(self):
712+
"""Test that reboot transition logs warning when clear fails"""
713+
714+
class _MBStubRebootFail:
715+
def __init__(self, *a, **k):
716+
pass
717+
718+
@staticmethod
719+
def get_module_state_transition(*_a, **_k):
720+
return {"state_transition_in_progress": "True", "transition_type": "reboot"}
721+
722+
@staticmethod
723+
def clear_module_state_transition(db, name):
724+
return False # Simulate failure
725+
726+
with patch("gnoi_shutdown_daemon.SonicV2Connector") as mock_sonic, \
727+
patch("gnoi_shutdown_daemon.ModuleBase", new=_MBStubRebootFail), \
728+
patch("gnoi_shutdown_daemon.execute_gnoi_command") as mock_exec, \
729+
patch("gnoi_shutdown_daemon.is_tcp_open", return_value=True), \
730+
patch("gnoi_shutdown_daemon._cfg_get_entry",
731+
side_effect=lambda table, key:
732+
{"ips@": "10.0.0.1"} if table == "DHCP_SERVER_IPV4_PORT" else {"gnmi_port": "8080"}), \
733+
patch("gnoi_shutdown_daemon.time.sleep", return_value=None), \
734+
patch("gnoi_shutdown_daemon.logger") as mock_logger:
735+
import gnoi_shutdown_daemon as d
736+
db = MagicMock()
737+
pubsub = MagicMock()
738+
pubsub.get_message.side_effect = [
739+
{"type": "pmessage", "channel": "__keyspace@6__:CHASSIS_MODULE_TABLE|DPU0", "data": "set"},
740+
Exception("stop"),
741+
]
742+
db.pubsub.return_value = pubsub
743+
mock_sonic.return_value = db
744+
745+
mock_exec.side_effect = [
746+
(0, "OK", ""), # Reboot command
747+
(0, "reboot complete", ""), # RebootStatus
748+
]
749+
750+
try:
751+
d.main()
752+
except Exception:
753+
pass
754+
755+
# Check for warning log when clear fails
756+
all_logs = " | ".join(str(c) for c in mock_logger.method_calls)
757+
self.assertIn("Failed to clear transition for DPU0", all_logs)
758+
759+
def test_status_polling_timeout_warning(self):
760+
"""Test that timeout during status polling logs the appropriate warning"""
761+
762+
with patch("gnoi_shutdown_daemon.SonicV2Connector") as mock_sonic, \
763+
patch("gnoi_shutdown_daemon.ModuleBase", new=_MBStub2), \
764+
patch("gnoi_shutdown_daemon.execute_gnoi_command") as mock_exec, \
765+
patch("gnoi_shutdown_daemon.is_tcp_open", return_value=True), \
766+
patch("gnoi_shutdown_daemon._cfg_get_entry",
767+
side_effect=lambda table, key:
768+
{"ips@": "10.0.0.1"} if table == "DHCP_SERVER_IPV4_PORT" else {"gnmi_port": "8080"}), \
769+
patch("gnoi_shutdown_daemon.time.sleep", return_value=None), \
770+
patch("gnoi_shutdown_daemon.time.monotonic", side_effect=[0, 100]), \
771+
patch("gnoi_shutdown_daemon.logger") as mock_logger:
772+
import gnoi_shutdown_daemon as d
773+
db = MagicMock()
774+
pubsub = MagicMock()
775+
pubsub.get_message.side_effect = [
776+
{"type": "pmessage", "channel": "__keyspace@6__:CHASSIS_MODULE_TABLE|DPU0", "data": "set"},
777+
Exception("stop"),
778+
]
779+
db.pubsub.return_value = pubsub
780+
mock_sonic.return_value = db
781+
782+
mock_exec.side_effect = [
783+
(0, "OK", ""), # Reboot command
784+
(0, "not complete", ""), # RebootStatus - never returns complete
785+
]
786+
787+
try:
788+
d.main()
789+
except Exception:
790+
pass
791+
792+
# Check for timeout warning
793+
all_logs = " | ".join(str(c) for c in mock_logger.method_calls)
794+
self.assertIn("Status polling of halting the services on DPU timed out for DPU0", all_logs)

0 commit comments

Comments
 (0)