From d0ce9a220efe40ee96d3368738cb239f3ae33456 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 25 Jun 2025 10:39:16 +0200 Subject: [PATCH] WIP host.reboot: fix detection of host up/down When rebooting a master host, and no VM is running, it can get down fast enough that the ssh connection is closed on us. In this case (which we already handled) trying to open a new ssh connection to query xapi is never going to succeed. Even when a xapi master answers a slave is not enabled it is a bit of a stretch to conclude the host is down already. Especially, the test for "host up" uses ping, and the host might still respond to a few pings before it actually shuts down. Instead, for robustness of the "host up" test, it is much more logical to use ping as well to decide the host is down. --- lib/host.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/host.py b/lib/host.py index d7031a9bc..3d37db543 100644 --- a/lib/host.py +++ b/lib/host.py @@ -520,7 +520,10 @@ def reboot(self, verify=False): if "closed by remote host" not in e.stdout: raise if verify: - wait_for_not(self.is_enabled, "Wait for host down") + # FIXME for more robustness we should make sure that 0% + # pings of a given number actually get a response + wait_for_not(lambda: not os.system(f"ping -c1 {self.hostname_or_ip} > /dev/null 2>&1"), + "Wait for host down", timeout_secs=10 * 60, retry_delay_secs=10) wait_for(lambda: not os.system(f"ping -c1 {self.hostname_or_ip} > /dev/null 2>&1"), "Wait for host up", timeout_secs=10 * 60, retry_delay_secs=10) wait_for(lambda: not os.system(f"nc -zw5 {self.hostname_or_ip} 22"),