From 2c5bc539045d570cb51d784ae4dfb70255675913 Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Tue, 19 Aug 2025 15:05:57 +0200 Subject: [PATCH 1/3] retry zenoh pico tests if daemon is slow to start --- tests/run_with_router.sh | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/run_with_router.sh b/tests/run_with_router.sh index c36747ac..69968cdd 100644 --- a/tests/run_with_router.sh +++ b/tests/run_with_router.sh @@ -49,9 +49,31 @@ for LOCATOR in $(echo "$LOCATORS" | xargs); do sleep 5 - echo "> Running $TESTBIN ..." - "$TESTBIN" "$LOCATOR" > client."$TEST_NAME_WE".log 2>&1 - RETCODE=$? + # Run test with retry if log shows "Failed to open session" + MAX_RETRIES=5 + ATTEMPT=1 + RETCODE=1 + while :; do + echo "> Running $TESTBIN ... (attempt $ATTEMPT/$((MAX_RETRIES + 1)))" + "$TESTBIN" "$LOCATOR" > client."$TEST_NAME_WE".log 2>&1 + RETCODE=$? + + # Only retry if the test failed (non-zero) AND the log contains the specific transient failure message + if [ "$RETCODE" -lt 0 ] && grep -q "Failed to open session" client."$TEST_NAME_WE".log; then + if [ "$ATTEMPT" -le "$MAX_RETRIES" ]; then + echo "> Detected 'Failed to open session' in log. Client log (attempt $ATTEMPT):" + cat client."$TEST_NAME_WE".log + echo "> Waiting 5s and retrying..." + sleep 5 + ATTEMPT=$((ATTEMPT + 1)) + continue + else + echo "> Detected 'Failed to open session' after $MAX_RETRIES retries. Not retrying." + fi + fi + # Break on success or on failures not matching the transient error + break + done echo "> Logs of $TESTBIN ..." cat client."$TEST_NAME_WE".log From 9d9ae3e2dc8d81147364f2891551e68a0e3ac7d3 Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Tue, 19 Aug 2025 16:18:33 +0200 Subject: [PATCH 2/3] changed error code check --- tests/run_with_router.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/run_with_router.sh b/tests/run_with_router.sh index 69968cdd..3134a590 100644 --- a/tests/run_with_router.sh +++ b/tests/run_with_router.sh @@ -59,7 +59,7 @@ for LOCATOR in $(echo "$LOCATORS" | xargs); do RETCODE=$? # Only retry if the test failed (non-zero) AND the log contains the specific transient failure message - if [ "$RETCODE" -lt 0 ] && grep -q "Failed to open session" client."$TEST_NAME_WE".log; then + if [ "$RETCODE" -ne 0 ] && grep -q "Failed to open session" client."$TEST_NAME_WE".log; then if [ "$ATTEMPT" -le "$MAX_RETRIES" ]; then echo "> Detected 'Failed to open session' in log. Client log (attempt $ATTEMPT):" cat client."$TEST_NAME_WE".log @@ -86,7 +86,7 @@ for LOCATOR in $(echo "$LOCATORS" | xargs); do echo "> Logs of zenohd ..." cat zenohd."$TEST_NAME_WE".log - [ "$RETCODE" -lt 0 ] && exit "$RETCODE" + [ "$RETCODE" -ne 0 ] && exit "$RETCODE" done echo "> Done ($RETCODE)." From 9e9508bbeb4ff460a0f13967afe197457c83e8a5 Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Wed, 20 Aug 2025 13:56:29 +0200 Subject: [PATCH 3/3] debug prints --- tests/run_with_router.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/run_with_router.sh b/tests/run_with_router.sh index 3134a590..5dbc5af8 100644 --- a/tests/run_with_router.sh +++ b/tests/run_with_router.sh @@ -47,6 +47,9 @@ for LOCATOR in $(echo "$LOCATORS" | xargs); do RUST_LOG=debug ./zenohd --plugin-search-dir "$TESTDIR/zenoh-git/target/debug" -l "$LOCATOR" > zenohd."$TEST_NAME_WE".log 2>&1 & ZPID=$! + date + cat zenohd."$TEST_NAME_WE".log + sleep 5 # Run test with retry if log shows "Failed to open session" @@ -64,6 +67,10 @@ for LOCATOR in $(echo "$LOCATORS" | xargs); do echo "> Detected 'Failed to open session' in log. Client log (attempt $ATTEMPT):" cat client."$TEST_NAME_WE".log echo "> Waiting 5s and retrying..." + + date + cat zenohd."$TEST_NAME_WE".log + sleep 5 ATTEMPT=$((ATTEMPT + 1)) continue