55This script: 
661. Runs the full test suite initially (with normal cleanup) 
772. Identifies failed tests and retries them with configurable strategy 
8- 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging) 
8+ 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging by default ) 
994. Provides detailed logging and comprehensive reporting 
1010
1111Usage: ./scripts/auto-retry-tests.py --parallel 4 --attempts-serial 3 --attempts-parallel 2 --venv ./venv 
1616import  json 
1717import  os 
1818import  re 
19+ import  shutil 
1920import  statistics 
2021import  subprocess 
2122import  sys 
@@ -32,8 +33,7 @@ class TestConstants:
3233    """Constants used throughout the test runner.""" 
3334
3435    MIN_RUNTIME_THRESHOLD  =  30.0   # Filter out quick failures (seconds) 
35-     MAX_RUNTIME_HISTORY  =  10   # Keep only recent runs 
36-     MAX_ERROR_LINES_TO_CHECK  =  50   # Lines to scan for errors in logs 
36+     MAX_RUNTIME_HISTORY  =  50   # Keep only recent runs 
3737    MAX_TEST_NAME_LENGTH  =  100   # Maximum test name length for filenames 
3838    HASH_SUFFIX_LENGTH  =  8   # Length of MD5 hash suffix 
3939    DEFAULT_PARALLEL_WORKERS  =  2   # Default number of parallel workers 
@@ -231,7 +231,7 @@ def build_configuration_dict(self, args) -> dict:
231231            "parallel" : args .parallel ,
232232            "attempts_parallel" : args .attempts_parallel ,
233233            "attempts_serial" : args .attempts_serial ,
234-             "keep_failed_namespaces " : args .keep_failed_namespaces ,
234+             "delete_failed_namespaces " : args .delete_failed_namespaces ,
235235            "venv" : args .venv ,
236236            "extra_args" : args .extra_args ,
237237            "output_dir" : str (self .output_dir ),
@@ -378,7 +378,7 @@ def create_safe_log_filename(
378378        return  f"{ safe_test_name } { attempt } { attempt_type }  
379379
380380    def  build_test_command (
381-         self , test_name : str  =  None , skip_delete : bool  =  False 
381+         self , test_name : str  =  None , skip_delete : bool  =  False ,  work_dir :  str   =   None 
382382    ) ->  List [str ]:
383383        """Build the command arguments for running tests.""" 
384384        command_args  =  ["scripts/run-tests" ]
@@ -393,6 +393,13 @@ def build_test_command(
393393        if  test_name :
394394            command_args .extend (["--test" , test_name ])
395395
396+         # Add unique work directory to prevent parallel interference 
397+         # beku deletes the work dir at the start of a test so if a test was already running and then 
398+         # another starts the new one would delete (and recreate) the work directory. 
399+         # This does lead to failures. 
400+         if  work_dir :
401+             command_args .extend (["--work-dir" , work_dir ])
402+ 
396403        # Add any extra arguments passed through 
397404        if  self .args .extra_args :
398405            command_args .extend (self .args .extra_args )
@@ -484,7 +491,20 @@ def run_single_test_suite(
484491        attempt_type : str  =  "initial" ,
485492    ) ->  TestResult :
486493        """Run a single test or the full test suite.""" 
487-         command_args  =  self .build_test_command (test_name , skip_delete )
494+         # Create unique work directory to prevent parallel test interference 
495+         timestamp  =  datetime .now ().strftime ("%Y%m%d_%H%M%S_%f" )[
496+             :- 3 
497+         ]  # microseconds to milliseconds 
498+         if  test_name :
499+             # Create a unique work directory based on test name and timestamp 
500+             safe_test_name  =  test_name .replace ("/" , "_" ).replace ("," , "_" )
501+             work_dir  =  (
502+                 f"tests/_work_{ safe_test_name } { attempt } { attempt_type } { timestamp }  
503+             )
504+         else :
505+             work_dir  =  f"tests/_work_full_suite_{ attempt } { attempt_type } { timestamp }  
506+ 
507+         command_args  =  self .build_test_command (test_name , skip_delete , work_dir )
488508
489509        # Set up log file 
490510        if  test_name :
@@ -519,6 +539,12 @@ def run_single_test_suite(
519539        start_time  =  time .time ()
520540
521541        with  open (log_file , "w" ) as  file_handle :
542+             # Log the exact command being executed 
543+             file_handle .write (f"Command: { ' ' .join (command_args )} \n " )
544+             file_handle .write (f"Working directory: { os .getcwd ()} \n " )
545+             file_handle .write ("="  *  80  +  "\n \n " )
546+             file_handle .flush ()
547+ 
522548            result  =  subprocess .run (
523549                command_args ,
524550                stdout = file_handle ,
@@ -569,10 +595,18 @@ def run_single_test_suite(
569595                        print (f"    📊 Average: { self .format_duration (avg_runtime )}  )
570596
571597        # If this was a skip_delete attempt and test passed, clean up the namespace 
598+         # We only want to keep failed namespaces 
572599        if  test_name  and  skip_delete  and  success  and  namespace :
573600            self .delete_test_namespace (namespace )
574601            print (f"  🧹 Test passed, cleaning up namespace: { namespace }  )
575602
603+         # Clean up the unique work directory after test completion 
604+         if  work_dir  and  Path (work_dir ).exists ():
605+             try :
606+                 shutil .rmtree (work_dir )
607+             except  Exception  as  exception :
608+                 print (f"  ⚠️ Failed to clean up work directory { work_dir } { exception }  )
609+ 
576610        return  test_result 
577611
578612
@@ -727,7 +761,9 @@ def generate_comprehensive_report(self, runner, start_time: datetime) -> str:
727761        report .append (f"Parallel: { runner .args .parallel }  )
728762        report .append (f"Parallel retry attempts: { runner .args .attempts_parallel }  )
729763        report .append (f"Serial retry attempts: { runner .args .attempts_serial }  )
730-         report .append (f"Keep failed namespaces: { runner .args .keep_failed_namespaces }  )
764+         report .append (
765+             f"Delete failed namespaces: { runner .args .delete_failed_namespaces }  
766+         )
731767        report .append (f"Virtualenv: { runner .args .venv  or  'None' }  )
732768        report .append ("" )
733769
@@ -882,7 +918,7 @@ def retry_tests_in_parallel(
882918            # Determine if this is the last attempt and no serial tests follow 
883919            is_last_attempt  =  attempt  ==  max_attempts 
884920            use_skip_delete  =  (
885-                 self .args .keep_failed_namespaces 
921+                 not   self .args .delete_failed_namespaces 
886922                and  not  serial_tests_follow 
887923                and  is_last_attempt 
888924            )
@@ -892,10 +928,6 @@ def retry_tests_in_parallel(
892928            print (
893929                f"Retrying { len (tests_to_retry )} { max_parallel }  
894930            )
895-             if  use_skip_delete :
896-                 print (
897-                     "  Using skip-delete for this final parallel attempt (no serial tests follow)" 
898-                 )
899931
900932            # Execute tests in parallel 
901933            with  ThreadPoolExecutor (max_workers = max_parallel ) as  executor :
@@ -952,10 +984,12 @@ def retry_test_serially(
952984        for  attempt  in  range (1 , max_attempts  +  1 ):
953985            # Only use skip-delete on the last attempt 
954986            is_last_attempt  =  attempt  ==  max_attempts 
987+             use_skip_delete  =  not  self .args .delete_failed_namespaces  and  is_last_attempt 
988+ 
955989            result  =  self .test_executor .run_single_test_suite (
956990                self .output_dir ,
957991                test_name = test_name ,
958-                 skip_delete = self . args . keep_failed_namespaces   and   is_last_attempt ,
992+                 skip_delete = use_skip_delete ,
959993                attempt = attempt ,
960994                attempt_type = "serial" ,
961995            )
@@ -992,23 +1026,15 @@ def create_test_summary(
9921026        else :
9931027            final_status  =  "failed" 
9941028
995-         # Find the last namespace for failed tests 
1029+         # Find the last namespace for failed tests (only if keeping failed namespaces)  
9961030        final_namespace  =  None 
997-         if  final_status  ==  "failed" :
1031+         if  final_status  ==  "failed"   and   not   self . args . delete_failed_namespaces :
9981032            # Keep the last failed attempt's namespace 
9991033            for  result  in  reversed (retry_results ):
10001034                if  result .namespace :
10011035                    final_namespace  =  result .namespace 
10021036                    break 
10031037
1004-         # Clean up namespaces for successful tests 
1005-         if  final_status  in  ["passed" , "flaky" ]:
1006-             # Delete all namespaces for this test 
1007-             all_results  =  [initial_result ] +  retry_results 
1008-             for  result  in  all_results :
1009-                 if  result .namespace  and  result .namespace  !=  final_namespace :
1010-                     self .test_executor .delete_test_namespace (result .namespace )
1011- 
10121038        summary  =  TestSummary (
10131039            test_name = test_name ,
10141040            initial_result = initial_result ,
@@ -1078,8 +1104,14 @@ def _run_initial_test_suite(self) -> bool:
10781104        )
10791105
10801106        if  not  failed_tests :
1081-             print ("  No failed tests found in output (this might be a parsing issue)" )
1082-             self .report_generator .generate_and_save_final_report (self , self .start_time )
1107+             print ("  No failed tests found in output but run-tests exited with code 1" )
1108+             print (
1109+                 "  This indicates an infrastructure or setup issue that prevents tests from running" 
1110+             )
1111+             print (
1112+                 "  Check the log file for connection errors, missing dependencies, or cluster issues" 
1113+             )
1114+             print (f"  Log file: { initial_result .log_file }  )
10831115            return  False 
10841116
10851117        print (f"  Found { len (failed_tests )}  )
@@ -1281,9 +1313,9 @@ def main():
12811313
12821314    # Namespace management arguments 
12831315    parser .add_argument (
1284-         "--keep -failed-namespaces" ,
1316+         "--delete -failed-namespaces" ,
12851317        action = "store_true" ,
1286-         help = "Keep  namespaces of failed tests for debugging (only the last one is kept )" ,
1318+         help = "Delete  namespaces of failed tests (default: keep them for debugging )" ,
12871319    )
12881320
12891321    # Output arguments 
0 commit comments