From c9af808d51f9c1ee0782edd8645ce835b5a0b292 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Thu, 17 Oct 2024 16:37:41 +0100
Subject: [PATCH 01/14] Print completion message when all tests are done

---
 src/ReTestItems.jl       | 10 +++++-----
 src/log_capture.jl       |  8 ++++++--
 test/integrationtests.jl | 34 ++++++++++++++++++++++++----------
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 82d67792..35700ae7 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -449,13 +449,13 @@ function _runtests_in_current_env(
             end
         end
         Test.TESTSET_PRINT_ENABLE[] = true # reenable printing so our `finish` prints
+        # Let users know if tests are done, and if all of them ran (or if we failed fast).
+        # Print this above the final report as there might have been other logs printed
+        # since a failfast-cancellation was printed, but print it ASAP after tests finish
+        # in case any of the recording/reporting steps have an issue.
+        print_completion_summary(testitems; failedfast=(cfg.failfast && is_cancelled(testitems)))
         record_results!(testitems)
         cfg.report && write_junit_file(proj_name, dirname(projectfile), testitems.graph.junit)
-        if cfg.failfast && is_cancelled(testitems)
-            # Let users know if not all tests ran. Print this just above the final report as
-            # there might have been other logs printed since the cancellation was printed.
-            print_failfast_summary(testitems)
-        end
         Test.finish(testitems) # print summary of total passes/failures/errors
     finally
         Test.TESTSET_PRINT_ENABLE[] = true
diff --git a/src/log_capture.jl b/src/log_capture.jl
index 8c2fbb57..12dd14cc 100644
--- a/src/log_capture.jl
+++ b/src/log_capture.jl
@@ -310,9 +310,13 @@ end
 # So that the user is warned that not all tests were run.
 # We don't use loglock here, because this is only called once on the coordinator after all
 # tasks running tests have stopped and we're printing the final test report.
-function print_failfast_summary(t::TestItems)
+function print_completion_summary(t::TestItems; failedfast::Bool)
     io = DEFAULT_STDOUT[]
-    printstyled(io, "[ Fail Fast: "; bold=true, color=Base.warn_color())
+    if failedfast
+        printstyled(io, "[ Fail Fast: "; bold=true, color=Base.warn_color())
+    else
+        printstyled(io, "[ Tests Completed: "; bold=true, color=Base.info_color())
+    end
     println(io, "$(t.count)/$(length(t.testitems)) test items were run.")
     return nothing
 end
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index f4c7cc3c..b8abc076 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -45,18 +45,24 @@ end
 
 # test we can call runtests manually w/ directory
 @testset "manual `runtests(dir)`" begin
-    results = encased_testset() do
-        runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl"))
+    using IOCapture
+    c = IOCapture.capture() do
+        encased_testset(() -> runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl")))
     end
+    results = c.value
     @test n_passed(results) == 2  # NoDeps has two test files with a test each
+    @test contains(c.output, "[ Tests Completed: 2/2 test items were run.")
 end
 
 @testset "manual `runtests(file)`" begin
     # test we can point to a file at the base of the package (not just in `src` or `test`)
-    results = encased_testset() do
-        runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl", "toplevel_tests.jl"))
+    using IOCapture
+    c = IOCapture.capture() do
+        encased_testset(() -> runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl", "toplevel_tests.jl")))
     end
+    results = c.value
     @test n_passed(results) == 1
+    @test contains(c.output, "[ Tests Completed: 1/1 test items were run.")
 end
 
 @testset "`runtests(path)` auto finds testsetups" begin
@@ -273,20 +279,28 @@ end
 nworkers = 2
 @testset "runtests with nworkers = $nworkers" verbose=true begin
     @testset "Pkg.test() $pkg" for pkg in TEST_PKGS
-        results = with_test_package(pkg) do
-            withenv("RETESTITEMS_NWORKERS" => nworkers) do
-                Pkg.test()
+        c = IOCapture.capture() do
+            with_test_package(pkg) do
+                withenv("RETESTITEMS_NWORKERS" => nworkers) do
+                    Pkg.test()
+                end
             end
         end
+        results = c.value
         @test all_passed(results)
+        @test contains(c.output, "[ Tests Completed")
     end
     @testset "Pkg.test() DontPass.jl" begin
-        results = with_test_package("DontPass.jl") do
-            withenv("RETESTITEMS_NWORKERS" => 2) do
-                Pkg.test()
+        c = IOCapture.capture() do
+            with_test_package("DontPass.jl") do
+                withenv("RETESTITEMS_NWORKERS" => 2) do
+                    Pkg.test()
+                end
             end
         end
+        results = c.value
         @test length(non_passes(results)) > 0
+        @test contains(c.output, "[ Tests Completed")
     end
 end
 

From 0a70cef7902d073b1dcda64069d19999ccacd921 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Thu, 17 Oct 2024 16:44:42 +0100
Subject: [PATCH 02/14] More debug logs

---
 src/ReTestItems.jl | 5 ++++-
 src/junit_xml.jl   | 4 ++++
 src/testcontext.jl | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 35700ae7..9ea82b3a 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -456,14 +456,17 @@ function _runtests_in_current_env(
         print_completion_summary(testitems; failedfast=(cfg.failfast && is_cancelled(testitems)))
         record_results!(testitems)
         cfg.report && write_junit_file(proj_name, dirname(projectfile), testitems.graph.junit)
+        @debugv 1 "Calling Test.finish(testitems)"
         Test.finish(testitems) # print summary of total passes/failures/errors
     finally
         Test.TESTSET_PRINT_ENABLE[] = true
-        # Cleanup test setup logs
+        @debugv 1 "Cleaning up test setup logs"
         foreach(Iterators.filter(endswith(".log"), readdir(RETESTITEMS_TEMP_FOLDER[], join=true))) do logfile
             rm(logfile; force=true)  # `force` to ignore error if file already cleaned up
         end
+        @debugv 1 "Done cleaning up test setup logs"
     end
+    @debugv 1 "DONE"
     return nothing
 end
 
diff --git a/src/junit_xml.jl b/src/junit_xml.jl
index fefeb062..1f0e5607 100644
--- a/src/junit_xml.jl
+++ b/src/junit_xml.jl
@@ -191,6 +191,7 @@ function write_junit_file(path::AbstractString, junit::Union{JUnitTestSuites,JUn
     open(path, "w") do io
         write_junit_file(io, junit)
     end
+    @debugv 1 "Done writing JUnit XML file to $(repr(path))"
     return nothing
 end
 
@@ -201,6 +202,7 @@ function write_junit_file(io::IO, junit::Union{JUnitTestSuites,JUnitTestSuite})
 end
 
 function write_junit_xml(io, junit::JUnitTestSuites)
+    @debugv 2 "Writing JUnit XML for testsuites $(junit.name)"
     write(io, "\n<testsuites")
     write_counts(io, junit.counts)
     write(io, ">")
@@ -212,6 +214,7 @@ function write_junit_xml(io, junit::JUnitTestSuites)
 end
 
 function write_junit_xml(io, ts::JUnitTestSuite)
+    @debugv 2 "Writing JUnit XML for testsuite $(ts.name)"
     write(io, "\n<testsuite name=", xml_markup(ts.name))
     write_counts(io, ts.counts)
     write(io, ">")
@@ -258,6 +261,7 @@ function write_dd_tags(io, tc::JUnitTestCase)
 end
 
 function write_junit_xml(io, tc::JUnitTestCase)
+    @debugv 2 "Writing JUnit XML for testcase $(tc.name)"
     write(io, "\n\t<testcase name=", xml_markup(tc.name))
     write_counts(io, tc.counts)
     write(io, ">")
diff --git a/src/testcontext.jl b/src/testcontext.jl
index fdd574e5..9fdae14e 100644
--- a/src/testcontext.jl
+++ b/src/testcontext.jl
@@ -108,9 +108,12 @@ is_cancelled(t::TestItems) = @atomic t.cancelled
 ###
 
 function record_results!(ti::TestItems)
+    @debugv 1 "Recording testitem results"
     foreach(ti.graph.children) do child
         record_results!(ti.graph, child)
     end
+    @debugv 1 "Done recording testitem results"
+    return ti
 end
 
 function record_results!(dir::DirNode, child_dir::DirNode)

From d094db8dc354c321950c8d21185fc82400559ad1 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Thu, 17 Oct 2024 16:45:03 +0100
Subject: [PATCH 03/14] Bump version

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 1b7f821f..72e619ef 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReTestItems"
 uuid = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
-version = "1.29.0"
+version = "1.30.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

From 0420a847e662f868a36af103c0cb361605ea157a Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 12:46:00 +0100
Subject: [PATCH 04/14] Log a message when each worker has completed tests

---
 src/ReTestItems.jl       | 9 ++++++---
 src/workers.jl           | 2 ++
 test/integrationtests.jl | 2 +-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 9ea82b3a..fe91c9b9 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -443,7 +443,7 @@ function _runtests_in_current_env(
                 ti = starting[i]
                 @spawn begin
                     with_logger(original_logger) do
-                        manage_worker($w, $proj_name, $testitems, $ti, $cfg)
+                        manage_worker($w, $proj_name, $testitems, $ti, $cfg; worker_num=$i)
                     end
                 end
             end
@@ -575,8 +575,10 @@ function record_test_error!(testitem, msg, elapsed_seconds::Real=0.0)
     return testitem
 end
 
+# The provided `worker_num` is only for logging purposes, and not persisted as part of the worker.
 function manage_worker(
-    worker::Worker, proj_name::AbstractString, testitems::TestItems, testitem::Union{TestItem,Nothing}, cfg::_Config,
+    worker::Worker, proj_name::AbstractString, testitems::TestItems, testitem::Union{TestItem,Nothing}, cfg::_Config;
+    worker_num::Int
 )
     ntestitems = length(testitems.testitems)
     run_number = 1
@@ -584,7 +586,7 @@ function manage_worker(
     while testitem !== nothing
         ch = Channel{TestItemResult}(1)
         if memory_percent() > memory_threshold_percent
-            @warn "Memory usage ($(Base.Ryu.writefixed(memory_percent(), 1))%) is higher than threshold ($(Base.Ryu.writefixed(memory_threshold_percent, 1))%). Restarting worker process to try to free memory."
+            @warn "Memory usage ($(Base.Ryu.writefixed(memory_percent(), 1))%) is higher than threshold ($(Base.Ryu.writefixed(memory_threshold_percent, 1))%). Restarting process for worker $worker_num to try to free memory."
             terminate!(worker)
             wait(worker)
             worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems)
@@ -692,6 +694,7 @@ function manage_worker(
             continue
         end
     end
+    @info "All tests on worker $worker_num completed. Closing $worker."
     close(worker)
     return nothing
 end
diff --git a/src/workers.jl b/src/workers.jl
index ebaac6f3..43b9fc70 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -128,6 +128,7 @@ end
 # gracefully terminate a worker by sending a shutdown message
 # and waiting for the other tasks to perform worker shutdown
 function Base.close(w::Worker)
+    @debugv 2 "closing worker $(w.pid)"
     if !w.terminated && isopen(w.socket)
         req = Request(Symbol(), :(), rand(UInt64), true)
         @lock w.lock begin
@@ -135,6 +136,7 @@ function Base.close(w::Worker)
             flush(w.socket)
         end
     end
+    @debugv 2 "waiting for worker $(w.pid) to terminate"
     wait(w)
     return
 end
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index b8abc076..22a5adf6 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -1212,7 +1212,7 @@ end
         # monkey-patch the internal `memory_percent` function to return a fixed value, so we
         # can control if we hit the `memory_threshold`.
         @eval ReTestItems.memory_percent() = 83.1
-        expected_warning = "Warning: Memory usage (83.1%) is higher than threshold (7.0%). Restarting worker process to try to free memory."
+        expected_warning = "Warning: Memory usage (83.1%) is higher than threshold (7.0%). Restarting process for worker 1 to try to free memory."
 
         # Pass `memory_threshold` keyword, and hit the memory threshold.
         c1 = IOCapture.capture() do

From 608994adc6a051d87ae3e24e92cf29995abc98df Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 12:51:43 +0100
Subject: [PATCH 05/14] Fix import for `at-debugv`

---
 src/ReTestItems.jl | 2 +-
 src/workers.jl     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index fe91c9b9..181882fc 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -7,7 +7,7 @@ using .Threads: @spawn, nthreads
 using Pkg: Pkg
 using TestEnv
 using Logging
-using LoggingExtras
+using LoggingExtras: LoggingExtras, @debugv
 
 export runtests, runtestitem
 export @testsetup, @testitem
diff --git a/src/workers.jl b/src/workers.jl
index 43b9fc70..0dc874b2 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -1,6 +1,7 @@
 module Workers
 
 using Sockets, Serialization
+using LoggingExtras: @debugv
 
 export Worker, remote_eval, remote_fetch, terminate!, WorkerTerminatedException
 export trigger_profile

From 74c16c06f8eb7cecff96107b469b33fe7684fda8 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 12:55:58 +0100
Subject: [PATCH 06/14] Debug log after `close(worker)`

---
 src/ReTestItems.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 181882fc..14d206cc 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -696,6 +696,7 @@ function manage_worker(
     end
     @info "All tests on worker $worker_num completed. Closing $worker."
     close(worker)
+    @debugv 1 "Worker $worker_num closed: $(worker)"
     return nothing
 end
 

From 262f4e477881d6675a5753cd408458edb771338d Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 13:27:55 +0100
Subject: [PATCH 07/14] Use at-debug in workers.jl

---
 src/workers.jl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/workers.jl b/src/workers.jl
index 0dc874b2..612d046a 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -1,7 +1,6 @@
 module Workers
 
 using Sockets, Serialization
-using LoggingExtras: @debugv
 
 export Worker, remote_eval, remote_fetch, terminate!, WorkerTerminatedException
 export trigger_profile
@@ -129,7 +128,7 @@ end
 # gracefully terminate a worker by sending a shutdown message
 # and waiting for the other tasks to perform worker shutdown
 function Base.close(w::Worker)
-    @debugv 2 "closing worker $(w.pid)"
+    @debug "closing $worker"
     if !w.terminated && isopen(w.socket)
         req = Request(Symbol(), :(), rand(UInt64), true)
         @lock w.lock begin
@@ -137,7 +136,7 @@ function Base.close(w::Worker)
             flush(w.socket)
         end
     end
-    @debugv 2 "waiting for worker $(w.pid) to terminate"
+    @debug "waiting for $worker to terminate"
     wait(w)
     return
 end

From 3766e931c3ba8b8652886cf398604ac2b09d9956 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 13:35:52 +0100
Subject: [PATCH 08/14] Add worker num to all worker restart messages

---
 src/ReTestItems.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 14d206cc..6f37d230 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -684,7 +684,7 @@ function manage_worker(
                 run_number = 1
             else
                 run_number += 1
-                @info "Retrying $(repr(testitem.name)) on a new worker process. Run=$run_number."
+                @info "Retrying $(repr(testitem.name)) on a new worker $worker_num process. Run=$run_number."
             end
             # The worker was terminated, so replace it unless there are no more testitems to run
             if testitem !== nothing

From 52789579738f9c21f784d518868a0878a6f1afd3 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 13:38:28 +0100
Subject: [PATCH 09/14] Allow workers to have a user-given number identifier

---
 src/ReTestItems.jl | 13 ++++++-------
 src/workers.jl     | 28 +++++++++++++++-------------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 6f37d230..ea778d5c 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -471,10 +471,8 @@ function _runtests_in_current_env(
 end
 
 # Start a new `Worker` with `nworker_threads` threads and run `worker_init_expr` on it.
-# The provided `worker_num` is only for logging purposes, and not persisted as part of the worker.
-function start_worker(proj_name, nworker_threads::String, worker_init_expr::Expr, ntestitems::Int; worker_num=nothing)
-    w = Worker(; threads=nworker_threads)
-    i = worker_num == nothing ? "" : " $worker_num"
+function start_worker(proj_name, nworker_threads::String, worker_init_expr::Expr, ntestitems::Int; worker_num)
+    w = Worker(; threads=nworker_threads, num=worker_num)
     # remote_fetch here because we want to make sure the worker is all setup before starting to eval testitems
     remote_fetch(w, quote
         using ReTestItems, Test
@@ -482,7 +480,8 @@ function start_worker(proj_name, nworker_threads::String, worker_init_expr::Expr
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)
         GLOBAL_TEST_CONTEXT.setups_evaled = ReTestItems.TestSetupModules()
         nthreads_str = $nworker_threads
-        @info "Starting test worker$($i) on pid = $(Libc.getpid()), with $nthreads_str threads"
+        num = $worker_num
+        @info "Starting test worker $(num) on pid=$(Libc.getpid()), with $(nthreads_str) threads"
         $(worker_init_expr.args...)
         nothing
     end)
@@ -589,7 +588,7 @@ function manage_worker(
             @warn "Memory usage ($(Base.Ryu.writefixed(memory_percent(), 1))%) is higher than threshold ($(Base.Ryu.writefixed(memory_threshold_percent, 1))%). Restarting process for worker $worker_num to try to free memory."
             terminate!(worker)
             wait(worker)
-            worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems)
+            worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems; worker_num)
         end
         testitem.workerid[] = worker.pid
         timeout = something(testitem.timeout, cfg.testitem_timeout)
@@ -688,7 +687,7 @@ function manage_worker(
             end
             # The worker was terminated, so replace it unless there are no more testitems to run
             if testitem !== nothing
-                worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems)
+                worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems; worker_num)
             end
             # Now loop back around to reschedule the testitem
             continue
diff --git a/src/workers.jl b/src/workers.jl
index 612d046a..2a1df766 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -52,7 +52,8 @@ Base.fetch(f::Future) = fetch(f.value)
 
 mutable struct Worker
     lock::ReentrantLock # protects the .futures field; no other fields are modified after construction
-    pid::Int
+    num::Int # user given ID
+    pid::Int # process ID
     process::Base.Process
     socket::TCPSocket
     messages::Task
@@ -73,7 +74,7 @@ end
 function terminate!(w::Worker, from::Symbol=:manual)
     already_terminated = @atomicswap :monotonic w.terminated = true
     if !already_terminated
-        @debug "terminating worker $(w.pid) from $from"
+        @debug "terminating $worker from $from"
     end
     wte = WorkerTerminatedException(w)
     @lock w.lock begin
@@ -114,7 +115,7 @@ end
 # Called when timeout_profile_wait is non-zero.
 function trigger_profile(w::Worker, timeout_profile_wait, from::Symbol=:manual)
     if !Sys.iswindows()
-        @debug "sending profile request to worker $(w.pid) from $from"
+        @debug "sending profile request to $worker from $from"
         if Sys.islinux()
             kill(w.process, 10)  # SIGUSR1
         elseif Sys.isbsd()
@@ -144,21 +145,22 @@ end
 # wait until our spawned tasks have all finished
 Base.wait(w::Worker) = fetch(w.process_watch) && fetch(w.messages) && fetch(w.output)
 
-Base.show(io::IO, w::Worker) = print(io, "Worker(pid=$(w.pid)", w.terminated ? ", terminated=true, termsignal=$(w.process.termsignal)" : "", ")")
+Base.show(io::IO, w::Worker) = print(io, "Worker(num=$(w.num), pid=$(w.pid)", w.terminated ? ", terminated=true, termsignal=$(w.process.termsignal)" : "", ")")
 
 # used in testing to ensure all created workers are
 # eventually cleaned up properly
 const GLOBAL_CALLBACK_PER_WORKER = Ref{Any}()
 
 function Worker(;
+    num::Int=rand(1:typemax(Int32)),
     env::AbstractDict=ENV,
     dir::String=pwd(),
     threads::String="auto",
     exeflags=`--threads=$threads`,
     connect_timeout::Int=60,
     worker_redirect_io::IO=stdout,
-    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $pid:  $line")
-    )
+    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $num/$pid:  $line")
+)
     # below copied from Distributed.launch
     env = Dict{String, String}(env)
     pathsep = Sys.iswindows() ? ";" : ":"
@@ -194,7 +196,7 @@ function Worker(;
             return Sockets.connect(parse(Int, split(port_str, ':')[2]))
         end
         # create worker
-        w = Worker(ReentrantLock(), pid, proc, sock, Task(nothing), Task(nothing), Task(nothing), Dict{UInt64, Future}(), false)
+        w = Worker(ReentrantLock(), num, pid, proc, sock, Task(nothing), Task(nothing), Task(nothing), Dict{UInt64, Future}(), false)
         ## start a task to watch for worker process termination, notify the event when the task starts
         e1 = Threads.Event()
         w.process_watch = Threads.@spawn watch_and_terminate!(w, $e1)
@@ -233,7 +235,7 @@ function redirect_worker_output(io::IO, w::Worker, fn, proc, ev::Threads.Event)
             end
         end
     catch e
-        # @error "Error redirecting worker output $(w.pid)" exception=(e, catch_backtrace())
+        # @error "Error redirecting $worker output" exception=(e, catch_backtrace())
         terminate!(w, :redirect_worker_output)
         e isa EOFError || e isa Base.IOError || rethrow()
     finally
@@ -252,13 +254,13 @@ function process_responses(w::Worker, ev::Threads.Event)
         while isopen(w.socket) && !w.terminated
             # get the next Response from the worker
             r = deserialize(w.socket)
-            @assert r isa Response "Received invalid response from worker $(w.pid): $(r)"
-            # println("Received response $(r) from worker $(w.pid)")
+            @assert r isa Response "Received invalid response from $worker: $(r)"
+            # println("Received response $(r) from $worker")
             @lock lock begin
-                @assert haskey(reqs, r.id) "Received response for unknown request $(r.id) from worker $(w.pid)"
+                @assert haskey(reqs, r.id) "Received response for unknown request $(r.id) from $worker"
                 # look up the Future for this request
                 fut = pop!(reqs, r.id)
-                @assert !isready(fut.value) "Received duplicate response for request $(r.id) from worker $(w.pid)"
+                @assert !isready(fut.value) "Received duplicate response for request $(r.id) from $worker"
                 if r.error !== nothing
                     # this allows rethrowing the exception from the worker to the caller
                     close(fut.value, r.error)
@@ -268,7 +270,7 @@ function process_responses(w::Worker, ev::Threads.Event)
             end
         end
     catch e
-        # @error "Error processing responses from worker $(w.pid)" exception=(e, catch_backtrace())
+        # @error "Error processing responses from $worker" exception=(e, catch_backtrace())
         terminate!(w, :process_responses)
         e isa EOFError || e isa Base.IOError || rethrow()
     end

From 9851f9638302c6dd5b9342657438a926fd6de64c Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 14:29:09 +0100
Subject: [PATCH 10/14] format

---
 src/workers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/workers.jl b/src/workers.jl
index 2a1df766..b1b06b37 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -159,7 +159,7 @@ function Worker(;
     exeflags=`--threads=$threads`,
     connect_timeout::Int=60,
     worker_redirect_io::IO=stdout,
-    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $num/$pid:  $line")
+    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $num|$pid:  $line")
 )
     # below copied from Distributed.launch
     env = Dict{String, String}(env)

From 1e39245dd24280f63c4c3397ff4bdd70c28c0f75 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 14:30:17 +0100
Subject: [PATCH 11/14] fixup! format

---
 src/workers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/workers.jl b/src/workers.jl
index b1b06b37..6271e14a 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -159,7 +159,7 @@ function Worker(;
     exeflags=`--threads=$threads`,
     connect_timeout::Int=60,
     worker_redirect_io::IO=stdout,
-    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $num|$pid:  $line")
+    worker_redirect_fn=(io, pid, line)->println(io, "  Worker $num $pid:  $line")
 )
     # below copied from Distributed.launch
     env = Dict{String, String}(env)

From a8f38db95bed40fcec3097870ac835372d94c12d Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 15:52:55 +0100
Subject: [PATCH 12/14] fixup! Allow workers to have a user-given number
 identifier

---
 test/integrationtests.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 22a5adf6..1d172a13 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -1017,10 +1017,10 @@ end
 @testset "worker always crashes immediately" begin
     file = joinpath(TEST_FILES_DIR, "_happy_tests.jl")
 
-    # We have occassionally seen the Process exist with the expected signal.
+    # We have occassionally seen the Process exit without the expected signal.
     @assert typemin(Int32) == -2147483648
-    terminated_err_log_1 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 1/2\)."
-    terminated_err_log_2 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 2/2\)."
+    terminated_err_log_1 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker \d process \(retry 1/2\)."
+    terminated_err_log_2 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker \d process \(retry 2/2\)."
 
     worker_init_expr = :(@eval ccall(:abort, Cvoid, ()))
     # We don't use IOCapture for capturing logs as that seems to hang when the worker crashes.

From edca086fe2a8e42372f85384a5e63b058b030364 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 17:16:51 +0100
Subject: [PATCH 13/14] fixup! fixup! Allow workers to have a user-given number
 identifier

---
 test/integrationtests.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 1d172a13..63d07602 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -1019,8 +1019,8 @@ end
 
     # We have occassionally seen the Process exit without the expected signal.
     @assert typemin(Int32) == -2147483648
-    terminated_err_log_1 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker \d process \(retry 1/2\)."
-    terminated_err_log_2 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker \d process \(retry 2/2\)."
+    terminated_err_log_1 = r"Error: Worker\(num=\d+, pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 1/2\)."
+    terminated_err_log_2 = r"Error: Worker\(num=\d+, pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 2/2\)."
 
     worker_init_expr = :(@eval ccall(:abort, Cvoid, ()))
     # We don't use IOCapture for capturing logs as that seems to hang when the worker crashes.

From cc7f635ab10a8e4766fb2ee53fdc5266590d8b0d Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Mon, 21 Oct 2024 17:54:51 +0100
Subject: [PATCH 14/14] fixup! fixup! fixup! Allow workers to have a user-given
 number identifier

---
 test/integrationtests.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 63d07602..f49b150d 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -1050,9 +1050,9 @@ end
                 @eval ccall(:abort, Cvoid, ())
             end
         end
-        # We have occassionally seen the Process exist with the expected signal.
+        # We have occassionally seen the Process exit without the expected signal.
         @assert typemin(Int32) == -2147483648
-        terminated_err_log_1 = r"Error: Worker\(pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 1/2\)."
+        terminated_err_log_1 = r"Error: Worker\(num=\d+, pid=\d+, terminated=true, termsignal=(6|-2147483648)\) terminated unexpectedly. Starting new worker process \(retry 1/2\)."
         # We don't use IOCapture for capturing logs as that seems to hang when the worker crashes.
         mktemp() do log_io, _
             results = redirect_stdio(stdout=log_io, stderr=log_io, stdin=devnull) do