@@ -14,15 +14,22 @@ import Pkg
14
14
using Distributed: launch, manage, kill, init_worker, connect
15
15
# ==================================================================
16
16
17
+ export ElasticManager, elastic_worker
18
+
17
19
18
20
# The master process listens on a well-known port
19
21
# Launched workers connect to the master and redirect their STDOUTs to the same
20
22
# Workers can join and leave the cluster on demand.
21
23
22
- export ElasticManager, elastic_worker
23
-
24
24
const HDR_COOKIE_LEN = Distributed. HDR_COOKIE_LEN
25
25
26
+ @static if Base. VERSION >= v " 1.7-"
27
+ # Base.errormonitor() is only available in Julia 1.7+
28
+ my_errormonitor (t) = Base. errormonitor (t)
29
+ else
30
+ my_errormonitor (t) = nothing
31
+ end
32
+
26
33
struct ElasticManager <: Distributed.ClusterManager
27
34
active:: Dict{Int, Distributed.WorkerConfig} # active workers
28
35
pending:: Channel{Sockets.TCPSocket} # to be added workers
@@ -47,20 +54,23 @@ struct ElasticManager <: Distributed.ClusterManager
47
54
error (" Failed to automatically get host's IP address. Please specify `addr=` explicitly." )
48
55
end
49
56
end
50
-
57
+
51
58
l_sock = Distributed. listen (addr, port)
52
59
53
60
lman = new (Dict {Int, Distributed.WorkerConfig} (), Channel {Sockets.TCPSocket} (typemax (Int)), Set {Int} (), topology, Sockets. getsockname (l_sock), manage_callback, printing_kwargs)
54
61
55
- @async begin
62
+ t1 = @async begin
56
63
while true
57
64
let s = Sockets. accept (l_sock)
58
- @async process_worker_conn (lman, s)
65
+ t2 = @async process_worker_conn (lman, s)
66
+ my_errormonitor (t2)
59
67
end
60
68
end
61
69
end
70
+ my_errormonitor (t1)
62
71
63
- @async process_pending_connections (lman)
72
+ t3 = @async process_pending_connections (lman)
73
+ my_errormonitor (t3)
64
74
65
75
lman
66
76
end
@@ -153,7 +163,7 @@ function Base.show(io::IO, mgr::ElasticManager)
153
163
154
164
println (iob, " Worker connect command : " )
155
165
print (iob, " " , get_connect_cmd (mgr; mgr. printing_kwargs... ))
156
-
166
+
157
167
print (io, String (take! (iob)))
158
168
end
159
169
@@ -176,5 +186,21 @@ function elastic_worker(
176
186
Distributed. start_worker (c, cookie)
177
187
end
178
188
189
+ function get_connect_cmd (em:: ElasticManager ; absolute_exename= true , same_project= true , exeflags:: Tuple = ())
190
+ ip = string (em. sockname[1 ])
191
+ port = convert (Int,em. sockname[2 ])
192
+ cookie = Distributed. cluster_cookie ()
193
+ exename = absolute_exename ? joinpath (Sys. BINDIR, Base. julia_exename ()) : " julia"
194
+ project = same_project ? (" --project=$(Pkg. API. Context (). env. project_file) " ,) : ()
195
+
196
+ join ([
197
+ exename,
198
+ exeflags... ,
199
+ project... ,
200
+ " -e 'import ElasticClusterManager; ElasticClusterManager.elastic_worker(\" $cookie \" ,\" $ip \" ,$port )'"
201
+ ]," " )
202
+
203
+ end
204
+
179
205
180
206
end # module CustomClusterManagers
0 commit comments