File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed
Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -103,6 +103,7 @@ services:
103103 girder_worker_pipelines :
104104 # Merge base-worker object with this config
105105 << : *base-worker
106+ restart : always
106107 deploy :
107108 resources :
108109 reservations :
@@ -116,10 +117,17 @@ services:
116117 - " WORKER_CONCURRENCY=${PIPELINE_WORKER_CONCURRENCY:-1}"
117118 - " WORKER_GPU_UUID=${PIPELINE_GPU_UUID}"
118119 - " CELERY_BROKER_URL=${CELERY_BROKER_URL:-amqp://guest:guest@rabbit/default}"
120+ healthcheck :
121+ test : ["CMD", "nvidia-smi"]
122+ interval : 15m
123+ timeout : 10s
124+ retries : 1
125+ start_period : 1m
119126
120127 girder_worker_training :
121128 # Merge base-worker object with this config
122129 << : *base-worker
130+ restart : always
123131 deploy :
124132 resources :
125133 reservations :
@@ -132,6 +140,12 @@ services:
132140 - " WORKER_CONCURRENCY=${TRAINING_WORKER_CONCURRENCY:-1}"
133141 - " WORKER_GPU_UUID=${TRAINING_GPU_UUID}"
134142 - " CELERY_BROKER_URL=${CELERY_BROKER_URL:-amqp://guest:guest@rabbit/default}"
143+ healthcheck :
144+ test : ["CMD", "nvidia-smi"]
145+ interval : 15m
146+ timeout : 10s
147+ retries : 1
148+ start_period : 1m
135149
136150volumes :
137151 addons :
You can’t perform that action at this time.
0 commit comments