Skip to content

Commit 2dc60d6

Browse files
3coinsdlqqq
andauthored
Added exception handling to both api handlers and UI (#302)
* Added exception handling to both api handlers, and UI * Ran lint * keep network error state local to detail-view component * fix lint Co-authored-by: David L. Qiu <[email protected]>
1 parent 07dba0f commit 2dc60d6

File tree

5 files changed

+221
-62
lines changed

5 files changed

+221
-62
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
### Bugs fixed
1414

15-
- Fixed propagation of error message to UI [#299](https://github.com/jupyter-server/jupyter-scheduler/pull/299) ([@3coins](https://github.com/3coins))
15+
- Fixed propagation of error message to UI [#299](https://github.com/jupyter-server/jupyter-scheduler/pull/299) ([@3coins](https://github.com/3coins))
1616
- correctly handle last page even when latest next_token is truthy [#292](https://github.com/jupyter-server/jupyter-scheduler/pull/292) ([@dlqqq](https://github.com/dlqqq))
1717

1818
### Contributors to this release

jupyter_scheduler/handlers.py

Lines changed: 92 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,15 @@ class JobDefinitionHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
7676
@tornado.web.authenticated
7777
async def get(self, job_definition_id=None):
7878
if job_definition_id:
79-
job_definition = await ensure_async(
80-
self.scheduler.get_job_definition(job_definition_id)
81-
)
82-
self.finish(job_definition.json())
79+
try:
80+
job_definition = await ensure_async(
81+
self.scheduler.get_job_definition(job_definition_id)
82+
)
83+
except SchedulerError as e:
84+
self.log.exception(e)
85+
raise tornado.web.HTTPError(500, str(e)) from e
86+
else:
87+
self.finish(job_definition.json())
8388
else:
8489
create_time = self.get_query_argument("create_time", None)
8590
sort_by = compute_sort_model(self.get_query_arguments("sort_by"))
@@ -92,8 +97,13 @@ async def get(self, job_definition_id=None):
9297
max_items=self.get_query_argument("max_items", DEFAULT_MAX_ITEMS),
9398
next_token=self.get_query_argument("next_token", None),
9499
)
95-
list_response = await ensure_async(self.scheduler.list_job_definitions(list_query))
96-
self.finish(list_response.json(exclude_none=True))
100+
try:
101+
list_response = await ensure_async(self.scheduler.list_job_definitions(list_query))
102+
except SchedulerError as e:
103+
self.log.exception(e)
104+
raise tornado.web.HTTPError(500, str(e)) from e
105+
else:
106+
self.finish(list_response.json(exclude_none=True))
97107

98108
@tornado.web.authenticated
99109
async def post(self):
@@ -119,25 +129,42 @@ async def post(self):
119129
@tornado.web.authenticated
120130
async def patch(self, job_definition_id):
121131
payload = self.get_json_body()
122-
await ensure_async(
123-
self.scheduler.update_job_definition(job_definition_id, UpdateJobDefinition(**payload))
124-
)
125-
self.set_status(204)
126-
self.finish()
132+
try:
133+
await ensure_async(
134+
self.scheduler.update_job_definition(
135+
job_definition_id, UpdateJobDefinition(**payload)
136+
)
137+
)
138+
except SchedulerError as e:
139+
self.log.exception(e)
140+
raise tornado.web.HTTPError(500, str(e)) from e
141+
else:
142+
self.set_status(204)
143+
self.finish()
127144

128145
@tornado.web.authenticated
129146
async def delete(self, job_definition_id):
130-
await ensure_async(self.scheduler.delete_job_definition(job_definition_id))
131-
self.set_status(204)
132-
self.finish()
147+
try:
148+
await ensure_async(self.scheduler.delete_job_definition(job_definition_id))
149+
except SchedulerError as e:
150+
self.log.exception(e)
151+
raise tornado.web.HTTPError(500, str(e)) from e
152+
else:
153+
self.set_status(204)
154+
self.finish()
133155

134156

135157
class JobHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
136158
@tornado.web.authenticated
137159
async def get(self, job_id=None):
138160
if job_id:
139-
job = await ensure_async(self.scheduler.get_job(job_id))
140-
self.finish(job.json())
161+
try:
162+
job = await ensure_async(self.scheduler.get_job(job_id))
163+
except SchedulerError as e:
164+
self.log.exception(e)
165+
raise tornado.web.HTTPError(500, str(e)) from e
166+
else:
167+
self.finish(job.json())
141168
else:
142169
status = self.get_query_argument("status", None)
143170
start_time = self.get_query_argument("start_time", None)
@@ -152,9 +179,13 @@ async def get(self, job_id=None):
152179
max_items=self.get_query_argument("max_items", DEFAULT_MAX_ITEMS),
153180
next_token=self.get_query_argument("next_token", None),
154181
)
155-
list_jobs_response = await ensure_async(self.scheduler.list_jobs(list_jobs_query))
156-
157-
self.finish(list_jobs_response.json(exclude_none=True))
182+
try:
183+
list_jobs_response = await ensure_async(self.scheduler.list_jobs(list_jobs_query))
184+
except SchedulerError as e:
185+
self.log.exception(e)
186+
raise tornado.web.HTTPError(500, str(e)) from e
187+
else:
188+
self.finish(list_jobs_response.json(exclude_none=True))
158189

159190
@tornado.web.authenticated
160191
async def post(self):
@@ -191,20 +222,28 @@ async def patch(self, job_id):
191222
"Invalid value for field 'status'. Jobs can only be updated to status 'STOPPED' after creation.",
192223
)
193224

194-
if status:
195-
await ensure_async(self.scheduler.stop_job(job_id))
225+
try:
226+
if status:
227+
await ensure_async(self.scheduler.stop_job(job_id))
228+
else:
229+
await ensure_async(self.scheduler.update_job(job_id, UpdateJob(**payload)))
230+
except SchedulerError as e:
231+
self.log.exception(e)
232+
raise tornado.web.HTTPError(500, str(e)) from e
196233
else:
197-
await ensure_async(self.scheduler.update_job(job_id, UpdateJob(**payload)))
198-
199-
self.set_status(204)
200-
self.finish()
234+
self.set_status(204)
235+
self.finish()
201236

202237
@tornado.web.authenticated
203238
async def delete(self, job_id):
204-
await ensure_async(self.scheduler.delete_job(job_id))
205-
206-
self.set_status(204)
207-
self.finish()
239+
try:
240+
await ensure_async(self.scheduler.delete_job(job_id))
241+
except SchedulerError as e:
242+
self.log.exception(e)
243+
raise tornado.web.HTTPError(500, str(e)) from e
244+
else:
245+
self.set_status(204)
246+
self.finish()
208247

209248

210249
class JobFromDefinitionHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
@@ -232,11 +271,15 @@ class BatchJobHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
232271
@tornado.web.authenticated
233272
async def delete(self):
234273
job_ids = self.get_query_arguments("job_id")
235-
for job_id in job_ids:
236-
await ensure_async(self.scheduler.delete_job(job_id))
237-
238-
self.set_status(204)
239-
self.finish()
274+
try:
275+
for job_id in job_ids:
276+
await ensure_async(self.scheduler.delete_job(job_id))
277+
except SchedulerError as e:
278+
self.log.exception(e)
279+
raise tornado.web.HTTPError(500, str(e)) from e
280+
else:
281+
self.set_status(204)
282+
self.finish()
240283

241284

242285
class JobsCountHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
@@ -246,9 +289,13 @@ async def get(self):
246289
count_jobs_query = CountJobsQuery(
247290
status=Status(status.upper()) if status else Status.IN_PROGRESS
248291
)
249-
count = await ensure_async(self.scheduler.count_jobs(count_jobs_query))
250-
251-
self.finish(json.dumps(dict(count=count)))
292+
try:
293+
count = await ensure_async(self.scheduler.count_jobs(count_jobs_query))
294+
except SchedulerError as e:
295+
self.log.exception(e)
296+
raise tornado.web.HTTPError(500, str(e)) from e
297+
else:
298+
self.finish(json.dumps(dict(count=count)))
252299

253300

254301
class RuntimeEnvironmentsHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler):
@@ -304,7 +351,11 @@ def job_files_manager(self):
304351
@tornado.web.authenticated
305352
async def get(self, job_id):
306353
redownload = self.get_query_argument("redownload", False)
307-
await self.job_files_manager.copy_from_staging(job_id=job_id, redownload=redownload)
308-
309-
self.set_status(204)
310-
self.finish()
354+
try:
355+
await self.job_files_manager.copy_from_staging(job_id=job_id, redownload=redownload)
356+
except Exception as e:
357+
self.log.exception(e)
358+
raise tornado.web.HTTPError(500, str(e)) from e
359+
else:
360+
self.set_status(204)
361+
self.finish()

jupyter_scheduler/scheduler.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import random
23
import shutil
34
from multiprocessing import Process
45
from typing import Dict, Optional, Type, Union
@@ -682,3 +683,99 @@ def get_staging_paths(self, model: Union[DescribeJob, DescribeJobDefinition]) ->
682683
staging_paths["input"] = os.path.join(self.staging_path, model.job_id, model.input_filename)
683684

684685
return staging_paths
686+
687+
688+
class SchedulerWithErrors(Scheduler):
689+
"""
690+
Use only for testing exceptions, not to be used in production
691+
692+
This scheduler uses the default `Scheduler`, but randomly
693+
raises the `SchedulerError` to help view/test errors in the
694+
UI. To use this, specify the fully classified class name at
695+
at start up or add to one of the server config files.
696+
697+
Usage
698+
-----
699+
>> jupyter lab --SchedulerApp.scheduler_class=jupyter_scheduler.scheduler.SchedulerWithErrors
700+
"""
701+
702+
def _should_raise_error(self, probability=0.5):
703+
return random.random() < probability
704+
705+
def create_job(self, model: CreateJob) -> str:
706+
if self._should_raise_error():
707+
raise SchedulerError("Failed create job because of a deliberate exception.")
708+
else:
709+
return super().create_job(model)
710+
711+
def update_job(self, job_id: str, model: UpdateJob):
712+
if self._should_raise_error():
713+
raise SchedulerError("Failed update job because of a deliberate exception.")
714+
else:
715+
super().update_job(job_id, model)
716+
717+
def list_jobs(self, query: ListJobsQuery) -> ListJobsResponse:
718+
if self._should_raise_error():
719+
raise SchedulerError("Failed list jobs because of a deliberate exception.")
720+
else:
721+
return super().list_jobs(query)
722+
723+
def count_jobs(self, query: CountJobsQuery) -> int:
724+
if self._should_raise_error():
725+
raise SchedulerError("Failed count jobs because of a deliberate exception.")
726+
else:
727+
return super().count_jobs(query)
728+
729+
def get_job(self, job_id: str, job_files: Optional[bool] = True) -> DescribeJob:
730+
if self._should_raise_error():
731+
raise SchedulerError("Failed get job because of a deliberate exception.")
732+
else:
733+
return super().get_job(job_id, job_files)
734+
735+
def delete_job(self, job_id: str):
736+
if self._should_raise_error():
737+
raise SchedulerError("Failed delete job because of a deliberate exception.")
738+
else:
739+
super().delete_job(job_id)
740+
741+
def stop_job(self, job_id: str):
742+
if self._should_raise_error():
743+
raise SchedulerError("Failed stop job because of a deliberate exception.")
744+
else:
745+
super().stop_job(job_id)
746+
747+
def create_job_definition(self, model: CreateJobDefinition) -> str:
748+
if self._should_raise_error():
749+
raise SchedulerError("Failed create job definition because of a deliberate exception.")
750+
else:
751+
return super().create_job_definition(model)
752+
753+
def update_job_definition(self, job_definition_id: str, model: UpdateJobDefinition):
754+
if self._should_raise_error():
755+
raise SchedulerError("Failed update job definition because of a deliberate exception.")
756+
else:
757+
super().update_job_definition(job_definition_id, model)
758+
759+
def delete_job_definition(self, job_definition_id: str):
760+
if self._should_raise_error():
761+
raise SchedulerError("Failed delete job definition because of a deliberate exception.")
762+
else:
763+
super().delete_job_definition(job_definition_id)
764+
765+
def get_job_definition(self, job_definition_id: str) -> DescribeJobDefinition:
766+
if self._should_raise_error():
767+
raise SchedulerError("Failed get job definition because of a deliberate exception.")
768+
else:
769+
return super().get_job_definition(job_definition_id)
770+
771+
def list_job_definitions(self, query: ListJobDefinitionsQuery) -> ListJobDefinitionsResponse:
772+
if self._should_raise_error():
773+
raise SchedulerError("Failed list job definitions because of a deliberate exception.")
774+
else:
775+
return super().list_job_definitions(query)
776+
777+
def create_job_from_definition(self, job_definition_id: str, model: CreateJobFromDefinition):
778+
if self._should_raise_error():
779+
raise SchedulerError("Failed list jobs because of a deliberate exception.")
780+
else:
781+
return super().create_job_from_definition(job_definition_id, model)

0 commit comments

Comments
 (0)