1010import zipfile
1111from zipfile import ZipFile
1212from celery .result import AsyncResult , result_from_tuple
13- from celery import group , chain
13+ from celery import group
1414
1515import ijson
1616import requests
2424
2525from core import settings
2626from core .common .serializers import IdentifierSerializer
27- from core .common .tasks import bulk_import_subtask , bulk_import_subtask_empty
27+ from core .common .tasks import bulk_import_subtask , bulk_import_subtask_empty , bulk_import_queue
2828from core .common .tasks import import_finisher
2929from core .code_systems .converter import CodeSystemConverter
3030from core .common .utils import get_export_service
@@ -111,10 +111,11 @@ def import_async_result(self):
111111 return None
112112
113113 def revoke (self ):
114- import_group = self .import_async_result
115- while import_group is not None :
116- import_group .revoke () # Revokes all tasks in a group
117- import_group = import_group .parent
114+ import_final_task = self .import_async_result
115+ import_final_task .revoke ()
116+ for task_id in self .subtask_ids :
117+ child = AsyncResult (task_id )
118+ child .revoke ()
118119
119120 @import_async_result .setter
120121 def import_async_result (self , import_async_result ):
@@ -231,7 +232,6 @@ def run(self): # pylint: disable=too-many-locals
231232 time_started = timezone .now ()
232233 resource_types = ['CodeSystem' , 'ValueSet' , 'ConceptMap' ]
233234 resource_types .extend (ResourceImporter .get_resource_types ())
234-
235235 if not self .path .startswith ('/' ): # not local path
236236 key = self .path
237237 protocol_index = key .find ('://' )
@@ -429,14 +429,14 @@ def calculate_batch_size(self, resources):
429429 for _ , count in item .items ():
430430 all_count += count
431431 if all_count > 50000 :
432- task_batch_size = all_count / 1000
432+ task_batch_size = round ( all_count / 1000 )
433433 else :
434434 task_batch_size = self .MIN_BATCH_SIZE
435435 return task_batch_size
436436
437437 def schedule_tasks (self , tasks ):
438438 subtask_ids = []
439- chained_tasks = chain ()
439+ group_queue = []
440440 for task in tasks :
441441 group_tasks = []
442442 for group_task in task :
@@ -446,14 +446,21 @@ def schedule_tasks(self, tasks):
446446 group_tasks .append (bulk_import_subtask .si (group_task ['path' ], group_task ['username' ],
447447 group_task ['owner_type' ], group_task ['owner' ],
448448 group_task ['resource_type' ], group_task ['files' ])
449- .set (queue = 'concurrent' , task_id = subtask_id ))
449+ .set (task_id = subtask_id ))
450450 if len (group_tasks ) == 1 : # Prevent celery from converting group to a single task
451- group_tasks .append (bulk_import_subtask_empty .si ().set (queue = 'concurrent' ))
451+ group_tasks .append (bulk_import_subtask_empty .si ())
452+
453+ group_queue .append (group (group_tasks ))
454+
455+ final_task_id = uuid ()
456+ group_queue .append (import_finisher .si (self .task_id ).set (task_id = final_task_id ))
452457
453- chained_tasks |= group (group_tasks )
454- chained_tasks |= import_finisher .si (self .task_id ).set (queue = 'concurrent' )
458+ # Celery cannot handle chain of groups that have hundreds of tasks thus we use a task that schedules
459+ # a group of tasks once the previous group is done.
460+ bulk_import_queue .si (group_queue ).apply_async (queue = 'concurrent' )
455461
456- final_task = chained_tasks .apply_async (queue = 'concurrent' )
462+ # We pass the final task id to be able to track the end of execution and track progress.
463+ final_task = AsyncResult (final_task_id )
457464 return final_task , subtask_ids
458465
459466 def is_importable_file (self , file_name ):
0 commit comments