1414import traceback
1515import requests
1616from redis .exceptions import ConnectionError as RedisConnectionError
17- from urllib .parse import urlparse
1817
1918from augur .tasks .start_tasks import augur_collection_monitor , create_collection_status_records
2019from augur .tasks .git .facade_tasks import clone_repos
2120from augur .tasks .github .contributors import process_contributors
2221from augur .tasks .github .util .github_api_key_handler import GithubApiKeyHandler
2322from augur .tasks .gitlab .gitlab_api_key_handler import GitlabApiKeyHandler
2423from augur .tasks .data_analysis .contributor_breadth_worker .contributor_breadth_worker import contributor_breadth_model
25- from augur .tasks .init .redis_connection import get_redis_connection
2624from augur .application .db .models import UserRepo
2725from augur .application .db .session import DatabaseSession
2826from augur .application .logs import AugurLogger
27+ from augur .application .service_manager import AugurServiceManager , cleanup_collection_status_and_rabbit , clean_collection_status
2928from augur .application .db .lib import get_value
3029from augur .application .cli import test_connection , test_db_connection , with_database , DatabaseContext
3130import sqlalchemy as s
3635
3736logger = AugurLogger ("augur" , reset_logfiles = reset_logs ).get_logger ()
3837
39-
4038@click .group ('server' , short_help = 'Commands for controlling the backend API server & data collection workers' )
4139@click .pass_context
4240def cli (ctx ):
@@ -55,17 +53,23 @@ def start(ctx, disable_collection, development, pidfile, port):
5553 """Start Augur's backend server."""
5654 with open (pidfile , "w" ) as pidfile_io :
5755 pidfile_io .write (str (os .getpid ()))
58-
56+
57+ manager = AugurServiceManager (ctx , pidfile , disable_collection )
58+
59+ # Register signal handlers for graceful shutdown
60+ signal .signal (signal .SIGTERM , manager .shutdown_signal_handler )
61+ signal .signal (signal .SIGINT , manager .shutdown_signal_handler )
62+
5963 try :
6064 if os .environ .get ('AUGUR_DOCKER_DEPLOY' ) != "1" :
6165 raise_open_file_limit (100000 )
62- except Exception as e :
66+ except Exception as e :
6367 logger .error (
6468 '' .join (traceback .format_exception (None , e , e .__traceback__ )))
65-
69+
6670 logger .error ("Failed to raise open file limit!" )
6771 raise e
68-
72+
6973 if development :
7074 os .environ ["AUGUR_DEV" ] = "1"
7175 logger .info ("Starting in development mode" )
@@ -101,6 +105,7 @@ def start(ctx, disable_collection, development, pidfile, port):
101105
102106 gunicorn_command = f"gunicorn -c { gunicorn_location } -b { host } :{ port } augur.api.server:app --log-file { gunicorn_log_file } "
103107 server = subprocess .Popen (gunicorn_command .split (" " ))
108+ manager .server = server
104109
105110 logger .info ("awaiting Gunicorn start" )
106111 while not server .poll ():
@@ -123,6 +128,7 @@ def start(ctx, disable_collection, development, pidfile, port):
123128 logger .info (f"The API is available at '{ api_response .json ()['route' ]} '" )
124129
125130 processes = start_celery_worker_processes ((core_worker_count , secondary_worker_count , facade_worker_count ), disable_collection )
131+ manager .processes = processes
126132
127133 celery_beat_schedule_db = os .getenv ("CELERYBEAT_SCHEDULE_DB" , "celerybeat-schedule.db" )
128134 if os .path .exists (celery_beat_schedule_db ):
@@ -132,8 +138,10 @@ def start(ctx, disable_collection, development, pidfile, port):
132138 log_level = get_value ("Logging" , "log_level" )
133139 celery_beat_process = None
134140 celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l { log_level .lower ()} -s { celery_beat_schedule_db } "
135- celery_beat_process = subprocess .Popen (celery_command .split (" " ))
141+ celery_beat_process = subprocess .Popen (celery_command .split (" " ))
142+ manager .celery_beat_process = celery_beat_process
136143 keypub = KeyPublisher ()
144+ manager .keypub = keypub
137145
138146 if not disable_collection :
139147 if os .environ .get ('AUGUR_DOCKER_DEPLOY' ) != "1" :
@@ -180,29 +188,15 @@ def start(ctx, disable_collection, development, pidfile, port):
180188 try :
181189 server .wait ()
182190 except KeyboardInterrupt :
183-
184- if server :
185- logger .info ("Shutting down server" )
186- server .terminate ()
187-
188- logger .info ("Shutting down all celery worker processes" )
189- for p in processes :
190- if p :
191- p .terminate ()
192-
193- if celery_beat_process :
194- logger .info ("Shutting down celery beat process" )
195- celery_beat_process .terminate ()
196-
197- if not disable_collection :
198-
191+ # Signal handler will take care of cleanup
192+ pass
193+ finally :
194+ # Ensure pidfile is cleaned up if we exit normally
195+ if os .path .exists (pidfile ):
199196 try :
200- keypub .shutdown ()
201- cleanup_collection_status_and_rabbit (logger , ctx .obj .engine )
202- except RedisConnectionError :
203- pass
204-
205- os .unlink (pidfile )
197+ os .unlink (pidfile )
198+ except OSError as e :
199+ logger .error (f"Could not remove pidfile { pidfile } : { e } " )
206200
207201def start_celery_worker_processes (worker_counts : tuple [int , int , int ], disable_collection = False ):
208202 """
@@ -344,84 +338,6 @@ def augur_stop(signal, logger, engine):
344338 cleanup_collection_status_and_rabbit (logger , engine )
345339
346340
347- def cleanup_collection_status_and_rabbit (logger , engine ):
348- clear_redis_caches ()
349-
350- connection_string = get_value ("RabbitMQ" , "connection_string" )
351-
352- with DatabaseSession (logger , engine = engine ) as session :
353-
354- clean_collection_status (session )
355-
356- clear_rabbitmq_messages (connection_string )
357-
358- def clear_redis_caches ():
359- """Clears the redis databases that celery and redis use."""
360-
361- logger .info ("Flushing all redis databases this instance was using" )
362- celery_purge_command = "celery -A augur.tasks.init.celery_app.celery_app purge -f"
363- subprocess .call (celery_purge_command .split (" " ))
364-
365- redis_connection = get_redis_connection ()
366- redis_connection .flushdb ()
367-
368- def clear_all_message_queues (connection_string ):
369- queues = ['celery' ,'secondary' ,'scheduling' ,'facade' ]
370-
371- virtual_host_string = connection_string .split ("/" )[- 1 ]
372-
373- #Parse username and password with urllib
374- parsed = urlparse (connection_string )
375-
376- for q in queues :
377- curl_cmd = f"curl -i -u { parsed .username } :{ parsed .password } -XDELETE http://localhost:15672/api/queues/{ virtual_host_string } /{ q } "
378- subprocess .call (curl_cmd .split (" " ),stdout = subprocess .PIPE , stderr = subprocess .PIPE )
379-
380-
381- def clear_rabbitmq_messages (connection_string ):
382- #virtual_host_string = connection_string.split("/")[-1]
383-
384- logger .info ("Clearing all messages from celery queue in rabbitmq" )
385- from augur .tasks .init .celery_app import celery_app
386- celery_app .control .purge ()
387-
388- clear_all_message_queues (connection_string )
389- #rabbitmq_purge_command = f"sudo rabbitmqctl purge_queue celery -p {virtual_host_string}"
390- #subprocess.call(rabbitmq_purge_command.split(" "))
391-
392- #Make sure that database reflects collection status when processes are killed/stopped.
393- def clean_collection_status (session ):
394- session .execute_sql (s .sql .text ("""
395- UPDATE augur_operations.collection_status
396- SET core_status='Pending',core_task_id = NULL
397- WHERE core_status='Collecting' AND core_data_last_collected IS NULL;
398-
399- UPDATE augur_operations.collection_status
400- SET core_status='Success',core_task_id = NULL
401- WHERE core_status='Collecting' AND core_data_last_collected IS NOT NULL;
402-
403- UPDATE augur_operations.collection_status
404- SET secondary_status='Pending',secondary_task_id = NULL
405- WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NULL;
406-
407- UPDATE augur_operations.collection_status
408- SET secondary_status='Success',secondary_task_id = NULL
409- WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NOT NULL;
410-
411- UPDATE augur_operations.collection_status
412- SET facade_status='Update', facade_task_id=NULL
413- WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NULL;
414-
415- UPDATE augur_operations.collection_status
416- SET facade_status='Success', facade_task_id=NULL
417- WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NOT NULL;
418-
419- UPDATE augur_operations.collection_status
420- SET facade_status='Pending', facade_task_id=NULL
421- WHERE facade_status='Failed Clone' OR facade_status='Initializing';
422- """ ))
423- #TODO: write timestamp for currently running repos.
424-
425341def assign_orphan_repos_to_default_user (session ):
426342 query = s .sql .text ("""
427343 SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM augur_operations.user_repos)
0 commit comments