diff --git a/sky/client/cli/command.py b/sky/client/cli/command.py index 2a641398b91..055c338e340 100644 --- a/sky/client/cli/command.py +++ b/sky/client/cli/command.py @@ -3040,34 +3040,6 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str, # there is no in-prgress managed jobs. managed_jobs_ = [] pools_ = [] - except exceptions.InconsistentConsolidationModeError: - # If this error is raised, it means the user switched to the - # consolidation mode but the previous controller cluster is still - # running. We should allow the user to tear down the controller - # cluster in this case. - with skypilot_config.override_skypilot_config( - {'jobs': { - 'controller': { - 'consolidation_mode': False - } - }}): - # Check again with the consolidation mode disabled. This is to - # make sure there is no in-progress managed jobs. - request_id, queue_result_version = ( - cli_utils.get_managed_job_queue( - refresh=False, - skip_finished=True, - all_users=True, - fields=fields, - )) - result = sdk.stream_and_get(request_id) - if queue_result_version.v2(): - managed_jobs_, _, status_counts, _ = result - else: - managed_jobs_ = typing.cast( - List[responses.ManagedJobRecord], result) - request_id_pools = managed_jobs.pool_status(pool_names=None) - pools_ = sdk.stream_and_get(request_id_pools) msg = (f'{colorama.Fore.YELLOW}WARNING: Tearing down the managed ' 'jobs controller. Please be aware of the following:' @@ -3144,21 +3116,6 @@ def _hint_or_raise_for_down_sky_serve_controller(controller_name: str, # controller being STOPPED or being firstly launched, i.e., there is # no in-prgress services. services = [] - except exceptions.InconsistentConsolidationModeError: - # If this error is raised, it means the user switched to the - # consolidation mode but the previous controller cluster is still - # running. We should allow the user to tear down the controller - # cluster in this case. - with skypilot_config.override_skypilot_config( - {'serve': { - 'controller': { - 'consolidation_mode': False - } - }}): - # Check again with the consolidation mode disabled. This is to - # make sure there is no in-progress services. - request_id = serve_lib.status(service_names=None) - services = sdk.stream_and_get(request_id) if services: service_names = [service['name'] for service in services] diff --git a/sky/exceptions.py b/sky/exceptions.py index 377f90a5df8..a2714f9b5ed 100644 --- a/sky/exceptions.py +++ b/sky/exceptions.py @@ -208,12 +208,6 @@ class InconsistentHighAvailabilityError(Exception): pass -class InconsistentConsolidationModeError(Exception): - """Raised when the consolidation mode property in the user config - is inconsistent with the actual cluster.""" - pass - - class ProvisionPrechecksError(Exception): """Raised when a managed job fails prechecks before provision. diff --git a/sky/jobs/utils.py b/sky/jobs/utils.py index b6ecb17af10..d71a6956cf3 100644 --- a/sky/jobs/utils.py +++ b/sky/jobs/utils.py @@ -186,13 +186,11 @@ def _validate_consolidation_mode_config( controller_cn = ( controller_utils.Controllers.JOBS_CONTROLLER.value.cluster_name) if global_user_state.cluster_with_name_exists(controller_cn): - with ux_utils.print_exception_no_traceback(): - raise exceptions.InconsistentConsolidationModeError( - f'{colorama.Fore.RED}Consolidation mode for jobs is ' - f'enabled, but the controller cluster ' - f'{controller_cn} is still running. Please ' - 'terminate the controller cluster first.' - f'{colorama.Style.RESET_ALL}') + logger.warning( + f'{colorama.Fore.RED}Consolidation mode for jobs is enabled, ' + f'but the controller cluster {controller_cn} is still running. ' + 'Please terminate the controller cluster first.' + f'{colorama.Style.RESET_ALL}') else: total_jobs = managed_job_state.get_managed_jobs_total() if total_jobs > 0: @@ -200,13 +198,11 @@ def _validate_consolidation_mode_config( managed_job_state.get_nonterminal_job_ids_by_name( None, None, all_users=True)) if nonterminal_jobs: - with ux_utils.print_exception_no_traceback(): - raise exceptions.InconsistentConsolidationModeError( - f'{colorama.Fore.RED}Consolidation mode ' - 'is disabled, but there are still ' - f'{len(nonterminal_jobs)} managed jobs ' - 'running. Please terminate those jobs ' - f'first.{colorama.Style.RESET_ALL}') + logger.warning( + f'{colorama.Fore.YELLOW}Consolidation mode is disabled, ' + f'but there are still {len(nonterminal_jobs)} managed jobs ' + 'running. Please terminate those jobs first.' + f'{colorama.Style.RESET_ALL}') else: logger.warning( f'{colorama.Fore.YELLOW}Consolidation mode is disabled, ' @@ -233,14 +229,11 @@ def is_consolidation_mode(on_api_restart: bool = False) -> bool: signal_file = pathlib.Path( _JOBS_CONSOLIDATION_RELOADED_SIGNAL_FILE).expanduser() - restart_signal_file_exists = signal_file.exists() - consolidation_mode = (config_consolidation_mode and - restart_signal_file_exists) - if on_api_restart: if config_consolidation_mode: signal_file.touch() else: + restart_signal_file_exists = signal_file.exists() if not restart_signal_file_exists: if config_consolidation_mode: logger.warning(f'{colorama.Fore.YELLOW}Consolidation mode for ' @@ -259,8 +252,8 @@ def is_consolidation_mode(on_api_restart: bool = False) -> bool: # have related config and will always seemingly disabled for consolidation # mode. Check #6611 for more details. if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None: - _validate_consolidation_mode_config(consolidation_mode) - return consolidation_mode + _validate_consolidation_mode_config(config_consolidation_mode) + return config_consolidation_mode def ha_recovery_for_consolidation_mode() -> None: diff --git a/sky/serve/serve_utils.py b/sky/serve/serve_utils.py index 8dbbd0ad1af..80cf8b7362c 100644 --- a/sky/serve/serve_utils.py +++ b/sky/serve/serve_utils.py @@ -218,25 +218,23 @@ def _validate_consolidation_mode_config(current_is_consolidation_mode: bool, if current_is_consolidation_mode: controller_cn = controller.cluster_name if global_user_state.cluster_with_name_exists(controller_cn): - with ux_utils.print_exception_no_traceback(): - raise exceptions.InconsistentConsolidationModeError( - f'{colorama.Fore.RED}Consolidation mode for ' - f'{controller.controller_type} is enabled, but the ' - f'controller cluster {controller_cn} is still running. ' - 'Please terminate the controller cluster first.' - f'{colorama.Style.RESET_ALL}') + logger.warning( + f'{colorama.Fore.RED}Consolidation mode for ' + f'{controller.controller_type} is enabled, but the controller ' + f'cluster {controller_cn} is still running. Please terminate ' + 'the controller cluster first.' + f'{colorama.Style.RESET_ALL}') else: noun = 'pool' if pool else 'service' all_services = [ svc for svc in serve_state.get_services() if svc['pool'] == pool ] if all_services: - with ux_utils.print_exception_no_traceback(): - raise exceptions.InconsistentConsolidationModeError( - f'{colorama.Fore.RED}Consolidation mode for ' - f'{controller.controller_type} is disabled, but there are ' - f'still {len(all_services)} {noun}s running. Please ' - f'terminate those {noun}s first.{colorama.Style.RESET_ALL}') + logger.warning( + f'{colorama.Fore.RED}Consolidation mode for ' + f'{controller.controller_type} is disabled, but there are ' + f'still {len(all_services)} {noun}s running. Please terminate ' + f'those {noun}s first.{colorama.Style.RESET_ALL}') @annotations.lru_cache(scope='request', maxsize=1)