diff --git a/sky/core.py b/sky/core.py index e9b0f5b5e77..594227d3838 100644 --- a/sky/core.py +++ b/sky/core.py @@ -498,6 +498,32 @@ def _start( controller_autostop_config.enabled): idle_minutes_to_autostop = controller_autostop_config.idle_minutes down = controller_autostop_config.down + else: + # For non-controller clusters, restore autostop configuration from + # database if not explicitly provided. + if idle_minutes_to_autostop is None: + cluster_record = global_user_state.get_cluster_from_name( + cluster_name, include_user_info=False, summary_response=True) + if cluster_record is not None: + stored_autostop = cluster_record.get('autostop', -1) + stored_to_down = cluster_record.get('to_down', False) + # Restore autostop if it was previously set (autostop > 0) + if stored_autostop > 0: + logger.warning(f'Restoring cluster {cluster_name!r} with ' + f'autostop set to {stored_autostop} minutes' + f'. To turn off autostop, run: ' + f'`sky autostop {cluster_name} --cancel`') + idle_minutes_to_autostop = stored_autostop + # Only restore 'down' if it was explicitly set and we're + # restoring autostop + if stored_to_down: + down = stored_to_down + elif stored_autostop == 0: + logger.warning( + f'Autostop was previously set to 0 minutes ' + f'for cluster {cluster_name!r} so it will ' + 'not be restored. To turn on autostop, run: ' + f'`sky autostop {cluster_name} -i `') usage_lib.record_cluster_name_for_current_operation(cluster_name) diff --git a/tests/smoke_tests/test_basic.py b/tests/smoke_tests/test_basic.py index 16ee7d67441..ef325a87b68 100644 --- a/tests/smoke_tests/test_basic.py +++ b/tests/smoke_tests/test_basic.py @@ -328,6 +328,57 @@ def test_launch_fast_with_autostop(generic_cloud: str): smoke_tests_utils.run_one_test(test) +# See cloud exclusion explanations in test_autostop +@pytest.mark.no_fluidstack +@pytest.mark.no_lambda_cloud +@pytest.mark.no_ibm +@pytest.mark.no_kubernetes +@pytest.mark.no_hyperbolic +@pytest.mark.no_shadeform +@pytest.mark.no_seeweb +def test_start_preserves_autostop(generic_cloud: str): + """Test that sky start preserves the autostop setting from the database.""" + name = smoke_tests_utils.get_cluster_name() + autostop_timeout = 600 if generic_cloud == 'azure' else 250 + test = smoke_tests_utils.Test( + 'test_start_preserves_autostop', + [ + # Launch cluster with autostop of 1 minute + f's=$(SKYPILOT_DEBUG=0 sky launch -y -c {name} --infra {generic_cloud} -i 1 {smoke_tests_utils.LOW_RESOURCE_ARG} tests/test_yamls/minimal.yaml) && {smoke_tests_utils.VALIDATE_LAUNCH_OUTPUT}', + f'sky logs {name} 1 --status', + f'sky status -r {name} | grep UP', + # Verify autostop is set + f'sky status | grep {name} | grep "1m"', + + # Wait for cluster to be STOPPED from autostop + smoke_tests_utils.get_cmd_wait_until_cluster_status_contains( + cluster_name=name, + cluster_status=[sky.ClusterStatus.STOPPED], + timeout=autostop_timeout), + + # Start the cluster without explicitly setting autostop - it should preserve the previous setting + f'sky start -y {name}', + # Wait for cluster to be UP + smoke_tests_utils.get_cmd_wait_until_cluster_status_contains( + cluster_name=name, + cluster_status=[sky.ClusterStatus.UP], + timeout=smoke_tests_utils.get_timeout(generic_cloud)), + # Verify autostop is still set (preserved from database) + f'sky status | grep {name} | grep "1m"', + + # Wait for cluster to be STOPPED again from autostop (proving it was preserved) + smoke_tests_utils.get_cmd_wait_until_cluster_status_contains( + cluster_name=name, + cluster_status=[sky.ClusterStatus.STOPPED], + timeout=autostop_timeout), + ], + f'sky down -y {name}', + timeout=smoke_tests_utils.get_timeout(generic_cloud) + + 2 * autostop_timeout, + ) + smoke_tests_utils.run_one_test(test) + + # We override the AWS config to force the cluster to relaunch, so only run the # test on AWS. @pytest.mark.aws diff --git a/tests/smoke_tests/test_cluster_job.py b/tests/smoke_tests/test_cluster_job.py index 42f47ee9eea..f5ed083b789 100644 --- a/tests/smoke_tests/test_cluster_job.py +++ b/tests/smoke_tests/test_cluster_job.py @@ -1341,9 +1341,11 @@ def test_autostop_wait_for_jobs(generic_cloud: str): cluster_status=[sky.ClusterStatus.STOPPED], timeout=autostop_timeout), - # Ensure the cluster is UP and the autostop setting is reset ('-'). - f'sky start -y {name}', - f'sky status | grep {name} | grep -E "UP\s+-"', + # Ensure the cluster is UP. + # Change the autostop setting to be very high so we can test + # resetting it. + f'sky start -y {name} -i 500', + f'sky status | grep {name} | grep "UP"', # Ensure the job succeeded. f'sky exec {name} tests/test_yamls/minimal.yaml', @@ -1361,8 +1363,10 @@ def test_autostop_wait_for_jobs(generic_cloud: str): timeout=autostop_timeout), # Test restarting the idleness timer via exec: - f'sky start -y {name}', - f'sky status | grep {name} | grep -E "UP\s+-"', + # Change the autostop setting to be very high so we can test + # resetting it. + f'sky start -y {name} -i 500', + f'sky status | grep {name} | grep "UP"', f'sky autostop -y {name} -i 1 --wait-for jobs', # Idleness starts counting. 'sleep 45', # Almost reached the threshold. f'sky exec {name} echo hi', # Should restart the timer.