Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions sky/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,32 @@ def _start(
controller_autostop_config.enabled):
idle_minutes_to_autostop = controller_autostop_config.idle_minutes
down = controller_autostop_config.down
else:
# For non-controller clusters, restore autostop configuration from
# database if not explicitly provided.
if idle_minutes_to_autostop is None:
cluster_record = global_user_state.get_cluster_from_name(
cluster_name, include_user_info=False, summary_response=True)
if cluster_record is not None:
stored_autostop = cluster_record.get('autostop', -1)
stored_to_down = cluster_record.get('to_down', False)
# Restore autostop if it was previously set (autostop > 0)
if stored_autostop > 0:
logger.warning(f'Restoring cluster {cluster_name!r} with '
f'autostop set to {stored_autostop} minutes'
f'. To turn off autostop, run: '
f'`sky autostop {cluster_name} --cancel`')
idle_minutes_to_autostop = stored_autostop
# Only restore 'down' if it was explicitly set and we're
# restoring autostop
if stored_to_down:
down = stored_to_down
elif stored_autostop == 0:
logger.warning(
f'Autostop was previously set to 0 minutes '
f'for cluster {cluster_name!r} so it will '
'not be restored. To turn on autostop, run: '
f'`sky autostop {cluster_name} -i <minutes>`')

usage_lib.record_cluster_name_for_current_operation(cluster_name)

Expand Down
51 changes: 51 additions & 0 deletions tests/smoke_tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,57 @@ def test_launch_fast_with_autostop(generic_cloud: str):
smoke_tests_utils.run_one_test(test)


# See cloud exclusion explanations in test_autostop
@pytest.mark.no_fluidstack
@pytest.mark.no_lambda_cloud
@pytest.mark.no_ibm
@pytest.mark.no_kubernetes
@pytest.mark.no_hyperbolic
@pytest.mark.no_shadeform
@pytest.mark.no_seeweb
def test_start_preserves_autostop(generic_cloud: str):
"""Test that sky start preserves the autostop setting from the database."""
name = smoke_tests_utils.get_cluster_name()
autostop_timeout = 600 if generic_cloud == 'azure' else 250
test = smoke_tests_utils.Test(
'test_start_preserves_autostop',
[
# Launch cluster with autostop of 1 minute
f's=$(SKYPILOT_DEBUG=0 sky launch -y -c {name} --infra {generic_cloud} -i 1 {smoke_tests_utils.LOW_RESOURCE_ARG} tests/test_yamls/minimal.yaml) && {smoke_tests_utils.VALIDATE_LAUNCH_OUTPUT}',
f'sky logs {name} 1 --status',
f'sky status -r {name} | grep UP',
# Verify autostop is set
f'sky status | grep {name} | grep "1m"',

# Wait for cluster to be STOPPED from autostop
smoke_tests_utils.get_cmd_wait_until_cluster_status_contains(
cluster_name=name,
cluster_status=[sky.ClusterStatus.STOPPED],
timeout=autostop_timeout),

# Start the cluster without explicitly setting autostop - it should preserve the previous setting
f'sky start -y {name}',
# Wait for cluster to be UP
smoke_tests_utils.get_cmd_wait_until_cluster_status_contains(
cluster_name=name,
cluster_status=[sky.ClusterStatus.UP],
timeout=smoke_tests_utils.get_timeout(generic_cloud)),
# Verify autostop is still set (preserved from database)
f'sky status | grep {name} | grep "1m"',

# Wait for cluster to be STOPPED again from autostop (proving it was preserved)
smoke_tests_utils.get_cmd_wait_until_cluster_status_contains(
cluster_name=name,
cluster_status=[sky.ClusterStatus.STOPPED],
timeout=autostop_timeout),
],
f'sky down -y {name}',
timeout=smoke_tests_utils.get_timeout(generic_cloud) +
2 * autostop_timeout,
)
smoke_tests_utils.run_one_test(test)


# We override the AWS config to force the cluster to relaunch, so only run the
# test on AWS.
@pytest.mark.aws
Expand Down
14 changes: 9 additions & 5 deletions tests/smoke_tests/test_cluster_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,11 @@ def test_autostop_wait_for_jobs(generic_cloud: str):
cluster_status=[sky.ClusterStatus.STOPPED],
timeout=autostop_timeout),

# Ensure the cluster is UP and the autostop setting is reset ('-').
f'sky start -y {name}',
f'sky status | grep {name} | grep -E "UP\s+-"',
# Ensure the cluster is UP.
# Change the autostop setting to be very high so we can test
# resetting it.
f'sky start -y {name} -i 500',
f'sky status | grep {name} | grep "UP"',

# Ensure the job succeeded.
f'sky exec {name} tests/test_yamls/minimal.yaml',
Expand All @@ -1361,8 +1363,10 @@ def test_autostop_wait_for_jobs(generic_cloud: str):
timeout=autostop_timeout),

# Test restarting the idleness timer via exec:
f'sky start -y {name}',
f'sky status | grep {name} | grep -E "UP\s+-"',
# Change the autostop setting to be very high so we can test
# resetting it.
f'sky start -y {name} -i 500',
f'sky status | grep {name} | grep "UP"',
f'sky autostop -y {name} -i 1 --wait-for jobs', # Idleness starts counting.
'sleep 45', # Almost reached the threshold.
f'sky exec {name} echo hi', # Should restart the timer.
Expand Down
Loading