Skip to content

Commit 3d58563

Browse files
committed
Only check for yellow cluster condition after we time out
1 parent c5a3391 commit 3d58563

File tree

1 file changed

+51
-34
lines changed

1 file changed

+51
-34
lines changed

roles/elasticsearch/tasks/elasticsearch-rolling-stop.yml

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -33,48 +33,65 @@
3333
retries: 5
3434
delay: 30
3535

36-
# this step is key!!! Don't restart more nodes until we can safely do so. This either requires a green cluster status, or a yellow status with 0 initializing or relocating shards
37-
#
38-
# From https://www.elastic.co/guide/en/elastic-stack/8.17/upgrading-elasticsearch.html
39-
## During a rolling upgrade, primary shards assigned to a node running the new version cannot have their replicas assigned to a node with the old version. The new version might have a different data format that is not understood by the old version.
40-
##
41-
## If it is not possible to assign the replica shards to another node (there is only one upgraded node in the cluster), the replica shards remain unassigned and status stays yellow.
42-
##
43-
## In this case, you can proceed once there are no initializing or relocating shards (check the init and relo columns).
44-
- name: Wait for cluster health to return to green
45-
ansible.builtin.uri:
46-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
47-
method: GET
48-
user: elastic
49-
password: "{{ elasticstack_password.stdout }}"
50-
validate_certs: no
51-
register: response
52-
until: "response.json.status == 'green' or
53-
( response.json.status == 'yellow' and
54-
response.json.relocating_shards == 0 and
55-
response.json.initializing_shards == 0
56-
)"
57-
retries: 50
58-
delay: 30
5936

60-
# Extra safety in case we continune with a yellow cluster
61-
# Wait a short time, then check cluster status again
62-
- name: "Attempting to contune with yellow cluster health"
63-
when: "response.json.status == 'yellow'"
64-
block:
65-
- name: "Cluster health yellow: Wait before verifying status"
66-
ansible.builtin.pause:
67-
seconds: 10
37+
#
38+
# Start cluster health check
39+
#
6840

69-
- name: "Cluster health yellow: Verify we can safely continue"
41+
# this step is key!!! Don't restart more nodes until we can safely do so. This either requires a green cluster status, or a yellow status with 0 initializing or relocating shards
42+
#
43+
# From https://www.elastic.co/guide/en/elastic-stack/8.17/upgrading-elasticsearch.html
44+
## During a rolling upgrade, primary shards assigned to a node running the new version cannot have their replicas assigned to a node with the old version. The new version might have a different data format that is not understood by the old version.
45+
##
46+
## If it is not possible to assign the replica shards to another node (there is only one upgraded node in the cluster), the replica shards remain unassigned and status stays yellow.
47+
##
48+
## In this case, you can proceed once there are no initializing or relocating shards (check the init and relo columns).
49+
50+
- name: Check cluster health
51+
block:
52+
- name: Wait for cluster health to return to green
7053
ansible.builtin.uri:
7154
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
7255
method: GET
7356
user: elastic
7457
password: "{{ elasticstack_password.stdout }}"
7558
validate_certs: no
76-
register: response1
77-
failed_when: "response1.json.relocating_shards != 0 or response1.json.initializing_shards != 0"
59+
register: response
60+
until: "response.json.status == 'green'"
61+
retries: 50
62+
delay: 30
63+
64+
# Timed out while waiting for green cluster
65+
# Check if we can continue with a yellow cluster
66+
rescue:
67+
- name: "Rescue: Check if cluster health is yellow"
68+
ansible.builtin.uri:
69+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
70+
method: GET
71+
user: elastic
72+
password: "{{ elasticstack_password.stdout }}"
73+
validate_certs: no
74+
register: response
75+
failed_when: "response.json.status != 'yellow' or response.json.relocating_shards != 0 or response.json.initializing_shards != 0"
76+
77+
- name: "Rescure: Wait before verifying status"
78+
ansible.builtin.pause:
79+
seconds: 10
80+
81+
- name: "Rescue: Verify we can safely continue with yellow cluster"
82+
ansible.builtin.uri:
83+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
84+
method: GET
85+
user: elastic
86+
password: "{{ elasticstack_password.stdout }}"
87+
validate_certs: no
88+
register: response
89+
failed_when: "response.json.status != 'yellow' or response.json.relocating_shards != 0 or response.json.initializing_shards != 0"
90+
91+
#
92+
# End cluster health check
93+
#
94+
7895

7996
# Disabling shard allocation right after enabling it seems redundant. Please see above for details.
8097
- name: Disable shard allocation for the cluster

0 commit comments

Comments
 (0)