|
33 | 33 | retries: 5
|
34 | 34 | delay: 30
|
35 | 35 |
|
36 |
| - # this step is key!!! Don't restart more nodes |
37 |
| - # until all shards have completed recovery |
| 36 | + # this step is key!!! Don't restart more nodes until we can safely do so. This either requires a green cluster status, or a yellow status with 0 initializing or relocating shards |
| 37 | + # |
| 38 | + # From https://www.elastic.co/guide/en/elastic-stack/8.17/upgrading-elasticsearch.html |
| 39 | + ## During a rolling upgrade, primary shards assigned to a node running the new version cannot have their replicas assigned to a node with the old version. The new version might have a different data format that is not understood by the old version. |
| 40 | + ## |
| 41 | + ## If it is not possible to assign the replica shards to another node (there is only one upgraded node in the cluster), the replica shards remain unassigned and status stays yellow. |
| 42 | + ## |
| 43 | + ## In this case, you can proceed once there are no initializing or relocating shards (check the init and relo columns). |
38 | 44 | - name: Wait for cluster health to return to green
|
39 | 45 | ansible.builtin.uri:
|
40 | 46 | url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
|
|
43 | 49 | password: "{{ elasticstack_password.stdout }}"
|
44 | 50 | validate_certs: no
|
45 | 51 | register: response
|
46 |
| - until: "response.json.status == 'green'" |
| 52 | + until: "response.json.status == 'green' or |
| 53 | + ( response.json.status == 'yellow' and |
| 54 | + response.json.relocating_shards == 0 and |
| 55 | + response.json.initializing_shards == 0 |
| 56 | + )" |
47 | 57 | retries: 50
|
48 | 58 | delay: 30
|
49 | 59 |
|
| 60 | +# Extra safety in case we continune with a yellow cluster |
| 61 | +# Wait a short time, then check cluster status again |
| 62 | +- name: "Attempting to contune with yellow cluster health" |
| 63 | + when: "response.json.status == 'yellow'" |
| 64 | + block: |
| 65 | + - name: "Cluster health yellow: Wait before verifying status" |
| 66 | + ansible.builtin.pause: |
| 67 | + seconds: 10 |
| 68 | + |
| 69 | + - name: "Cluster health yellow: Verify we can safely continue" |
| 70 | + ansible.builtin.uri: |
| 71 | + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" |
| 72 | + method: GET |
| 73 | + user: elastic |
| 74 | + password: "{{ elasticstack_password.stdout }}" |
| 75 | + validate_certs: no |
| 76 | + register: response1 |
| 77 | + failed_when: "response1.json.relocating_shards != 0 or response1.json.initializing_shards != 0" |
| 78 | + |
50 | 79 | # Disabling shard allocation right after enabling it seems redundant. Please see above for details.
|
51 | 80 | - name: Disable shard allocation for the cluster
|
52 | 81 | ansible.builtin.uri:
|
|
0 commit comments