Merge pull request #10 from stackhpc/drain-resume

Bharat Kunwar · web-flow · commit 3df7f67a501b · 2019-05-21T15:14:57.000+01:00
Drain/resume toggle (v0.4.0)
diff --git a/README.md b/README.md
@@ -19,18 +19,42 @@ Role Variables
 
 `openhpc_packages`: additional OpenHPC packages to install
 
-`openhpc_enable`: 
+`openhpc_enable`:
 * `control`: whether to enable control host
-* `batch`: whether to enable compute nodes 
+* `batch`: whether to enable compute nodes
 * `runtime`: whether to enable OpenHPC runtime
+* `drain`: whether to drain compute nodes
+* `resume`: whether to resume compute nodes
 
-Example Playbook
+Example Inventory
+-----------------
+
+And an Ansible inventory as this:
+
+    [openhpc_login]
+    openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
+
+    [openhpc_compute]
+    openhpc-compute-0 ansible_host=10.60.253.31 ansible_user=centos
+    openhpc-compute-1 ansible_host=10.60.253.32 ansible_user=centos
+
+    [cluster_login:children]
+    openhpc_login
+
+    [cluster_control:children]
+    openhpc_login
+
+    [cluster_batch:children]
+    openhpc_compute
+
+Example Playbooks
 ----------------
- 
+
 To deploy, create a playbook which looks like this:
 
     ---
     - hosts:
+      - cluster_login
       - cluster_control
       - cluster_batch
       become: yes
@@ -53,19 +77,52 @@ To deploy, create a playbook which looks like this:
           openhpc_packages: []
     ...
 
-Example Inventory
------------------
-
-And an Ansible inventory as this:
-
-    [openhpc_login]
-    openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
 
-    [openhpc_compute]
-    openhpc-compute-0 ansible_host=10.60.253.33 ansible_user=centos
+To drain nodes, for example, before scaling down the cluster to 6 nodes:
 
-    [cluster_control:children]
-    openhpc_login
+    ---
+    - hosts: openstack
+      gather_facts: false
+      vars:
+        partition: "{{ cluster_group.output_value | selectattr('group', 'equalto', item.name) | list }}"
+        openhpc_slurm_partitions:
+          - name: "compute"
+            flavor: "compute-A"
+            image: "CentOS7.5-OpenHPC"
+            num_nodes: 6
+            user: "centos"
+        openhpc_cluster_name: openhpc
+      roles:
+        # Our stackhpc.cluster-infra role can be invoked in `query` mode which
+        # looks up the state of the cluster by querying the Heat API.
+        - role: stackhpc.cluster-infra
+          cluster_name: "{{ cluster_name }}"
+          cluster_state: query
+          cluster_params:
+            cluster_groups: "{{ cluster_groups }}"
+      tasks:
+        # Given that the original cluster that was created had 8 nodes and the
+        # cluster we want to create has 6 nodes, the computed desired_state
+        # variable stores the list of instances to leave untouched.
+        - name: Count the number of compute nodes per slurm partition
+          set_fact:
+            desired_state: "{{ (( partition | first).nodes | map(attribute='name') | list )[:item.num_nodes] + desired_state | default([]) }}"
+          when: partition | length > 0
+          with_items: "{{ openhpc_slurm_partitions }}"
+        - debug: var=desired_state
+
+    - hosts: cluster_batch
+      become: yes
+      vars:
+        desired_state: "{{ hostvars['localhost']['desired_state'] | default([]) }}"
+      roles:
+        # Now, the stackhpc.openhpc role is invoked in drain/resume modes where
+        # the instances in desired_state are resumed if in a drained state and
+        # drained if in a resumed state.
+        - role: stackhpc.openhpc
+          openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}"
+          openhpc_enable:
+            drain: "{{ inventory_hostname not in desired_state }}"
+            resume: "{{ inventory_hostname in desired_state }}"
+    ...
 
-    [cluster_batch:children]
-    openhpc_compute
diff --git a/defaults/main.yml b/defaults/main.yml
@@ -1,11 +1,16 @@
 ---
 openhpc_slurm_service_enabled: true
 openhpc_slurm_service:
-openhpc_slurm_control_host:
+openhpc_slurm_control_host: "{{ inventory_hostname }}"
 openhpc_slurm_partitions: []
 openhpc_cluster_name:
 openhpc_packages: []
+openhpc_drain_timeout: 86400
+openhpc_resume_timeout: 300
+openhpc_retry_delay: 10
 openhpc_enable:
   control: false
   batch: false
   runtime: false 
+  drain: false 
+  resume: false 
diff --git a/tasks/drain.yml b/tasks/drain.yml
@@ -0,0 +1,24 @@
+---
+# Ansible tasks to drain a Slurm compute node. Waits for the compute node to be
+# drained for up to a day by default.
+#
+# Variables:
+# - node_to_drain: compute node to drain
+# - drain_timeout: seconds to wait for node to drain, default is 86400.
+
+- name: Get nodes in DRAINED state
+  command: "sinfo --noheader --Node --format='%N' --states=DRAINED"
+  register: drained_nodes_results
+  changed_when: false
+
+- name: Drain compute node
+  command: "scontrol update nodename={{ inventory_hostname }} state=DRAIN reason='maintenance'"
+  when: inventory_hostname not in drained_nodes_results.stdout_lines
+
+- name: Check node has drained
+  command: "sinfo --noheader --Node --format='%N' --states=DRAINED"
+  register: drained_nodes
+  until: "inventory_hostname in drained_nodes.stdout_lines"
+  delay: "{{ openhpc_retry_delay }}"
+  retries: "{{ (openhpc_drain_timeout/openhpc_retry_delay) | int }}"
+  changed_when: false
diff --git a/tasks/main.yml b/tasks/main.yml
@@ -1,10 +1,18 @@
 ---
 - include: control.yml
-  when: openhpc_enable.control | bool
+  when: openhpc_enable.control | default(false) | bool
 
 - include: compute.yml
-  when: openhpc_enable.batch | bool
+  when: openhpc_enable.batch | default(false) | bool
 
 - include: runtime.yml
-  when: openhpc_enable.runtime | bool
+  when: openhpc_enable.runtime | default(false) | bool
+
+- include: drain.yml
+  when: openhpc_enable.drain | default(false) | bool
+  delegate_to: "{{ openhpc_slurm_control_host }}"
+
+- include: resume.yml
+  when: openhpc_enable.resume | default(false) | bool
+  delegate_to: "{{ openhpc_slurm_control_host }}"
 ...
diff --git a/tasks/resume.yml b/tasks/resume.yml
@@ -0,0 +1,24 @@
+---
+# Ansible tasks to resume a Slurm compute node. Waits for the compute node to
+# change state for 5 minutes by default.
+#
+# Variables:
+# - nodes_to_resume: compute node to resume
+# - resume_timeout: seconds to wait for node to resume, default is 300.
+
+- name: Get nodes in ALLOC,IDLE states
+  command: "sinfo --noheader --Node --format='%N' --states=ALLOC,IDLE"
+  register: resumed_nodes_results
+  changed_when: false
+
+- name: Resume compute node
+  command: "scontrol update nodename={{ inventory_hostname }} state=RESUME"
+  when: inventory_hostname not in resumed_nodes_results.stdout_lines
+
+- name: Check node has resumed
+  command: "sinfo --noheader --Node --format='%N' --states=ALLOC,IDLE"
+  register: resumed_nodes
+  until: "inventory_hostname in resumed_nodes.stdout_lines"
+  delay: "{{ openhpc_retry_delay }}"
+  retries: "{{ (openhpc_resume_timeout/openhpc_retry_delay) | int }}"
+  changed_when: false