@@ -19,18 +19,42 @@ Role Variables
19
19
20
20
` openhpc_packages ` : additional OpenHPC packages to install
21
21
22
- ` openhpc_enable ` :
22
+ ` openhpc_enable ` :
23
23
* ` control ` : whether to enable control host
24
- * ` batch ` : whether to enable compute nodes
24
+ * ` batch ` : whether to enable compute nodes
25
25
* ` runtime ` : whether to enable OpenHPC runtime
26
+ * ` drain ` : whether to drain compute nodes
27
+ * ` resume ` : whether to resume compute nodes
26
28
27
- Example Playbook
29
+ Example Inventory
30
+ -----------------
31
+
32
+ And an Ansible inventory as this:
33
+
34
+ [openhpc_login]
35
+ openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
36
+
37
+ [openhpc_compute]
38
+ openhpc-compute-0 ansible_host=10.60.253.31 ansible_user=centos
39
+ openhpc-compute-1 ansible_host=10.60.253.32 ansible_user=centos
40
+
41
+ [cluster_login:children]
42
+ openhpc_login
43
+
44
+ [cluster_control:children]
45
+ openhpc_login
46
+
47
+ [cluster_batch:children]
48
+ openhpc_compute
49
+
50
+ Example Playbooks
28
51
----------------
29
-
52
+
30
53
To deploy, create a playbook which looks like this:
31
54
32
55
---
33
56
- hosts:
57
+ - cluster_login
34
58
- cluster_control
35
59
- cluster_batch
36
60
become: yes
@@ -53,19 +77,52 @@ To deploy, create a playbook which looks like this:
53
77
openhpc_packages: []
54
78
...
55
79
56
- Example Inventory
57
- -----------------
58
-
59
- And an Ansible inventory as this:
60
-
61
- [openhpc_login]
62
- openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
63
80
64
- [openhpc_compute]
65
- openhpc-compute-0 ansible_host=10.60.253.33 ansible_user=centos
81
+ To drain nodes, for example, before scaling down the cluster to 6 nodes:
66
82
67
- [cluster_control:children]
68
- openhpc_login
83
+ ---
84
+ - hosts: openstack
85
+ gather_facts: false
86
+ vars:
87
+ partition: "{{ cluster_group.output_value | selectattr('group', 'equalto', item.name) | list }}"
88
+ openhpc_slurm_partitions:
89
+ - name: "compute"
90
+ flavor: "compute-A"
91
+ image: "CentOS7.5-OpenHPC"
92
+ num_nodes: 6
93
+ user: "centos"
94
+ openhpc_cluster_name: openhpc
95
+ roles:
96
+ # Our stackhpc.cluster-infra role can be invoked in `query` mode which
97
+ # looks up the state of the cluster by querying the Heat API.
98
+ - role: stackhpc.cluster-infra
99
+ cluster_name: "{{ cluster_name }}"
100
+ cluster_state: query
101
+ cluster_params:
102
+ cluster_groups: "{{ cluster_groups }}"
103
+ tasks:
104
+ # Given that the original cluster that was created had 8 nodes and the
105
+ # cluster we want to create has 6 nodes, the computed desired_state
106
+ # variable stores the list of instances to leave untouched.
107
+ - name: Count the number of compute nodes per slurm partition
108
+ set_fact:
109
+ desired_state: "{{ (( partition | first).nodes | map(attribute='name') | list )[:item.num_nodes] + desired_state | default([]) }}"
110
+ when: partition | length > 0
111
+ with_items: "{{ openhpc_slurm_partitions }}"
112
+ - debug: var=desired_state
113
+
114
+ - hosts: cluster_batch
115
+ become: yes
116
+ vars:
117
+ desired_state: "{{ hostvars['localhost']['desired_state'] | default([]) }}"
118
+ roles:
119
+ # Now, the stackhpc.openhpc role is invoked in drain/resume modes where
120
+ # the instances in desired_state are resumed if in a drained state and
121
+ # drained if in a resumed state.
122
+ - role: stackhpc.openhpc
123
+ openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}"
124
+ openhpc_enable:
125
+ drain: "{{ inventory_hostname not in desired_state }}"
126
+ resume: "{{ inventory_hostname in desired_state }}"
127
+ ...
69
128
70
- [cluster_batch:children]
71
- openhpc_compute
0 commit comments