From d5d0a716860a9f440577962212e9498612347683 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 23 Jun 2025 14:03:50 +0100 Subject: [PATCH 1/4] Add filesystems docs --- docs/filesystems.md | 71 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 docs/filesystems.md diff --git a/docs/filesystems.md b/docs/filesystems.md new file mode 100644 index 000000000..8cc691848 --- /dev/null +++ b/docs/filesystems.md @@ -0,0 +1,71 @@ +# Overview + +The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). These docs explain: + +- How to create the shares in OpenStack Manila. + +- How to configure the Slurm Appliance to mount these Manila shares. + +- How to switch to a Manila share for a shared home directory. + +## Creating shares in OpenStack + +The Slurm appliance requires that the Manila shares already exist on the system. Follow the instructions below to do this. + +If this is the first time Manila is being used on the system, a CephFS share type will need to be created. You will need admin credentials to do this. + + ```bash + openstack share type create cephfs-type false --extra-specs storage_protocol=CEPHFS vendor_name=Ceph + ``` + +Once this exists, create a share using credentials for the Slurm project. An access rule also needs to be created, where the `access_to` argument (`openstack share access create `) is a user that will be created in Ceph. This needs to be globally unique in Ceph, so needs to be different for each OpenStack project. + + ```bash + openstack share create CephFS 300 --description 'Scratch dir for Slurm prod' --name slurm-production-scratch --share-type cephfs-type --wait + openstack share access create slurm-production-scratch cephx slurm-production + ``` + +## Configuring the Slurm Appliance for Manila + +To mount shares onto hosts in a group, add them to the `manila` group. + + ```ini + [manila:children] + login + compute + ``` + +Set the version of Ceph which is running on the system. + + ```yaml + os_manila_mount_ceph_version: "18.2.4" + ``` + +Define the list of shares to be mounted, and the paths to mount them to. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options. + + ```yaml + os_manila_mount_shares: + - share_name: slurm-production-scratch + mount_path: /scratch + ``` + +### Shared home directory + +By default, the Slurm appliance configures the control node as an NFS server and exports a directory which is mounted on the other cluster nodes as `/home`. When using Manila + CephFS for the home directory instead, this will need to be disabled. To do this, set the tf var `home_volume_provisioning` to `None`. + +The `basic_users_homedir_server_path` home directory will need to be updated to point to this new shared directory. + + ```yaml + basic_users_homedir_server: "{{ groups['login'] | first }}" # if not mounting /home on control node + basic_users_homedir_server_path: /home + ``` + +Finally, add the home directory to the list of shares (the share should be already created in OpenStack). + + ```yaml + os_manila_mount_shares: + - share_name: slurm-production-scratch + mount_path: /scratch + - share_name: slurm-production-home + mount_path: /home + ``` From 858ae01534f77a439aae52f0db75bf78901f3a11 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Wed, 27 Aug 2025 16:23:28 +0100 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com> --- docs/filesystems.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/filesystems.md b/docs/filesystems.md index 8cc691848..3c16cb0fa 100644 --- a/docs/filesystems.md +++ b/docs/filesystems.md @@ -30,7 +30,8 @@ Once this exists, create a share using credentials for the Slurm project. An acc To mount shares onto hosts in a group, add them to the `manila` group. ```ini - [manila:children] + # environments/site/inventory/groups: + [manila:children]: login compute ``` @@ -38,14 +39,16 @@ To mount shares onto hosts in a group, add them to the `manila` group. Set the version of Ceph which is running on the system. ```yaml + # environments/site/inventory/group_vars/manila.yml: os_manila_mount_ceph_version: "18.2.4" ``` -Define the list of shares to be mounted, and the paths to mount them to. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options. +Define the list of shares to be mounted, and the paths to mount them to. The example below parameterises the share name using the environment name. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options. ```yaml + # environments/site/inventory/group_vars/manila.yml: os_manila_mount_shares: - - share_name: slurm-production-scratch + - share_name: "slurm-{{ appliances_environment_name }}-scratch" mount_path: /scratch ``` @@ -53,9 +56,10 @@ Define the list of shares to be mounted, and the paths to mount them to. See the By default, the Slurm appliance configures the control node as an NFS server and exports a directory which is mounted on the other cluster nodes as `/home`. When using Manila + CephFS for the home directory instead, this will need to be disabled. To do this, set the tf var `home_volume_provisioning` to `None`. -The `basic_users_homedir_server_path` home directory will need to be updated to point to this new shared directory. +Some `basic_users_homedir_*` parameters need overriding as the provided defaults are only satisfactory for the default root-squashed NFS share: ```yaml + # environments/site/inventory/group_vars/all/basic_users.yml: basic_users_homedir_server: "{{ groups['login'] | first }}" # if not mounting /home on control node basic_users_homedir_server_path: /home ``` @@ -63,9 +67,10 @@ The `basic_users_homedir_server_path` home directory will need to be updated to Finally, add the home directory to the list of shares (the share should be already created in OpenStack). ```yaml + # environments/site/inventory/group_vars/all/manila.yml: os_manila_mount_shares: - - share_name: slurm-production-scratch + - share_name: "slurm-{{ appliances_environment_name }}-scratch" mount_path: /scratch - - share_name: slurm-production-home + - share_name: "slurm-{{ appliances_environment_name }}-home" mount_path: /home ``` From d36f9f5dc784cca3c74a9225f6f2bb8be55bf795 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Wed, 27 Aug 2025 16:40:07 +0100 Subject: [PATCH 3/4] Update Ceph instructions for Manila integrations --- docs/filesystems.md | 16 +++++++++++++--- .../common/inventory/group_vars/all/manila.yml | 6 +++++- .../inventory/group_vars/all/os-manila-mount.yml | 3 --- 3 files changed, 18 insertions(+), 7 deletions(-) delete mode 100644 environments/common/inventory/group_vars/all/os-manila-mount.yml diff --git a/docs/filesystems.md b/docs/filesystems.md index 3c16cb0fa..24983c647 100644 --- a/docs/filesystems.md +++ b/docs/filesystems.md @@ -1,6 +1,6 @@ # Overview -The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). These docs explain: +The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). This section explains: - How to create the shares in OpenStack Manila. @@ -18,7 +18,7 @@ If this is the first time Manila is being used on the system, a CephFS share typ openstack share type create cephfs-type false --extra-specs storage_protocol=CEPHFS vendor_name=Ceph ``` -Once this exists, create a share using credentials for the Slurm project. An access rule also needs to be created, where the `access_to` argument (`openstack share access create `) is a user that will be created in Ceph. This needs to be globally unique in Ceph, so needs to be different for each OpenStack project. +Once this exists, create a share using credentials for the Slurm project. An access rule also needs to be created, where the `access_to` argument (`openstack share access create `) is a user that will be created in Ceph. This needs to be globally unique in Ceph, so needs to be different for each OpenStack project. Ideally, this share should include your environment name. In this example, the name is "production". ```bash openstack share create CephFS 300 --description 'Scratch dir for Slurm prod' --name slurm-production-scratch --share-type cephfs-type --wait @@ -36,13 +36,23 @@ To mount shares onto hosts in a group, add them to the `manila` group. compute ``` -Set the version of Ceph which is running on the system. +If you are running a different version of Ceph from the defaults in the i[os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount/blob/master/defaults/main.yml), you will need to update the package version by setting the following. ```yaml # environments/site/inventory/group_vars/manila.yml: os_manila_mount_ceph_version: "18.2.4" ``` +This will need to be included in the `builder` group to be installed in the host image. + + ```ini + # environments/site/inventory/groups: + [manila:children]: + login + compute + builder + ``` + Define the list of shares to be mounted, and the paths to mount them to. The example below parameterises the share name using the environment name. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options. ```yaml diff --git a/environments/common/inventory/group_vars/all/manila.yml b/environments/common/inventory/group_vars/all/manila.yml index baccd4432..cb015f940 100644 --- a/environments/common/inventory/group_vars/all/manila.yml +++ b/environments/common/inventory/group_vars/all/manila.yml @@ -10,4 +10,8 @@ os_manila_mount_shares: [] # mount_group: # mount_mode: -# os_manila_mount_ceph_version: nautilus # role default for RockyLinux 8 +# os_manila_mount_ceph_version: + +# Empty repo lists from stackhpc.ansible-role-os-manila-mount role defaults, as these repofiles are +# now generated by dnf_repos to allow injecting Ark creds: +os_manila_mount_ceph_rpm_repos: [] diff --git a/environments/common/inventory/group_vars/all/os-manila-mount.yml b/environments/common/inventory/group_vars/all/os-manila-mount.yml deleted file mode 100644 index 6b25d62cb..000000000 --- a/environments/common/inventory/group_vars/all/os-manila-mount.yml +++ /dev/null @@ -1,3 +0,0 @@ -# Empty repo lists from stackhpc.ansible-role-os-manila-mount role defaults, as these repofiles are -# now generated by dnf_repos to allow injecting Ark creds: -os_manila_mount_ceph_rpm_repos: [] From 0351f67a4eb0288f64aaaadd5f2d1cf078ff0e89 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Wed, 27 Aug 2025 16:43:07 +0100 Subject: [PATCH 4/4] Update overview --- docs/filesystems.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/filesystems.md b/docs/filesystems.md index 24983c647..cebff00d1 100644 --- a/docs/filesystems.md +++ b/docs/filesystems.md @@ -1,5 +1,15 @@ # Overview +The Slurm appliance supports multiple ways of configuring shared filesystems, including: + +- Configuring the control node as an NFS server. (Default) + +- CephFS via Manila + +- Lustre + +# Manila + The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). This section explains: - How to create the shares in OpenStack Manila.