diff --git a/ansible/.gitignore b/ansible/.gitignore index 62c9a543c..bdf71fcb8 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -98,3 +98,5 @@ roles/* !roles/eessi/** !roles/topology/ !roles/topology/** +!roles/cvmfs_server/ +!roles/cvmfs_server/** diff --git a/ansible/extras.yml b/ansible/extras.yml index 08892e4ec..e00f4aee0 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -28,6 +28,17 @@ - import_role: name: basic_users +- name: Setup CernVM-FS server + hosts: cvmfs_server + tags: cvmfs_server + become: true + gather_facts: true + tasks: + - name: Install/configure CernVM-FS server + ansible.builtin.include_role: + name: cvmfs_server + tasks_from: main.yml + - name: Setup EESSI hosts: eessi tags: eessi diff --git a/ansible/roles/compute_init/README.md b/ansible/roles/compute_init/README.md index 7a95d2b74..6e20a2746 100644 --- a/ansible/roles/compute_init/README.md +++ b/ansible/roles/compute_init/README.md @@ -73,6 +73,7 @@ it also requires an image build with the role name added to the | filesystems.yml | manila | All functionality | No [5] | | filesystems.yml | lustre | All functionality | Yes | | extras.yml | basic_users | All functionality [6] | No | +| extras.yml | cvmfs_server | Not relevant for compute nodes | n/a | | extras.yml | eessi | All functionality [7] | No | | extras.yml | cuda | None required - use image build | Yes [8] | | extras.yml | vgpu | All functionality | Yes | diff --git a/ansible/roles/cvmfs_server/README.md b/ansible/roles/cvmfs_server/README.md new file mode 100644 index 000000000..b289c849e --- /dev/null +++ b/ansible/roles/cvmfs_server/README.md @@ -0,0 +1,110 @@ +# cvmfs_server + +Install a CernVM-FS Stratum 1 server replicating the EESSI repository. + +By default, the appliance `eessi` role configures EESSI clients to use EESSI's +Stratum 1 servers. If EESSI is in production use, the `squid` role should normally +be configured to provide an http proxy for those clients to reduce the load +on the upstream stratum 1 servers. However both of those approaches assume that +there is outbound http access. If this is not the case, this role can be used +to provide a private, in-cluster server replicating the EESSI repository from +an EESSI synchronisation server. + +This feature is enabled by adding a node to the `cvmfs_server` group. The +defaults provided are sufficent to implement the above configuration. + +This role wraps the [EESSI ansible-cvmfs](https://github.com/EESSI/ansible-cvmfs) +role, which provides additional functionality. The defaults here: +- Use https URLs for both dnf repositories and for the EESSI repository replication. +- Use the `aws-eu-west-s1-sync` EESSI server (which is the only one providing + https replication). +- Do not configure a squid proxy in front of the Stratum 1 server. +- Do not configure a firewall (OpenStack security groups are expected to be + sufficent). +- Do not configure the Geo API service. + +Guidance on configuring a private Stratum 1 server for EESSI is provided [here](https://www.eessi.io/docs/filesystem_layer/stratum1/#requirements-for-a-stratum-1). + +**NB**: The initial replication will take a considerable amount of time. If +this fails due to e.g. a network glitch you can recover it by sshing to the +`cvmfs_server` node and running: + + sudo cvmfs_server snapshot software.eessi.io + +## Requirements + +1. See the [EESSI Stratum 1 requirements](https://www.eessi.io/docs/filesystem_layer/stratum1/#requirements-for-a-stratum-1) + for the server specification. +3. The node used must have outbound connectivity for dnf package installs + and to replicate the EESSI repository. +4. If this role is used to provide EESSI for an [isolated cluster](../../../docs/experimental/isolated-clusters.md) + where cluster users have no outbound internet connectivity, ensure those users + cannot access this node, i.e. it is not in groups `basic_user`, `ldap` or + `freeipa`. +5. The node is automatically added to the `dnf_repos` group to enable yum + repositories so this role can install dependencies. It therefore requires + either configuring Ark credentials or a local Pulp server - see links in + [adding additional packages](../../../docs/operations.md#adding-additional-packages). + Note the former will also require setting `dnf_repos_allow_insecure_creds: true` + to allow Ark credentials to be templated into repofiles - this also requires 3. + to avoid exposing these to cluster users. + +See also the example configuration below. + +## Role variables + +Any variables from the [EESSI ansible-cvmfs role](https://github.com/EESSI/ansible-cvmfs) +may set. Generally only `cvmfs_srv_device` is likely to be required, if CVMFS +data should be be stored on a specific block device (e.g. a mounted volume). + +## Example configuration + +The below OpenTofu configuration creates a new node in the `cvmfs_server` group +with a new 1TB volume attached: + +```terraform +# environments/production/tofu/main.tf: +module "cluster" { + + ... + + additional_nodegroups = { + cvmfs_server = { + nodes = ["eessi"] + flavor = "m2.medium" + extra_volumes = { + srv = { + size = 1000 # GB + } + } + } + } + + ... +} +``` + +Configure the role to use the volume for CVMFS data: + +``` +# environments/site/inventory/group_vars/all/cvmfs_server.yml: +cvmfs_srv_device: /dev/vdb +``` + +**NB:** Hardcoding the device path is only safe if a single volume is attached, +else the ordering of devices is not guaranteed after reboots etc. + +Note Ark credentials or a local Pulp server must also be configured as referenced +above. + +## Client configuration + +Configuration for EESSI clients is provided by the [eessi](../eessi/README.md) +role. To use the Stratum 1 server provided by this role requires overriding +the default configuration (NB: not adding additional configuration) using: + +``` +# environments/site/inventory/group_vars/all/eessi.yml: +cvmfs_config: + CVMFS_SERVER_URL: "http://{{ hostvars[groups['cvmfs_server'] | first].ansible_host }}/cvmfs/@fqrn@" +``` diff --git a/ansible/roles/cvmfs_server/defaults/main.yml b/ansible/roles/cvmfs_server/defaults/main.yml new file mode 100644 index 000000000..18cd5c71c --- /dev/null +++ b/ansible/roles/cvmfs_server/defaults/main.yml @@ -0,0 +1,55 @@ +# NB: Most eessi.cvmfs variables cannot be set here, because they are not +# applied when this role calls it via import_role. +# Instead they are set in environments/common/inventory/group_vars/all/cvmfs_server.yml + +cvmfs_role: stratum1 + +# Vars from eessi.cvmfs:vars/redhat.yml - required because *this* role does +# not run eessi.cvmfs:tasks/main.yml which loads those vars. These therefore +# *can* be set here, as they are not otherwise set at all. + +cvmfs_apache_service_name: httpd +cvmfs_apache_conf_file: /etc/httpd/conf/httpd.conf +cvmfs_dnf_repos: [] # provided by dnf_repos instead + +cvmfs_dnf_repo_key: + content: | + -----BEGIN PGP PUBLIC KEY BLOCK----- + Version: GnuPG v2.0.14 (GNU/Linux) + + mQGiBEuGP6YRBADV89cbF4uoEX89Q8uxOklIDVJhOJAFKZ33LSdzHv3iObnjo5w4 + wbb8FiSir4oWgarAco4u0kR1yKjHJ33oVB2xmPOzW3NWoHI7aPF7tCgo7FY9hNoC + 4NEkNycvbfSoCScsv2yY5qz2q2sY1LWGZGbUXjBvKbmASe9sJFKJV7NsmwCg76W/ + aMazleHyDtooD8tk3ZWvpKcD/Rg51Oad+ZLc7h45wDMHpaDvOBeGoyp+k7JgQd87 + HfXiJtg/Q6zyTwrV3vCQvMpw3GRjRkZBcPgRWb6rUk68dL8fa2cTxhISX5/DIQzc + mmuDa0EgCGGAKUZ4bHqaexFFnp/B+VKBPvJuxLa0cBDd6eewxNwtHJ90EaMeBzGd + 6zU2BADO9YbXiEMqRkfVLnuvD5G31/WJZvffXCxspnSfg923DbILWa4vNW9MLMsK + IVHvyVr0mZF8xdyQNVPUX3/4uahKM4hwuFqdbyjuLGEIF3U73aIJ0+YDep/+I6yU + JGHnxy8Ex+a1XIhJ1hSI7+oalSdt+w/pE3+2MQyUfSDPSXVA3LQ+Q2VyblZNIEFk + bWluaXN0cmF0b3IgKGN2bWFkbWluKSA8Y2VybnZtLmFkbWluaXN0cmF0b3JAY2Vy + bi5jaD6IZAQTEQIAJAIbAwYLCQgHAwIDFQIDAxYCAQIeAQIXgAUCT18LigUJBbn/ + ZAAKCRAjDTidiuRc5/BFAKCb13G8yxG75r3s63mHo5l9PNUKGwCfZpSlZrhBsVZ4 + 2DsKfLG1VQ+X8HW5Ag0ES4Y/qBAIAL3sWKXQKpbIOpwX+mNX2IV2XxNBM3KYjYOE + ii66i9apPo3BA39a9Wm9vh1kYIHTkh9Qqb8w53hc4ANkVT+cYzxXythGBjWoLtwC + zKCPrIb7RQJRc956Ot0q4qmlcUEGi5zefSIoJZR5jyR7rZS+1PNJYI05xY2+Eah1 + u9UxrlzBH5DCsvUqTNK12WrPIibmLo8u+yIDJjwgh9O5YITC+et/g47NLfZdiAGP + LEjvJFRi7Ju+8ywO32dSVBPJQDktr5BC950DKZHA9n+sJ63iF3lP/aCTECpxxUqX + VVqioobwg5ytl60hw9I9sfwBP6z9PR90RcyT1l4giiBz9LV+KpcAAwUIAKeAxArG + aJxzWziKs7D8TTuE50Nw+S3RGhVzwSKy7183Z11iOEMqbm2/zwp65wFkntCKmLKD + nGsTgFNpstIyFwJmj34Axp7N3KGqXnTI+SIQd6VmzQ1phxfCOw8IGueOR6YI7S1G + YWt7DoseZKz4EWdvXCOkQAhbxq/HT2c3ihxsuxrErxz7QtNaYOFXiuLj3mYH9XaM + eEe8Pkl+yyRTvyUNlMIu/i79qf+QUlsi10nCUm88cSXQiKWOJ4GiUoT+jD7pN4oh + dALRVl0tl/EyPTw+asG3lQhPZ+solvJXp+i7KF7nwnyXDB63WNH15S1pQLMnqCuG + CFyegf6jnOJU0AqITwQYEQIADwIbDAUCT18MOQUJBboAEQAKCRAjDTidiuRc53P2 + AJ9e1y70yIKwx6YmpDnwqWSE07Q6lACdEnem0DbLg9t+gkX/98driCP9Ifg= + =S7Dt + -----END PGP PUBLIC KEY BLOCK----- + dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CernVM + +cvmfs_packages: + stratum1-disk: + - httpd + - "{{ 'mod_wsgi' if ansible_distribution_major_version is version('8', '<') else 'python3-mod_wsgi' }}" + - "{{ 'squid' if cvmfs_stratum1_squid else omit }}" + - cvmfs-server + - cvmfs-config-default diff --git a/ansible/roles/cvmfs_server/tasks/main.yml b/ansible/roles/cvmfs_server/tasks/main.yml new file mode 100644 index 000000000..15fe90a37 --- /dev/null +++ b/ansible/roles/cvmfs_server/tasks/main.yml @@ -0,0 +1,3 @@ +- ansible.builtin.import_role: + name: eessi.cvmfs + tasks_from: "{{ cvmfs_role }}.yml" diff --git a/ansible/roles/dnf_repos/defaults/main.yml b/ansible/roles/dnf_repos/defaults/main.yml index 9302eff84..8977c15c8 100644 --- a/ansible/roles/dnf_repos/defaults/main.yml +++ b/ansible/roles/dnf_repos/defaults/main.yml @@ -19,6 +19,7 @@ dnf_repos_filenames: dnf_repos_version_filenames: "{{ dnf_repos_filenames[ansible_distribution_major_version] }}" # epel installed separately +# NB: 'name' cannot have spaces dnf_repos_default_repolist: - file: "{{ dnf_repos_version_filenames.baseos }}" name: baseos @@ -38,6 +39,12 @@ dnf_repos_default_repolist: - file: "{{ dnf_repos_version_filenames.grafana }}" name: grafana base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.grafana[ansible_distribution_major_version] | appliances_repo_to_subpath }}" +- file: cernvm + name: cernvmfs_pkgs + base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.cernvmfs_pkgs[ansible_distribution_major_version] | appliances_repo_to_subpath }}" +- file: cernvm + name: cernvmfs_cfg + base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.cernvmfs_cfg[ansible_distribution_major_version] | appliances_repo_to_subpath }}" dnf_repos_openhpc_repolist: - name: OpenHPC diff --git a/ansible/roles/eessi/tasks/configure.yml b/ansible/roles/eessi/tasks/configure.yml index b3083761c..c496e997b 100644 --- a/ansible/roles/eessi/tasks/configure.yml +++ b/ansible/roles/eessi/tasks/configure.yml @@ -8,9 +8,16 @@ value: "{{ item.value }}" no_extra_spaces: true loop: "{{ cvmfs_config | dict2items }}" + register: _cvmfs_config_ini # NOTE: Not clear how to make this idempotent -- name: Ensure CVMFS config is setup +- name: Ensure CVMFS is setup command: cmd: "cvmfs_config setup" + when: _cvmfs_config_ini.changed | default(false) + +- name: Reload CVMFS config + command: + cmd: "cvmfs_config reload" + when: _cvmfs_config_ini.changed | default(false) diff --git a/ansible/roles/pulp_site/defaults/main.yml b/ansible/roles/pulp_site/defaults/main.yml index d30d1bdff..8365deb8a 100644 --- a/ansible/roles/pulp_site/defaults/main.yml +++ b/ansible/roles/pulp_site/defaults/main.yml @@ -28,8 +28,13 @@ pulp_site_rpm_info: subpath: "{{ appliances_pulp_repos.openhpc_updates[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}" - name: "ceph-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.ceph[pulp_site_target_distribution_version_major].timestamp }}" subpath: "{{ appliances_pulp_repos.ceph[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}" -- name: "grafana-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.grafana.timestamp[pulp_site_target_distribution_version_major].timestamp }} +- name: "grafana-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.grafana[pulp_site_target_distribution_version_major].timestamp }}" subpath: "{{ appliances_pulp_repos.grafana[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}" +- name: "cernvmfs_pkgs-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.cernvmfs_pkgs.timestamp[pulp_site_target_distribution_version_major].timestamp }} + subpath: "{{ appliances_pulp_repos.cernvmfs_pkgs[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}" +- name: "cernvmfs_cfg-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.cernvmfs_pkgs.timestamp[pulp_site_target_distribution_version_major].timestamp }} + subpath: "{{ appliances_pulp_repos.cernvmfs_cfg[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}" + pulp_site_rpm_repo_defaults: remote_username: "{{ pulp_site_upstream_username }}" diff --git a/docs/experimental/isolated-clusters.md b/docs/experimental/isolated-clusters.md index a570465ea..b7a1a73e7 100644 --- a/docs/experimental/isolated-clusters.md +++ b/docs/experimental/isolated-clusters.md @@ -13,10 +13,13 @@ The full list of features and whether they are functional on such an "isolated" network is shown in the table below. Note that: - Using [EESSI](https://www.eessi.io/docs/) necessarily requires outbound - network access for the CernVM File System. However this can be provided - via an authenticated proxy. While the proxy configuration on the cluster node - is readable by all users, this proxy could be limited via acls to only provide - access to EESSI's CVMFS Stratum 1 servers. + network access for the CernVM File System. If security groups are not + sufficent to restrict this: + a. If outbound http is available, an authenticated proxy could be used, + limited via acls to only provide access to EESSI's CVMFS Stratum 1 servers, + The proxy configuration should be via the `eessi` role variables. + b. If only outbound https is available, the [cvmfs_server](../../ansible/roles/cvmfs_server/README.md) + role can be used to provide a Stratum 1 server on the cluster network. ## Support by feature for isolated networks diff --git a/environments/common/inventory/group_vars/all/cvmfs_server.yml b/environments/common/inventory/group_vars/all/cvmfs_server.yml new file mode 100644 index 000000000..977fa8f30 --- /dev/null +++ b/environments/common/inventory/group_vars/all/cvmfs_server.yml @@ -0,0 +1,28 @@ +# See ansible/roles/cvmfs_server/README.md + +# cvmfs_srv_device: # block device to use for CVMFS data. /srv/cvmfs is used if not set. + +cvmfs_keys: + # from /cvmfs/cvmfs-config.cern.ch/etc/cvmfs/keys/eessi.io/eessi.io.pub on client + - path: /etc/cvmfs/keys/eessi.io/eessi.io.pub + key: | + -----BEGIN PUBLIC KEY----- + MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyau1UFUcoiqpE5U9StON + W0Trc3PM02AA5kYgknrqZJdusj5PcNg7rhOnrd+SX8BIiVtVMr6mqORXsJ2FNydJ + lfm512GcmurM154m6Hz4I+UttcpVLe1CvlyxjjCrN0siSiPnLDONy3Ih1PWyRlA/ + sobSaIhx3Vyn1IzC0nbjstKObAYJs2kjokS+cipX2oIXJSoxkhvXcxmiMCi2GQIs + KTQDT+bATs9eK+ZqY4n5x8VX87AlMBQuSCP5mA8VQPx4aCf8AjHWAn3qmPrhk3od + Z/Cl9vrYzoWJT1HOyumv7aEv7UlY81++3w/AwYKXLJI6ioop2AotjrUm3w2U3+k7 + WQIDAQAB + -----END PUBLIC KEY----- + +cvmfs_stratum1_http_ports: + - 80 + - 8000 + +cvmfs_repositories: + - stratum0: aws-eu-west-s1-sync.eessi.science + stratum0_url_scheme: https + repository: software.eessi.io + key_dir: /etc/cvmfs/keys/eessi.io + owner: root diff --git a/environments/common/inventory/group_vars/all/timestamps.yml b/environments/common/inventory/group_vars/all/timestamps.yml index 455c26005..4dbe4f9c6 100644 --- a/environments/common/inventory/group_vars/all/timestamps.yml +++ b/environments/common/inventory/group_vars/all/timestamps.yml @@ -1,3 +1,20 @@ +# In the below: +# - Top-level key (e.g. appstream) is arbitrary +# - 2nd level key (e.g. '8.10') is `distribution_version` or `distribution_major_version` +# as required from ansible_facts +# - path is the base_path parameter from stackhpc-release-train/ansible/inventory/group_vars/all/package-repos +# WITHOUT the trailing slash +# - timestamp is the the Ark timestamp to use + +# See also: +# - ansible/roles/dnf_repos/defaults/main.yml +# - ansible/roles/pulp_site/defaults/main.yml + +# Note that with Ark creds in the active environment all timestamps can be +# updated to the latest available using +# ansible-playbook ansible/ci/update_timestamps.yml +# but it doesn't check they are functional! + appliances_pulp_repos: appstream: '8.10': @@ -86,3 +103,17 @@ appliances_pulp_repos: '9': path: OpenHPC/3/updates/EL_9 timestamp: 20250510T003301 + cernvmfs_pkgs: + '8': + path: cvmfs/EL/8/x86_64 + timestamp: 20250806T121654 + '9': + path: cvmfs/EL/9/x86_64 + timestamp: 20250806T121654 + cernvmfs_cfg: + '8': + path: cvmfs-config/EL/8/x86_64 + timestamp: 20250805T130249 + '9': + path: cvmfs-config/EL/9/x86_64 + timestamp: 20250805T130249 diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 0f9e204d5..8f0745bc1 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -17,9 +17,14 @@ compute # Login group to use for running mpi-based testing. login +[additional] +# Additional nodes to include in "cluster" group +# Automatically populated from OpenTofu variable additional_nodegroups + [cluster:children] # All nodes in the appliance - add e.g. service nodes not running Slurm here. openhpc +additional [builder] # Do not add hosts here manually - used as part of Packer image build pipeline. See packer/README.md. @@ -182,6 +187,7 @@ k3s_agent # Warning: when using Ark directly rather than a local Pulp server, adding hosts other than `builder` will leak Ark creds to users builder extra_packages +cvmfs_server [pulp] # Add builder to this group to enable automatically syncing of pulp during image build @@ -197,3 +203,7 @@ extra_packages [nhc] # Hosts to configure for node health checks - either entire 'compute' group or empty + +[cvmfs_server] +# Hosts to configure as a CernVM-FS Stratum 1 replica server for EESSI. +# See ansible/roles/cvmfs_server/README.md diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 0f4253758..859f5e762 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -146,3 +146,7 @@ compute # Note that this feature currently assumes all compute nodes are VMs, enabling # when the cluster contains baremetal compute nodes may lead to unexpected scheduling behaviour compute + +[cvmfs_server] +# Hosts to configure as a CernVM-FS Stratum 1 replica server for EESSI. +# See ansible/roles/cvmfs_server/README.md diff --git a/requirements.yml b/requirements.yml index 8850c1615..201c148af 100644 --- a/requirements.yml +++ b/requirements.yml @@ -25,6 +25,9 @@ roles: version: v25.3.1 - src: mrlesmithjr.chrony version: v0.1.4 + - src: https://github.com/stackhpc/ansible-cvmfs.git + name: eessi.cvmfs + version: 2025.08.1 collections: - name: containers.podman