From a3b35cdbbfe138c3070eb2c5573f21b199891b6a Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Sun, 3 Aug 2025 09:06:39 -0500 Subject: [PATCH 1/2] Update xcache list internally --- helm/servicex/templates/app/configmap.yaml | 5 +++ helm/servicex/values.yaml | 6 ++++ .../servicex_app/transformer_manager.py | 31 ++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/helm/servicex/templates/app/configmap.yaml b/helm/servicex/templates/app/configmap.yaml index e543e260..b328e69b 100644 --- a/helm/servicex/templates/app/configmap.yaml +++ b/helm/servicex/templates/app/configmap.yaml @@ -94,6 +94,11 @@ data: {{- else }} TRANSFORMER_CACHE_PREFIX = "" {{- end }} + {{- if .Values.transformer.cacheVPSSite }} + TRANSFORMER_CACHE_VPS_SITE = "{{ .Values.transformer.cacheVPSSite }}" + TRANSFORMER_CACHE_VPS_INTERVAL = {{ .Values.transformer.cacheVPSCheckInterval }} + TRANSFORMER_CACHE_VPS_LIVENESS_URL = "{{ .Values.transformer.cacheVPSLivenessURL }}" + {{- end }} TRANSFORMER_AUTOSCALE_ENABLED = {{- ternary "True" "False" .Values.transformer.autoscaler.enabled }} TRANSFORMER_CPU_LIMIT = {{ .Values.transformer.cpuLimit }} TRANSFORMER_MEMORY_LIMIT = "{{ .Values.transformer.memoryLimit }}" diff --git a/helm/servicex/values.yaml b/helm/servicex/values.yaml index 85ad51be..95ca5f58 100644 --- a/helm/servicex/values.yaml +++ b/helm/servicex/values.yaml @@ -201,6 +201,12 @@ transformer: # comma-separated list of values. # Do not put root:// in these values. cachePrefix: null + # Alternatively, provide the virtual placement service site name to look up + # xCache servers from + cacheVPSSite: null + cacheVPSCheckInterval: 1800 + cacheVPSLivenessURL: https://vps.cern.ch/liveness + autoscaler: cpuScaleThreshold: 30 diff --git a/servicex_app/servicex_app/transformer_manager.py b/servicex_app/servicex_app/transformer_manager.py index fd419a41..76d98946 100644 --- a/servicex_app/servicex_app/transformer_manager.py +++ b/servicex_app/servicex_app/transformer_manager.py @@ -220,7 +220,11 @@ def create_job_object( env = env + [env_var_instance_name] # provide each pod with an environment var holding cache prefix path - if "TRANSFORMER_CACHE_PREFIX" in current_app.config: + if ( + "TRANSFORMER_CACHE_PREFIX" in current_app.config + or "TRANSFORMER_CACHE_VPS_SITE" in current_app.config + ): + TransformerManager.validate_caches() env += [ client.V1EnvVar( "CACHE_PREFIX", value=current_app.config["TRANSFORMER_CACHE_PREFIX"] @@ -410,6 +414,31 @@ def persistent_volume_claim_exists(self, claim_name, namespace): return False return False + @staticmethod + def validate_caches(): + import time + import urllib3 + + if not (thissite := current_app.config.get("TRANSFORMER_CACHE_VPS_SITE", None)): + return + + lastchecktime = current_app.config.get("TRANSFORMER_CACHE_VPS_LASTCHECK", 0) + ttl = current_app.config.get("TRANSFORMER_CACHE_VPS_INTERVAL", 30 * 60) + if (now := time.time()) - lastchecktime > ttl: + current_app.config["TRANSFORMER_CACHE_VPS_LASTCHECK"] = now + vps_server = current_app.config["TRANSFORMER_CACHE_VPS_LIVENESS_URL"] + try: + sitedata = urllib3.request("GET", vps_server).json() + if thissite not in sitedata: + current_app.logger.error(f"{thissite} is not in VPS liveness data") + servers = sorted( + [_["address"] for _ in sitedata[thissite].values() if _["live"]] + ) + current_app.logger.info(f"Live xCache servers are {servers}") + current_app.config["TRANSFORMER_CACHE_PREFIX"] = ",".join(servers) + except Exception as e: + current_app.logger.error(f"Exception retrieving site data: {e}") + @staticmethod def create_hpa_object(request_id, max_workers): target = client.V1CrossVersionObjectReference( From 94e12547fe68689ccdce7065f965b9addf6c1ff0 Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Sun, 3 Aug 2025 10:22:29 -0500 Subject: [PATCH 2/2] Add documentation on new options --- docs/deployment/reference.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/deployment/reference.md b/docs/deployment/reference.md index 9fa89a44..97518b9c 100644 --- a/docs/deployment/reference.md +++ b/docs/deployment/reference.md @@ -93,7 +93,10 @@ parameters for the [rabbitMQ](https://github.com/bitnami/charts/tree/master/bitn | `minio.auth.rootPassword` | Password key to log into minio | leftfoot1 | | `minio.apiIngress.enabled` | Should minio chart deploy an ingress to the service? | false | | `minio.apiIngress.hostname` | Hostname associate with ingress controller | nil | -| `transformer.cachePrefix` | Prefix string to stick in front of file paths. Useful for XCache | | +| `transformer.cachePrefix` | Prefix string to stick in front of file paths. Useful for XCache | nil | +| `transformer.cacheVPSSite` | Specify a Virtual Placement Service site whose XCaches we should use. Will update automatically if list changes | nil | +| `transformer.cacheVPSCheckInterval` | How frequency should the Virtual Placement Service be consulted for the list of XCaches (in seconds) | 1800 | +| `transformer.cacheVPSLivenessURL` | URL from which Virtual Placement Service site info can be obtained | https://vps.cern.ch/liveness | | `transformer.autoscaler.enabled` | Enable/disable horizontal pod autoscaler for transformers | True | | `transformer.autoscaler.cpuScaleThreshold` | CPU percentage threshold for pod scaling | 30 | | `transformer.autoscaler.minReplicas` | Minimum number of transformer pods per request | 1 |