From 8db475e6ef5047994ad9805d919b34579930ee63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Wed, 27 Aug 2025 11:32:20 +0200 Subject: [PATCH 01/14] Extend ScyllaDBMonitoring with externally-managed Prometheus support --- pkg/api/scylla/v1alpha1/types_monitoring.go | 133 ++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/pkg/api/scylla/v1alpha1/types_monitoring.go b/pkg/api/scylla/v1alpha1/types_monitoring.go index 51521656a44..5ab2e55a8be 100644 --- a/pkg/api/scylla/v1alpha1/types_monitoring.go +++ b/pkg/api/scylla/v1alpha1/types_monitoring.go @@ -86,8 +86,30 @@ type Storage struct { VolumeClaimTemplate corev1.PersistentVolumeClaimTemplate `json:"volumeClaimTemplate,omitempty"` } +// PrometheusMode describes the mode of the Prometheus instance. +// +kubebuilder:validation:Enum="Managed";"External" +type PrometheusMode string + +const ( + // PrometheusModeManaged defines a mode where a `Prometheus` object is created as a child of a `ScyllaDBMonitoring` + // object. `ServiceMonitor` and `PrometheusRule` resources are also created to configure scraping and alerting. + // This mode requires a Prometheus Operator to be installed in the cluster. + PrometheusModeManaged PrometheusMode = "Managed" + + // PrometheusModeExternal defines a mode where no `Prometheus` child object is created, but `ServiceMonitor` and + // `PrometheusRule` objects are still created to configure scraping and alerting. + // This mode requires a Prometheus Operator to be installed in the cluster, along with a `Prometheus` instance + // configured to reconcile `ServiceMonitor` and `PrometheusRule` resources. + PrometheusModeExternal PrometheusMode = "External" +) + // PrometheusSpec holds the spec prometheus options. type PrometheusSpec struct { + // mode defines the mode of the Prometheus instance. + // +kubebuilder:default:="Managed" + // +optional + Mode PrometheusMode `json:"mode,omitempty"` + // placement describes restrictions for the nodes Prometheus is scheduled on. // +optional Placement *PlacementSpec `json:"placement,omitempty"` @@ -133,6 +155,117 @@ type GrafanaSpec struct { // authentication hold the authentication options for accessing Grafana. // +optional Authentication GrafanaAuthentication `json:"authentication,omitempty"` + + // datasources is a list of Grafana datasources to configure. + // It's expected to be set when using Prometheus component in `External` mode. + // At most one datasource is allowed for now (only Prometheus is supported). + // +kubebuilder:validation:MaxItems=1 + // +optional + Datasources []GrafanaDatasourceSpec `json:"datasources,omitempty"` +} + +// GrafanaDatasourceType defines the type of Grafana datasource. +// +kubebuilder:validation:Enum="Prometheus" +type GrafanaDatasourceType string + +const ( + // GrafanaDatasourceTypePrometheus is the Prometheus datasource type. + GrafanaDatasourceTypePrometheus GrafanaDatasourceType = "Prometheus" +) + +type GrafanaDatasourceSpec struct { + // name is the name of the datasource as it will appear in Grafana. + // Only "prometheus" is supported as that's the datasource name expected by the ScyllaDB monitoring stack dashboards. + // +kubebuilder:validation:Enum="prometheus" + // +kubebuilder:default:="prometheus" + Name string `json:"name,omitempty"` + + // type is the type of the datasource. Only "prometheus" is supported. + // +kubebuilder:validation:Enum="Prometheus" + // +kubebuilder:default:="Prometheus" + // +optional + Type GrafanaDatasourceType `json:"type,omitempty"` + + // url is the URL of the datasource. + // +kubebuilder:validation:MinLength=1 + URL string `json:"url"` + + // prometheusOptions defines Prometheus-specific options. + // +optional + PrometheusOptions *GrafanaPrometheusDatasourceOptions `json:"prometheusOptions,omitempty"` +} + +type GrafanaPrometheusDatasourceOptions struct { + // tls holds TLS configuration for connecting to Prometheus over HTTPS. + // +optional + TLS *GrafanaDatasourceTLSSpec `json:"tls,omitempty"` + + // auth holds authentication options for connecting to Prometheus. + // +optional + Auth *GrafanaPrometheusDatasourceAuthSpec `json:"auth,omitempty"` +} + +// GrafanaPrometheusDatasourceAuthType defines the type of authentication to use when connecting to Prometheus. +type GrafanaPrometheusDatasourceAuthType string + +const ( + // GrafanaPrometheusDatasourceAuthTypeNoAuthentication means no authentication. + GrafanaPrometheusDatasourceAuthTypeNoAuthentication GrafanaPrometheusDatasourceAuthType = "NoAuthentication" + + // GrafanaPrometheusDatasourceAuthTypeBearerToken means Bearer token authentication. + GrafanaPrometheusDatasourceAuthTypeBearerToken GrafanaPrometheusDatasourceAuthType = "BearerToken" +) + +type GrafanaPrometheusDatasourceAuthSpec struct { + // type is the type of authentication to use. + // +kubebuilder:default:="NoAuthentication" + // +optional + Type GrafanaPrometheusDatasourceAuthType `json:"type,omitempty"` + + // bearerToken holds options for Bearer token authentication. + // +optional + BearerTokenOptions *GrafanaPrometheusDatasourceBearerTokenAuthOptions `json:"bearerTokenOptions,omitempty"` +} + +type GrafanaPrometheusDatasourceBearerTokenAuthOptions struct { + // secretRef is a reference to a key in a Secret holding a Bearer token to use to authenticate with Prometheus. + // +optional + SecretRef *LocalObjectKeySelector `json:"secretRef,omitempty"` +} + +type GrafanaDatasourceTLSSpec struct { + // caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. + // When not specified, system CAs are used. + // +optional + CACertConfigMapRef *LocalObjectKeySelector `json:"caCertConfigMapRef,omitempty"` + + // insecureSkipVerify controls whether to skip server certificate verification. + // +kubebuilder:default:=false + // +optional + InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty"` + + // clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. + // It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + // +optional + ClientTLSKeyPairSecretRef *LocalObjectReference `json:"clientTLSKeyPairSecretRef,omitempty"` +} + +// LocalObjectKeySelector selects a key of a ConfigMap or Secret in the same namespace. +type LocalObjectKeySelector struct { + // name of the selected object. + // +kubebuilder:validation:MinLength=1 + Name string `json:"name"` + + // key within the selected object. + // +kubebuilder:validation:MinLength=1 + Key string `json:"key"` +} + +// LocalObjectReference contains a reference to an object in the same namespace. +// It can be used to reference a Secret, ConfigMap, or any other namespaced resource. +type LocalObjectReference struct { + // Name of the referent. + Name string `json:"name"` } // Components holds the options to configure individual applications. From bdc3ef293410161b6476ebe914602fe757bdd118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Wed, 27 Aug 2025 11:34:23 +0200 Subject: [PATCH 02/14] Handle externally-managed Prometheus mode --- .../scylladbmonitoring/sync_prometheus.go | 181 ++++++++++++------ 1 file changed, 119 insertions(+), 62 deletions(-) diff --git a/pkg/controller/scylladbmonitoring/sync_prometheus.go b/pkg/controller/scylladbmonitoring/sync_prometheus.go index b0661c87826..a7b9efed8fa 100644 --- a/pkg/controller/scylladbmonitoring/sync_prometheus.go +++ b/pkg/controller/scylladbmonitoring/sync_prometheus.go @@ -294,38 +294,9 @@ func (smc *Controller) syncPrometheus( } // Render manifests. - var renderErrors []error - - requiredPrometheusSA, _, err := makePrometheusSA(sm) - renderErrors = append(renderErrors, err) - - requiredPrometheusRoleBinding, _, err := makePrometheusRoleBinding(sm) - renderErrors = append(renderErrors, err) - - requiredPrometheusService, _, err := makePrometheusService(sm) - renderErrors = append(renderErrors, err) - - requiredIngress, _, err := makePrometheusIngress(sm) - renderErrors = append(renderErrors, err) - - requiredPrometheus, _, err := makePrometheus(sm, soc) - renderErrors = append(renderErrors, err) - - requiredLatencyPrometheusRule, _, err := makeLatencyPrometheusRule(sm) - renderErrors = append(renderErrors, err) - - requiredAlertsPrometheusRule, _, err := makeAlertsPrometheusRule(sm) - renderErrors = append(renderErrors, err) - - requiredTablePrometheusRule, _, err := makeTablePrometheusRule(sm) - renderErrors = append(renderErrors, err) - - requiredScyllaDBServiceMonitor, _, err := makeScyllaDBServiceMonitor(sm) - renderErrors = append(renderErrors, err) - - renderError := apimachineryutilerrors.NewAggregate(renderErrors) - if renderError != nil { - return progressingConditions, renderError + requiredResources, err := makeRequiredPrometheusResources(sm, soc) + if err != nil { + return progressingConditions, err } // Prune objects. @@ -333,7 +304,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredPrometheusSA), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.ServiceAccount)), serviceAccounts, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.CoreV1().ServiceAccounts(sm.Namespace).Delete, @@ -344,7 +315,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredPrometheusService), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.Service)), services, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.CoreV1().Services(sm.Namespace).Delete, @@ -355,7 +326,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredPrometheusRoleBinding), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.RoleBinding)), roleBindings, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.RbacV1().RoleBindings(sm.Namespace).Delete, @@ -366,7 +337,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredPrometheus), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.Prometheus)), prometheuses, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.monitoringClient.Prometheuses(sm.Namespace).Delete, @@ -377,7 +348,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.FilterOutNil(oslices.ToSlice(requiredIngress)), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.Ingress)), ingresses, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.NetworkingV1().Ingresses(sm.Namespace).Delete, @@ -388,7 +359,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredLatencyPrometheusRule, requiredAlertsPrometheusRule, requiredTablePrometheusRule), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.AlertsPrometheusRule, requiredResources.LatencyPrometheusRule, requiredResources.TablePrometheusRule)), prometheusRules, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Delete, @@ -399,7 +370,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - oslices.ToSlice(requiredScyllaDBServiceMonitor), + oslices.FilterOutNil(oslices.ToSlice(requiredResources.ScyllaDBServiceMonitor)), serviceMonitors, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.monitoringClient.ServiceMonitors(sm.Namespace).Delete, @@ -410,7 +381,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - certChainConfigs.GetMetaSecrets(), + oslices.FilterOutNil(certChainConfigs.GetMetaSecrets()), secrets, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.CoreV1().Secrets(sm.Namespace).Delete, @@ -421,7 +392,7 @@ func (smc *Controller) syncPrometheus( err = controllerhelpers.Prune( ctx, - certChainConfigs.GetMetaConfigMaps(), + oslices.FilterOutNil(certChainConfigs.GetMetaConfigMaps()), configMaps, &controllerhelpers.PruneControlFuncs{ DeleteFunc: smc.kubeClient.CoreV1().ConfigMaps(sm.Namespace).Delete, @@ -437,25 +408,31 @@ func (smc *Controller) syncPrometheus( // Apply required objects. var applyErrors []error - applyConfigurations := []resourceapply.ApplyConfigUntyped{ - resourceapply.ApplyConfig[*corev1.ServiceAccount]{ - Required: requiredPrometheusSA, + var applyConfigurations []resourceapply.ApplyConfigUntyped + if requiredResources.ServiceAccount != nil { + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*corev1.ServiceAccount]{ + Required: requiredResources.ServiceAccount, Control: resourceapply.ApplyControlFuncs[*corev1.ServiceAccount]{ GetCachedFunc: smc.serviceAccountLister.ServiceAccounts(sm.Namespace).Get, CreateFunc: smc.kubeClient.CoreV1().ServiceAccounts(sm.Namespace).Create, UpdateFunc: smc.kubeClient.CoreV1().ServiceAccounts(sm.Namespace).Update, DeleteFunc: smc.kubeClient.CoreV1().ServiceAccounts(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*corev1.Service]{ - Required: requiredPrometheusService, + }.ToUntyped()) + } + if requiredResources.Service != nil { + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*corev1.Service]{ + Required: requiredResources.Service, Control: resourceapply.ApplyControlFuncs[*corev1.Service]{ GetCachedFunc: smc.serviceLister.Services(sm.Namespace).Get, CreateFunc: smc.kubeClient.CoreV1().Services(sm.Namespace).Create, UpdateFunc: smc.kubeClient.CoreV1().Services(sm.Namespace).Update, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*rbacv1.RoleBinding]{ + }.ToUntyped()) + } + if requiredResources.RoleBinding != nil { + requiredPrometheusRoleBinding := requiredResources.RoleBinding + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*rbacv1.RoleBinding]{ Required: requiredPrometheusRoleBinding, Control: resourceapply.ApplyControlFuncs[*rbacv1.RoleBinding]{ GetCachedFunc: smc.roleBindingLister.RoleBindings(sm.Namespace).Get, @@ -463,8 +440,11 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.kubeClient.RbacV1().RoleBindings(sm.Namespace).Update, DeleteFunc: smc.kubeClient.RbacV1().RoleBindings(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*monitoringv1.Prometheus]{ + }.ToUntyped()) + } + if requiredResources.Prometheus != nil { + requiredPrometheus := requiredResources.Prometheus + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*monitoringv1.Prometheus]{ Required: requiredPrometheus, Control: resourceapply.ApplyControlFuncs[*monitoringv1.Prometheus]{ GetCachedFunc: smc.prometheusLister.Prometheuses(sm.Namespace).Get, @@ -472,8 +452,11 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.monitoringClient.Prometheuses(sm.Namespace).Update, DeleteFunc: smc.monitoringClient.Prometheuses(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*monitoringv1.ServiceMonitor]{ + }.ToUntyped()) + } + if requiredResources.ScyllaDBServiceMonitor != nil { + requiredScyllaDBServiceMonitor := requiredResources.ScyllaDBServiceMonitor + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*monitoringv1.ServiceMonitor]{ Required: requiredScyllaDBServiceMonitor, Control: resourceapply.ApplyControlFuncs[*monitoringv1.ServiceMonitor]{ GetCachedFunc: smc.serviceMonitorLister.ServiceMonitors(sm.Namespace).Get, @@ -481,8 +464,11 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.monitoringClient.ServiceMonitors(sm.Namespace).Update, DeleteFunc: smc.monitoringClient.ServiceMonitors(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ + }.ToUntyped()) + } + if requiredResources.LatencyPrometheusRule != nil { + requiredLatencyPrometheusRule := requiredResources.LatencyPrometheusRule + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ Required: requiredLatencyPrometheusRule, Control: resourceapply.ApplyControlFuncs[*monitoringv1.PrometheusRule]{ GetCachedFunc: smc.prometheusRuleLister.PrometheusRules(sm.Namespace).Get, @@ -490,8 +476,11 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Update, DeleteFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ + }.ToUntyped()) + } + if requiredResources.AlertsPrometheusRule != nil { + requiredAlertsPrometheusRule := requiredResources.AlertsPrometheusRule + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ Required: requiredAlertsPrometheusRule, Control: resourceapply.ApplyControlFuncs[*monitoringv1.PrometheusRule]{ GetCachedFunc: smc.prometheusRuleLister.PrometheusRules(sm.Namespace).Get, @@ -499,8 +488,11 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Update, DeleteFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Delete, }, - }.ToUntyped(), - resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ + }.ToUntyped()) + } + if requiredResources.TablePrometheusRule != nil { + requiredTablePrometheusRule := requiredResources.TablePrometheusRule + applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*monitoringv1.PrometheusRule]{ Required: requiredTablePrometheusRule, Control: resourceapply.ApplyControlFuncs[*monitoringv1.PrometheusRule]{ GetCachedFunc: smc.prometheusRuleLister.PrometheusRules(sm.Namespace).Get, @@ -508,10 +500,10 @@ func (smc *Controller) syncPrometheus( UpdateFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Update, DeleteFunc: smc.monitoringClient.PrometheusRules(sm.Namespace).Delete, }, - }.ToUntyped(), + }.ToUntyped()) } - - if requiredIngress != nil { + if requiredResources.Ingress != nil { + requiredIngress := requiredResources.Ingress applyConfigurations = append(applyConfigurations, resourceapply.ApplyConfig[*networkingv1.Ingress]{ Required: requiredIngress, Control: resourceapply.ApplyControlFuncs[*networkingv1.Ingress]{ @@ -584,3 +576,68 @@ func (smc *Controller) syncPrometheus( return progressingConditions, nil } + +// requiredPrometheusResources holds all resources required for Prometheus deployment. +// Some of them may be nil, depending on the Prometheus mode. +type requiredPrometheusResources struct { + ServiceAccount *corev1.ServiceAccount + RoleBinding *rbacv1.RoleBinding + Service *corev1.Service + Ingress *networkingv1.Ingress + Prometheus *monitoringv1.Prometheus + LatencyPrometheusRule *monitoringv1.PrometheusRule + AlertsPrometheusRule *monitoringv1.PrometheusRule + TablePrometheusRule *monitoringv1.PrometheusRule + ScyllaDBServiceMonitor *monitoringv1.ServiceMonitor +} + +func makeRequiredPrometheusResources(sm *scyllav1alpha1.ScyllaDBMonitoring, soc *scyllav1alpha1.ScyllaOperatorConfig) (requiredPrometheusResources, error) { + var renderErrors []error + var resources requiredPrometheusResources + + var err error + switch prometheusMode(sm) { + case scyllav1alpha1.PrometheusModeManaged: + resources.ServiceAccount, _, err = makePrometheusSA(sm) + renderErrors = append(renderErrors, err) + + resources.RoleBinding, _, err = makePrometheusRoleBinding(sm) + renderErrors = append(renderErrors, err) + + resources.Service, _, err = makePrometheusService(sm) + renderErrors = append(renderErrors, err) + + resources.Ingress, _, err = makePrometheusIngress(sm) + renderErrors = append(renderErrors, err) + + resources.Prometheus, _, err = makePrometheus(sm, soc) + renderErrors = append(renderErrors, err) + case scyllav1alpha1.PrometheusModeExternal: + // No resources required. + default: + return requiredPrometheusResources{}, fmt.Errorf("unknown Prometheus mode %q", prometheusMode(sm)) + } + + resources.LatencyPrometheusRule, _, err = makeLatencyPrometheusRule(sm) + renderErrors = append(renderErrors, err) + + resources.AlertsPrometheusRule, _, err = makeAlertsPrometheusRule(sm) + renderErrors = append(renderErrors, err) + + resources.TablePrometheusRule, _, err = makeTablePrometheusRule(sm) + renderErrors = append(renderErrors, err) + + resources.ScyllaDBServiceMonitor, _, err = makeScyllaDBServiceMonitor(sm) + renderErrors = append(renderErrors, err) + + return resources, apimachineryutilerrors.NewAggregate(renderErrors) +} + +func prometheusMode(sm *scyllav1alpha1.ScyllaDBMonitoring) scyllav1alpha1.PrometheusMode { + if sm.Spec.Components.Prometheus != nil { + return sm.Spec.Components.Prometheus.Mode + } + + // By default, Prometheus is managed by the ScyllaDB Operator. + return scyllav1alpha1.PrometheusModeManaged +} From 57291f966a624439f7560a928e26040f9766c032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Tue, 2 Sep 2025 09:14:06 +0200 Subject: [PATCH 03/14] Handle custom Grafana Prometheus datasource --- .../grafana/v1alpha1/deployment.yaml | 21 +- .../grafana/v1alpha1/provisioning.cm.yaml | 26 +- pkg/controller/scylladbmonitoring/resource.go | 121 ++++ .../scylladbmonitoring/sync_grafana.go | 21 +- .../scylladbmonitoring/sync_grafana_test.go | 598 +++++++++++++++++- .../scylladbmonitoring/sync_prometheus.go | 16 +- pkg/naming/names.go | 8 + .../scylladbmonitoring/scylladbmonitoring.go | 9 +- 8 files changed, 803 insertions(+), 17 deletions(-) create mode 100644 pkg/controller/scylladbmonitoring/resource.go diff --git a/assets/monitoring/grafana/v1alpha1/deployment.yaml b/assets/monitoring/grafana/v1alpha1/deployment.yaml index e9ece01c7cc..af7507069a8 100644 --- a/assets/monitoring/grafana/v1alpha1/deployment.yaml +++ b/assets/monitoring/grafana/v1alpha1/deployment.yaml @@ -121,10 +121,18 @@ spec: mountPath: /var/run/secrets/grafana-admin-credentials - name: grafana-serving-certs mountPath: /var/run/secrets/grafana-serving-certs + {{- if and .prometheusTLSSpec .prometheusTLSSpec.ClientTLSKeyPairSecretRef }} - name: prometheus-client-certs mountPath: /var/run/secrets/prometheus-client-certs + {{- end }} + {{- if and .prometheusTLSSpec .prometheusTLSSpec.ServingCAConfigMapRef }} - name: prometheus-serving-ca mountPath: /var/run/configmaps/prometheus-serving-ca + {{- end }} + {{- if and .prometheusAuthSpec .prometheusAuthSpec.BearerTokenSecretRef }} + - name: prometheus-bearer-token + mountPath: /var/run/secrets/prometheus-bearer-token + {{- end }} - name: grafana-storage mountPath: /var/lib/grafana securityContext: @@ -159,12 +167,21 @@ spec: - name: grafana-serving-certs secret: secretName: "{{ .servingCertSecretName }}" + {{- if and .prometheusTLSSpec .prometheusTLSSpec.ClientTLSKeyPairSecretRef }} - name: prometheus-client-certs secret: - secretName: "{{ .scyllaDBMonitoringName }}-prometheus-client-grafana" + secretName: "{{ .prometheusTLSSpec.ClientTLSKeyPairSecretRef.Name }}" + {{- end }} + {{- if and .prometheusTLSSpec .prometheusTLSSpec.ServingCAConfigMapRef }} - name: prometheus-serving-ca configMap: - name: "{{ .scyllaDBMonitoringName }}-prometheus-serving-ca" + name: "{{ .prometheusTLSSpec.ServingCAConfigMapRef.Name }}" + {{- end }} + {{- if and .prometheusAuthSpec .prometheusAuthSpec.BearerTokenSecretRef }} + - name: prometheus-bearer-token + secret: + secretName: "{{ .prometheusAuthSpec.BearerTokenSecretRef.Name }}" + {{- end }} - name: grafana-storage emptyDir: sizeLimit: 100Mi diff --git a/assets/monitoring/grafana/v1alpha1/provisioning.cm.yaml b/assets/monitoring/grafana/v1alpha1/provisioning.cm.yaml index 91fc8662913..d6474eaf538 100644 --- a/assets/monitoring/grafana/v1alpha1/provisioning.cm.yaml +++ b/assets/monitoring/grafana/v1alpha1/provisioning.cm.yaml @@ -20,16 +20,36 @@ data: - name: prometheus type: prometheus access: proxy - url: "https://{{ .scyllaDBMonitoringName }}-prometheus:9090" + url: "{{ .prometheusDatasource.URL }}" isDefault: true version: 1 editable: false jsonData: timeInterval: "5s" + {{- if and .prometheusDatasource.TLS .prometheusDatasource.TLS.InsecureSkipVerify }} + tlsSkipVerify: true + {{- end }} + {{- if and .prometheusDatasource.TLS .prometheusDatasource.TLS.ServingCAConfigMapRef }} tlsAuthWithCACert: true + {{- end }} + {{- if and .prometheusDatasource.Auth .prometheusDatasource.Auth.BearerTokenSecretRef }} + httpHeaderName1: "Authorization" + {{- end }} + {{- $hasCACert := and .prometheusDatasource.TLS .prometheusDatasource.TLS.ServingCAConfigMapRef -}} + {{- $hasClientPair := and .prometheusDatasource.TLS .prometheusDatasource.TLS.ClientTLSKeyPairSecretRef -}} + {{- $hasBearer := and .prometheusDatasource.Auth .prometheusDatasource.Auth.BearerTokenSecretRef -}} + {{- if or $hasCACert $hasClientPair $hasBearer }} secureJsonData: - tlsCACert: "$__file{/var/run/configmaps/prometheus-serving-ca/ca-bundle.crt}" - tlsClientCert: "$__file{/var/run/secrets/prometheus-client-certs/tls.crt}" + {{- if $hasCACert }} + tlsCACert: "$__file{/var/run/configmaps/prometheus-serving-ca/{{ .prometheusDatasource.TLS.ServingCAConfigMapRef.Key }}}" + {{- end }} + {{- if $hasClientPair }} + tlsClientCert: "$__file{/var/run/secrets/prometheus-client-certs/tls.crt}" tlsClientKey: "$__file{/var/run/secrets/prometheus-client-certs/tls.key}" + {{- end }} + {{- if $hasBearer }} + httpHeaderValue1: "Bearer $__file{/var/run/secrets/prometheus-bearer-token/{{ .prometheusDatasource.Auth.BearerTokenSecretRef.Key }}}" + {{- end }} + {{- end }} notifiers.yaml: "" plugins.yaml: "" diff --git a/pkg/controller/scylladbmonitoring/resource.go b/pkg/controller/scylladbmonitoring/resource.go new file mode 100644 index 00000000000..8728ff912b5 --- /dev/null +++ b/pkg/controller/scylladbmonitoring/resource.go @@ -0,0 +1,121 @@ +package scylladbmonitoring + +import ( + "fmt" + + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" + "github.com/scylladb/scylla-operator/pkg/naming" +) + +// grafanaPrometheusDatasourceSpec holds the spec of the Prometheus datasource used by Grafana. +// It describes both external (user-defined) and operator managed Prometheus. +// This struct is used as a template data structure to render the actual deployment and configmap. +type grafanaPrometheusDatasourceSpec struct { + URL string + TLS *grafanaPrometheusDatasourceTLSSpec + Auth *grafanaPrometheusDatasourceAuthSpec +} + +// grafanaPrometheusDatasourceTLSSpec holds TLS spec used by Grafana to connect to Prometheus. +// It describes both external (user-defined) and operator managed TLS. +// This struct is used as a template data structure to render the actual deployment and configmap. +type grafanaPrometheusDatasourceTLSSpec struct { + ClientTLSKeyPairSecretRef *scyllav1alpha1.LocalObjectReference + ServingCAConfigMapRef *scyllav1alpha1.LocalObjectKeySelector + InsecureSkipVerify bool +} + +// grafanaPrometheusDatasourceAuthSpec holds authentication spec used by Grafana to connect to Prometheus. +// This struct is used as a template data structure to render the actual deployment and configmap. +type grafanaPrometheusDatasourceAuthSpec struct { + BearerTokenSecretRef *scyllav1alpha1.LocalObjectKeySelector +} + +func makeGrafanaPrometheusDatasourceSpec(sm *scyllav1alpha1.ScyllaDBMonitoring) (*grafanaPrometheusDatasourceSpec, error) { + tls, err := makeGrafanaPrometheusDatasourceTLSSpec(sm) + if err != nil { + return nil, fmt.Errorf("can't make Grafana Prometheus datasource TLS spec: %w", err) + } + + return &grafanaPrometheusDatasourceSpec{ + URL: makeGrafanaDatasourceURL(sm), + TLS: tls, + Auth: makeGrafanaPrometheusDatasourceAuthSpec(sm), + }, nil +} + +func makeGrafanaPrometheusDatasourceAuthSpec(sm *scyllav1alpha1.ScyllaDBMonitoring) *grafanaPrometheusDatasourceAuthSpec { + ds := makePrometheusDatasourceSpec(sm) + if ds != nil { + if ds.PrometheusOptions.Auth != nil && ds.PrometheusOptions.Auth.Type == scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken { + return &grafanaPrometheusDatasourceAuthSpec{ + BearerTokenSecretRef: ds.PrometheusOptions.Auth.BearerTokenOptions.SecretRef, + } + } + } + + // By default, both for managed and external Prometheus, no auth is used. + return nil +} + +func makeGrafanaPrometheusDatasourceTLSSpec(sm *scyllav1alpha1.ScyllaDBMonitoring) (*grafanaPrometheusDatasourceTLSSpec, error) { + ds := makePrometheusDatasourceSpec(sm) + if ds != nil { + // If the user provided a Prometheus datasource with TLS config, use it. + if ds.PrometheusOptions.TLS != nil { + tls := ds.PrometheusOptions.TLS + return &grafanaPrometheusDatasourceTLSSpec{ + ClientTLSKeyPairSecretRef: tls.ClientTLSKeyPairSecretRef, + ServingCAConfigMapRef: tls.CACertConfigMapRef, + InsecureSkipVerify: tls.InsecureSkipVerify, + }, nil + } + + // If the user provided a Prometheus datasource without TLS config, assume no TLS. + return nil, nil + } + + managedPrometheusClientGrafanaSecretName, err := naming.ManagedPrometheusClientGrafanaSecretName(sm) + if err != nil { + return nil, fmt.Errorf("can't get managed Prometheus client Grafana secret name: %w", err) + } + + managedPrometheusServiceCAConfigMapName, err := naming.ManagedPrometheusServingCAConfigMapName(sm) + if err != nil { + return nil, fmt.Errorf("can't get managed Prometheus serving CA config map name: %w", err) + } + + // By default, use operator managed TLS certs. + return &grafanaPrometheusDatasourceTLSSpec{ + ClientTLSKeyPairSecretRef: &scyllav1alpha1.LocalObjectReference{ + Name: managedPrometheusClientGrafanaSecretName, + }, + ServingCAConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: managedPrometheusServiceCAConfigMapName, + Key: "ca-bundle.crt", + }, + InsecureSkipVerify: false, + }, nil +} + +func makeGrafanaDatasourceURL(sm *scyllav1alpha1.ScyllaDBMonitoring) string { + if spec := getGrafanaSpec(sm); spec != nil && len(spec.Datasources) > 0 { + // We only support one datasource for now. + ds := spec.Datasources[0] + return ds.URL + } + + // Default to managed Prometheus service. + return "https://" + sm.Name + "-prometheus:9090" +} + +func makePrometheusDatasourceSpec(sm *scyllav1alpha1.ScyllaDBMonitoring) *scyllav1alpha1.GrafanaDatasourceSpec { + if spec := getGrafanaSpec(sm); spec != nil && len(spec.Datasources) > 0 { + // We only support one datasource for now. + ds := spec.Datasources[0] + if ds.Type == scyllav1alpha1.GrafanaDatasourceTypePrometheus { + return &ds + } + } + return nil +} diff --git a/pkg/controller/scylladbmonitoring/sync_grafana.go b/pkg/controller/scylladbmonitoring/sync_grafana.go index 2b02467f8f4..a6aa23350c3 100644 --- a/pkg/controller/scylladbmonitoring/sync_grafana.go +++ b/pkg/controller/scylladbmonitoring/sync_grafana.go @@ -110,6 +110,11 @@ func makeGrafanaDeployment(sm *scyllav1alpha1.ScyllaDBMonitoring, soc *scyllav1a return nil, "", fmt.Errorf("dashboardsCMs can't be empty") } + prometheusDatasourceSpec, err := makeGrafanaPrometheusDatasourceSpec(sm) + if err != nil { + return nil, "", fmt.Errorf("can't make Prometheus datasource spec: %w", err) + } + return grafanav1alpha1assets.GrafanaDeploymentTemplate.Get().RenderObject(map[string]any{ "grafanaImage": grafanaImage, "bashToolsImage": bashToolsImage, @@ -120,6 +125,8 @@ func makeGrafanaDeployment(sm *scyllav1alpha1.ScyllaDBMonitoring, soc *scyllav1a "resources": resources, "restartTriggerHash": restartTriggerHash, "dashboardsCMs": dashboardsCMs, + "prometheusTLSSpec": prometheusDatasourceSpec.TLS, + "prometheusAuthSpec": prometheusDatasourceSpec.Auth, }) } @@ -170,7 +177,7 @@ func makeGrafanaConfigs(sm *scyllav1alpha1.ScyllaDBMonitoring) (*corev1.ConfigMa case scyllav1alpha1.ScyllaDBMonitoringTypeSAAS: defaultDashboard = "scylladb-latest/overview.json" default: - return nil, "", fmt.Errorf("unkown monitoring type: %q", t) + return nil, "", fmt.Errorf("unknown monitoring type: %q", t) } return grafanav1alpha1assets.GrafanaConfigsTemplate.Get().RenderObject(map[string]any{ @@ -213,9 +220,17 @@ func makeGrafanaDashboards(sm *scyllav1alpha1.ScyllaDBMonitoring) ([]*corev1.Con } func makeGrafanaProvisionings(sm *scyllav1alpha1.ScyllaDBMonitoring) (*corev1.ConfigMap, string, error) { - return grafanav1alpha1assets.GrafanaProvisioningConfigMapTemplate.Get().RenderObject(map[string]any{ + prometheusDatasourceSpec, err := makeGrafanaPrometheusDatasourceSpec(sm) + if err != nil { + return nil, "", fmt.Errorf("can't make Prometheus datasource spec: %w", err) + } + + datasourceConfig := map[string]any{ "scyllaDBMonitoringName": sm.Name, - }) + "prometheusDatasource": prometheusDatasourceSpec, + } + + return grafanav1alpha1assets.GrafanaProvisioningConfigMapTemplate.Get().RenderObject(datasourceConfig) } func makeGrafanaService(sm *scyllav1alpha1.ScyllaDBMonitoring) (*corev1.Service, string, error) { diff --git a/pkg/controller/scylladbmonitoring/sync_grafana_test.go b/pkg/controller/scylladbmonitoring/sync_grafana_test.go index 9572281fe6a..af03545d5a0 100644 --- a/pkg/controller/scylladbmonitoring/sync_grafana_test.go +++ b/pkg/controller/scylladbmonitoring/sync_grafana_test.go @@ -467,10 +467,10 @@ spec: secretName: "serving-secret" - name: prometheus-client-certs secret: - secretName: "sm-name-prometheus-client-grafana" + secretName: "sm-name-prometheus-client-grafana-2w56m" - name: prometheus-serving-ca configMap: - name: "sm-name-prometheus-serving-ca" + name: "sm-name-prometheus-serving-ca-3ah2z" - name: grafana-storage emptyDir: sizeLimit: 100Mi @@ -676,10 +676,258 @@ spec: secretName: "serving-secret" - name: prometheus-client-certs secret: - secretName: "sm-name-prometheus-client-grafana" + secretName: "sm-name-prometheus-client-grafana-2w56m" - name: prometheus-serving-ca configMap: - name: "sm-name-prometheus-serving-ca" + name: "sm-name-prometheus-serving-ca-3ah2z" + - name: grafana-storage + emptyDir: + sizeLimit: 100Mi + securityContext: + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + fsGroup: 472 + seccompProfile: + type: RuntimeDefault +`, "\n"), + expectedErr: nil, + }, + { + name: "external prometheus datasource", + sm: &scyllav1alpha1.ScyllaDBMonitoring{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sm-name", + }, + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Type: pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform), + Components: &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + Name: "prometheus", + URL: "https://prometheus.example.com/", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "prometheus-serving-ca", + Key: "ca.crt", + }, + ClientTLSKeyPairSecretRef: &scyllav1alpha1.LocalObjectReference{ + Name: "prometheus-client-grafana", + }, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "prometheus-bearer-token", + Key: "token", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + soc: defaultSOC, + grafanaServingCertSecretName: "serving-secret", + dashboardsCMs: []*corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "sm-name-grafana-scylladb-dashboards-scylladb-6.0", + Annotations: map[string]string{ + "internal.scylla-operator.scylladb.com/dashboard-name": "scylladb-6.0", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "sm-name-grafana-scylladb-dashboards-scylladb-6.1", + Annotations: map[string]string{ + "internal.scylla-operator.scylladb.com/dashboard-name": "scylladb-6.1", + }, + }, + }, + }, + restartTriggerHash: "restart-trigger-hash", + expectedString: strings.TrimLeft(` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "sm-name-grafana" +spec: + selector: + matchLabels: + scylla-operator.scylladb.com/deployment-name: "sm-name-grafana" + strategy: + type: RollingUpdate + template: + metadata: + annotations: + scylla-operator.scylladb.com/inputs-hash: "restart-trigger-hash" + labels: + scylla-operator.scylladb.com/deployment-name: "sm-name-grafana" + spec: + serviceAccountName: "sm-name-grafana" + affinity: + {} + tolerations: + null + initContainers: + - name: gzip + image: "bash-tools-image" + command: + - /usr/bin/bash + - -euExo + - pipefail + - -O + - inherit_errexit + - -c + args: + - | + mkdir /var/run/decompressed-configmaps/grafana-scylladb-dashboards + find /var/run/configmaps -mindepth 2 -maxdepth 2 -type d | while read -r d; do + tp="/var/run/decompressed-configmaps/${d#"/var/run/configmaps/"}" + mkdir "${tp}" + find "${d}" -mindepth 1 -maxdepth 1 -name '*.gz.base64' -exec cp -L -t "${tp}" {} + + done + find /var/run/decompressed-configmaps -name '*.gz.base64' | while read -r f; do + base64 -d "${f}" > "${f%.base64}" + rm "${f}" + done + find /var/run/decompressed-configmaps -name '*.gz' -exec gzip -d {} + + volumeMounts: + - name: decompressed-configmaps + mountPath: /var/run/decompressed-configmaps + - name: "sm-name-grafana-scylladb-dashboards-scylladb-6.0" + mountPath: "/var/run/configmaps/grafana-scylladb-dashboards/scylladb-6.0" + - name: "sm-name-grafana-scylladb-dashboards-scylladb-6.1" + mountPath: "/var/run/configmaps/grafana-scylladb-dashboards/scylladb-6.1" + containers: + - name: grafana + image: "grafana-image" + command: + - grafana-server + - --packaging=docker + - --homepath=/usr/share/grafana + - --config=/var/run/configmaps/grafana-configs/grafana.ini + env: + - name: GF_PATHS_PROVISIONING + - name: GF_PATHS_HOME + - name: GF_PATHS_DATA + - name: GF_PATHS_LOGS + - name: GF_PATHS_PLUGINS + - name: GF_PATHS_CONFIG + ports: + - containerPort: 3000 + name: grafana + protocol: TCP + readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 1 + httpGet: + path: /api/health + port: 3000 + scheme: HTTPS + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + httpGet: + path: /api/health + port: 3000 + scheme: HTTPS + resources: + {} + volumeMounts: + - name: grafana-configs + mountPath: /var/run/configmaps/grafana-configs + - name: decompressed-configmaps + mountPath: /var/run/dashboards/scylladb + subPath: grafana-scylladb-dashboards + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/access-control/access-control.yaml + subPath: access-control.yaml + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/alerting/alerting.yaml + subPath: alerting.yaml + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/dashboards/dashboards.yaml + subPath: dashboards.yaml + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/datasources/datasources.yaml + subPath: datasources.yaml + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/notifiers/notifiers.yaml + subPath: notifiers.yaml + - name: grafana-provisioning + mountPath: /var/run/configmaps/grafana-provisioning/plugins/plugins.yaml + subPath: plugins.yaml + - name: grafana-admin-credentials + mountPath: /var/run/secrets/grafana-admin-credentials + - name: grafana-serving-certs + mountPath: /var/run/secrets/grafana-serving-certs + - name: prometheus-client-certs + mountPath: /var/run/secrets/prometheus-client-certs + - name: prometheus-serving-ca + mountPath: /var/run/configmaps/prometheus-serving-ca + - name: prometheus-bearer-token + mountPath: /var/run/secrets/prometheus-bearer-token + - name: grafana-storage + mountPath: /var/lib/grafana + securityContext: + allowPrivilegeEscalation: false + privileged: false + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + capabilities: + drop: + - ALL + volumes: + - name: decompressed-configmaps + emptyDir: + sizeLimit: 50Mi + - name: grafana-configs + configMap: + name: "sm-name-grafana-configs" + - name: "sm-name-grafana-scylladb-dashboards-scylladb-6.0" + configMap: + name: "sm-name-grafana-scylladb-dashboards-scylladb-6.0" + - name: "sm-name-grafana-scylladb-dashboards-scylladb-6.1" + configMap: + name: "sm-name-grafana-scylladb-dashboards-scylladb-6.1" + - name: grafana-provisioning + configMap: + name: "sm-name-grafana-provisioning" + - name: grafana-admin-credentials + secret: + secretName: "sm-name-grafana-admin-credentials" + - name: grafana-serving-certs + secret: + secretName: "serving-secret" + - name: prometheus-client-certs + secret: + secretName: "prometheus-client-grafana" + - name: prometheus-serving-ca + configMap: + name: "prometheus-serving-ca" + - name: prometheus-bearer-token + secret: + secretName: "prometheus-bearer-token" - name: grafana-storage emptyDir: sizeLimit: 100Mi @@ -723,3 +971,345 @@ spec: }) } } + +func Test_makeGrafanaProvisionings(t *testing.T) { + testScyllaDBMonitoring := func() *scyllav1alpha1.ScyllaDBMonitoring { + return &scyllav1alpha1.ScyllaDBMonitoring{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sm-name", + }, + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + EndpointsSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "scylla", + }, + }, + }, + } + } + + tt := []struct { + name string + sm *scyllav1alpha1.ScyllaDBMonitoring + expectedString string + }{ + { + name: "default", + sm: testScyllaDBMonitoring(), + expectedString: strings.TrimLeft(` +apiVersion: v1 +kind: ConfigMap +metadata: + name: "sm-name-grafana-provisioning" +data: + access-control.yaml: "" + alerting.yaml: "" + dashboards.yaml: | + apiVersion: 1 + providers: + - name: dashboards + type: file + updateIntervalSeconds: 30 + options: + path: /var/run/dashboards + foldersFromFilesStructure: true + datasources.yaml: | + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: "https://sm-name-prometheus:9090" + isDefault: true + version: 1 + editable: false + jsonData: + timeInterval: "5s" + tlsAuthWithCACert: true + secureJsonData: + tlsCACert: "$__file{/var/run/configmaps/prometheus-serving-ca/ca-bundle.crt}" + tlsClientCert: "$__file{/var/run/secrets/prometheus-client-certs/tls.crt}" + tlsClientKey: "$__file{/var/run/secrets/prometheus-client-certs/tls.key}" + notifiers.yaml: "" + plugins.yaml: "" +`, "\n"), + }, + { + name: "custom prometheus datasource with mTLS and bearer token", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := testScyllaDBMonitoring() + sm.Spec.Components = &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + Name: "prometheus", + URL: "https://custom-prometheus:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "custom-prometheus-ca", + Key: "custom-ca-bundle-key.crt", + }, + ClientTLSKeyPairSecretRef: &scyllav1alpha1.LocalObjectReference{ + Name: "custom-prometheus-client-tls", + }, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "custom-prometheus-bearer-token", + Key: "token-key", + }, + }, + }, + }, + }, + }, + }, + } + return sm + }(), + expectedString: strings.TrimLeft(` +apiVersion: v1 +kind: ConfigMap +metadata: + name: "sm-name-grafana-provisioning" +data: + access-control.yaml: "" + alerting.yaml: "" + dashboards.yaml: | + apiVersion: 1 + providers: + - name: dashboards + type: file + updateIntervalSeconds: 30 + options: + path: /var/run/dashboards + foldersFromFilesStructure: true + datasources.yaml: | + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: "https://custom-prometheus:9090" + isDefault: true + version: 1 + editable: false + jsonData: + timeInterval: "5s" + tlsAuthWithCACert: true + httpHeaderName1: "Authorization" + secureJsonData: + tlsCACert: "$__file{/var/run/configmaps/prometheus-serving-ca/custom-ca-bundle-key.crt}" + tlsClientCert: "$__file{/var/run/secrets/prometheus-client-certs/tls.crt}" + tlsClientKey: "$__file{/var/run/secrets/prometheus-client-certs/tls.key}" + httpHeaderValue1: "Bearer $__file{/var/run/secrets/prometheus-bearer-token/token-key}" + notifiers.yaml: "" + plugins.yaml: "" +`, "\n"), + }, + { + name: "custom prometheus datasource with TLS without verification, no auth", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := testScyllaDBMonitoring() + sm.Spec.Components = &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + Name: "prometheus", + URL: "https://custom-prometheus:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + InsecureSkipVerify: true, + }, + }, + }, + }, + }, + } + return sm + }(), + expectedString: strings.TrimLeft(` +apiVersion: v1 +kind: ConfigMap +metadata: + name: "sm-name-grafana-provisioning" +data: + access-control.yaml: "" + alerting.yaml: "" + dashboards.yaml: | + apiVersion: 1 + providers: + - name: dashboards + type: file + updateIntervalSeconds: 30 + options: + path: /var/run/dashboards + foldersFromFilesStructure: true + datasources.yaml: | + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: "https://custom-prometheus:9090" + isDefault: true + version: 1 + editable: false + jsonData: + timeInterval: "5s" + tlsSkipVerify: true + notifiers.yaml: "" + plugins.yaml: "" +`, "\n"), + }, + { + name: "custom prometheus datasource without TLS, no auth", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := testScyllaDBMonitoring() + sm.Spec.Components = &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + Name: "prometheus", + URL: "http://custom-prometheus:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{}, + }, + }, + }, + } + return sm + }(), + expectedString: strings.TrimLeft(` +apiVersion: v1 +kind: ConfigMap +metadata: + name: "sm-name-grafana-provisioning" +data: + access-control.yaml: "" + alerting.yaml: "" + dashboards.yaml: | + apiVersion: 1 + providers: + - name: dashboards + type: file + updateIntervalSeconds: 30 + options: + path: /var/run/dashboards + foldersFromFilesStructure: true + datasources.yaml: | + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: "http://custom-prometheus:9090" + isDefault: true + version: 1 + editable: false + jsonData: + timeInterval: "5s" + notifiers.yaml: "" + plugins.yaml: "" +`, "\n"), + }, + { + name: "custom prometheus datasource with bearer token auth, no TLS", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := testScyllaDBMonitoring() + sm.Spec.Components = &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + Name: "prometheus", + URL: "http://custom-prometheus:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "custom-prometheus-bearer-token", + Key: "token-key", + }, + }, + }, + }, + }, + }, + }, + } + return sm + }(), + expectedString: strings.TrimLeft(` +apiVersion: v1 +kind: ConfigMap +metadata: + name: "sm-name-grafana-provisioning" +data: + access-control.yaml: "" + alerting.yaml: "" + dashboards.yaml: | + apiVersion: 1 + providers: + - name: dashboards + type: file + updateIntervalSeconds: 30 + options: + path: /var/run/dashboards + foldersFromFilesStructure: true + datasources.yaml: | + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: "http://custom-prometheus:9090" + isDefault: true + version: 1 + editable: false + jsonData: + timeInterval: "5s" + httpHeaderName1: "Authorization" + secureJsonData: + httpHeaderValue1: "Bearer $__file{/var/run/secrets/prometheus-bearer-token/token-key}" + notifiers.yaml: "" + plugins.yaml: "" +`, "\n"), + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + _, objString, err := makeGrafanaProvisionings(tc.sm) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if objString != tc.expectedString { + t.Errorf("expected and got strings differ:\n%s", gcmp.Diff( + strings.Split(tc.expectedString, "\n"), + strings.Split(objString, "\n"), + )) + } + }) + } +} diff --git a/pkg/controller/scylladbmonitoring/sync_prometheus.go b/pkg/controller/scylladbmonitoring/sync_prometheus.go index a7b9efed8fa..366eae8af7f 100644 --- a/pkg/controller/scylladbmonitoring/sync_prometheus.go +++ b/pkg/controller/scylladbmonitoring/sync_prometheus.go @@ -215,10 +215,15 @@ func (smc *Controller) syncPrometheus( ) ([]metav1.Condition, error) { var progressingConditions []metav1.Condition + managedPrometheusServiceCAConfigMapName, err := naming.ManagedPrometheusServingCAConfigMapName(sm) + if err != nil { + return progressingConditions, fmt.Errorf("can't get managed Prometheus serving CA config map name: %w", err) + } + prometheusServingCertChainConfig := &okubecrypto.CertChainConfig{ CAConfig: &okubecrypto.CAConfig{ MetaConfig: okubecrypto.MetaConfig{ - Name: fmt.Sprintf("%s-prometheus-serving-ca", sm.Name), + Name: managedPrometheusServiceCAConfigMapName, Labels: getPrometheusLabels(sm), }, Validity: 10 * 365 * 24 * time.Hour, @@ -226,7 +231,7 @@ func (smc *Controller) syncPrometheus( }, CABundleConfig: &okubecrypto.CABundleConfig{ MetaConfig: okubecrypto.MetaConfig{ - Name: fmt.Sprintf("%s-prometheus-serving-ca", sm.Name), + Name: managedPrometheusServiceCAConfigMapName, Labels: getPrometheusLabels(sm), }, }, @@ -255,6 +260,11 @@ func (smc *Controller) syncPrometheus( }, } + managedPrometheusClientGrafanaSecretName, err := naming.ManagedPrometheusClientGrafanaSecretName(sm) + if err != nil { + return progressingConditions, fmt.Errorf("can't get managed Prometheus client Grafana secret name: %w", err) + } + prometheusClientCertChainConfig := &okubecrypto.CertChainConfig{ CAConfig: &okubecrypto.CAConfig{ MetaConfig: okubecrypto.MetaConfig{ @@ -273,7 +283,7 @@ func (smc *Controller) syncPrometheus( CertConfigs: []*okubecrypto.CertificateConfig{ { MetaConfig: okubecrypto.MetaConfig{ - Name: fmt.Sprintf("%s-prometheus-client-grafana", sm.Name), + Name: managedPrometheusClientGrafanaSecretName, Labels: getPrometheusLabels(sm), }, Validity: 10 * 365 * 24 * time.Hour, diff --git a/pkg/naming/names.go b/pkg/naming/names.go index 708b3d6b59e..7248ff2237c 100644 --- a/pkg/naming/names.go +++ b/pkg/naming/names.go @@ -363,3 +363,11 @@ func RemoteNamespaceName(sc *scyllav1alpha1.ScyllaDBCluster, dc *scyllav1alpha1. return fmt.Sprintf("%s-%s", sc.Namespace, suffix), nil } + +func ManagedPrometheusClientGrafanaSecretName(sm *scyllav1alpha1.ScyllaDBMonitoring) (string, error) { + return generateTruncatedHashedName(apimachineryutilvalidation.DNS1123SubdomainMaxLength, sm.Name, "prometheus-client-grafana") +} + +func ManagedPrometheusServingCAConfigMapName(sm *scyllav1alpha1.ScyllaDBMonitoring) (string, error) { + return generateTruncatedHashedName(apimachineryutilvalidation.DNS1123SubdomainMaxLength, sm.Name, "prometheus-serving-ca") +} diff --git a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go index 5c600a6b92b..89286563a77 100644 --- a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go +++ b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go @@ -26,6 +26,7 @@ import ( scyllav1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1" scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" "github.com/scylladb/scylla-operator/pkg/controllerhelpers" + "github.com/scylladb/scylla-operator/pkg/naming" "github.com/scylladb/scylla-operator/pkg/pointer" scyllafixture "github.com/scylladb/scylla-operator/test/e2e/fixture/scylla" "github.com/scylladb/scylla-operator/test/e2e/framework" @@ -195,7 +196,9 @@ func verifyManagedPrometheus(ctx context.Context, f *framework.Framework, sm *sc // Some of these may be fixable by manually verifying it in the operator sync loop so it can also be // consumed by clients, but it's a bigger effort. - prometheusServingCABundleConfigMap, err := f.KubeClient().CoreV1().ConfigMaps(f.Namespace()).Get(ctx, fmt.Sprintf("%s-prometheus-serving-ca", sm.Name), metav1.GetOptions{}) + prometheusServingCABundleConfigMapName, err := naming.ManagedPrometheusServingCAConfigMapName(sm) + o.Expect(err).NotTo(o.HaveOccurred()) + prometheusServingCABundleConfigMap, err := f.KubeClient().CoreV1().ConfigMaps(f.Namespace()).Get(ctx, prometheusServingCABundleConfigMapName, metav1.GetOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) prometheusServingCACerts, _ := verification.VerifyAndParseCABundle(prometheusServingCABundleConfigMap) o.Expect(prometheusServingCACerts).To(o.HaveLen(1)) @@ -203,7 +206,9 @@ func verifyManagedPrometheus(ctx context.Context, f *framework.Framework, sm *sc prometheusServingCAPool := x509.NewCertPool() prometheusServingCAPool.AddCert(prometheusServingCACerts[0]) - prometheusGrafanaClientSecret, err := f.KubeClient().CoreV1().Secrets(f.Namespace()).Get(ctx, fmt.Sprintf("%s-prometheus-client-grafana", sm.Name), metav1.GetOptions{}) + prometheusGrafanaClientSecretName, err := naming.ManagedPrometheusClientGrafanaSecretName(sm) + o.Expect(err).NotTo(o.HaveOccurred()) + prometheusGrafanaClientSecret, err := f.KubeClient().CoreV1().Secrets(f.Namespace()).Get(ctx, prometheusGrafanaClientSecretName, metav1.GetOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) _, prometheusGrafanaClientCertBytes, _, prometheusGrafanaClientKeyBytes := verification.VerifyAndParseTLSCert(prometheusGrafanaClientSecret, verification.TLSCertOptions{ IsCA: pointer.Ptr(false), From 17d005d6cff593ec74f239c4c664b06297ae3f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Thu, 4 Sep 2025 12:04:25 +0200 Subject: [PATCH 04/14] Handle ScyllaDBMonitoring validation in admission webhook server --- .../scylladbmonitoring_validation.go | 266 +++++++++++ .../scylladbmonitoring_validation_test.go | 423 ++++++++++++++++++ pkg/cmd/operator/webhooks.go | 6 + 3 files changed, 695 insertions(+) create mode 100644 pkg/api/scylla/validation/scylladbmonitoring_validation.go create mode 100644 pkg/api/scylla/validation/scylladbmonitoring_validation_test.go diff --git a/pkg/api/scylla/validation/scylladbmonitoring_validation.go b/pkg/api/scylla/validation/scylladbmonitoring_validation.go new file mode 100644 index 00000000000..bed8c0823bd --- /dev/null +++ b/pkg/api/scylla/validation/scylladbmonitoring_validation.go @@ -0,0 +1,266 @@ +package validation + +import ( + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" + oslices "github.com/scylladb/scylla-operator/pkg/helpers/slices" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +func ValidateScyllaDBMonitoring(sm *scyllav1alpha1.ScyllaDBMonitoring) field.ErrorList { + var allErrs field.ErrorList + + allErrs = append(allErrs, validateScyllaDBMonitoringSpec(&sm.Spec, field.NewPath("spec"))...) + + return allErrs +} + +func ValidateScyllaDBMonitoringUpdate(new, old *scyllav1alpha1.ScyllaDBMonitoring) field.ErrorList { + var allErrs field.ErrorList + + allErrs = append(allErrs, ValidateScyllaDBMonitoring(new)...) + allErrs = append(allErrs, validateScyllaDBMonitoringSpecUpdate(&new.Spec, &old.Spec, field.NewPath("spec"))...) + + return allErrs +} + +func GetWarningsOnScyllaDBMonitoringCreate(_ *scyllav1alpha1.ScyllaDBMonitoring) []string { + return nil +} + +func GetWarningsOnScyllaDBMonitoringUpdate(_, _ *scyllav1alpha1.ScyllaDBMonitoring) []string { + return nil +} + +func validateScyllaDBMonitoringSpecUpdate(new, old *scyllav1alpha1.ScyllaDBMonitoringSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if new.Components != nil && old.Components != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringComponentsUpdate(new.Components, old.Components, fldPath.Child("components"))...) + } + + return allErrs +} + +func validateScyllaDBMonitoringComponentsUpdate(new, old *scyllav1alpha1.Components, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if new.Prometheus != nil && old.Prometheus != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringSpecComponentsPrometheusUpdate(new.Prometheus, old.Prometheus, fldPath.Child("prometheus"))...) + } + + return allErrs +} + +func validateScyllaDBMonitoringSpecComponentsPrometheusUpdate(new, old *scyllav1alpha1.PrometheusSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if new.Mode != old.Mode { + allErrs = append(allErrs, field.Invalid(fldPath.Child("mode"), old.Mode, "is immutable and cannot be changed")) + } + + return allErrs +} + +func validateScyllaDBMonitoringSpec(sm *scyllav1alpha1.ScyllaDBMonitoringSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if sm.Components != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringComponents(sm.Components, fldPath.Child("components"))...) + } + + return allErrs +} + +func validateScyllaDBMonitoringComponents(components *scyllav1alpha1.Components, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if components.Prometheus != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringSpecComponentsPrometheus(components.Prometheus, fldPath.Child("prometheus"))...) + } + + if components.Grafana != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringSpecComponentsGrafana(components.Grafana, fldPath.Child("grafana"))...) + } + + allErrs = append(allErrs, validateScyllaDBMonitoringComponentsInterdependencies(components, fldPath)...) + + return allErrs +} + +var allowedPrometheusModes = []string{ + string(scyllav1alpha1.PrometheusModeManaged), + string(scyllav1alpha1.PrometheusModeExternal), +} + +func validateScyllaDBMonitoringSpecComponentsPrometheus(ps *scyllav1alpha1.PrometheusSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if !oslices.ContainsItem(allowedPrometheusModes, string(ps.Mode)) { + allErrs = append(allErrs, field.NotSupported(fldPath.Child("mode"), string(ps.Mode), allowedPrometheusModes)) + } + + return allErrs +} + +func validateScyllaDBMonitoringComponentsInterdependencies(components *scyllav1alpha1.Components, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if components.Prometheus != nil && components.Prometheus.Mode == scyllav1alpha1.PrometheusModeExternal { + if components.Grafana == nil { + allErrs = append(allErrs, field.Required(fldPath.Child("grafana"), "must be specified when Prometheus is in External mode")) + } else { + datasources := components.Grafana.Datasources + if len(datasources) == 0 { + allErrs = append(allErrs, field.Required(fldPath.Child("grafana").Child("datasources"), "exactly one datasource must be specified when Prometheus is in External mode")) + } else if len(datasources) > 1 { + allErrs = append(allErrs, field.TooMany(fldPath.Child("grafana").Child("datasources"), len(datasources), 1)) + } + } + } + + if components.Prometheus == nil || components.Prometheus.Mode == scyllav1alpha1.PrometheusModeManaged { + if components.Grafana != nil && len(components.Grafana.Datasources) > 0 { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("grafana").Child("datasources"), "must not be specified when Prometheus is in Managed mode")) + } + } + + return allErrs +} + +func validateScyllaDBMonitoringSpecComponentsGrafana(gs *scyllav1alpha1.GrafanaSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + for i, ds := range gs.Datasources { + allErrs = append(allErrs, validateScyllaDBMonitoringGrafanaDatasource(ds, fldPath.Child("datasources").Index(i))...) + } + + return allErrs +} + +var allowedDatasourceTypes = []string{ + string(scyllav1alpha1.GrafanaDatasourceTypePrometheus), +} + +var allowedDatasourceNames = []string{ + "prometheus", +} + +func validateScyllaDBMonitoringGrafanaDatasource(ds scyllav1alpha1.GrafanaDatasourceSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if !oslices.ContainsItem(allowedDatasourceTypes, string(ds.Type)) { + allErrs = append(allErrs, field.NotSupported(fldPath.Child("type"), string(ds.Type), allowedDatasourceTypes)) + } + if !oslices.ContainsItem(allowedDatasourceNames, ds.Name) { + allErrs = append(allErrs, field.NotSupported(fldPath.Child("name"), ds.Name, allowedDatasourceNames)) + } + if ds.URL == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("url"), "must be specified")) + } + + allErrs = append(allErrs, validateScyllaDBMonitoringGrafanaDatasourceOptions(&ds, fldPath)...) + + return allErrs +} + +func validateScyllaDBMonitoringGrafanaDatasourceOptions(ds *scyllav1alpha1.GrafanaDatasourceSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if ds.Type == scyllav1alpha1.GrafanaDatasourceTypePrometheus { + allErrs = append(allErrs, validateScyllaDBMonitoringGrafanaPrometheusDatasource(ds, fldPath)...) + } + if ds.Type != scyllav1alpha1.GrafanaDatasourceTypePrometheus && ds.PrometheusOptions != nil { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("prometheusOptions"), "must not be specified when datasource type is not Prometheus")) + } + + return allErrs +} + +var supportedPrometheusAuthTypes = []string{ + string(scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeNoAuthentication), + string(scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken), +} + +func validateScyllaDBMonitoringGrafanaPrometheusDatasource(ds *scyllav1alpha1.GrafanaDatasourceSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + opts := ds.PrometheusOptions + if opts == nil { + allErrs = append(allErrs, field.Required(fldPath.Child("prometheusOptions"), "must be specified for Prometheus datasource")) + return allErrs + } + + if opts.Auth != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringPrometheusAuth(opts.Auth, fldPath.Child("prometheusOptions").Child("auth"))...) + } + + if opts.TLS != nil { + allErrs = append(allErrs, validateScyllaDBMonitoringGrafanaPrometheusDatasourceTLS(opts.TLS, fldPath.Child("prometheusOptions").Child("tls"))...) + } + + return allErrs +} + +func validateScyllaDBMonitoringPrometheusAuth(auth *scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if !oslices.ContainsItem(supportedPrometheusAuthTypes, string(auth.Type)) { + allErrs = append(allErrs, field.NotSupported(fldPath.Child("type"), string(auth.Type), supportedPrometheusAuthTypes)) + } + + if auth.Type == scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken { + allErrs = append(allErrs, validateScyllaDBMonitoringPrometheusAuthBearerToken(auth.BearerTokenOptions, fldPath.Child("bearerTokenOptions"))...) + } + + return allErrs +} + +func validateScyllaDBMonitoringPrometheusAuthBearerToken(opts *scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if opts.SecretRef == nil { + allErrs = append(allErrs, field.Required(fldPath.Child("secretRef"), "must be specified for BearerToken auth")) + } else { + allErrs = append(allErrs, validateLocalObjectKeySelector(opts.SecretRef, fldPath.Child("secretRef"))...) + } + + return allErrs +} + +func validateLocalObjectKeySelector(ref *scyllav1alpha1.LocalObjectKeySelector, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if ref.Key == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("key"), "must be specified")) + } + + if ref.Name == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("name"), "must be specified")) + } + + return allErrs +} + +func validateScyllaDBMonitoringGrafanaPrometheusDatasourceTLS(tls *scyllav1alpha1.GrafanaDatasourceTLSSpec, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if tls.ClientTLSKeyPairSecretRef != nil { + allErrs = append(allErrs, validateLocalObjectReference(tls.ClientTLSKeyPairSecretRef, fldPath.Child("clientTLSKeyPairSecretRef"))...) + } + + if tls.CACertConfigMapRef != nil { + allErrs = append(allErrs, validateLocalObjectKeySelector(tls.CACertConfigMapRef, fldPath.Child("caCertConfigMapRef"))...) + } + + return allErrs +} + +func validateLocalObjectReference(ref *scyllav1alpha1.LocalObjectReference, fldPath *field.Path) field.ErrorList { + var allErrs field.ErrorList + + if ref.Name == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("name"), "must be specified")) + } + + return allErrs +} diff --git a/pkg/api/scylla/validation/scylladbmonitoring_validation_test.go b/pkg/api/scylla/validation/scylladbmonitoring_validation_test.go new file mode 100644 index 00000000000..8d17653ca9b --- /dev/null +++ b/pkg/api/scylla/validation/scylladbmonitoring_validation_test.go @@ -0,0 +1,423 @@ +package validation + +import ( + "reflect" + "testing" + + "github.com/google/go-cmp/cmp" + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +type validateScyllaDBMonitoringTestCase struct { + name string + sm *scyllav1alpha1.ScyllaDBMonitoring + expectedErrorList field.ErrorList +} + +func validScyllaDBMonitoringWithExternalPrometheus() *scyllav1alpha1.ScyllaDBMonitoring { + return &scyllav1alpha1.ScyllaDBMonitoring{ + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Components: &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeExternal, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Name: "prometheus", + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + URL: "https://external-prom:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "ca-cert-configmap", + Key: "ca-cert.pem", + }, + InsecureSkipVerify: false, + ClientTLSKeyPairSecretRef: &scyllav1alpha1.LocalObjectReference{ + Name: "client-tls-secret", + }, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "bearer-token-secret", + Key: "token-key", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func validateScyllaDBMonitoringTestCases() []validateScyllaDBMonitoringTestCase { + return []validateScyllaDBMonitoringTestCase{ + { + name: "default monitoring", + sm: &scyllav1alpha1.ScyllaDBMonitoring{ + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Components: &scyllav1alpha1.Components{}, + }, + }, + expectedErrorList: nil, + }, + { + name: "valid monitoring with external prometheus", + sm: validScyllaDBMonitoringWithExternalPrometheus(), + expectedErrorList: nil, + }, + { + name: "invalid monitoring with datasources when prometheus is managed", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Prometheus.Mode = scyllav1alpha1.PrometheusModeManaged + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeForbidden, + Field: "spec.components.grafana.datasources", + BadValue: "", + Detail: "must not be specified when Prometheus is in Managed mode", + }, + }, + }, + { + name: "invalid monitoring with unsupported prometheus mode", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Prometheus.Mode = "unsupported-mode" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeNotSupported, + Field: "spec.components.prometheus.mode", + BadValue: "unsupported-mode", + Detail: `supported values: "Managed", "External"`, + }, + }, + }, + { + name: "invalid monitoring with external prometheus and missing grafana", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana = nil + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana", + BadValue: "", + Detail: "must be specified when Prometheus is in External mode", + }, + }, + }, + { + name: "invalid monitoring with external prometheus and missing datasource", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources = nil + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources", + BadValue: "", + Detail: "exactly one datasource must be specified when Prometheus is in External mode", + }, + }, + }, + { + name: "invalid monitoring with external prometheus and too many datasources", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources = append(sm.Spec.Components.Grafana.Datasources, scyllav1alpha1.GrafanaDatasourceSpec{ + Name: "prometheus", + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + URL: "https://another-prom:9090", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{}, + }) + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeTooMany, + Field: "spec.components.grafana.datasources", + BadValue: 2, + Detail: "must have at most 1 item", + }, + }, + }, + { + name: "invalid monitoring with unsupported datasource type", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions = nil + sm.Spec.Components.Grafana.Datasources[0].Type = "unsupported-type" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeNotSupported, + Field: "spec.components.grafana.datasources[0].type", + BadValue: "unsupported-type", + Detail: `supported values: "Prometheus"`, + }, + }, + }, + { + name: "invalid monitoring with unsupported datasource name", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].Name = "unsupported-name" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeNotSupported, + Field: "spec.components.grafana.datasources[0].name", + BadValue: "unsupported-name", + Detail: `supported values: "prometheus"`, + }, + }, + }, + { + name: "invalid monitoring with external prometheus and missing datasource URL", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].URL = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].url", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with external prometheus and unsupported auth type", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.Auth.Type = "unsupported-auth-type" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeNotSupported, + Field: "spec.components.grafana.datasources[0].prometheusOptions.auth.type", + BadValue: "unsupported-auth-type", + Detail: `supported values: "NoAuthentication", "BearerToken"`, + }, + }, + }, + { + name: "invalid monitoring with external prometheus, BearerToken auth type and missing secret ref", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.Auth.BearerTokenOptions.SecretRef = nil + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.auth.bearerTokenOptions.secretRef", + BadValue: "", + Detail: "must be specified for BearerToken auth", + }, + }, + }, + { + name: "invalid monitoring with external prometheus, BearerToken auth type and empty secret ref name", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.Auth.BearerTokenOptions.SecretRef.Name = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.auth.bearerTokenOptions.secretRef.name", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with external prometheus, BearerToken auth type and empty secret ref key", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.Auth.BearerTokenOptions.SecretRef.Key = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.auth.bearerTokenOptions.secretRef.key", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with external prometheus, TLS client cert/key secret ref and empty name", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.TLS.ClientTLSKeyPairSecretRef.Name = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.tls.clientTLSKeyPairSecretRef.name", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with external prometheus, TLS CA cert config map ref and empty name", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.TLS.CACertConfigMapRef.Name = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.tls.caCertConfigMapRef.name", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with external prometheus, TLS CA cert config map ref and empty key", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].PrometheusOptions.TLS.CACertConfigMapRef.Key = "" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeRequired, + Field: "spec.components.grafana.datasources[0].prometheusOptions.tls.caCertConfigMapRef.key", + BadValue: "", + Detail: "must be specified", + }, + }, + }, + { + name: "invalid monitoring with unsupported datasource type and prometheus options specified", + sm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana.Datasources[0].Type = "other-type" + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeNotSupported, + Field: "spec.components.grafana.datasources[0].type", + BadValue: "other-type", + Detail: `supported values: "Prometheus"`, + }, + { + Type: field.ErrorTypeForbidden, + Field: "spec.components.grafana.datasources[0].prometheusOptions", + BadValue: "", + Detail: "must not be specified when datasource type is not Prometheus", + }, + }, + }, + } +} + +func TestValidateScyllaDBMonitoring(t *testing.T) { + t.Parallel() + + for _, tc := range validateScyllaDBMonitoringTestCases() { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + actualErrorList := ValidateScyllaDBMonitoring(tc.sm) + if !reflect.DeepEqual(actualErrorList, tc.expectedErrorList) { + t.Errorf("expected error list differs from actual: %s", cmp.Diff(tc.expectedErrorList, actualErrorList)) + } + }) + } +} + +func TestValidateScyllaDBMonitoringUpdate(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + oldSm *scyllav1alpha1.ScyllaDBMonitoring + newSm *scyllav1alpha1.ScyllaDBMonitoring + expectedErrorList field.ErrorList + }{ + { + name: "no changes", + oldSm: validScyllaDBMonitoringWithExternalPrometheus(), + newSm: validScyllaDBMonitoringWithExternalPrometheus(), + expectedErrorList: nil, + }, + { + name: "change prometheus mode", + oldSm: validScyllaDBMonitoringWithExternalPrometheus(), + newSm: func() *scyllav1alpha1.ScyllaDBMonitoring { + sm := validScyllaDBMonitoringWithExternalPrometheus() + sm.Spec.Components.Grafana = nil // Grafana must be nil in managed mode. + sm.Spec.Components.Prometheus.Mode = scyllav1alpha1.PrometheusModeManaged + return sm + }(), + expectedErrorList: field.ErrorList{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.components.prometheus.mode", + BadValue: scyllav1alpha1.PrometheusModeExternal, + Detail: "is immutable and cannot be changed", + }, + }, + }, + } + + // Map regular test cases to the update test cases to ensure validation rules are wired up correctly. + regularTestCases := validateScyllaDBMonitoringTestCases() + for _, rtc := range regularTestCases { + tt = append(tt, struct { + name string + oldSm *scyllav1alpha1.ScyllaDBMonitoring + newSm *scyllav1alpha1.ScyllaDBMonitoring + expectedErrorList field.ErrorList + }{ + name: "update - " + rtc.name, + oldSm: rtc.sm, + newSm: rtc.sm, + expectedErrorList: rtc.expectedErrorList, + }) + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + actualErrorList := ValidateScyllaDBMonitoringUpdate(tc.newSm, tc.oldSm) + if !reflect.DeepEqual(actualErrorList, tc.expectedErrorList) { + t.Errorf("expected error list differs from actual: %s", cmp.Diff(tc.expectedErrorList, actualErrorList)) + } + }) + } +} diff --git a/pkg/cmd/operator/webhooks.go b/pkg/cmd/operator/webhooks.go index 59e4b48f950..c112c116f9f 100644 --- a/pkg/cmd/operator/webhooks.go +++ b/pkg/cmd/operator/webhooks.go @@ -81,6 +81,12 @@ var ( GetWarningsOnCreateFunc: validation.GetWarningsOnScyllaDBManagerTaskCreate, GetWarningsOnUpdateFunc: validation.GetWarningsOnScyllaDBManagerTaskUpdate, }, + scyllav1alpha1.GroupVersion.WithResource("scylladbmonitorings"): &GenericValidator[*scyllav1alpha1.ScyllaDBMonitoring]{ + ValidateCreateFunc: validation.ValidateScyllaDBMonitoring, + ValidateUpdateFunc: validation.ValidateScyllaDBMonitoringUpdate, + GetWarningsOnCreateFunc: validation.GetWarningsOnScyllaDBMonitoringCreate, + GetWarningsOnUpdateFunc: validation.GetWarningsOnScyllaDBMonitoringUpdate, + }, } ) From 83e7a17f520bc84bf79a0d2291e37754a2fe52e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Tue, 9 Sep 2025 09:17:18 +0200 Subject: [PATCH 05/14] Add ScyllaDBMonitoring to webhook server CR and VWC --- helm/scylla-operator/templates/validatingwebhook.yaml | 1 + helm/scylla-operator/templates/view_clusterrole.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/helm/scylla-operator/templates/validatingwebhook.yaml b/helm/scylla-operator/templates/validatingwebhook.yaml index 233f1ed2907..2ba3f72bbd7 100644 --- a/helm/scylla-operator/templates/validatingwebhook.yaml +++ b/helm/scylla-operator/templates/validatingwebhook.yaml @@ -39,3 +39,4 @@ webhooks: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings diff --git a/helm/scylla-operator/templates/view_clusterrole.yaml b/helm/scylla-operator/templates/view_clusterrole.yaml index ad18da58f9a..f6f9693ab75 100644 --- a/helm/scylla-operator/templates/view_clusterrole.yaml +++ b/helm/scylla-operator/templates/view_clusterrole.yaml @@ -16,6 +16,7 @@ rules: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings verbs: - get - list From 7b29e2630de81535a99a0d711aa8f23a5b35bfc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Wed, 27 Aug 2025 11:33:09 +0200 Subject: [PATCH 06/14] Update generated --- ...scylla-operator.clusterserviceversion.yaml | 1 + ...ylla.scylladb.com_scylladbmonitorings.yaml | 107 ++++++++ ...c.authorization.k8s.io_v1_clusterrole.yaml | 1 + deploy/operator.yaml | 99 ++++++++ .../00_scyllacluster_clusterrole_view.yaml | 1 + deploy/operator/10_validatingwebhook.yaml | 1 + .../scylladbmonitorings.rst | 233 ++++++++++++++++++ ...ylla.scylladb.com_scylladbmonitorings.yaml | 97 ++++++++ .../scylla/v1alpha1/zz_generated.deepcopy.go | 154 ++++++++++++ 9 files changed, 694 insertions(+) diff --git a/bundle/manifests/scylla-operator.clusterserviceversion.yaml b/bundle/manifests/scylla-operator.clusterserviceversion.yaml index aae5db71e85..3d3da39ff65 100644 --- a/bundle/manifests/scylla-operator.clusterserviceversion.yaml +++ b/bundle/manifests/scylla-operator.clusterserviceversion.yaml @@ -544,6 +544,7 @@ spec: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings sideEffects: None targetPort: 5000 type: ValidatingAdmissionWebhook diff --git a/bundle/manifests/scylla.scylladb.com_scylladbmonitorings.yaml b/bundle/manifests/scylla.scylladb.com_scylladbmonitorings.yaml index 60ee8f9e1a6..ae672ff531c 100644 --- a/bundle/manifests/scylla.scylladb.com_scylladbmonitorings.yaml +++ b/bundle/manifests/scylla.scylladb.com_scylladbmonitorings.yaml @@ -70,6 +70,106 @@ spec: to Grafana without authentication. type: boolean type: object + datasources: + description: |- + datasources is a list of Grafana datasources to configure. + It's expected to be set when using Prometheus component in `External` mode. + At most one datasource is allowed for now (only Prometheus is supported). + items: + properties: + name: + default: prometheus + description: |- + name is the name of the datasource as it will appear in Grafana. + Only "prometheus" is supported as that's the datasource name expected by the ScyllaDB monitoring stack dashboards. + enum: + - prometheus + type: string + prometheusOptions: + description: prometheusOptions defines Prometheus-specific + options. + properties: + auth: + description: auth holds authentication options for + connecting to Prometheus. + properties: + bearerTokenOptions: + description: bearerToken holds options for Bearer + token authentication. + properties: + secretRef: + description: secretRef is a reference to + a key in a Secret holding a Bearer token + to use to authenticate with Prometheus. + properties: + key: + description: key within the selected + object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + type: object + type: + default: NoAuthentication + description: type is the type of authentication + to use. + type: string + type: object + tls: + description: tls holds TLS configuration for connecting + to Prometheus over HTTPS. + properties: + caCertConfigMapRef: + description: |- + caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. + When not specified, system CAs are used. + properties: + key: + description: key within the selected object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + clientTLSKeyPairSecretRef: + description: |- + clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. + It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + properties: + name: + description: Name of the referent. + type: string + type: object + insecureSkipVerify: + default: false + description: insecureSkipVerify controls whether + to skip server certificate verification. + type: boolean + type: object + type: object + type: + allOf: + - enum: + - Prometheus + - enum: + - Prometheus + default: Prometheus + description: type is the type of the datasource. Only + "prometheus" is supported. + type: string + url: + description: url is the URL of the datasource. + minLength: 1 + type: string + type: object + maxItems: 1 + type: array exposeOptions: description: exposeOptions specifies options for exposing Grafana UI. @@ -1172,6 +1272,13 @@ spec: type: object type: object type: object + mode: + default: Managed + description: mode defines the mode of the Prometheus instance. + enum: + - Managed + - External + type: string placement: description: placement describes restrictions for the nodes Prometheus is scheduled on. diff --git a/bundle/manifests/scyllacluster-view_rbac.authorization.k8s.io_v1_clusterrole.yaml b/bundle/manifests/scyllacluster-view_rbac.authorization.k8s.io_v1_clusterrole.yaml index e3eeae3ff2c..d5898b8c5f4 100644 --- a/bundle/manifests/scyllacluster-view_rbac.authorization.k8s.io_v1_clusterrole.yaml +++ b/bundle/manifests/scyllacluster-view_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -17,6 +17,7 @@ rules: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings verbs: - get - list diff --git a/deploy/operator.yaml b/deploy/operator.yaml index beb10df0999..f579a3ae5a7 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -45840,6 +45840,96 @@ spec: description: insecureEnableAnonymousAccess allows access to Grafana without authentication. type: boolean type: object + datasources: + description: |- + datasources is a list of Grafana datasources to configure. + It's expected to be set when using Prometheus component in `External` mode. + At most one datasource is allowed for now (only Prometheus is supported). + items: + properties: + name: + default: prometheus + description: |- + name is the name of the datasource as it will appear in Grafana. + Only "prometheus" is supported as that's the datasource name expected by the ScyllaDB monitoring stack dashboards. + enum: + - prometheus + type: string + prometheusOptions: + description: prometheusOptions defines Prometheus-specific options. + properties: + auth: + description: auth holds authentication options for connecting to Prometheus. + properties: + bearerTokenOptions: + description: bearerToken holds options for Bearer token authentication. + properties: + secretRef: + description: secretRef is a reference to a key in a Secret holding a Bearer token to use to authenticate with Prometheus. + properties: + key: + description: key within the selected object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + type: object + type: + default: NoAuthentication + description: type is the type of authentication to use. + type: string + type: object + tls: + description: tls holds TLS configuration for connecting to Prometheus over HTTPS. + properties: + caCertConfigMapRef: + description: |- + caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. + When not specified, system CAs are used. + properties: + key: + description: key within the selected object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + clientTLSKeyPairSecretRef: + description: |- + clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. + It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + properties: + name: + description: Name of the referent. + type: string + type: object + insecureSkipVerify: + default: false + description: insecureSkipVerify controls whether to skip server certificate verification. + type: boolean + type: object + type: object + type: + allOf: + - enum: + - Prometheus + - enum: + - Prometheus + default: Prometheus + description: type is the type of the datasource. Only "prometheus" is supported. + type: string + url: + description: url is the URL of the datasource. + minLength: 1 + type: string + type: object + maxItems: 1 + type: array exposeOptions: description: exposeOptions specifies options for exposing Grafana UI. properties: @@ -46879,6 +46969,13 @@ spec: type: object type: object type: object + mode: + default: Managed + description: mode defines the mode of the Prometheus instance. + enum: + - Managed + - External + type: string placement: description: placement describes restrictions for the nodes Prometheus is scheduled on. properties: @@ -48416,6 +48513,7 @@ rules: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings verbs: - get - list @@ -48674,6 +48772,7 @@ webhooks: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings --- apiVersion: policy/v1 diff --git a/deploy/operator/00_scyllacluster_clusterrole_view.yaml b/deploy/operator/00_scyllacluster_clusterrole_view.yaml index ad18da58f9a..f6f9693ab75 100644 --- a/deploy/operator/00_scyllacluster_clusterrole_view.yaml +++ b/deploy/operator/00_scyllacluster_clusterrole_view.yaml @@ -16,6 +16,7 @@ rules: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings verbs: - get - list diff --git a/deploy/operator/10_validatingwebhook.yaml b/deploy/operator/10_validatingwebhook.yaml index 00cd4dbcf37..a6d30c43545 100644 --- a/deploy/operator/10_validatingwebhook.yaml +++ b/deploy/operator/10_validatingwebhook.yaml @@ -39,3 +39,4 @@ webhooks: - scylladbclusters - scylladbmanagerclusterregistrations - scylladbmanagertasks + - scylladbmonitorings diff --git a/docs/source/api-reference/groups/scylla.scylladb.com/scylladbmonitorings.rst b/docs/source/api-reference/groups/scylla.scylladb.com/scylladbmonitorings.rst index 4e54aaa7f68..242944c1f7e 100755 --- a/docs/source/api-reference/groups/scylla.scylladb.com/scylladbmonitorings.rst +++ b/docs/source/api-reference/groups/scylla.scylladb.com/scylladbmonitorings.rst @@ -137,6 +137,9 @@ object * - :ref:`authentication` - object - authentication hold the authentication options for accessing Grafana. + * - :ref:`datasources` + - array (object) + - datasources is a list of Grafana datasources to configure. It's expected to be set when using Prometheus component in `External` mode. At most one datasource is allowed for now (only Prometheus is supported). * - :ref:`exposeOptions` - object - exposeOptions specifies options for exposing Grafana UI. @@ -175,6 +178,233 @@ object - boolean - insecureEnableAnonymousAccess allows access to Grafana without authentication. +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[]: + +.spec.components.grafana.datasources[] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" + + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - name + - string + - name is the name of the datasource as it will appear in Grafana. Only "prometheus" is supported as that's the datasource name expected by the ScyllaDB monitoring stack dashboards. + * - :ref:`prometheusOptions` + - object + - prometheusOptions defines Prometheus-specific options. + * - type + - string + - type is the type of the datasource. Only "prometheus" is supported. + * - url + - string + - url is the URL of the datasource. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions: + +.spec.components.grafana.datasources[].prometheusOptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +prometheusOptions defines Prometheus-specific options. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - :ref:`auth` + - object + - auth holds authentication options for connecting to Prometheus. + * - :ref:`tls` + - object + - tls holds TLS configuration for connecting to Prometheus over HTTPS. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.auth: + +.spec.components.grafana.datasources[].prometheusOptions.auth +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +auth holds authentication options for connecting to Prometheus. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - :ref:`bearerTokenOptions` + - object + - bearerToken holds options for Bearer token authentication. + * - type + - string + - type is the type of authentication to use. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.auth.bearerTokenOptions: + +.spec.components.grafana.datasources[].prometheusOptions.auth.bearerTokenOptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +bearerToken holds options for Bearer token authentication. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - :ref:`secretRef` + - object + - secretRef is a reference to a key in a Secret holding a Bearer token to use to authenticate with Prometheus. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.auth.bearerTokenOptions.secretRef: + +.spec.components.grafana.datasources[].prometheusOptions.auth.bearerTokenOptions.secretRef +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +secretRef is a reference to a key in a Secret holding a Bearer token to use to authenticate with Prometheus. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - key + - string + - key within the selected object. + * - name + - string + - name of the selected object. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.tls: + +.spec.components.grafana.datasources[].prometheusOptions.tls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +tls holds TLS configuration for connecting to Prometheus over HTTPS. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - :ref:`caCertConfigMapRef` + - object + - caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. When not specified, system CAs are used. + * - :ref:`clientTLSKeyPairSecretRef` + - object + - clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + * - insecureSkipVerify + - boolean + - insecureSkipVerify controls whether to skip server certificate verification. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.tls.caCertConfigMapRef: + +.spec.components.grafana.datasources[].prometheusOptions.tls.caCertConfigMapRef +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. When not specified, system CAs are used. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - key + - string + - key within the selected object. + * - name + - string + - name of the selected object. + +.. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.datasources[].prometheusOptions.tls.clientTLSKeyPairSecretRef: + +.spec.components.grafana.datasources[].prometheusOptions.tls.clientTLSKeyPairSecretRef +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Description +""""""""""" +clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + +Type +"""" +object + + +.. list-table:: + :widths: 25 10 150 + :header-rows: 1 + + * - Property + - Type + - Description + * - name + - string + - Name of the referent. + .. _api-scylla.scylladb.com-scylladbmonitorings-v1alpha1-.spec.components.grafana.exposeOptions: .spec.components.grafana.exposeOptions @@ -1572,6 +1802,9 @@ object * - :ref:`exposeOptions` - object - exposeOptions specifies options for exposing Prometheus UI. + * - mode + - string + - mode defines the mode of the Prometheus instance. * - :ref:`placement` - object - placement describes restrictions for the nodes Prometheus is scheduled on. diff --git a/pkg/api/scylla/v1alpha1/scylla.scylladb.com_scylladbmonitorings.yaml b/pkg/api/scylla/v1alpha1/scylla.scylladb.com_scylladbmonitorings.yaml index 4d395652c29..595713895ff 100644 --- a/pkg/api/scylla/v1alpha1/scylla.scylladb.com_scylladbmonitorings.yaml +++ b/pkg/api/scylla/v1alpha1/scylla.scylladb.com_scylladbmonitorings.yaml @@ -65,6 +65,96 @@ spec: description: insecureEnableAnonymousAccess allows access to Grafana without authentication. type: boolean type: object + datasources: + description: |- + datasources is a list of Grafana datasources to configure. + It's expected to be set when using Prometheus component in `External` mode. + At most one datasource is allowed for now (only Prometheus is supported). + items: + properties: + name: + default: prometheus + description: |- + name is the name of the datasource as it will appear in Grafana. + Only "prometheus" is supported as that's the datasource name expected by the ScyllaDB monitoring stack dashboards. + enum: + - prometheus + type: string + prometheusOptions: + description: prometheusOptions defines Prometheus-specific options. + properties: + auth: + description: auth holds authentication options for connecting to Prometheus. + properties: + bearerTokenOptions: + description: bearerToken holds options for Bearer token authentication. + properties: + secretRef: + description: secretRef is a reference to a key in a Secret holding a Bearer token to use to authenticate with Prometheus. + properties: + key: + description: key within the selected object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + type: object + type: + default: NoAuthentication + description: type is the type of authentication to use. + type: string + type: object + tls: + description: tls holds TLS configuration for connecting to Prometheus over HTTPS. + properties: + caCertConfigMapRef: + description: |- + caCert is a reference to a key within the CA bundle ConfigMap. The key should hold the CA cert in PEM format. + When not specified, system CAs are used. + properties: + key: + description: key within the selected object. + minLength: 1 + type: string + name: + description: name of the selected object. + minLength: 1 + type: string + type: object + clientTLSKeyPairSecretRef: + description: |- + clientTLSKeyPairSecretRef is a reference to a Secret holding client TLS certificate and key for mTLS authentication. + It's expected to be a standard Kubernetes TLS Secret with `tls.crt` and `tls.key` keys. + properties: + name: + description: Name of the referent. + type: string + type: object + insecureSkipVerify: + default: false + description: insecureSkipVerify controls whether to skip server certificate verification. + type: boolean + type: object + type: object + type: + allOf: + - enum: + - Prometheus + - enum: + - Prometheus + default: Prometheus + description: type is the type of the datasource. Only "prometheus" is supported. + type: string + url: + description: url is the URL of the datasource. + minLength: 1 + type: string + type: object + maxItems: 1 + type: array exposeOptions: description: exposeOptions specifies options for exposing Grafana UI. properties: @@ -1104,6 +1194,13 @@ spec: type: object type: object type: object + mode: + default: Managed + description: mode defines the mode of the Prometheus instance. + enum: + - Managed + - External + type: string placement: description: placement describes restrictions for the nodes Prometheus is scheduled on. properties: diff --git a/pkg/api/scylla/v1alpha1/zz_generated.deepcopy.go b/pkg/api/scylla/v1alpha1/zz_generated.deepcopy.go index ff0368ad214..b0004542ab6 100644 --- a/pkg/api/scylla/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/scylla/v1alpha1/zz_generated.deepcopy.go @@ -212,6 +212,53 @@ func (in *GrafanaAuthentication) DeepCopy() *GrafanaAuthentication { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GrafanaDatasourceSpec) DeepCopyInto(out *GrafanaDatasourceSpec) { + *out = *in + if in.PrometheusOptions != nil { + in, out := &in.PrometheusOptions, &out.PrometheusOptions + *out = new(GrafanaPrometheusDatasourceOptions) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GrafanaDatasourceSpec. +func (in *GrafanaDatasourceSpec) DeepCopy() *GrafanaDatasourceSpec { + if in == nil { + return nil + } + out := new(GrafanaDatasourceSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GrafanaDatasourceTLSSpec) DeepCopyInto(out *GrafanaDatasourceTLSSpec) { + *out = *in + if in.CACertConfigMapRef != nil { + in, out := &in.CACertConfigMapRef, &out.CACertConfigMapRef + *out = new(LocalObjectKeySelector) + **out = **in + } + if in.ClientTLSKeyPairSecretRef != nil { + in, out := &in.ClientTLSKeyPairSecretRef, &out.ClientTLSKeyPairSecretRef + *out = new(LocalObjectReference) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GrafanaDatasourceTLSSpec. +func (in *GrafanaDatasourceTLSSpec) DeepCopy() *GrafanaDatasourceTLSSpec { + if in == nil { + return nil + } + out := new(GrafanaDatasourceTLSSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GrafanaExposeOptions) DeepCopyInto(out *GrafanaExposeOptions) { *out = *in @@ -233,6 +280,74 @@ func (in *GrafanaExposeOptions) DeepCopy() *GrafanaExposeOptions { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GrafanaPrometheusDatasourceAuthSpec) DeepCopyInto(out *GrafanaPrometheusDatasourceAuthSpec) { + *out = *in + if in.BearerTokenOptions != nil { + in, out := &in.BearerTokenOptions, &out.BearerTokenOptions + *out = new(GrafanaPrometheusDatasourceBearerTokenAuthOptions) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GrafanaPrometheusDatasourceAuthSpec. +func (in *GrafanaPrometheusDatasourceAuthSpec) DeepCopy() *GrafanaPrometheusDatasourceAuthSpec { + if in == nil { + return nil + } + out := new(GrafanaPrometheusDatasourceAuthSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GrafanaPrometheusDatasourceBearerTokenAuthOptions) DeepCopyInto(out *GrafanaPrometheusDatasourceBearerTokenAuthOptions) { + *out = *in + if in.SecretRef != nil { + in, out := &in.SecretRef, &out.SecretRef + *out = new(LocalObjectKeySelector) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GrafanaPrometheusDatasourceBearerTokenAuthOptions. +func (in *GrafanaPrometheusDatasourceBearerTokenAuthOptions) DeepCopy() *GrafanaPrometheusDatasourceBearerTokenAuthOptions { + if in == nil { + return nil + } + out := new(GrafanaPrometheusDatasourceBearerTokenAuthOptions) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GrafanaPrometheusDatasourceOptions) DeepCopyInto(out *GrafanaPrometheusDatasourceOptions) { + *out = *in + if in.TLS != nil { + in, out := &in.TLS, &out.TLS + *out = new(GrafanaDatasourceTLSSpec) + (*in).DeepCopyInto(*out) + } + if in.Auth != nil { + in, out := &in.Auth, &out.Auth + *out = new(GrafanaPrometheusDatasourceAuthSpec) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GrafanaPrometheusDatasourceOptions. +func (in *GrafanaPrometheusDatasourceOptions) DeepCopy() *GrafanaPrometheusDatasourceOptions { + if in == nil { + return nil + } + out := new(GrafanaPrometheusDatasourceOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GrafanaSpec) DeepCopyInto(out *GrafanaSpec) { *out = *in @@ -248,6 +363,13 @@ func (in *GrafanaSpec) DeepCopyInto(out *GrafanaSpec) { (*in).DeepCopyInto(*out) } out.Authentication = in.Authentication + if in.Datasources != nil { + in, out := &in.Datasources, &out.Datasources + *out = make([]GrafanaDatasourceSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } @@ -357,6 +479,38 @@ func (in *LocalDiskSetup) DeepCopy() *LocalDiskSetup { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LocalObjectKeySelector) DeepCopyInto(out *LocalObjectKeySelector) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalObjectKeySelector. +func (in *LocalObjectKeySelector) DeepCopy() *LocalObjectKeySelector { + if in == nil { + return nil + } + out := new(LocalObjectKeySelector) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LocalObjectReference) DeepCopyInto(out *LocalObjectReference) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalObjectReference. +func (in *LocalObjectReference) DeepCopy() *LocalObjectReference { + if in == nil { + return nil + } + out := new(LocalObjectReference) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LocalScyllaDBReference) DeepCopyInto(out *LocalScyllaDBReference) { *out = *in From c54eacecdd9ad7b303b202668ce4c53ca1f0f1ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Tue, 9 Sep 2025 10:59:30 +0200 Subject: [PATCH 07/14] Add E2E test covering ScyllaDBMonitoring validating webhook --- test/e2e/set/scylladbmonitoring/config.go | 5 + .../scylladbmonitoring_webhooks.go | 102 ++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 test/e2e/set/scylladbmonitoring/config.go create mode 100644 test/e2e/set/scylladbmonitoring/scylladbmonitoring_webhooks.go diff --git a/test/e2e/set/scylladbmonitoring/config.go b/test/e2e/set/scylladbmonitoring/config.go new file mode 100644 index 00000000000..ad7fc721a99 --- /dev/null +++ b/test/e2e/set/scylladbmonitoring/config.go @@ -0,0 +1,5 @@ +package scylladbmonitoring + +import "time" + +const testTimeout = 15 * time.Minute diff --git a/test/e2e/set/scylladbmonitoring/scylladbmonitoring_webhooks.go b/test/e2e/set/scylladbmonitoring/scylladbmonitoring_webhooks.go new file mode 100644 index 00000000000..39bf603cba4 --- /dev/null +++ b/test/e2e/set/scylladbmonitoring/scylladbmonitoring_webhooks.go @@ -0,0 +1,102 @@ +package scylladbmonitoring + +import ( + "fmt" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" + "github.com/scylladb/scylla-operator/test/e2e/framework" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apiserver/pkg/storage/names" +) + +var _ = g.Describe("ScyllaDBMonitoring webhook", func() { + f := framework.NewFramework("scylladbmonitoring") + + type entry struct { + modifierFuncs []func(*scyllav1alpha1.ScyllaDBMonitoring) + expectedErrMatcherFunc func(sm *scyllav1alpha1.ScyllaDBMonitoring) o.OmegaMatcher + } + + validSDBM := &scyllav1alpha1.ScyllaDBMonitoring{ + ObjectMeta: metav1.ObjectMeta{ + Name: names.SimpleNameGenerator.GenerateName("valid-"), + }, + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Components: &scyllav1alpha1.Components{ + Prometheus: &scyllav1alpha1.PrometheusSpec{ + Mode: scyllav1alpha1.PrometheusModeManaged, + }, + Grafana: &scyllav1alpha1.GrafanaSpec{ + Resources: corev1.ResourceRequirements{}, + }, + }, + }, + } + + g.DescribeTableSubtree("should respond", func(e *entry) { + g.It("is created", func(ctx g.SpecContext) { + sm := validSDBM.DeepCopy() + sm.Name = names.SimpleNameGenerator.GenerateName("sm-") + for _, f := range e.modifierFuncs { + f(sm) + } + + framework.By("Creating a ScyllaDBMonitoring") + _, err := f.ScyllaAdminClient().ScyllaV1alpha1().ScyllaDBMonitorings(f.Namespace()).Create(ctx, sm, metav1.CreateOptions{}) + o.Expect(err).To(e.expectedErrMatcherFunc(sm)) + }, g.NodeTimeout(testTimeout)) + + g.It("is updated", func(ctx g.SpecContext) { + sm := validSDBM.DeepCopy() + sm.Name = names.SimpleNameGenerator.GenerateName("sm-") + framework.By("Creating a ScyllaDBMonitoring") + createdSM, err := f.ScyllaAdminClient().ScyllaV1alpha1().ScyllaDBMonitorings(f.Namespace()).Create(ctx, sm, metav1.CreateOptions{}) + o.Expect(err).To(o.Succeed()) + + smCopy := createdSM.DeepCopy() + for _, f := range e.modifierFuncs { + f(smCopy) + } + framework.By("Updating the ScyllaDBMonitoring") + _, err = f.ScyllaAdminClient().ScyllaV1alpha1().ScyllaDBMonitorings(f.Namespace()).Update(ctx, smCopy, metav1.UpdateOptions{}) + o.Expect(err).To(e.expectedErrMatcherFunc(smCopy)) + }, g.NodeTimeout(testTimeout)) + }, + g.Entry("with acceptance when a valid ScyllaDBMonitoring", &entry{ + modifierFuncs: nil, + expectedErrMatcherFunc: func(sm *scyllav1alpha1.ScyllaDBMonitoring) o.OmegaMatcher { return o.Succeed() }, + }), + g.Entry("with rejection when an invalid ScyllaDBMonitoring", &entry{ + modifierFuncs: []func(*scyllav1alpha1.ScyllaDBMonitoring){ + func(sm *scyllav1alpha1.ScyllaDBMonitoring) { + sm.Spec.Components.Prometheus.Mode = "invalid-mode" + }, + }, + expectedErrMatcherFunc: func(sm *scyllav1alpha1.ScyllaDBMonitoring) o.OmegaMatcher { + return o.Equal(&apierrors.StatusError{ErrStatus: metav1.Status{ + Status: "Failure", + Message: fmt.Sprintf(`ScyllaDBMonitoring.scylla.scylladb.com %q is invalid: spec.components.prometheus.mode: Unsupported value: "invalid-mode": supported values: "Managed", "External"`, sm.Name), + Reason: "Invalid", + Details: &metav1.StatusDetails{ + Name: sm.Name, + Group: "scylla.scylladb.com", + Kind: "ScyllaDBMonitoring", + UID: "", + Causes: []metav1.StatusCause{ + { + Type: "FieldValueNotSupported", + Message: `Unsupported value: "invalid-mode": supported values: "Managed", "External"`, + Field: "spec.components.prometheus.mode", + }, + }, + }, + Code: 422, + }}) + }, + }), + ) +}) From 09150d2f275850814b063599a80771b7b294e42f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Tue, 9 Sep 2025 15:19:24 +0200 Subject: [PATCH 08/14] Enqueue ScyllaDBMonitoring on referenced Secret/CM changes --- .../scylladbmonitoring/controller.go | 101 ++++++++++-- pkg/controller/scylladbmonitoring/index.go | 63 ++++++++ .../scylladbmonitoring/index_test.go | 146 ++++++++++++++++++ pkg/controller/scylladbmonitoring/sync.go | 2 +- 4 files changed, 301 insertions(+), 11 deletions(-) create mode 100644 pkg/controller/scylladbmonitoring/index.go create mode 100644 pkg/controller/scylladbmonitoring/index_test.go diff --git a/pkg/controller/scylladbmonitoring/controller.go b/pkg/controller/scylladbmonitoring/controller.go index f99c2528f54..6f25eec28fb 100644 --- a/pkg/controller/scylladbmonitoring/controller.go +++ b/pkg/controller/scylladbmonitoring/controller.go @@ -69,7 +69,7 @@ type Controller struct { deploymentLister appsv1listers.DeploymentLister ingressLister networkingv1listers.IngressLister - scylladbMonitoringLister scyllav1alpha1listers.ScyllaDBMonitoringLister + scyllaDBMonitoringInformer scyllav1alpha1informers.ScyllaDBMonitoringInformer prometheusLister monitoringv1listers.PrometheusLister prometheusRuleLister monitoringv1listers.PrometheusRuleLister @@ -123,7 +123,7 @@ func NewController( deploymentLister: deploymentInformer.Lister(), ingressLister: ingressInformer.Lister(), - scylladbMonitoringLister: scyllaDBMonitoringInformer.Lister(), + scyllaDBMonitoringInformer: scyllaDBMonitoringInformer, prometheusLister: prometheusInformer.Lister(), prometheusRuleLister: prometheusRuleInformer.Lister(), @@ -159,6 +159,13 @@ func NewController( keyGetter: keyGetter, } + if err := scyllaDBMonitoringInformer.Informer().AddIndexers(cache.Indexers{ + scyllaDBMonitoringBySecretIndexName: indexScyllaDBMonitoringBySecret, + scyllaDBMonitoringByConfigMapIndexName: indexScyllaDBMonitoringByConfigMap, + }); err != nil { + return nil, fmt.Errorf("can't add indexers to ScyllaDBMonitoring informer: %w", err) + } + var err error smc.handlers, err = controllerhelpers.NewHandlers[*scyllav1alpha1.ScyllaDBMonitoring]( smc.queue, @@ -167,10 +174,10 @@ func NewController( scylladbMonitoringControllerGVK, kubeinterfaces.NamespacedGetList[*scyllav1alpha1.ScyllaDBMonitoring]{ GetFunc: func(namespace, name string) (*scyllav1alpha1.ScyllaDBMonitoring, error) { - return smc.scylladbMonitoringLister.ScyllaDBMonitorings(namespace).Get(name) + return smc.scyllaDBMonitoringInformer.Lister().ScyllaDBMonitorings(namespace).Get(name) }, ListFunc: func(namespace string, selector labels.Selector) (ret []*scyllav1alpha1.ScyllaDBMonitoring, err error) { - return smc.scylladbMonitoringLister.ScyllaDBMonitorings(namespace).List(selector) + return smc.scyllaDBMonitoringInformer.Lister().ScyllaDBMonitorings(namespace).List(selector) }, }, ) @@ -301,7 +308,10 @@ func (smc *Controller) deleteScyllaOperatorConfig(obj interface{}) { func (smc *Controller) addConfigMap(obj interface{}) { smc.handlers.HandleAdd( obj.(*corev1.ConfigMap), - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueByConfigMapRef, + smc.handlers.EnqueueOwner, + ), ) } @@ -309,7 +319,10 @@ func (smc *Controller) updateConfigMap(old, cur interface{}) { smc.handlers.HandleUpdate( old.(*corev1.ConfigMap), cur.(*corev1.ConfigMap), - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueByConfigMapRef, + smc.handlers.EnqueueOwner, + ), smc.deleteConfigMap, ) } @@ -317,14 +330,20 @@ func (smc *Controller) updateConfigMap(old, cur interface{}) { func (smc *Controller) deleteConfigMap(obj interface{}) { smc.handlers.HandleDelete( obj, - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueByConfigMapRef, + smc.handlers.EnqueueOwner, + ), ) } func (smc *Controller) addSecret(obj interface{}) { smc.handlers.HandleAdd( obj.(*corev1.Secret), - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueBySecretRef, + smc.handlers.EnqueueOwner, + ), ) } @@ -332,7 +351,10 @@ func (smc *Controller) updateSecret(old, cur interface{}) { smc.handlers.HandleUpdate( old.(*corev1.Secret), cur.(*corev1.Secret), - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueBySecretRef, + smc.handlers.EnqueueOwner, + ), smc.deleteSecret, ) } @@ -340,7 +362,10 @@ func (smc *Controller) updateSecret(old, cur interface{}) { func (smc *Controller) deleteSecret(obj interface{}) { smc.handlers.HandleDelete( obj, - smc.handlers.EnqueueOwner, + combineEnqueueFuncs( + smc.enqueueBySecretRef, + smc.handlers.EnqueueOwner, + ), ) } @@ -528,6 +553,54 @@ func (smc *Controller) deleteServiceMonitor(obj interface{}) { ) } +func (smc *Controller) enqueueBySecretRef(depth int, obj kubeinterfaces.ObjectInterface, op controllerhelpers.HandlerOperationType) { + _, ok := obj.(*corev1.Secret) + if !ok { + apimachineryutilruntime.HandleError(fmt.Errorf("expected %T, got %T", &corev1.Secret{}, obj)) + return + } + + name := obj.GetName() + indexedSDBMs, err := smc.scyllaDBMonitoringInformer.Informer().GetIndexer().ByIndex(scyllaDBMonitoringBySecretIndexName, name) + if err != nil { + apimachineryutilruntime.HandleError(fmt.Errorf("can't get ScyllaDBMonitoring for Secret %q: %w", name, err)) + } + + for _, indexedSDBM := range indexedSDBMs { + sdbm, ok := indexedSDBM.(*scyllav1alpha1.ScyllaDBMonitoring) + if !ok { + apimachineryutilruntime.HandleError(fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got %T", indexedSDBM)) + continue + } + klog.V(4).InfoS("Enqueuing ScyllaDBMonitoring for Secret", "Secret", name, "ScyllaDBMonitoring", klog.KObj(sdbm)) + smc.handlers.Enqueue(depth+1, sdbm, op) + } +} + +func (smc *Controller) enqueueByConfigMapRef(depth int, obj kubeinterfaces.ObjectInterface, op controllerhelpers.HandlerOperationType) { + _, ok := obj.(*corev1.ConfigMap) + if !ok { + apimachineryutilruntime.HandleError(fmt.Errorf("expected %T, got %T", &corev1.ConfigMap{}, obj)) + return + } + + name := obj.GetName() + indexedSDBMs, err := smc.scyllaDBMonitoringInformer.Informer().GetIndexer().ByIndex(scyllaDBMonitoringByConfigMapIndexName, name) + if err != nil { + apimachineryutilruntime.HandleError(fmt.Errorf("can't get ScyllaDBMonitoring for ConfigMap %q: %w", name, err)) + } + + for _, indexedSDBM := range indexedSDBMs { + sdbm, ok := indexedSDBM.(*scyllav1alpha1.ScyllaDBMonitoring) + if !ok { + apimachineryutilruntime.HandleError(fmt.Errorf("expected %T, got %T", &scyllav1alpha1.ScyllaDBMonitoring{}, indexedSDBM)) + continue + } + klog.V(4).InfoS("Enqueuing ScyllaDBMonitoring for ConfigMap", "ConfigMap", name, "ScyllaDBMonitoring", klog.KObj(sdbm)) + smc.handlers.Enqueue(depth+1, sdbm, op) + } +} + func (smc *Controller) processNextItem(ctx context.Context) bool { key, quit := smc.queue.Get() if quit { @@ -590,3 +663,11 @@ func (smc *Controller) Run(ctx context.Context, workers int) { <-ctx.Done() } + +func combineEnqueueFuncs(funcs ...controllerhelpers.EnqueueFuncType) controllerhelpers.EnqueueFuncType { + return func(depth int, obj kubeinterfaces.ObjectInterface, op controllerhelpers.HandlerOperationType) { + for _, fn := range funcs { + fn(depth+1, obj, op) + } + } +} diff --git a/pkg/controller/scylladbmonitoring/index.go b/pkg/controller/scylladbmonitoring/index.go new file mode 100644 index 00000000000..5839edc2f0b --- /dev/null +++ b/pkg/controller/scylladbmonitoring/index.go @@ -0,0 +1,63 @@ +package scylladbmonitoring + +import ( + "fmt" + + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" +) + +const ( + scyllaDBMonitoringBySecretIndexName = "secret" + scyllaDBMonitoringByConfigMapIndexName = "configmap" +) + +// indexScyllaDBMonitoringBySecret indexes ScyllaDBMonitoring resources by the names of Secrets it references. +func indexScyllaDBMonitoringBySecret(obj interface{}) ([]string, error) { + sdm, ok := obj.(*scyllav1alpha1.ScyllaDBMonitoring) + if !ok { + return nil, fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got %T", obj) + } + + var secretNames []string + + if sdm.Spec.Components != nil && sdm.Spec.Components.Grafana != nil { + for _, ds := range sdm.Spec.Components.Grafana.Datasources { + if ds.PrometheusOptions != nil { + if ds.PrometheusOptions.Auth != nil && ds.PrometheusOptions.Auth.BearerTokenOptions != nil && ds.PrometheusOptions.Auth.BearerTokenOptions.SecretRef != nil { + secretNames = append(secretNames, ds.PrometheusOptions.Auth.BearerTokenOptions.SecretRef.Name) + } + if ds.PrometheusOptions.TLS != nil { + if ds.PrometheusOptions.TLS.ClientTLSKeyPairSecretRef != nil { + secretNames = append(secretNames, ds.PrometheusOptions.TLS.ClientTLSKeyPairSecretRef.Name) + } + } + } + } + } + + return secretNames, nil +} + +// indexScyllaDBMonitoringByConfigMap indexes ScyllaDBMonitoring resources by the names of ConfigMaps it references. +func indexScyllaDBMonitoringByConfigMap(obj interface{}) ([]string, error) { + sdm, ok := obj.(*scyllav1alpha1.ScyllaDBMonitoring) + if !ok { + return nil, fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got %T", obj) + } + + var configMapNames []string + + if sdm.Spec.Components != nil && sdm.Spec.Components.Grafana != nil { + for _, ds := range sdm.Spec.Components.Grafana.Datasources { + if ds.PrometheusOptions != nil { + if ds.PrometheusOptions.TLS != nil { + if ds.PrometheusOptions.TLS.CACertConfigMapRef != nil { + configMapNames = append(configMapNames, ds.PrometheusOptions.TLS.CACertConfigMapRef.Name) + } + } + } + } + } + + return configMapNames, nil +} diff --git a/pkg/controller/scylladbmonitoring/index_test.go b/pkg/controller/scylladbmonitoring/index_test.go new file mode 100644 index 00000000000..61169312f52 --- /dev/null +++ b/pkg/controller/scylladbmonitoring/index_test.go @@ -0,0 +1,146 @@ +package scylladbmonitoring + +import ( + "fmt" + "reflect" + "testing" + + scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" + corev1 "k8s.io/api/core/v1" +) + +func Test_indexScyllaDBMonitoringBySecret(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + obj interface{} + want []string + wantErr error + }{ + { + name: "unexpected object type", + obj: corev1.Pod{}, + want: nil, + wantErr: fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got v1.Pod"), + }, + { + name: "no secret references", + obj: &scyllav1alpha1.ScyllaDBMonitoring{}, + want: nil, + wantErr: nil, + }, + { + name: "all possible references", + obj: &scyllav1alpha1.ScyllaDBMonitoring{ + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Components: &scyllav1alpha1.Components{ + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + ClientTLSKeyPairSecretRef: &scyllav1alpha1.LocalObjectReference{ + Name: "client-tls-secret", + }, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "bearer-token-secret", + Key: "token", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + want: []string{"bearer-token-secret", "client-tls-secret"}, + wantErr: nil, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got, err := indexScyllaDBMonitoringBySecret(tc.obj) + if !reflect.DeepEqual(err, tc.wantErr) { + t.Errorf("indexScyllaDBMonitoringBySecret() error = %v, wantErr %v", err, tc.wantErr) + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("indexScyllaDBMonitoringBySecret() got = %v, want %v", got, tc.want) + } + }) + } +} + +func Test_indexScyllaDBMonitoringByConfigMap(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + obj interface{} + want []string + wantErr error + }{ + { + name: "unexpected object type", + obj: corev1.Pod{}, + want: nil, + wantErr: fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got v1.Pod"), + }, + { + name: "no configmap references", + obj: &scyllav1alpha1.ScyllaDBMonitoring{}, + want: nil, + wantErr: nil, + }, + { + name: "all possible references", + obj: &scyllav1alpha1.ScyllaDBMonitoring{ + Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{ + Components: &scyllav1alpha1.Components{ + Grafana: &scyllav1alpha1.GrafanaSpec{ + Datasources: []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "ca-cert-configmap", + Key: "ca.crt", + }, + }, + }, + }, + }, + }, + }, + }, + }, + want: []string{"ca-cert-configmap"}, + wantErr: nil, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got, err := indexScyllaDBMonitoringByConfigMap(tc.obj) + if !reflect.DeepEqual(err, tc.wantErr) { + t.Errorf("indexScyllaDBMonitoringByConfigMap() error = %v, wantErr %v", err, tc.wantErr) + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("indexScyllaDBMonitoringByConfigMap() got = %v, want %v", got, tc.want) + } + }) + } +} diff --git a/pkg/controller/scylladbmonitoring/sync.go b/pkg/controller/scylladbmonitoring/sync.go index e1763d594d4..655e9d207c6 100644 --- a/pkg/controller/scylladbmonitoring/sync.go +++ b/pkg/controller/scylladbmonitoring/sync.go @@ -43,7 +43,7 @@ func (smc *Controller) sync(ctx context.Context, key string) error { klog.V(4).InfoS("Finished syncing ScyllaDBMonitoring", "ScyllaDBMonitoring", klog.KRef(namespace, name), "duration", time.Since(startTime)) }() - sm, err := smc.scylladbMonitoringLister.ScyllaDBMonitorings(namespace).Get(name) + sm, err := smc.scyllaDBMonitoringInformer.Lister().ScyllaDBMonitorings(namespace).Get(name) if errors.IsNotFound(err) { klog.V(2).InfoS("ScyllaDBMonitoring has been deleted", "ScyllaDBMonitoring", klog.KObj(sm)) return nil From 60990a1b4cafc9082fe089a442f72997b61b50d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Thu, 11 Sep 2025 12:30:41 +0200 Subject: [PATCH 09/14] Rollout Grafana deployment on referenced Secret/ConfigMap changes --- pkg/controller/scylladbmonitoring/index.go | 12 ++- .../scylladbmonitoring/sync_grafana.go | 73 ++++++++++++++++++- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/pkg/controller/scylladbmonitoring/index.go b/pkg/controller/scylladbmonitoring/index.go index 5839edc2f0b..131e943af61 100644 --- a/pkg/controller/scylladbmonitoring/index.go +++ b/pkg/controller/scylladbmonitoring/index.go @@ -18,6 +18,10 @@ func indexScyllaDBMonitoringBySecret(obj interface{}) ([]string, error) { return nil, fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got %T", obj) } + return getScyllaDBMonitoringGrafanaSecretReferences(sdm), nil +} + +func getScyllaDBMonitoringGrafanaSecretReferences(sdm *scyllav1alpha1.ScyllaDBMonitoring) []string { var secretNames []string if sdm.Spec.Components != nil && sdm.Spec.Components.Grafana != nil { @@ -35,7 +39,7 @@ func indexScyllaDBMonitoringBySecret(obj interface{}) ([]string, error) { } } - return secretNames, nil + return secretNames } // indexScyllaDBMonitoringByConfigMap indexes ScyllaDBMonitoring resources by the names of ConfigMaps it references. @@ -45,6 +49,10 @@ func indexScyllaDBMonitoringByConfigMap(obj interface{}) ([]string, error) { return nil, fmt.Errorf("expected *scyllav1alpha1.ScyllaDBMonitoring, got %T", obj) } + return getScyllaDBMonitoringGrafanaConfigMapReferences(sdm), nil +} + +func getScyllaDBMonitoringGrafanaConfigMapReferences(sdm *scyllav1alpha1.ScyllaDBMonitoring) []string { var configMapNames []string if sdm.Spec.Components != nil && sdm.Spec.Components.Grafana != nil { @@ -59,5 +67,5 @@ func indexScyllaDBMonitoringByConfigMap(obj interface{}) ([]string, error) { } } - return configMapNames, nil + return configMapNames } diff --git a/pkg/controller/scylladbmonitoring/sync_grafana.go b/pkg/controller/scylladbmonitoring/sync_grafana.go index a6aa23350c3..6f3574df38b 100644 --- a/pkg/controller/scylladbmonitoring/sync_grafana.go +++ b/pkg/controller/scylladbmonitoring/sync_grafana.go @@ -27,8 +27,10 @@ import ( corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" apimachineryutilerrors "k8s.io/apimachinery/pkg/util/errors" apimachineryutilrand "k8s.io/apimachinery/pkg/util/rand" ) @@ -273,7 +275,13 @@ func (smc *Controller) syncGrafana( deployments map[string]*appsv1.Deployment, ingresses map[string]*networkingv1.Ingress, ) ([]metav1.Condition, error) { - var progressingConditions []metav1.Condition + referencedObjects, progressingConditions, err := smc.resolveGrafanaReferencedObjects(sm) + if err != nil { + return progressingConditions, fmt.Errorf("can't resolve referenced objects required by Grafana: %w", err) + } + if len(progressingConditions) > 0 { + return progressingConditions, nil + } grafanaServingCertChainConfig := &okubecrypto.CertChainConfig{ CAConfig: &okubecrypto.CAConfig{ @@ -351,7 +359,15 @@ func (smc *Controller) syncGrafana( var requiredDeployment *appsv1.Deployment // Trigger restart for inputs that are not live reloaded. - grafanaRestartHash, hashErr := hash.HashObjects(requiredConfigsCM, requiredProvisioningsCM, requiredDahsboardsCMs) + objectsForGrafanaRestartHash := []any{ + requiredConfigsCM, + requiredProvisioningsCM, + requiredAdminCredentialsSecret, + } + for _, referencedObj := range referencedObjects { + objectsForGrafanaRestartHash = append(objectsForGrafanaRestartHash, referencedObj) + } + grafanaRestartHash, hashErr := hash.HashObjects(objectsForGrafanaRestartHash...) if hashErr != nil { renderErrors = append(renderErrors, hashErr) } else { @@ -619,3 +635,56 @@ func (smc *Controller) syncGrafana( return progressingConditions, nil } + +func (smc *Controller) resolveGrafanaReferencedObjects(sm *scyllav1alpha1.ScyllaDBMonitoring) ( + referencedObjects []runtime.Object, + progressingConditions []metav1.Condition, + err error, +) { + var objectErrs []error + + for _, cmName := range getScyllaDBMonitoringGrafanaConfigMapReferences(sm) { + cm, err := smc.configMapLister.ConfigMaps(sm.Namespace).Get(cmName) + if err != nil { + if errors.IsNotFound(err) { + progressingConditions = append(progressingConditions, metav1.Condition{ + Type: grafanaControllerProgressingCondition, + Status: metav1.ConditionTrue, + Reason: "WaitingForConfigMap", + Message: fmt.Sprintf("Waiting for ConfigMap %q to exist.", naming.ManualRef(sm.Namespace, cmName)), + ObservedGeneration: sm.Generation, + }) + + } else { + objectErrs = append(objectErrs, fmt.Errorf("can't get referenced configmap %q: %w", cmName, err)) + } + continue + } + referencedObjects = append(referencedObjects, cm) + } + + for _, secretName := range getScyllaDBMonitoringGrafanaSecretReferences(sm) { + secret, err := smc.secretLister.Secrets(sm.Namespace).Get(secretName) + if err != nil { + if errors.IsNotFound(err) { + progressingConditions = append(progressingConditions, metav1.Condition{ + Type: grafanaControllerProgressingCondition, + Status: metav1.ConditionTrue, + Reason: "WaitingForSecret", + Message: fmt.Sprintf("Waiting for Secret %q to exist.", naming.ManualRef(sm.Namespace, secretName)), + ObservedGeneration: sm.Generation, + }) + } else { + objectErrs = append(objectErrs, fmt.Errorf("can't get referenced secret %q: %w", secretName, err)) + } + continue + } + referencedObjects = append(referencedObjects, secret) + } + + if err := apimachineryutilerrors.NewAggregate(objectErrs); err != nil { + return nil, progressingConditions, err + } + + return referencedObjects, progressingConditions, nil +} From 3c3f1a9dced4519ec1e80dd744a51deec5170488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Thu, 4 Sep 2025 11:13:49 +0200 Subject: [PATCH 10/14] Add example ScyllaDBMonitoring configured with external Prometheus --- .../openshift-uwm.scylladbmonitoring.yaml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 examples/monitoring/v1alpha1/openshift-uwm.scylladbmonitoring.yaml diff --git a/examples/monitoring/v1alpha1/openshift-uwm.scylladbmonitoring.yaml b/examples/monitoring/v1alpha1/openshift-uwm.scylladbmonitoring.yaml new file mode 100644 index 00000000000..90ee5a6cdd4 --- /dev/null +++ b/examples/monitoring/v1alpha1/openshift-uwm.scylladbmonitoring.yaml @@ -0,0 +1,44 @@ +apiVersion: scylla.scylladb.com/v1alpha1 +kind: ScyllaDBMonitoring +metadata: + name: scylla-monitoring + namespace: scylla +spec: + type: Platform + endpointsSelector: + matchLabels: + app.kubernetes.io/name: scylla + scylla-operator.scylladb.com/scylla-service-type: member + scylla/cluster: scylla + components: + prometheus: + mode: External + grafana: + datasources: + # Prometheus datasource pointing to OpenShift's Thanos Querier service. + # To make this work, `cluster-monitoring-config` ConfigMap in `openshift-monitoring` namespace must be configured + # to contain `config.yaml` key with `enableUserWorkload: true` in its content. + # See https://docs.redhat.com/en/documentation/openshift_container_platform/4.19/html/monitoring/configuring-user-workload-monitoring#enabling-monitoring-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm for details. + - type: Prometheus + url: "https://thanos-querier.openshift-monitoring.svc:9091" + prometheusOptions: + tls: + caCertConfigMapRef: + name: openshift-service-ca.crt + key: service-ca.crt + auth: + type: BearerToken + bearerTokenOptions: + secretRef: + # This is a `kubernetes.io/service-account-token` type of Secret created for a ServiceAccount bound to + # `cluster-monitoring-view` ClusterRole. + name: scylla-monitoring-grafana-token + key: token + exposeOptions: + webInterface: + ingress: + ingressClassName: haproxy + dnsDomains: + - example-grafana.test.svc.cluster.local + annotations: + haproxy-ingress.github.io/ssl-passthrough: "true" From 6d0ce52bce90f5a016177d4848f0954eddabcbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Mon, 15 Sep 2025 14:56:34 +0200 Subject: [PATCH 11/14] Add implementation history entries for 2490 enhancement proposal --- .../2490-scylladbmonitoring-external-prometheus/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/enhancements/proposals/2490-scylladbmonitoring-external-prometheus/README.md b/enhancements/proposals/2490-scylladbmonitoring-external-prometheus/README.md index c7df3f08b83..e2ebc67f5d8 100644 --- a/enhancements/proposals/2490-scylladbmonitoring-external-prometheus/README.md +++ b/enhancements/proposals/2490-scylladbmonitoring-external-prometheus/README.md @@ -397,6 +397,8 @@ the [Support matrix](https://operator.docs.scylladb.com/stable/support/releases. ## Implementation History +- 2025-09-12: Initial implementation completed. +- 2025-08-29: Proposal accepted. - 2025-08-26: First review iteration. - 2025-08-21: Enhancement proposal introduced. From 176c1ad85d60c7e6afc64aabbfcbed36351f0510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Wed, 24 Sep 2025 10:00:39 +0200 Subject: [PATCH 12/14] Add external Prometheus E2Es --- test/e2e/framework/client.go | 8 + .../scylladbmonitoring/scylladbmonitoring.go | 424 +++++++++++++++++- 2 files changed, 431 insertions(+), 1 deletion(-) diff --git a/test/e2e/framework/client.go b/test/e2e/framework/client.go index 33bd2857383..b65dc169151 100644 --- a/test/e2e/framework/client.go +++ b/test/e2e/framework/client.go @@ -4,6 +4,7 @@ package framework import ( o "github.com/onsi/gomega" + promclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" scyllaclientset "github.com/scylladb/scylla-operator/pkg/client/scylla/clientset/versioned" "k8s.io/client-go/discovery" "k8s.io/client-go/dynamic" @@ -29,6 +30,7 @@ type AdminClientInterface interface { KubeAdminClient() *kubernetes.Clientset DynamicAdminClient() dynamic.Interface ScyllaAdminClient() *scyllaclientset.Clientset + PrometheusOperatorAdminClient() *promclient.Clientset } type FullClientInterface interface { @@ -107,6 +109,12 @@ func (ac *AdminClient) DiscoveryClient() *discovery.DiscoveryClient { return client } +func (ac *AdminClient) PrometheusOperatorAdminClient() *promclient.Clientset { + cs, err := promclient.NewForConfig(ac.AdminClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + return cs +} + type FullClient struct { Client AdminClient diff --git a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go index 89286563a77..f38bf828c3f 100644 --- a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go +++ b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go @@ -19,6 +19,7 @@ import ( g "github.com/onsi/ginkgo/v2" o "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" prometheusappclient "github.com/prometheus/client_golang/api" promeheusappv1api "github.com/prometheus/client_golang/api/prometheus/v1" configassests "github.com/scylladb/scylla-operator/assets/config" @@ -34,7 +35,11 @@ import ( "github.com/scylladb/scylla-operator/test/e2e/utils/grafana" scyllaclusterverification "github.com/scylladb/scylla-operator/test/e2e/utils/verification/scyllacluster" "github.com/scylladb/scylla-operator/test/e2e/verification" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/cert" "k8s.io/klog/v2" ) @@ -116,6 +121,57 @@ var _ = g.Describe("ScyllaDBMonitoring", func() { VerifyPrometheusFn: verifyManagedPrometheus, VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), }), + g.Entry(describeEntry, &scyllaDBMonitoringEntry{ + Description: "Platform type with external Prometheus without TLS", + ScyllaDBMonitoringModifierFn: func(sm *scyllav1alpha1.ScyllaDBMonitoring) { + sm.Spec.Type = pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform) + sm.Spec.Components.Prometheus.Mode = scyllav1alpha1.PrometheusModeExternal + sm.Spec.Components.Grafana.Datasources = []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Name: "prometheus", + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + URL: fmt.Sprintf("http://%s.%s.svc.cluster.local:9090", prometheusNameForScyllaDBMonitoring(sm), f.Namespace()), + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + InsecureSkipVerify: true, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeNoAuthentication, + }, + }, + }, + } + }, + PrepareExternalPrometheusFn: prepareExternalPrometheusWithoutTLS, + VerifyPrometheusFn: verifyExternalPrometheusWithoutTLS, + VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), + }), + g.FEntry(describeEntry, &scyllaDBMonitoringEntry{ + Description: "Platform type with external Prometheus with TLS", + ScyllaDBMonitoringModifierFn: func(sm *scyllav1alpha1.ScyllaDBMonitoring) { + sm.Spec.Type = pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform) + sm.Spec.Components.Prometheus.Mode = scyllav1alpha1.PrometheusModeExternal + sm.Spec.Components.Grafana.Datasources = []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Name: "prometheus", + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + URL: fmt.Sprintf("https://%s.%s.svc.cluster.local:9090", prometheusNameForScyllaDBMonitoring(sm), f.Namespace()), + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + InsecureSkipVerify: false, + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: prometheusCACertConfigMapNameForScyllaDBMonitoring(sm), + Key: "ca-bundle.crt", + }, + }, + }, + }, + } + }, + PrepareExternalPrometheusFn: prepareExternalPrometheusWithTLS, + VerifyPrometheusFn: verifyExternalPrometheusWithTLS, + VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), + }), ) }) @@ -196,6 +252,13 @@ func verifyManagedPrometheus(ctx context.Context, f *framework.Framework, sm *sc // Some of these may be fixable by manually verifying it in the operator sync loop so it can also be // consumed by clients, but it's a bigger effort. + promClient := prepareManagedPrometheusClient(ctx, f, sm) + verifyPrometheusTargetsAndRules(ctx, promClient) +} + +func prepareManagedPrometheusClient(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) promeheusappv1api.API { + g.GinkgoHelper() + prometheusServingCABundleConfigMapName, err := naming.ManagedPrometheusServingCAConfigMapName(sm) o.Expect(err).NotTo(o.HaveOccurred()) prometheusServingCABundleConfigMap, err := f.KubeClient().CoreV1().ConfigMaps(f.Namespace()).Get(ctx, prometheusServingCABundleConfigMapName, metav1.GetOptions{}) @@ -245,8 +308,13 @@ func verifyManagedPrometheus(ctx context.Context, f *framework.Framework, sm *sc }) o.Expect(err).NotTo(o.HaveOccurred()) - promClient := promeheusappv1api.NewAPI(promHTTPClient) + return promeheusappv1api.NewAPI(promHTTPClient) +} + +func verifyPrometheusTargetsAndRules(ctx context.Context, promClient promeheusappv1api.API) { + g.GinkgoHelper() + framework.By("Verifying Prometheus targets and rules") o.Eventually(func(eo o.Gomega) { ctxTargets, ctxTargetsCancel := context.WithTimeout(ctx, 15*time.Second) defer ctxTargetsCancel() @@ -437,3 +505,357 @@ func decodeGrafanaDashboardFromGZBase64String(s string) (*grafanaDashboard, erro return res, nil } + +func prepareExternalPrometheusWithoutTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + g.GinkgoHelper() + + framework.By("Creating a ServiceAccount for external Prometheus") + sa := createExternalPrometheusServiceAccountWithClusterRole(ctx, f, sm) + + framework.By("Creating a Service for Prometheus to be used by external Prometheus") + svc := createServiceForPrometheus(ctx, f, sm) + + framework.By("Creating a Prometheus instance to be used as external Prometheus") + createExternalPrometheusInstanceWithoutTLS(ctx, f, sm, sa, svc) +} + +func prepareExternalPrometheusWithTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + g.GinkgoHelper() + + framework.By("Creating a ServiceAccount for external Prometheus") + sa := createExternalPrometheusServiceAccountWithClusterRole(ctx, f, sm) + + framework.By("Creating a Service for Prometheus to be used by external Prometheus") + svc := createServiceForPrometheus(ctx, f, sm) + + framework.By("Creating a TLS Secret and ConfigMap for Prometheus") + createPrometheusTLSSecretAndConfigMap(ctx, f, sm) + + framework.By("Creating a Prometheus instance to be used as external Prometheus") + createExternalPrometheusInstanceWithTLS(ctx, f, sm, sa, svc) +} + +func createExternalPrometheusServiceAccountWithClusterRole(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) *corev1.ServiceAccount { + g.GinkgoHelper() + + framework.By("Creating a ServiceAccount for external Prometheus") + prometheusServiceAccountName := fmt.Sprintf("%s-prometheus", sm.Name) + sa, err := f.KubeAdminClient().CoreV1().ServiceAccounts(f.Namespace()).Create(ctx, &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusServiceAccountName, + Namespace: f.Namespace(), + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Creating a ClusterRole for external Prometheus") + prometheusClusterRoleName := fmt.Sprintf("%s-prometheus", sm.Name) + _, err = f.KubeAdminClient().RbacV1().ClusterRoles().Create(ctx, &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusClusterRoleName, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "nodes", + "nodes/metrics", + "services", + "endpoints", + "pods", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"configmaps"}, + Verbs: []string{"get"}, + }, + { + APIGroups: []string{"discovery.k8s.io"}, + Resources: []string{"endpointslices"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"networking.k8s.io"}, + Resources: []string{"ingresses"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + NonResourceURLs: []string{"/metrics"}, + Verbs: []string{"get"}, + }, + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Creating a ClusterRoleBinding for external Prometheus") + _, err = f.KubeAdminClient().RbacV1().ClusterRoleBindings().Create(ctx, &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-prometheus", sm.Name), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: prometheusServiceAccountName, + Namespace: f.Namespace(), + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: prometheusClusterRoleName, + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + return sa +} + +// createServiceForPrometheus creates a headless Service for Prometheus to be used by external Prometheus. +func createServiceForPrometheus(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) *corev1.Service { + g.GinkgoHelper() + + framework.By("Creating a Service for Prometheus") + prometheusName := prometheusNameForScyllaDBMonitoring(sm) + svc, err := f.KubeAdminClient().CoreV1().Services(f.Namespace()).Create(ctx, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusName, + Namespace: f.Namespace(), + }, + Spec: corev1.ServiceSpec{ + ClusterIP: corev1.ClusterIPNone, + Selector: map[string]string{ + "app.kubernetes.io/managed-by": "prometheus-operator", + "app.kubernetes.io/name": "prometheus", + "app.kubernetes.io/instance": prometheusName, + "operator.prometheus.io/name": prometheusName, + "prometheus": prometheusName, + }, + Ports: []corev1.ServicePort{ + { + Name: "web", + Port: 9090, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + return svc +} + +func createExternalPrometheusInstanceWithoutTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring, sa *corev1.ServiceAccount, svc *corev1.Service) *monitoringv1.Prometheus { + g.GinkgoHelper() + + framework.By("Creating a Prometheus instance") + prom, err := f.PrometheusOperatorAdminClient().MonitoringV1().Prometheuses(f.Namespace()).Create(ctx, &monitoringv1.Prometheus{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusNameForScyllaDBMonitoring(sm), + Namespace: f.Namespace(), + }, + Spec: monitoringv1.PrometheusSpec{ + CommonPrometheusFields: monitoringv1.CommonPrometheusFields{ + ServiceAccountName: sa.Name, + ServiceName: pointer.Ptr(svc.Name), + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: pointer.Ptr(true), + RunAsUser: pointer.Ptr[int64](65534), + FSGroup: pointer.Ptr[int64](65534), + }, + ServiceMonitorSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scylla-operator.scylladb.com/scylladbmonitoring-name": sm.Name, + }, + }, + Web: &monitoringv1.PrometheusWebSpec{ + PageTitle: pointer.Ptr("ScyllaDB Prometheus"), + }, + }, + Alerting: &monitoringv1.AlertingSpec{ + Alertmanagers: []monitoringv1.AlertmanagerEndpoints{ + { + Name: "scylla-monitoring", + Port: intstr.FromString("web"), + }, + }, + }, + RuleSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scylla-operator.scylladb.com/scylladbmonitoring-name": sm.Name, + }, + }, + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + return prom +} + +func createExternalPrometheusInstanceWithTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring, sa *corev1.ServiceAccount, svc *corev1.Service) *monitoringv1.Prometheus { + g.GinkgoHelper() + + framework.By("Creating a Prometheus instance with TLS") + tlsSecretName := prometheusTLSSecretNameForScyllaDBMonitoring(sm) + prom, err := f.PrometheusOperatorAdminClient().MonitoringV1().Prometheuses(f.Namespace()).Create(ctx, &monitoringv1.Prometheus{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusNameForScyllaDBMonitoring(sm), + Namespace: f.Namespace(), + }, + Spec: monitoringv1.PrometheusSpec{ + CommonPrometheusFields: monitoringv1.CommonPrometheusFields{ + ServiceAccountName: sa.Name, + ServiceName: pointer.Ptr(svc.Name), + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: pointer.Ptr(true), + RunAsUser: pointer.Ptr[int64](65534), + FSGroup: pointer.Ptr[int64](65534), + }, + ServiceMonitorSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scylla-operator.scylladb.com/scylladbmonitoring-name": sm.Name, + }, + }, + Web: &monitoringv1.PrometheusWebSpec{ + PageTitle: pointer.Ptr("ScyllaDB Prometheus"), + WebConfigFileFields: monitoringv1.WebConfigFileFields{ + TLSConfig: &monitoringv1.WebTLSConfig{ + Cert: monitoringv1.SecretOrConfigMap{ + Secret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: tlsSecretName, + }, + Key: "tls.crt", + }, + }, + KeySecret: corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: tlsSecretName, + }, + Key: "tls.key", + }, + }, + }, + }, + }, + Alerting: &monitoringv1.AlertingSpec{ + Alertmanagers: []monitoringv1.AlertmanagerEndpoints{ + { + Name: "scylla-monitoring", + Port: intstr.FromString("web"), + }, + }, + }, + RuleSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scylla-operator.scylladb.com/scylladbmonitoring-name": sm.Name, + }, + }, + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + return prom +} + +func verifyExternalPrometheusWithoutTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + g.GinkgoHelper() + + promClient := makePrometheusClientWithConfig(prometheusappclient.Config{ + Address: fmt.Sprintf("http://%s.%s.svc.cluster.local:9090", prometheusNameForScyllaDBMonitoring(sm), f.Namespace()), + }) + verifyPrometheusTargetsAndRules(ctx, promClient) +} + +func verifyExternalPrometheusWithTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + g.GinkgoHelper() + + rootCAs := x509.NewCertPool() + prometheusCACertConfigMap, err := f.KubeClient().CoreV1().ConfigMaps(f.Namespace()).Get(ctx, prometheusCACertConfigMapNameForScyllaDBMonitoring(sm), metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + prometheusCACerts, _ := verification.VerifyAndParseCABundle(prometheusCACertConfigMap) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(prometheusCACerts).To(o.HaveLen(2)) + rootCAs.AddCert(prometheusCACerts[1]) // The CA cert is the second cert in the bundle. + + promClient := makePrometheusClientWithConfig(prometheusappclient.Config{ + Address: fmt.Sprintf("https://%s.%s.svc.cluster.local:9090", prometheusNameForScyllaDBMonitoring(sm), f.Namespace()), + Client: &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: false, + RootCAs: rootCAs, + }, + }, + }, + }) + verifyPrometheusTargetsAndRules(ctx, promClient) +} + +func makePrometheusClientWithConfig(cfg prometheusappclient.Config) promeheusappv1api.API { + g.GinkgoHelper() + + promHTTPClient, err := prometheusappclient.NewClient(cfg) + o.Expect(err).NotTo(o.HaveOccurred()) + return promeheusappv1api.NewAPI(promHTTPClient) +} + +func createPrometheusTLSSecretAndConfigMap(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + crt, key, err := cert.GenerateSelfSignedCertKey(fmt.Sprintf("%s.%s.svc.cluster.local", prometheusNameForScyllaDBMonitoring(sm), f.Namespace()), nil, nil) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Creating a TLS secret for Prometheus") + _, err = f.KubeAdminClient().CoreV1().Secrets(f.Namespace()).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusTLSSecretNameForScyllaDBMonitoring(sm), + Namespace: f.Namespace(), + }, + Type: corev1.SecretTypeTLS, + StringData: map[string]string{ + "tls.crt": string(crt), + "tls.key": string(key), + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Creating a CA cert config map for Prometheus") + _, err = f.KubeAdminClient().CoreV1().ConfigMaps(f.Namespace()).Create(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusCACertConfigMapNameForScyllaDBMonitoring(sm), + Namespace: f.Namespace(), + }, + Data: map[string]string{ + "ca-bundle.crt": string(crt), + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) +} + +func prometheusNameForScyllaDBMonitoring(sm *scyllav1alpha1.ScyllaDBMonitoring) string { + return fmt.Sprintf("%s-prometheus", sm.Name) +} + +func prometheusCACertConfigMapNameForScyllaDBMonitoring(sm *scyllav1alpha1.ScyllaDBMonitoring) string { + return fmt.Sprintf("%s-prometheus-tls-ca", sm.Name) +} + +func prometheusTLSSecretNameForScyllaDBMonitoring(sm *scyllav1alpha1.ScyllaDBMonitoring) string { + return fmt.Sprintf("%s-prometheus-tls", sm.Name) +} From ddd390d07de832ceab7924112a7821951f44ed0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Thu, 25 Sep 2025 11:40:53 +0200 Subject: [PATCH 13/14] Enable UWM in OpenShift test clusters --- .../openshift-monitoring/openshift-uwm.cm.yaml | 10 ++++++++++ hack/.ci/run-e2e-openshift-aws-release.sh | 3 +++ hack/.ci/run-e2e-openshift-aws.sh | 3 +++ hack/ci-deploy-release.sh | 7 +++++++ hack/ci-deploy.sh | 7 +++++++ 5 files changed, 30 insertions(+) create mode 100644 hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml diff --git a/hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml b/hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml new file mode 100644 index 00000000000..d4c83cee1a6 --- /dev/null +++ b/hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml @@ -0,0 +1,10 @@ +# This ConfigMap enables User Workload Monitoring in OpenShift clusters. +# See https://docs.redhat.com/en/documentation/openshift_container_platform/4.18/html/monitoring/configuring-user-workload-monitoring#enabling-monitoring-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm. +apiVersion: v1 +kind: ConfigMap +metadata: + name: cluster-monitoring-config + namespace: openshift-monitoring +data: + config.yaml: | + enableUserWorkload: true diff --git a/hack/.ci/run-e2e-openshift-aws-release.sh b/hack/.ci/run-e2e-openshift-aws-release.sh index df9f7bc1d8e..3a6a91cfe29 100755 --- a/hack/.ci/run-e2e-openshift-aws-release.sh +++ b/hack/.ci/run-e2e-openshift-aws-release.sh @@ -36,6 +36,9 @@ export SO_NODECONFIG_PATH SO_DISABLE_PROMETHEUS_OPERATOR="${SO_DISABLE_PROMETHEUS_OPERATOR:-true}" export SO_DISABLE_PROMETHEUS_OPERATOR +SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING="${SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING:-true}" +export SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING + run-deploy-script-in-all-clusters "${parent_dir}/../ci-deploy-release.sh" apply-e2e-workarounds-in-all-clusters diff --git a/hack/.ci/run-e2e-openshift-aws.sh b/hack/.ci/run-e2e-openshift-aws.sh index e6b14c301a8..3515eac5b83 100755 --- a/hack/.ci/run-e2e-openshift-aws.sh +++ b/hack/.ci/run-e2e-openshift-aws.sh @@ -39,6 +39,9 @@ export SO_CSI_DRIVER_PATH SO_DISABLE_PROMETHEUS_OPERATOR="${SO_DISABLE_PROMETHEUS_OPERATOR:-true}" export SO_DISABLE_PROMETHEUS_OPERATOR +SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING="${SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING:-true}" +export SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING + run-deploy-script-in-all-clusters "${parent_dir}/../ci-deploy.sh" apply-e2e-workarounds-in-all-clusters diff --git a/hack/ci-deploy-release.sh b/hack/ci-deploy-release.sh index 56df0ebfb1f..bcabcb13abe 100755 --- a/hack/ci-deploy-release.sh +++ b/hack/ci-deploy-release.sh @@ -58,6 +58,13 @@ else kubectl_create -n=prometheus-operator -f="${source_url}/${revision}/examples/third-party/prometheus-operator.yaml" fi +if [[ "${SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING:-}" == "true" ]]; then + echo "Enabling OpenShift User Workload Monitoring" + kubectl_create -f="${source_url}/${revision}/hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml" +else + echo "Skipping enabling OpenShift User Workload Monitoring" +fi + kubectl_create -n=haproxy-ingress -f="${source_url}/${revision}/examples/third-party/haproxy-ingress.yaml" kubectl_create -f="${source_url}/${revision}/examples/third-party/cert-manager.yaml" diff --git a/hack/ci-deploy.sh b/hack/ci-deploy.sh index 9e6b93dbf3b..3924a52327b 100755 --- a/hack/ci-deploy.sh +++ b/hack/ci-deploy.sh @@ -42,6 +42,13 @@ else cp ./examples/third-party/prometheus-operator/*.yaml "${ARTIFACTS_DEPLOY_DIR}/prometheus-operator" fi +if [[ "${SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING:-}" == "true" ]]; then + echo "Enabling OpenShift User Workload Monitoring" + cp ./hack/.ci/manifests/namespaces/openshift-monitoring/openshift-uwm.cm.yaml "${ARTIFACTS_DEPLOY_DIR}/" +else + echo "Skipping enabling OpenShift User Workload Monitoring" +fi + cp ./deploy/operator/*.yaml "${ARTIFACTS_DEPLOY_DIR}/operator" cp ./examples/third-party/haproxy-ingress/*.yaml "${ARTIFACTS_DEPLOY_DIR}/haproxy-ingress" cp ./examples/third-party/cert-manager.yaml "${ARTIFACTS_DEPLOY_DIR}/" From b8c743137417969847c89efbed02fd67fcb4ed3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Burzy=C5=84ski?= Date: Thu, 25 Sep 2025 12:12:34 +0200 Subject: [PATCH 14/14] Add external Prometheus on OpenShift E2E --- hack/ci-deploy.sh | 4 + .../scylladbmonitoring/scylladbmonitoring.go | 133 +++++++++++++++++- test/e2e/utils/grafana/client.go | 23 +++ 3 files changed, 153 insertions(+), 7 deletions(-) diff --git a/hack/ci-deploy.sh b/hack/ci-deploy.sh index 3924a52327b..eb054a51526 100755 --- a/hack/ci-deploy.sh +++ b/hack/ci-deploy.sh @@ -108,6 +108,10 @@ if [[ -n ${SO_INSTALL_XFSPROGS_ON_NODES:-} ]]; then kubectl_create -f "${ARTIFACTS_DEPLOY_DIR}"/install-xfsprogs.daemonset.yaml fi +if [[ "${SO_ENABLE_OPENSHIFT_USER_WORKLOAD_MONITORING:-}" == "true" ]]; then + kubectl_create -f "${ARTIFACTS_DEPLOY_DIR}/openshift-uwm.cm.yaml" +fi + kubectl_create -n haproxy-ingress -f "${ARTIFACTS_DEPLOY_DIR}/haproxy-ingress" kubectl_create -f "${ARTIFACTS_DEPLOY_DIR}"/cert-manager.yaml diff --git a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go index f38bf828c3f..e1b4cfaf167 100644 --- a/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go +++ b/test/e2e/set/scylladbmonitoring/scylladbmonitoring.go @@ -38,7 +38,7 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" + apimachineryutilintstr "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/util/cert" "k8s.io/klog/v2" ) @@ -70,7 +70,7 @@ var _ = g.Describe("ScyllaDBMonitoring", func() { f := framework.NewFramework("scylladbmonitoring") // Disabled on OpenShift because of https://github.com/scylladb/scylla-operator/issues/2319#issuecomment-2643287819 - g.DescribeTable("should setup monitoring stack TESTCASE_DISABLED_ON_OPENSHIFT", func(ctx g.SpecContext, e *scyllaDBMonitoringEntry) { + g.DescribeTable("should setup monitoring stack", func(ctx g.SpecContext, e *scyllaDBMonitoringEntry) { framework.By("Creating a ScyllaCluster with a single node") sc := createTestScyllaCluster(ctx, f) @@ -88,8 +88,10 @@ var _ = g.Describe("ScyllaDBMonitoring", func() { framework.By("Waiting for the ScyllaDBMonitoring to roll out") awaitScyllaDBMonitoringRollout(ctx, f, sm) - framework.By("Verifying that Prometheus is configured correctly") - e.VerifyPrometheusFn(ctx, f, sm) + if e.VerifyPrometheusFn != nil { + framework.By("Verifying that Prometheus is configured correctly") + e.VerifyPrometheusFn(ctx, f, sm) + } framework.By("Verifying that Grafana is configured correctly") e.VerifyGrafanaFn(ctx, f, sm) @@ -146,7 +148,7 @@ var _ = g.Describe("ScyllaDBMonitoring", func() { VerifyPrometheusFn: verifyExternalPrometheusWithoutTLS, VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), }), - g.FEntry(describeEntry, &scyllaDBMonitoringEntry{ + g.Entry(describeEntry, &scyllaDBMonitoringEntry{ Description: "Platform type with external Prometheus with TLS", ScyllaDBMonitoringModifierFn: func(sm *scyllav1alpha1.ScyllaDBMonitoring) { sm.Spec.Type = pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform) @@ -172,6 +174,41 @@ var _ = g.Describe("ScyllaDBMonitoring", func() { VerifyPrometheusFn: verifyExternalPrometheusWithTLS, VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), }), + g.FEntry(describeEntry, &scyllaDBMonitoringEntry{ + Description: "Platform type with Thanos Querier on OpenShift", + ScyllaDBMonitoringModifierFn: func(sm *scyllav1alpha1.ScyllaDBMonitoring) { + sm.Spec.Type = pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform) + sm.Spec.Components.Prometheus.Mode = scyllav1alpha1.PrometheusModeExternal + sm.Spec.Components.Grafana.Datasources = []scyllav1alpha1.GrafanaDatasourceSpec{ + { + Name: "prometheus", + Type: scyllav1alpha1.GrafanaDatasourceTypePrometheus, + URL: "https://thanos-querier.openshift-monitoring.svc:9091", + PrometheusOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceOptions{ + TLS: &scyllav1alpha1.GrafanaDatasourceTLSSpec{ + InsecureSkipVerify: false, + CACertConfigMapRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: "openshift-service-ca.crt", + Key: "service-ca.crt", + }, + }, + Auth: &scyllav1alpha1.GrafanaPrometheusDatasourceAuthSpec{ + Type: scyllav1alpha1.GrafanaPrometheusDatasourceAuthTypeBearerToken, + BearerTokenOptions: &scyllav1alpha1.GrafanaPrometheusDatasourceBearerTokenAuthOptions{ + SecretRef: &scyllav1alpha1.LocalObjectKeySelector{ + Name: monitoringAccessServiceAccountNameOnOpenShift(sm), + Key: "token", + }, + }, + }, + }, + }, + } + }, + PrepareExternalPrometheusFn: prepareOpenShiftMonitoring, + VerifyPrometheusFn: nil, // Nothing to verify, we trust OpenShift. + VerifyGrafanaFn: verifyManagedGrafanaWithDashboards(getExpectedPlatformDashboards()), + }), ) }) @@ -400,6 +437,7 @@ func verifyManagedGrafanaWithDashboards( o.Expect(err).NotTo(o.HaveOccurred()) verifyGrafanaDashboards(grafanaClient, expectedDashboards, expectedHomeDashboardUID) + verifyPrometheusGrafanaDataSource(grafanaClient) } } @@ -423,6 +461,18 @@ func verifyGrafanaDashboards(grafanaClient *grafana.Client, expectedDashboards [ o.Expect(homeDashboardUID).To(o.Equal(expectedHomeDashboardUID)) } +func verifyPrometheusGrafanaDataSource(grafanaClient *grafana.Client) { + g.GinkgoHelper() + + framework.By("Verifying 'prometheus' Grafana data source") + o.Eventually(func(eo o.Gomega) { + health, err := grafanaClient.DatasourceHealth("prometheus") + framework.Infof("Checking 'prometheus' grafana data source health: err: %v, health: %v, message: %s", err, health.OK, health.Message) + eo.Expect(err).NotTo(o.HaveOccurred()) + eo.Expect(health.OK).To(o.Equal(true)) + }).WithTimeout(10 * time.Minute).WithPolling(1 * time.Second).Should(o.Succeed()) +} + // getExpectedPlatformDashboards returns the expected grafana dashboards. // Platform dashboards come directly from ScyllaDB Monitoring, so we do not know the expected values // and given the size they are not feasible to be maintained as a duplicate. @@ -685,7 +735,7 @@ func createExternalPrometheusInstanceWithoutTLS(ctx context.Context, f *framewor Alertmanagers: []monitoringv1.AlertmanagerEndpoints{ { Name: "scylla-monitoring", - Port: intstr.FromString("web"), + Port: apimachineryutilintstr.FromString("web"), }, }, }, @@ -753,7 +803,7 @@ func createExternalPrometheusInstanceWithTLS(ctx context.Context, f *framework.F Alertmanagers: []monitoringv1.AlertmanagerEndpoints{ { Name: "scylla-monitoring", - Port: intstr.FromString("web"), + Port: apimachineryutilintstr.FromString("web"), }, }, }, @@ -771,6 +821,71 @@ func createExternalPrometheusInstanceWithTLS(ctx context.Context, f *framework.F return prom } +func prepareOpenShiftMonitoring(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { + g.GinkgoHelper() + + framework.By("Creating a ServiceAccount for monitoring access on OpenShift") + sa := createMonitoringAccessServiceAccountOnOpenShift(ctx, f, sm) + + framework.By("Binding cluster-monitoring-view ClusterRole to the ServiceAccount") + bindClusterMonitoringViewClusterRoleToServiceAccount(ctx, f, sa) + + framework.By("Creating a Secret with the ServiceAccount token") + createServiceAccountTokenSecret(ctx, f, sa) +} + +func createMonitoringAccessServiceAccountOnOpenShift(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) *corev1.ServiceAccount { + sa, err := f.KubeAdminClient().CoreV1().ServiceAccounts(f.Namespace()).Create(ctx, &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: monitoringAccessServiceAccountNameOnOpenShift(sm), + Namespace: f.Namespace(), + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + return sa +} + +func bindClusterMonitoringViewClusterRoleToServiceAccount(ctx context.Context, f *framework.Framework, sa *corev1.ServiceAccount) { + _, err := f.KubeAdminClient().RbacV1().ClusterRoleBindings().Create(ctx, &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: sa.Name, + }, + Subjects: []rbacv1.Subject{ + { + Kind: rbacv1.ServiceAccountKind, + Name: sa.Name, + Namespace: sa.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: "cluster-monitoring-view", + }, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) +} + +func createServiceAccountTokenSecret(ctx context.Context, f *framework.Framework, sa *corev1.ServiceAccount) { + _, err := f.KubeAdminClient().CoreV1().Secrets(f.Namespace()).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: sa.Name, + Namespace: f.Namespace(), + Annotations: map[string]string{ + "kubernetes.io/service-account.name": sa.Name, + }, + }, + Type: corev1.SecretTypeServiceAccountToken, + }, metav1.CreateOptions{ + FieldManager: f.FieldManager(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) +} + func verifyExternalPrometheusWithoutTLS(ctx context.Context, f *framework.Framework, sm *scyllav1alpha1.ScyllaDBMonitoring) { g.GinkgoHelper() @@ -859,3 +974,7 @@ func prometheusCACertConfigMapNameForScyllaDBMonitoring(sm *scyllav1alpha1.Scyll func prometheusTLSSecretNameForScyllaDBMonitoring(sm *scyllav1alpha1.ScyllaDBMonitoring) string { return fmt.Sprintf("%s-prometheus-tls", sm.Name) } + +func monitoringAccessServiceAccountNameOnOpenShift(sm *scyllav1alpha1.ScyllaDBMonitoring) string { + return fmt.Sprintf("%s-monitoring-access", sm.Name) +} diff --git a/test/e2e/utils/grafana/client.go b/test/e2e/utils/grafana/client.go index b710145ffa7..6160fb484c2 100644 --- a/test/e2e/utils/grafana/client.go +++ b/test/e2e/utils/grafana/client.go @@ -95,3 +95,26 @@ func (c *Client) HomeDashboardUID() (string, error) { return "", fmt.Errorf("unexpected type for dashboard payload") } + +type DatasourceHealth struct { + Message string + OK bool +} + +func (c *Client) DatasourceHealth(datasourceName string) (DatasourceHealth, error) { + resp, err := c.c.Datasources.GetDataSourceByName(datasourceName) + if err != nil { + return DatasourceHealth{}, fmt.Errorf("failed to get datasource %q: %w", datasourceName, err) + } + + datasourceUID := resp.GetPayload().UID + healthResp, err := c.c.Datasources.CheckDatasourceHealthWithUID(datasourceUID) + if err != nil { + return DatasourceHealth{}, fmt.Errorf("failed to check health for datasource %q (UID %s): %w", datasourceName, datasourceUID, err) + } + + return DatasourceHealth{ + Message: healthResp.GetPayload().Message, + OK: healthResp.IsSuccess(), + }, nil +}