diff --git a/apis/v1alpha1/config_types.go b/apis/v1alpha1/config_types.go index 5db892e6e..1665badad 100644 --- a/apis/v1alpha1/config_types.go +++ b/apis/v1alpha1/config_types.go @@ -28,63 +28,89 @@ import ( // Config holds the configuration for bpfman-operator. // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:printcolumn:name="Progressing",type="string",JSONPath=".status.conditions[?(@.type=='Progressing')].status" +// +kubebuilder:printcolumn:name="Available",type="string",JSONPath=".status.conditions[?(@.type=='Available')].status" type Config struct { - metav1.TypeMeta `json:",inline"` + metav1.TypeMeta `json:",inline"` + // metadata is the object's metadata. + // +optional metav1.ObjectMeta `json:"metadata,omitempty"` - Spec ConfigSpec `json:"spec,omitempty"` + // spec defines the desired state of the bpfman-operator. + // +required + Spec ConfigSpec `json:"spec,omitzero"` + // status reflects the observed state of the bpfman-operator. + // +optional Status ConfigStatus `json:"status,omitempty"` } -// Spec defines the desired state of the bpfman-operator. +// spec defines the desired state of the bpfman-operator. type ConfigSpec struct { - // Agent holds the configuration for the bpfman agent. + // agent specifies the configuration for the bpfman agent DaemonSet. // +required - Agent AgentSpec `json:"agent,omitempty"` - // Configuration holds the content of bpfman.toml. + Agent AgentSpec `json:"agent,omitzero"` + // configuration specifies the content of bpfman.toml configuration file used by the bpfman DaemonSet. // +required - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Configuration string `json:"configuration"` - // Image holds the image of the bpfman DaemonSets. + // +kubebuilder:validation:MaxLength=65536 + Configuration string `json:"configuration,omitempty"` + // image specifies the container image for the bpfman DaemonSet. // +required - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Image string `json:"image"` - // LogLevel holds the log level for the bpfman-operator. + // +kubebuilder:validation:MaxLength=1023 + Image string `json:"image,omitempty"` + // logLevel specifies the log level for the bpfman DaemonSet via the RUST_LOG environment variable. + // The RUST_LOG environment variable controls logging with the syntax: RUST_LOG=[target][=][level][,...]. + // For further information, see https://docs.rs/env_logger/latest/env_logger/. // +optional + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=1024 LogLevel string `json:"logLevel,omitempty"` - // Namespace holds the namespace where bpfman-operator resources shall be - // deployed. + // namespace specifies the namespace where bpfman-operator resources will be deployed. + // If not specified, resources will be deployed in the default bpfman namespace. + // +optional + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=256 Namespace string `json:"namespace,omitempty"` } // AgentSpec defines the desired state of the bpfman agent. type AgentSpec struct { - // HealthProbePort holds the health probe bind port for the bpfman agent. + // healthProbePort specifies the port on which the bpfman agent's health probe endpoint will listen. + // If unspecified, the default port will be used. // +optional - // +kubebuilder:default=8175 // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 - HealthProbePort int `json:"healthProbePort"` - // Image holds the image for the bpfman agent. + HealthProbePort int32 `json:"healthProbePort,omitempty"` + // image specifies the container image for the bpfman agent DaemonSet. // +required - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Image string `json:"image"` - // LogLevel holds the log level for the bpfman agent. + // +kubebuilder:validation:MaxLength=1023 + Image string `json:"image,omitempty"` + // logLevel specifies the verbosity of logs produced by the bpfman agent. + // Valid values are: "info", "debug", "trace". // +optional + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=256 + // +kubebuilder:validation:Enum=info;debug;trace LogLevel string `json:"logLevel,omitempty"` } // status reflects the status of the bpfman-operator configuration. type ConfigStatus struct { - // conditions store the status conditions of the bpfman-operator. + // conditions represents the current state conditions of the bpfman-operator and its components. + // +optional // +patchMergeKey=type // +patchStrategy=merge // +listType=map // +listMapKey=type + // +kubebuilder:validation:MaxItems=1023 Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"` + + // components represents the operational status of each individual bpfman-operator component such as the deployed + // DaemonSets. + // +optional + Components map[string]ConfigComponentStatus `json:"components,omitempty"` } // +kubebuilder:object:root=true @@ -94,3 +120,15 @@ type ConfigList struct { metav1.ListMeta `json:"metadata,omitempty"` Items []Config `json:"items"` } + +// ConfigComponentStatus holds the status of a single Config component. +type ConfigComponentStatus string + +const ( + // ConfigStatusUnknown indicates the component state cannot be determined. + ConfigStatusUnknown ConfigComponentStatus = "Unknown" + // ConfigStatusProgressing indicates the component is being updated or reconciled. + ConfigStatusProgressing ConfigComponentStatus = "Progressing" + // ConfigStatusReady indicates the component is fully operational and ready. + ConfigStatusReady ConfigComponentStatus = "Ready" +) diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go index 454bbc333..9b3a4ecdf 100644 --- a/apis/v1alpha1/zz_generated.deepcopy.go +++ b/apis/v1alpha1/zz_generated.deepcopy.go @@ -1614,6 +1614,13 @@ func (in *ConfigStatus) DeepCopyInto(out *ConfigStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Components != nil { + in, out := &in.Components, &out.Components + *out = make(map[string]ConfigComponentStatus, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigStatus. diff --git a/bundle/manifests/bpfman-operator.clusterserviceversion.yaml b/bundle/manifests/bpfman-operator.clusterserviceversion.yaml index 0a7f0c16f..54bb186ec 100644 --- a/bundle/manifests/bpfman-operator.clusterserviceversion.yaml +++ b/bundle/manifests/bpfman-operator.clusterserviceversion.yaml @@ -1012,7 +1012,7 @@ metadata: capabilities: Basic Install categories: OpenShift Optional containerImage: quay.io/bpfman/bpfman-operator:latest - createdAt: "2025-09-26T11:09:36Z" + createdAt: "2025-10-21T13:05:28Z" description: The bpfman Operator is designed to manage eBPF programs for applications. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" @@ -1502,6 +1502,14 @@ spec: - configs/finalizers verbs: - update + - apiGroups: + - bpfman.io + resources: + - configs/status + verbs: + - get + - patch + - update - apiGroups: - "" resources: @@ -1514,6 +1522,13 @@ spec: - patch - update - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch - apiGroups: - "" resources: diff --git a/bundle/manifests/bpfman.io_configs.yaml b/bundle/manifests/bpfman.io_configs.yaml index ac0c836a8..c2af3bd42 100644 --- a/bundle/manifests/bpfman.io_configs.yaml +++ b/bundle/manifests/bpfman.io_configs.yaml @@ -18,6 +18,12 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - jsonPath: .status.conditions[?(@.type=='Progressing')].status + name: Progressing + type: string + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available + type: string name: v1alpha1 schema: openAPIV3Schema: @@ -41,53 +47,82 @@ spec: metadata: type: object spec: - description: Spec defines the desired state of the bpfman-operator. + description: spec defines the desired state of the bpfman-operator. properties: agent: - description: Agent holds the configuration for the bpfman agent. + description: agent specifies the configuration for the bpfman agent + DaemonSet. properties: healthProbePort: - default: 8175 - description: HealthProbePort holds the health probe bind port - for the bpfman agent. + description: |- + healthProbePort specifies the port on which the bpfman agent's health probe endpoint will listen. + If unspecified, the default port will be used. + format: int32 maximum: 65535 minimum: 1 type: integer image: - description: Image holds the image for the bpfman agent. + description: image specifies the container image for the bpfman + agent DaemonSet. + maxLength: 1023 minLength: 1 type: string logLevel: - description: LogLevel holds the log level for the bpfman agent. + description: |- + logLevel specifies the verbosity of logs produced by the bpfman agent. + Valid values are: "info", "debug", "trace". + enum: + - info + - debug + - trace + maxLength: 256 + minLength: 1 type: string - required: - - image type: object configuration: - description: Configuration holds the content of bpfman.toml. + description: configuration specifies the content of bpfman.toml configuration + file used by the bpfman DaemonSet. + maxLength: 65536 minLength: 1 type: string image: - description: Image holds the image of the bpfman DaemonSets. + description: image specifies the container image for the bpfman DaemonSet. + maxLength: 1023 minLength: 1 type: string logLevel: - description: LogLevel holds the log level for the bpfman-operator. + description: |- + logLevel specifies the log level for the bpfman DaemonSet via the RUST_LOG environment variable. + The RUST_LOG environment variable controls logging with the syntax: RUST_LOG=[target][=][level][,...]. + For further information, see https://docs.rs/env_logger/latest/env_logger/. + maxLength: 1024 + minLength: 1 type: string namespace: description: |- - Namespace holds the namespace where bpfman-operator resources shall be - deployed. + namespace specifies the namespace where bpfman-operator resources will be deployed. + If not specified, resources will be deployed in the default bpfman namespace. + maxLength: 256 + minLength: 1 type: string required: - - configuration - - image + - agent type: object status: - description: status reflects the status of the bpfman-operator configuration. + description: status reflects the observed state of the bpfman-operator. properties: + components: + additionalProperties: + description: ConfigComponentStatus holds the status of a single + Config component. + type: string + description: |- + components represents the operational status of each individual bpfman-operator component such as the deployed + DaemonSets. + type: object conditions: - description: conditions store the status conditions of the bpfman-operator. + description: conditions represents the current state conditions of + the bpfman-operator and its components. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for @@ -155,11 +190,14 @@ spec: - status - type type: object + maxItems: 1023 type: array x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map type: object + required: + - spec type: object served: true storage: true diff --git a/cmd/bpfman-operator/main.go b/cmd/bpfman-operator/main.go index 5d79a9046..59ca945e1 100644 --- a/cmd/bpfman-operator/main.go +++ b/cmd/bpfman-operator/main.go @@ -201,6 +201,7 @@ func main() { CsiDriverDS: internal.BpfmanCsiDriverPath, RestrictedSCC: internal.BpfmanRestrictedSCCPath, IsOpenshift: isOpenshift, + Recorder: mgr.GetEventRecorderFor("config-controller"), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create bpfmanConfig controller") os.Exit(1) diff --git a/config/crd/bases/bpfman.io_configs.yaml b/config/crd/bases/bpfman.io_configs.yaml index f94557cd9..b1291ee6c 100644 --- a/config/crd/bases/bpfman.io_configs.yaml +++ b/config/crd/bases/bpfman.io_configs.yaml @@ -18,6 +18,12 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - jsonPath: .status.conditions[?(@.type=='Progressing')].status + name: Progressing + type: string + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available + type: string name: v1alpha1 schema: openAPIV3Schema: @@ -41,53 +47,82 @@ spec: metadata: type: object spec: - description: Spec defines the desired state of the bpfman-operator. + description: spec defines the desired state of the bpfman-operator. properties: agent: - description: Agent holds the configuration for the bpfman agent. + description: agent specifies the configuration for the bpfman agent + DaemonSet. properties: healthProbePort: - default: 8175 - description: HealthProbePort holds the health probe bind port - for the bpfman agent. + description: |- + healthProbePort specifies the port on which the bpfman agent's health probe endpoint will listen. + If unspecified, the default port will be used. + format: int32 maximum: 65535 minimum: 1 type: integer image: - description: Image holds the image for the bpfman agent. + description: image specifies the container image for the bpfman + agent DaemonSet. + maxLength: 1023 minLength: 1 type: string logLevel: - description: LogLevel holds the log level for the bpfman agent. + description: |- + logLevel specifies the verbosity of logs produced by the bpfman agent. + Valid values are: "info", "debug", "trace". + enum: + - info + - debug + - trace + maxLength: 256 + minLength: 1 type: string - required: - - image type: object configuration: - description: Configuration holds the content of bpfman.toml. + description: configuration specifies the content of bpfman.toml configuration + file used by the bpfman DaemonSet. + maxLength: 65536 minLength: 1 type: string image: - description: Image holds the image of the bpfman DaemonSets. + description: image specifies the container image for the bpfman DaemonSet. + maxLength: 1023 minLength: 1 type: string logLevel: - description: LogLevel holds the log level for the bpfman-operator. + description: |- + logLevel specifies the log level for the bpfman DaemonSet via the RUST_LOG environment variable. + The RUST_LOG environment variable controls logging with the syntax: RUST_LOG=[target][=][level][,...]. + For further information, see https://docs.rs/env_logger/latest/env_logger/. + maxLength: 1024 + minLength: 1 type: string namespace: description: |- - Namespace holds the namespace where bpfman-operator resources shall be - deployed. + namespace specifies the namespace where bpfman-operator resources will be deployed. + If not specified, resources will be deployed in the default bpfman namespace. + maxLength: 256 + minLength: 1 type: string required: - - configuration - - image + - agent type: object status: - description: status reflects the status of the bpfman-operator configuration. + description: status reflects the observed state of the bpfman-operator. properties: + components: + additionalProperties: + description: ConfigComponentStatus holds the status of a single + Config component. + type: string + description: |- + components represents the operational status of each individual bpfman-operator component such as the deployed + DaemonSets. + type: object conditions: - description: conditions store the status conditions of the bpfman-operator. + description: conditions represents the current state conditions of + the bpfman-operator and its components. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for @@ -155,11 +190,14 @@ spec: - status - type type: object + maxItems: 1023 type: array x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map type: object + required: + - spec type: object served: true storage: true diff --git a/config/rbac/bpfman-operator/role.yaml b/config/rbac/bpfman-operator/role.yaml index 6db029f41..2790b97a7 100644 --- a/config/rbac/bpfman-operator/role.yaml +++ b/config/rbac/bpfman-operator/role.yaml @@ -101,6 +101,14 @@ rules: - configs/finalizers verbs: - update +- apiGroups: + - bpfman.io + resources: + - configs/status + verbs: + - get + - patch + - update - apiGroups: - "" resources: @@ -113,6 +121,13 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch - apiGroups: - "" resources: diff --git a/controllers/bpfman-operator/config.go b/controllers/bpfman-operator/config.go index b082d6a7f..18413ec3d 100644 --- a/controllers/bpfman-operator/config.go +++ b/controllers/bpfman-operator/config.go @@ -29,9 +29,11 @@ import ( storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + meta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -51,7 +53,9 @@ import ( // +kubebuilder:rbac:groups=storage.k8s.io,resources=csidrivers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=bpfman.io,resources=configs,verbs=get;list;watch;update;patch;delete +// +kubebuilder:rbac:groups=bpfman.io,resources=configs/status,verbs=get;update;patch // +kubebuilder:rbac:groups=bpfman.io,resources=configs/finalizers,verbs=update +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch type BpfmanConfigReconciler struct { ClusterApplicationReconciler @@ -60,6 +64,7 @@ type BpfmanConfigReconciler struct { CsiDriverDS string RestrictedSCC string IsOpenshift bool + Recorder record.EventRecorder } // SetupWithManager sets up the controller with the Manager. @@ -106,6 +111,14 @@ func (r *BpfmanConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, err } + // If we haven't added any conditions, yet, set them to unknown. + if len(bpfmanConfig.Status.Conditions) == 0 { + r.Logger.Info("Adding initial status conditions", "name", bpfmanConfig.Name) + if err := r.setStatusConditions(ctx, bpfmanConfig); err != nil { + return ctrl.Result{}, err + } + } + // Check if Config is being deleted first to prevent race // conditions. if !bpfmanConfig.DeletionTimestamp.IsZero() { @@ -121,7 +134,7 @@ func (r *BpfmanConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request r.Logger.Error(err, "Failed to add finalizer to Config") return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{}, r.setStatusConditions(ctx, bpfmanConfig) } // Normal reconciliation - safe to create/update resources. @@ -145,7 +158,7 @@ func (r *BpfmanConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{}, r.setStatusConditions(ctx, bpfmanConfig) } func (r *BpfmanConfigReconciler) reconcileCM(ctx context.Context, bpfmanConfig *v1alpha1.Config) error { @@ -225,6 +238,175 @@ func (r *BpfmanConfigReconciler) reconcileMetricsProxyDS(ctx context.Context, bp }) } +// setStatusConditions checks the status of all Config components (ConfigMap, DaemonSets, CSIDriver, SCC) +// and updates the Config's status.componentStatuses and status.conditions accordingly. +// It emits Kubernetes events for status changes. After updating the status subresource, it re-fetches +// the Config object to ensure the in-memory representation reflects the latest server state, including +// any changes made by the API server (e.g., resource version updates). +func (r *BpfmanConfigReconciler) setStatusConditions(ctx context.Context, config *v1alpha1.Config) error { + if config == nil { + return fmt.Errorf("object Config config is nil") + } + if r.Recorder == nil { + return fmt.Errorf("object Recorder is nil") + } + + // Check each resource and set appropriate status. + statuses := make(map[string]v1alpha1.ConfigComponentStatus) + var err error + var status *v1alpha1.ConfigComponentStatus + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: internal.BpfmanCmName, Namespace: config.Spec.Namespace} + if status, err = r.checkResourceStatus(ctx, cm, key, nil); err != nil { + return err + } + if status != nil { + statuses["ConfigMap"] = *status + } + + ds := &appsv1.DaemonSet{} + key = types.NamespacedName{Name: internal.BpfmanDsName, Namespace: config.Spec.Namespace} + if status, err = r.checkResourceStatus(ctx, ds, key, isDaemonSetReady); err != nil { + return err + } + if status != nil { + statuses["DaemonSet"] = *status + } + + metricsDS := &appsv1.DaemonSet{} + key = types.NamespacedName{Name: internal.BpfmanMetricsProxyDsName, Namespace: config.Spec.Namespace} + if status, err = r.checkResourceStatus(ctx, metricsDS, key, isDaemonSetReady); err != nil { + return err + } + if status != nil { + statuses["MetricsProxyDaemonSet"] = *status + } + + csiDriver := &storagev1.CSIDriver{} + key = types.NamespacedName{Name: internal.BpfmanCsiDriverName} + if status, err = r.checkResourceStatus(ctx, csiDriver, key, nil); err != nil { + return err + } + if status != nil { + statuses["CsiDriver"] = *status + } + + if r.IsOpenshift { + scc := &osv1.SecurityContextConstraints{} + key = types.NamespacedName{Name: internal.BpfmanRestrictedSccName} + if status, err = r.checkResourceStatus(ctx, scc, key, nil); err != nil { + return err + } + if status != nil { + statuses["Scc"] = *status + } + } + + // If none of the components changed, do not update anything for the status. + if config.Status.Components != nil && internal.CCSEquals(statuses, config.Status.Components) { + return nil + } + + // Set component statuses, first. + config.Status.Components = statuses + + // Set conditions, next. + switch { + case internal.CCSAnyComponentProgressing(statuses, r.IsOpenshift): + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonProgressing, + Message: internal.ConfigMessageProgressing, + }) + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionFalse, + Reason: internal.ConfigReasonProgressing, + Message: internal.ConfigMessageProgressing, + }) + r.Recorder.Event(config, "Normal", internal.ConfigReasonProgressing, internal.ConfigMessageProgressing) + case internal.CCSAllComponentsReady(statuses, r.IsOpenshift): + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + }) + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + }) + r.Recorder.Event(config, "Normal", internal.ConfigReasonAvailable, internal.ConfigMessageAvailable) + default: + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionUnknown, + Reason: internal.ConfigReasonUnknown, + Message: internal.ConfigMessageUnknown, + }) + meta.SetStatusCondition(&config.Status.Conditions, metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionUnknown, + Reason: internal.ConfigReasonUnknown, + Message: internal.ConfigMessageUnknown, + }) + r.Recorder.Event(config, "Normal", internal.ConfigReasonUnknown, internal.ConfigMessageUnknown) + } + + // Update the status. + if err := r.Status().Update(ctx, config); err != nil { + return fmt.Errorf("cannot update status for config %q, err: %w", config.Name, err) + } + + // Update the object again (config is a pointer). + return r.Get(ctx, types.NamespacedName{Name: config.Name}, config) +} + +// checkResourceStatus is a helper for setStatusConditions. It checks whether a given object (= resource) can be found +// and if it's ready. It'll return a status (Unknown, Progressing, Ready) for the object or an error. +func (r *BpfmanConfigReconciler) checkResourceStatus(ctx context.Context, obj client.Object, key types.NamespacedName, + readyCheck func(client.Object) bool) (*v1alpha1.ConfigComponentStatus, error) { + if err := r.Get(ctx, key, obj); err != nil { + if errors.IsNotFound(err) { + return ptr.To(v1alpha1.ConfigStatusUnknown), nil + } else { + return nil, err + } + } + if readyCheck == nil || readyCheck(obj) { + return ptr.To(v1alpha1.ConfigStatusReady), nil + } + return ptr.To(v1alpha1.ConfigStatusProgressing), nil +} + +// isDaemonSetReady returns true if we consider the DaemonSet to be ready. +// We consider a DaemonSet to be ready when: +// a) the DesiredNumberScheduled is > 0. For example, if a DaemonSet's node selector matches 0 nodes, we consider that +// it isn't ready. +// b) We check UpdatedNumberScheduled and NumberAvailable, the same as in +// https://github.com/kubernetes/kubernetes/blob/ad82c3d39f5e9f21e173ffeb8aa57953a0da4601/staging/src/k8s.io/kubectl/pkg/polymorphichelpers/rollout_status.go#L95 +// c) In addition, we check that NumberReady matches DesiredNumberScheduled. +func isDaemonSetReady(obj client.Object) bool { + daemon := obj.(*appsv1.DaemonSet) + if daemon.Status.DesiredNumberScheduled <= 0 { + return false + } + if daemon.Status.UpdatedNumberScheduled < daemon.Status.DesiredNumberScheduled { + return false + } + if daemon.Status.NumberAvailable < daemon.Status.DesiredNumberScheduled { + return false + } + if daemon.Status.NumberReady < daemon.Status.DesiredNumberScheduled { + return false + } + return true +} + // resourcePredicate creates a predicate that filters events based on resource name. // Only processes events for resources matching the specified resourceName. func resourcePredicate(resourceName string) predicate.Funcs { @@ -431,9 +613,12 @@ func (r *BpfmanConfigReconciler) handleDeletion(ctx context.Context, config *v1a return ctrl.Result{}, nil } -func healthProbeAddress(healthProbePort int) string { - if healthProbePort <= 0 || healthProbePort > 65535 { +func healthProbeAddress(healthProbePort int32) string { + if healthProbePort < 0 || healthProbePort > 65535 { return "" } + if healthProbePort == 0 { + healthProbePort = internal.DefaultHealthProbePort + } return fmt.Sprintf(":%d", healthProbePort) } diff --git a/controllers/bpfman-operator/config_test.go b/controllers/bpfman-operator/config_test.go index 35005e751..ba773e143 100644 --- a/controllers/bpfman-operator/config_test.go +++ b/controllers/bpfman-operator/config_test.go @@ -18,6 +18,7 @@ package bpfmanoperator import ( "context" + "encoding/json" "fmt" "os" "path/filepath" @@ -33,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -42,6 +44,7 @@ import ( "github.com/bpfman/bpfman-operator/apis/v1alpha1" "github.com/bpfman/bpfman-operator/internal" + "github.com/bpfman/bpfman-operator/test/testutil" ) const ( @@ -62,7 +65,8 @@ func TestReconcile(t *testing.T) { } { t.Run(fmt.Sprintf("isOpenShift: %v", tc.isOpenShift), func(t *testing.T) { t.Log("Setting up test environment") - r, bpfmanConfig, req, ctx, cl := setupTestEnvironment(tc.isOpenShift) + fakeRecorder := record.NewFakeRecorder(100) + r, bpfmanConfig, req, ctx, cl := setupTestEnvironment(tc.isOpenShift, fakeRecorder) t.Log("Checking the restricted SCC name") require.Equal(t, tc.isOpenShift, r.RestrictedSCC != "", @@ -78,6 +82,12 @@ func TestReconcile(t *testing.T) { t.Fatalf("initial reconcile failed: %v", err) } + t.Log("Checking status (should be unknown)") + err = testStatusSet(ctx, cl, internal.BpfmanConfigName, tc.isOpenShift, "unknown") + if err != nil { + t.Fatalf("unexpected status on config %q: %q", internal.BpfmanConfigName, err) + } + t.Log("Running second reconcile (creates resources)") _, err = r.Reconcile(ctx, req) if err != nil { @@ -90,6 +100,30 @@ func TestReconcile(t *testing.T) { t.Fatalf("not all objects present after initial reconcile: %q", err) } + t.Log("Checking status (should be progressing)") + err = testStatusSet(ctx, cl, internal.BpfmanConfigName, tc.isOpenShift, "progressing") + if err != nil { + t.Fatalf("unexpected status on config %q: %q", internal.BpfmanConfigName, err) + } + + t.Log("Marking DaemonSets as ready") + err = testReconcileDaemonSets(ctx, cl) + if err != nil { + t.Fatalf("couldn't reconcile DaemonSets, %q", err) + } + + t.Log("Running third reconcile (flips status to available)") + _, err = r.Reconcile(ctx, req) + if err != nil { + t.Fatalf("second reconcile failed: %v", err) + } + + t.Log("Checking status (should be available)") + err = testStatusSet(ctx, cl, internal.BpfmanConfigName, tc.isOpenShift, "available") + if err != nil { + t.Fatalf("unexpected status on config %q: %q", internal.BpfmanConfigName, err) + } + // Delete objects one by one to test restoration. objects := map[string]client.Object{ "configmap": &corev1.ConfigMap{ @@ -175,7 +209,7 @@ func TestReconcile(t *testing.T) { // - reconcile.Request: Mock reconcile request for testing // - context.Context: Test context // - client.Client: Fake Kubernetes client for API interactions -func setupTestEnvironment(isOpenShift bool) (*BpfmanConfigReconciler, *v1alpha1.Config, reconcile.Request, +func setupTestEnvironment(isOpenShift bool, recorder record.EventRecorder) (*BpfmanConfigReconciler, *v1alpha1.Config, reconcile.Request, context.Context, client.Client) { // A configMap for bpfman with metadata and spec. bpfmanConfig := &v1alpha1.Config{ @@ -208,7 +242,10 @@ func setupTestEnvironment(isOpenShift bool) (*BpfmanConfigReconciler, *v1alpha1. s.AddKnownTypes(osv1.GroupVersion, &osv1.SecurityContextConstraints{}) // Create a fake client to mock API calls. - cl := fake.NewClientBuilder().WithRuntimeObjects(objs...).Build() + // For WithStatusSubresource see: + // * https://github.com/kubernetes-sigs/controller-runtime/issues/2362#issuecomment-1837270195 + // * https://stackoverflow.com/questions/77489441/go-k8s-controller-runtime-upgrade-fake-client-lacks-functionality + cl := fake.NewClientBuilder().WithRuntimeObjects(objs...).WithStatusSubresource(bpfmanConfig).Build() // Set development Logger so we can see all logs in tests. logf.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{Development: true}))) @@ -230,6 +267,7 @@ func setupTestEnvironment(isOpenShift bool) (*BpfmanConfigReconciler, *v1alpha1. BpfmanMetricsProxyDS: resolveConfigPath(internal.BpfmanMetricsProxyPath), CsiDriverDS: resolveConfigPath(internal.BpfmanCsiDriverPath), IsOpenshift: isOpenShift, + Recorder: recorder, } if isOpenShift { r.RestrictedSCC = resolveConfigPath(internal.BpfmanRestrictedSCCPath) @@ -578,3 +616,139 @@ func verifyCM(cm *corev1.ConfigMap, requiredFields map[string]*string) error { } return nil } + +// testStatusSet validates that the Config's status.conditions and status.componentStatuses +// match the expected state (unknown, progressing, or available) based on the test scenario. +func testStatusSet(ctx context.Context, cl client.Client, configName string, isOpenShift bool, expected string) error { + bpfmanConfig := &v1alpha1.Config{} + if err := cl.Get(ctx, types.NamespacedName{Name: configName}, bpfmanConfig); err != nil { + return err + } + + var expectedComponentStatuses map[string]v1alpha1.ConfigComponentStatus + var progressingCondition metav1.Condition + var availableCondition metav1.Condition + + switch expected { + case "available": + expectedComponentStatuses = map[string]v1alpha1.ConfigComponentStatus{ + "ConfigMap": v1alpha1.ConfigStatusReady, + "DaemonSet": v1alpha1.ConfigStatusReady, + "MetricsProxyDaemonSet": v1alpha1.ConfigStatusReady, + "CsiDriver": v1alpha1.ConfigStatusReady, + } + if isOpenShift { + expectedComponentStatuses["Scc"] = v1alpha1.ConfigStatusReady + } + progressingCondition = metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + } + availableCondition = metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + } + case "progressing": + expectedComponentStatuses = map[string]v1alpha1.ConfigComponentStatus{ + "ConfigMap": v1alpha1.ConfigStatusReady, + "DaemonSet": v1alpha1.ConfigStatusProgressing, + "MetricsProxyDaemonSet": v1alpha1.ConfigStatusProgressing, + "CsiDriver": v1alpha1.ConfigStatusReady, + } + if isOpenShift { + expectedComponentStatuses["Scc"] = v1alpha1.ConfigStatusReady + } + progressingCondition = metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonProgressing, + Message: internal.ConfigMessageProgressing, + } + availableCondition = metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionFalse, + Reason: internal.ConfigReasonProgressing, + Message: internal.ConfigMessageProgressing, + } + default: + expectedComponentStatuses = map[string]v1alpha1.ConfigComponentStatus{ + "ConfigMap": v1alpha1.ConfigStatusUnknown, + "DaemonSet": v1alpha1.ConfigStatusUnknown, + "MetricsProxyDaemonSet": v1alpha1.ConfigStatusUnknown, + "CsiDriver": v1alpha1.ConfigStatusUnknown, + } + if isOpenShift { + expectedComponentStatuses["Scc"] = v1alpha1.ConfigStatusUnknown + } + progressingCondition = metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionUnknown, + Reason: internal.ConfigReasonUnknown, + Message: internal.ConfigMessageUnknown, + } + availableCondition = metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionUnknown, + Reason: internal.ConfigReasonUnknown, + Message: internal.ConfigMessageUnknown, + } + } + + if bpfmanConfig.Status.Components == nil || + !internal.CCSEquals(bpfmanConfig.Status.Components, expectedComponentStatuses) { + got := fmt.Sprintf("%v", bpfmanConfig.Status.Components) + want := fmt.Sprintf("%v", expectedComponentStatuses) + if b, err := json.Marshal(bpfmanConfig.Status.Components); err == nil { + got = string(b) + } + if b, err := json.Marshal(expectedComponentStatuses); err == nil { + want = string(b) + } + return fmt.Errorf("unexpected config.status.componentStatuses, got: %v, expected: %v", got, want) + } + if !testutil.ContainsCondition(bpfmanConfig.Status.Conditions, progressingCondition) { + return fmt.Errorf("conditions %v does not contain condition %v", + bpfmanConfig.Status.Conditions, progressingCondition) + } + if !testutil.ContainsCondition(bpfmanConfig.Status.Conditions, availableCondition) { + return fmt.Errorf("conditions %v does not contain condition %v", + bpfmanConfig.Status.Conditions, availableCondition) + } + return nil +} + +// testReconcileDaemonSets simulates DaemonSets becoming ready by setting their status +// fields to indicate all desired pods are ready and scheduled. +func testReconcileDaemonSets(ctx context.Context, cl client.Client) error { + bpfmanDs := &appsv1.DaemonSet{} + if err := cl.Get(ctx, types.NamespacedName{Name: internal.BpfmanDsName, Namespace: internal.BpfmanNamespace}, + bpfmanDs); err != nil { + return err + } + bpfmanDs.Status.DesiredNumberScheduled = 1 + bpfmanDs.Status.UpdatedNumberScheduled = bpfmanDs.Status.DesiredNumberScheduled + bpfmanDs.Status.NumberAvailable = bpfmanDs.Status.DesiredNumberScheduled + bpfmanDs.Status.NumberReady = bpfmanDs.Status.DesiredNumberScheduled + if err := cl.Status().Update(ctx, bpfmanDs); err != nil { + return err + } + + metricsProxyDs := &appsv1.DaemonSet{} + if err := cl.Get(ctx, types.NamespacedName{Name: internal.BpfmanMetricsProxyDsName, Namespace: internal.BpfmanNamespace}, + metricsProxyDs); err != nil { + return err + } + metricsProxyDs.Status.DesiredNumberScheduled = 1 + metricsProxyDs.Status.UpdatedNumberScheduled = metricsProxyDs.Status.DesiredNumberScheduled + metricsProxyDs.Status.NumberAvailable = metricsProxyDs.Status.DesiredNumberScheduled + metricsProxyDs.Status.NumberReady = metricsProxyDs.Status.DesiredNumberScheduled + if err := cl.Status().Update(ctx, metricsProxyDs); err != nil { + return err + } + + return nil +} diff --git a/internal/constants.go b/internal/constants.go index 0b73f83c8..beb58197c 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -48,6 +48,7 @@ const ( BpfmanLogLevel = "bpfman.log.level" BpfmanAgentLogLevel = "bpfman.agent.log.level" BpfmanAgentHealthProbeAddress = "bpfman.agent.healthprobeaddr" + DefaultHealthProbePort = 8175 APIPrefix = "bpfman.io" ) @@ -248,3 +249,14 @@ func (r ReconcileResult) String() string { return fmt.Sprintf("INVALID RECONCILE RESULT (%d)", r) } } + +const ( + ConfigConditionProgressing = "Progressing" + ConfigConditionAvailable = "Available" + ConfigReasonUnknown = "Startup" + ConfigReasonProgressing = "ReconcileStarted" + ConfigReasonAvailable = "ReconcileComplete" + ConfigMessageUnknown = "Unknown state" + ConfigMessageProgressing = "Reconciliation in progress" + ConfigMessageAvailable = "Reconciliation complete" +) diff --git a/internal/k8s.go b/internal/k8s.go index 239236168..02d967849 100644 --- a/internal/k8s.go +++ b/internal/k8s.go @@ -17,6 +17,7 @@ limitations under the License. package internal import ( + "github.com/bpfman/bpfman-operator/apis/v1alpha1" "github.com/go-logr/logr" "k8s.io/client-go/discovery" "sigs.k8s.io/controller-runtime/pkg/event" @@ -86,3 +87,47 @@ func IsOpenShift(client discovery.DiscoveryInterface, setupLog logr.Logger) (boo } return false, nil } + +// CCSEquals compares two map[string]ConfigComponentStatus instances for equality. +func CCSEquals(ccs, c map[string]v1alpha1.ConfigComponentStatus) bool { + if len(ccs) != len(c) { + return false + } + for k, v := range ccs { + if c[k] != v { + return false + } + } + return true +} + +// CCSAnyComponentProgressing returns true if any component is in the Progressing state. +func CCSAnyComponentProgressing(ccs map[string]v1alpha1.ConfigComponentStatus, isOpenShift bool) bool { + requiredComponents := []string{"ConfigMap", "DaemonSet", "MetricsProxyDaemonSet", "CsiDriver"} + if isOpenShift { + requiredComponents = append(requiredComponents, "Scc") + } + + for _, component := range requiredComponents { + if status, ok := ccs[component]; ok && status == v1alpha1.ConfigStatusProgressing { + return true + } + } + return false +} + +// CCSAllComponentsReady returns true if all components are in the Ready state. +func CCSAllComponentsReady(ccs map[string]v1alpha1.ConfigComponentStatus, isOpenShift bool) bool { + requiredComponents := []string{"ConfigMap", "DaemonSet", "MetricsProxyDaemonSet", "CsiDriver"} + if isOpenShift { + requiredComponents = append(requiredComponents, "Scc") + } + + for _, component := range requiredComponents { + status, ok := ccs[component] + if !ok || status != v1alpha1.ConfigStatusReady { + return false + } + } + return true +} diff --git a/test/lifecycle/lifecycle_test.go b/test/lifecycle/lifecycle_test.go index 8435160fd..4fe92bdff 100644 --- a/test/lifecycle/lifecycle_test.go +++ b/test/lifecycle/lifecycle_test.go @@ -21,6 +21,7 @@ import ( bpfmaniov1alpha1 "github.com/bpfman/bpfman-operator/apis/v1alpha1" "github.com/bpfman/bpfman-operator/internal" bpfmanHelpers "github.com/bpfman/bpfman-operator/pkg/helpers" + "github.com/bpfman/bpfman-operator/test/testutil" osv1 "github.com/openshift/api/security/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -51,7 +52,7 @@ var ( Spec: v1alpha1.ConfigSpec{ Namespace: "bpfman", Image: "quay.io/bpfman/bpfman:latest", - LogLevel: "bpfman=info", // Changed from debug to info + LogLevel: "bpfman=debug", // Changed from info to debug. Configuration: `[database] max_retries = 35 millisec_delay = 10000 @@ -59,9 +60,8 @@ millisec_delay = 10000 allow_unsigned = true verify_enabled = true`, Agent: v1alpha1.AgentSpec{ - Image: "quay.io/bpfman/bpfman-agent:latest", - LogLevel: "debug", // Changed from info to debug - HealthProbePort: 8175, + Image: "quay.io/bpfman/bpfman-agent:latest", + LogLevel: "debug", // Changed from info to debug. }, }, } @@ -126,10 +126,16 @@ func TestLifecycle(t *testing.T) { }() // Make sure that all resources are there at the start. - t.Logf("Running: TestEnsureResources") + t.Logf("Running: waitForResourceCreation, waitForAvailable and checkResourcesInDesiredState") if err := waitForResourceCreation(ctx); err != nil { t.Fatalf("Failed to ensure resources: %q", err) } + if err := waitForAvailable(ctx, isOpenShift); err != nil { + t.Fatalf("Config never reported status available: %q", err) + } + if err := checkResourcesInDesiredState(ctx); err != nil { + t.Fatalf("Failed to ensure resources: %q", err) + } // Test deleting resources. t.Logf("Running: TestResourceDeletion") @@ -770,3 +776,72 @@ func testConfigStuckDeletion(ctx context.Context, t *testing.T) error { t.Logf("Config stuck deletion test completed successfully") return nil } + +// podHasContainerArg checks if a DaemonSet has the given argument in any of its containers. +func podHasContainerArg(ds *appsv1.DaemonSet, arg string) bool { + for _, container := range ds.Spec.Template.Spec.Containers { + for _, containerArg := range container.Args { + if containerArg == arg { + return true + } + } + } + return false +} + +// waitForAvailable waits until the bpfman Config shows "Available" status conditions, +// indicating that all components are ready and reconciliation is complete. +func waitForAvailable(ctx context.Context, isOpenShift bool) error { + return waitUntilCondition(ctx, func() (bool, error) { + bpfmanConfig := &v1alpha1.Config{} + if err := runtimeClient.Get(ctx, types.NamespacedName{Name: internal.BpfmanConfigName}, bpfmanConfig); err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + progressingCondition := metav1.Condition{ + Type: internal.ConfigConditionProgressing, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + } + availableCondition := metav1.Condition{ + Type: internal.ConfigConditionAvailable, + Status: metav1.ConditionTrue, + Reason: internal.ConfigReasonAvailable, + Message: internal.ConfigMessageAvailable, + } + if !testutil.ContainsCondition(bpfmanConfig.Status.Conditions, progressingCondition) { + return false, nil + } + if !testutil.ContainsCondition(bpfmanConfig.Status.Conditions, availableCondition) { + return false, nil + } + componentsReady := bpfmanConfig.Status.Components != nil && + internal.CCSAllComponentsReady(bpfmanConfig.Status.Components, isOpenShift) + return componentsReady, nil + }) +} + +// checkResourcesInDesiredState makes sure that all resources are in the desired state. +// Right now, the only implemented validation in this function is a check for the health probe port. +// Returns error if timeout is reached or if context is cancelled. +func checkResourcesInDesiredState(ctx context.Context) error { + // Check DaemonSet is in desired state. + ds := &appsv1.DaemonSet{} + dsKey := types.NamespacedName{Name: internal.BpfmanDsName, Namespace: internal.BpfmanNamespace} + if err := runtimeClient.Get(ctx, dsKey, ds); err != nil && errors.IsNotFound(err) || ds.Status.NumberAvailable == 0 { + return nil + } else if err != nil { + return err + } + + arg := "--health-probe-bind-address=:8175" + // Check that the bpfman DaemonSet has the health probe bind address argument. + if !podHasContainerArg(ds, arg) { + return fmt.Errorf("bpfman DaemonSet missing argument %q", arg) + } + + return nil +} diff --git a/test/testutil/conditions.go b/test/testutil/conditions.go new file mode 100644 index 000000000..ef6898bef --- /dev/null +++ b/test/testutil/conditions.go @@ -0,0 +1,18 @@ +package testutil + +import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + +// ContainsCondition checks if the given condition exists in the conditions slice +// by comparing type, status, reason, and message fields. +func ContainsCondition(conditions []metav1.Condition, condition metav1.Condition) bool { + for _, c := range conditions { + if c.Type == condition.Type && + c.Status == condition.Status && + c.Reason == condition.Reason && + c.Message == condition.Message { + return true + } + } + + return false +}