diff --git a/config/samples/example-with-ca-bundle.yaml b/config/samples/example-with-ca-bundle.yaml index cdd6c5b9..3d164c36 100644 --- a/config/samples/example-with-ca-bundle.yaml +++ b/config/samples/example-with-ca-bundle.yaml @@ -61,8 +61,6 @@ spec: value: "meta-llama/Llama-3.2-1B-Instruct" - name: VLLM_URL value: "https://vllm-server.vllm-dist.svc.cluster.local:8000/v1" - - name: VLLM_TLS_VERIFY - value: "/etc/ssl/certs/ca-bundle.crt" userConfig: configMapName: llama-stack-config # configMapNamespace: "" # Optional - defaults to the same namespace as the CR @@ -71,6 +69,12 @@ spec: configMapName: custom-ca-bundle # configMapNamespace: "" # Optional - defaults to the same namespace as the CR # configMapKeys not specified - defaults to ["ca-bundle.crt"] - # configMapKeys: # Specify multiple keys to concatenate into ca-bundle.crt + # configMapKeys: # Specify multiple keys to concatenate into a single CA bundle # - ca-bundle1.crt # - ca-bundle2.crt + # Note: The operator will automatically: + # 1. Validate and extract valid certificates from all specified ConfigMap keys + # 2. Concatenate them into a single PEM file + # 3. Create a managed ConfigMap named {instance-name}-ca-bundle + # 4. Mount the bundle at /etc/ssl/certs/ca-bundle/ca-bundle.crt + # 5. Set SSL_CERT_FILE environment variable automatically diff --git a/controllers/llamastackdistribution_controller.go b/controllers/llamastackdistribution_controller.go index 14fc9131..5cf8c732 100644 --- a/controllers/llamastackdistribution_controller.go +++ b/controllers/llamastackdistribution_controller.go @@ -18,8 +18,10 @@ package controllers import ( "context" + "crypto/x509" "encoding/json" "encoding/pem" + "errors" "fmt" "io" "net/http" @@ -59,14 +61,16 @@ const ( manifestsBasePath = "manifests/base" // CA Bundle related constants. - DefaultCABundleKey = "ca-bundle.crt" - CABundleMountPath = "/etc/ssl/certs/ca-bundle.crt" - CABundleTempPath = "/tmp/ca-bundle/ca-bundle.crt" - CABundleVolumeName = "ca-bundle" - CABundleSourceDir = "/tmp/ca-source" - CABundleInitName = "ca-bundle-init" - CABundleSourceVolName = "ca-bundle-source" - CABundleTempDir = "/tmp/ca-bundle" + DefaultCABundleKey = "ca-bundle.crt" + CABundleVolumeName = "ca-bundle" + ManagedCABundleConfigMapSuffix = "-ca-bundle" + ManagedCABundleKey = "ca-bundle.crt" + ManagedCABundleMountPath = "/etc/ssl/certs/ca-bundle" + ManagedCABundleFilePath = "/etc/ssl/certs/ca-bundle/ca-bundle.crt" + + // Security limits for CA bundle processing. + MaxCABundleSize = 10 * 1024 * 1024 // 10MB max total size + MaxCABundleCertificates = 1000 // Maximum number of certificates // ODH/RHOAI well-known ConfigMap for trusted CA bundles. odhTrustedCABundleConfigMap = "odh-trusted-ca-bundle" @@ -381,14 +385,38 @@ func (r *LlamaStackDistributionReconciler) reconcileResources(ctx context.Contex } func (r *LlamaStackDistributionReconciler) reconcileConfigMaps(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) error { - // Reconcile the ConfigMap if specified by the user + if err := r.validateCABundleKeys(instance); err != nil { + return err + } + + if err := r.reconcileUserAndCABundleConfigMaps(ctx, instance); err != nil { + return err + } + + return r.reconcileManagedCABundle(ctx, instance) +} + +func (r *LlamaStackDistributionReconciler) validateCABundleKeys(instance *llamav1alpha1.LlamaStackDistribution) error { + if instance.Spec.Server.TLSConfig == nil || instance.Spec.Server.TLSConfig.CABundle == nil { + return nil + } + + if len(instance.Spec.Server.TLSConfig.CABundle.ConfigMapKeys) > 0 { + if err := validateConfigMapKeys(instance.Spec.Server.TLSConfig.CABundle.ConfigMapKeys); err != nil { + return fmt.Errorf("failed to validate CA bundle ConfigMap keys: %w", err) + } + } + + return nil +} + +func (r *LlamaStackDistributionReconciler) reconcileUserAndCABundleConfigMaps(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) error { if r.hasUserConfigMap(instance) { if err := r.reconcileUserConfigMap(ctx, instance); err != nil { return fmt.Errorf("failed to reconcile user ConfigMap: %w", err) } } - // Reconcile the CA bundle ConfigMap if specified if r.hasCABundleConfigMap(instance) { if err := r.reconcileCABundleConfigMap(ctx, instance); err != nil { return fmt.Errorf("failed to reconcile CA bundle ConfigMap: %w", err) @@ -398,6 +426,39 @@ func (r *LlamaStackDistributionReconciler) reconcileConfigMaps(ctx context.Conte return nil } +func (r *LlamaStackDistributionReconciler) reconcileManagedCABundle(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) error { + logger := log.FromContext(ctx) + managedConfigMapName := getManagedCABundleConfigMapName(instance) + + if !r.hasCABundleConfigMap(instance) && !r.hasODHTrustedCABundle(ctx, instance) { + // No CA bundles configured, delete managed ConfigMap if it exists + existingConfigMap := &corev1.ConfigMap{} + err := r.Get(ctx, types.NamespacedName{ + Name: managedConfigMapName, + Namespace: instance.Namespace, + }, existingConfigMap) + + if err == nil { + // ConfigMap exists but is no longer needed, delete it + logger.Info("Deleting unused managed CA bundle ConfigMap", "configMap", managedConfigMapName) + if delErr := r.Delete(ctx, existingConfigMap); delErr != nil && !k8serrors.IsNotFound(delErr) { + return fmt.Errorf("failed to delete unused managed CA bundle ConfigMap: %w", delErr) + } + logger.Info("Successfully deleted unused managed CA bundle ConfigMap", "configMap", managedConfigMapName) + } else if !k8serrors.IsNotFound(err) { + // Unexpected error + return fmt.Errorf("failed to check for managed CA bundle ConfigMap: %w", err) + } + return nil + } + + if err := r.reconcileManagedCABundleConfigMap(ctx, instance); err != nil { + return fmt.Errorf("failed to reconcile managed CA bundle ConfigMap: %w", err) + } + + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *LlamaStackDistributionReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create a field indexer for ConfigMap references to improve performance @@ -1079,13 +1140,6 @@ func (r *LlamaStackDistributionReconciler) reconcileUserConfigMap(ctx context.Co return nil } -// isValidPEM validates that the given data contains valid PEM formatted content. -func isValidPEM(data []byte) bool { - // Basic PEM validation using pem.Decode. - block, _ := pem.Decode(data) - return block != nil -} - // reconcileCABundleConfigMap validates that the referenced CA bundle ConfigMap exists. func (r *LlamaStackDistributionReconciler) reconcileCABundleConfigMap(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) error { logger := log.FromContext(ctx) @@ -1136,26 +1190,9 @@ func (r *LlamaStackDistributionReconciler) reconcileCABundleConfigMap(ctx contex return fmt.Errorf("failed to find CA bundle key '%s' in ConfigMap %s/%s", key, configMapNamespace, instance.Spec.Server.TLSConfig.CABundle.ConfigMapName) } - // Validate that the key contains valid PEM data - pemData, exists := configMap.Data[key] - if !exists { - // This should not happen since we checked above, but just to be safe - return fmt.Errorf("failed to find CA bundle key '%s' in ConfigMap %s/%s", key, configMapNamespace, instance.Spec.Server.TLSConfig.CABundle.ConfigMapName) - } - - if !isValidPEM([]byte(pemData)) { - logger.Error(nil, "CA bundle key contains invalid PEM data", - "configMapName", instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, - "configMapNamespace", configMapNamespace, - "key", key) - return fmt.Errorf("failed to validate CA bundle key '%s' in ConfigMap %s/%s: contains invalid PEM data", - key, - configMapNamespace, - instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, - ) - } - - logger.V(1).Info("CA bundle key contains valid PEM data", + // Note: Detailed PEM validation is performed later + // in extractValidCertificates() which validates all PEM blocks. + logger.V(1).Info("CA bundle key found", "configMapName", instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, "configMapNamespace", configMapNamespace, "key", key) @@ -1190,34 +1227,315 @@ func (r *LlamaStackDistributionReconciler) getConfigMapHash(ctx context.Context, return fmt.Sprintf("%s-%s", configMap.ResourceVersion, configMap.Name), nil } -// getCABundleConfigMapHash calculates a hash of the CA bundle ConfigMap data to detect changes. +// getCABundleConfigMapHash calculates a hash of the managed CA bundle ConfigMap to detect changes. func (r *LlamaStackDistributionReconciler) getCABundleConfigMapHash(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) (string, error) { - if !r.hasCABundleConfigMap(instance) { + // Check if any CA bundles are configured + if !r.hasCABundleConfigMap(instance) && !r.hasODHTrustedCABundle(ctx, instance) { return "", nil } - configMapNamespace := r.getCABundleConfigMapNamespace(instance) + // Get the managed ConfigMap + managedConfigMapName := getManagedCABundleConfigMapName(instance) + configMap := &corev1.ConfigMap{} + err := r.Get(ctx, types.NamespacedName{ + Name: managedConfigMapName, + Namespace: instance.Namespace, + }, configMap) + if err != nil { + if k8serrors.IsNotFound(err) { + // ConfigMap doesn't exist yet, return empty hash + return "", nil + } + return "", err + } + + // Create a content-based hash that will change when the ConfigMap data changes + return fmt.Sprintf("%s-%s", configMap.ResourceVersion, configMap.Name), nil +} + +// hasODHTrustedCABundle checks if the ODH trusted CA bundle ConfigMap exists and has valid keys. +func (r *LlamaStackDistributionReconciler) hasODHTrustedCABundle(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) bool { + _, keys, err := r.detectODHTrustedCABundle(ctx, instance) + return err == nil && len(keys) > 0 +} + +// gatherCABundleData collects all CA certificate data from source ConfigMaps and concatenates them. +// This function implements security measures to prevent injection attacks: +// - Validates PEM structure and X.509 certificate format during processing. +// - Enforces size limits to prevent resource exhaustion. +// - Only extracts valid CERTIFICATE blocks using PEM decoder and X.509 parser. +func (r *LlamaStackDistributionReconciler) gatherCABundleData(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) (string, error) { + logger := log.FromContext(ctx) + collector := &certificateCollector{logger: logger} + + if err := r.gatherExplicitCABundle(ctx, instance, collector); err != nil { + return "", err + } + + if err := r.gatherODHCABundle(ctx, instance, collector); err != nil { + return "", err + } + + return collector.concatenate() +} + +type certificateCollector struct { + logger logr.Logger + certificates []string + totalSize int + certificateCount int +} + +func (c *certificateCollector) add(certs []string, size, count int, configMapName, key string) error { + c.totalSize += size + c.certificateCount += count + + if c.totalSize > MaxCABundleSize { + return fmt.Errorf("failed to process CA bundle: total size exceeds maximum allowed size of %d bytes", MaxCABundleSize) + } + + if c.certificateCount > MaxCABundleCertificates { + return fmt.Errorf("failed to process CA bundle: contains more than %d certificates (maximum allowed)", MaxCABundleCertificates) + } + + c.certificates = append(c.certificates, certs...) + c.logger.V(1).Info("Processed CA bundle key", + "configMap", configMapName, + "key", key, + "certificates", count, + "size", size) + + return nil +} + +func (c *certificateCollector) concatenate() (string, error) { + if len(c.certificates) == 0 { + return "", errors.New("failed to find valid certificates in CA bundle ConfigMaps") + } + // Use strings.Builder for efficient memory usage with large bundles + var builder strings.Builder + builder.Grow(c.totalSize + len(c.certificates)) // Pre-allocate with space for newlines + for i, cert := range c.certificates { + if i > 0 { + builder.WriteString("\n") + } + builder.WriteString(cert) + } + + concatenated := builder.String() + c.logger.Info("Successfully gathered CA bundle data", + "totalCertificates", c.certificateCount, + "totalSize", len(concatenated)) + + return concatenated, nil +} + +func (r *LlamaStackDistributionReconciler) gatherExplicitCABundle(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution, collector *certificateCollector) error { + if !r.hasCABundleConfigMap(instance) { + return nil + } + + configMapNamespace := r.getCABundleConfigMapNamespace(instance) configMap := &corev1.ConfigMap{} err := r.Get(ctx, types.NamespacedName{ Name: instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, Namespace: configMapNamespace, }, configMap) if err != nil { - return "", err + return fmt.Errorf("failed to get CA bundle ConfigMap %s/%s: %w", + configMapNamespace, instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, err) } - // Create a content-based hash that will change when the ConfigMap data changes - // Include information about which keys are being used - var keyInfo string - if len(instance.Spec.Server.TLSConfig.CABundle.ConfigMapKeys) > 0 { - keyInfo = fmt.Sprintf("-%s", strings.Join(instance.Spec.Server.TLSConfig.CABundle.ConfigMapKeys, ",")) + keysToProcess := instance.Spec.Server.TLSConfig.CABundle.ConfigMapKeys + if len(keysToProcess) == 0 { + keysToProcess = []string{DefaultCABundleKey} + } + + return r.processConfigMapKeys(configMap, keysToProcess, configMapNamespace, instance.Spec.Server.TLSConfig.CABundle.ConfigMapName, collector) +} + +func (r *LlamaStackDistributionReconciler) gatherODHCABundle(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution, collector *certificateCollector) error { + configMap, keys, err := r.detectODHTrustedCABundle(ctx, instance) + if err != nil { + // Log but don't fail - ODH bundle is optional + collector.logger.V(1).Info("Could not detect ODH trusted CA bundle", "error", err) + return nil + } + if configMap == nil || len(keys) == 0 { + return nil + } + + return r.processODHConfigMapKeys(configMap, keys, collector) +} + +func (r *LlamaStackDistributionReconciler) processConfigMapKeys(configMap *corev1.ConfigMap, keys []string, namespace, name string, collector *certificateCollector) error { + for _, key := range keys { + data, exists := configMap.Data[key] + if !exists { + return fmt.Errorf("failed to find CA bundle key '%s' in ConfigMap %s/%s", key, namespace, name) + } + + certs, size, count, err := extractValidCertificates([]byte(data), key) + if err != nil { + return fmt.Errorf("failed to process CA bundle key '%s' from ConfigMap %s/%s: %w", key, namespace, name, err) + } + + if err := collector.add(certs, size, count, configMap.Name, key); err != nil { + return err + } + } + + return nil +} + +func (r *LlamaStackDistributionReconciler) processODHConfigMapKeys(configMap *corev1.ConfigMap, keys []string, collector *certificateCollector) error { + for _, key := range keys { + data, exists := configMap.Data[key] + if !exists { + collector.logger.V(1).Info("ODH CA bundle key not found, skipping", "key", key) + continue + } + + certs, size, count, err := extractValidCertificates([]byte(data), key) + if err != nil { + collector.logger.Error(err, "Failed to process ODH CA bundle key, skipping", + "configMap", configMap.Name, + "key", key) + continue + } + + if err := collector.add(certs, size, count, configMap.Name, key); err != nil { + return err + } + } + + return nil +} + +// extractValidCertificates extracts only valid CERTIFICATE blocks from PEM data. +// This function validates PEM structure and X.509 certificate format for all blocks. +// It filters out non-certificate PEM blocks (e.g., private keys, public keys) and +// rejects invalid X.509 certificates. +// Returns: (certificates as strings, total size, certificate count, error). +func extractValidCertificates(data []byte, keyName string) ([]string, int, int, error) { + if len(data) == 0 { + return nil, 0, 0, fmt.Errorf("failed to process CA bundle key '%s': contains no data", keyName) + } + + var certificates []string + totalSize := 0 + remaining := data + + for { + block, rest := pem.Decode(remaining) + if block == nil { + break + } + + // Only accept CERTIFICATE blocks, reject other PEM types + if block.Type != "CERTIFICATE" { + // Skip non-certificate blocks (could be private keys, etc.) + remaining = rest + continue + } + + // Validate that this is actually a valid X.509 certificate + if _, err := x509.ParseCertificate(block.Bytes); err != nil { + return nil, 0, 0, fmt.Errorf("failed to parse X.509 certificate from key '%s': %w", keyName, err) + } + + // Re-encode the certificate to ensure it's properly formatted + certPEM := pem.EncodeToMemory(block) + if certPEM == nil { + return nil, 0, 0, fmt.Errorf("failed to encode certificate from key '%s'", keyName) + } + + certificates = append(certificates, string(certPEM)) + totalSize += len(certPEM) + remaining = rest + } + + if len(certificates) == 0 { + return nil, 0, 0, fmt.Errorf("failed to find valid certificates in CA bundle key '%s'", keyName) + } + + return certificates, totalSize, len(certificates), nil +} + +// reconcileManagedCABundleConfigMap creates or updates the managed CA bundle ConfigMap. +func (r *LlamaStackDistributionReconciler) reconcileManagedCABundleConfigMap(ctx context.Context, instance *llamav1alpha1.LlamaStackDistribution) error { + logger := log.FromContext(ctx) + + // Gather all CA certificate data + caBundleData, err := r.gatherCABundleData(ctx, instance) + if err != nil { + return fmt.Errorf("failed to gather CA bundle data: %w", err) + } + + managedConfigMapName := getManagedCABundleConfigMapName(instance) + + // Check if the managed ConfigMap already exists + existingConfigMap := &corev1.ConfigMap{} + err = r.Get(ctx, types.NamespacedName{ + Name: managedConfigMapName, + Namespace: instance.Namespace, + }, existingConfigMap) + + if err != nil && !k8serrors.IsNotFound(err) { + return fmt.Errorf("failed to get managed CA bundle ConfigMap: %w", err) + } + + // Create the desired ConfigMap + desiredConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedConfigMapName, + Namespace: instance.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/managed-by": "llama-stack-operator", + "app.kubernetes.io/instance": instance.Name, + "app.kubernetes.io/component": "ca-bundle", + }, + }, + Data: map[string]string{ + ManagedCABundleKey: caBundleData, + }, + } + + // Set owner reference so the ConfigMap is deleted when the LlamaStackDistribution is deleted + if refErr := ctrl.SetControllerReference(instance, desiredConfigMap, r.Scheme); refErr != nil { + return fmt.Errorf("failed to set controller reference on managed CA bundle ConfigMap: %w", refErr) + } + + if k8serrors.IsNotFound(err) { + // ConfigMap doesn't exist, create it + logger.Info("Creating managed CA bundle ConfigMap", "configMap", managedConfigMapName) + if err := r.Create(ctx, desiredConfigMap); err != nil { + return fmt.Errorf("failed to create managed CA bundle ConfigMap: %w", err) + } + logger.Info("Successfully created managed CA bundle ConfigMap", "configMap", managedConfigMapName) } else { - // Default to DefaultCABundleKey when no keys are specified - keyInfo = fmt.Sprintf("-%s", DefaultCABundleKey) + // ConfigMap exists, update it if the data has changed + if existingConfigMap.Data[ManagedCABundleKey] != caBundleData { + logger.Info("Updating managed CA bundle ConfigMap", "configMap", managedConfigMapName) + // Use Patch instead of Update to avoid race conditions + patch := client.MergeFrom(existingConfigMap.DeepCopy()) + existingConfigMap.Data = desiredConfigMap.Data + existingConfigMap.Labels = desiredConfigMap.Labels + if err := r.Patch(ctx, existingConfigMap, patch); err != nil { + if k8serrors.IsConflict(err) { + // Conflict detected, will be retried by controller + return fmt.Errorf("failed to patch managed CA bundle ConfigMap (conflict): %w", err) + } + return fmt.Errorf("failed to patch managed CA bundle ConfigMap: %w", err) + } + logger.Info("Successfully updated managed CA bundle ConfigMap", "configMap", managedConfigMapName) + } else { + logger.V(1).Info("Managed CA bundle ConfigMap is up to date", "configMap", managedConfigMapName) + } } - return fmt.Sprintf("%s-%s%s", configMap.ResourceVersion, configMap.Name, keyInfo), nil + return nil } // detectODHTrustedCABundle checks if the well-known ODH trusted CA bundle ConfigMap @@ -1243,23 +1561,17 @@ func (r *LlamaStackDistributionReconciler) detectODHTrustedCABundle(ctx context. instance.Namespace, odhTrustedCABundleConfigMap, err) } - // Extract available data keys and validate they contain valid PEM data + // Extract available data keys + // PEM data is validated in extractValidCertificates() + // which properly validates all PEM blocks. keys := make([]string, 0, len(configMap.Data)) - for key, value := range configMap.Data { - // Only include keys that contain valid PEM data - if isValidPEM([]byte(value)) { - keys = append(keys, key) - logger.V(1).Info("Auto-detected CA bundle key contains valid PEM data", - "configMapName", odhTrustedCABundleConfigMap, - "namespace", instance.Namespace, - "key", key) - } else { - logger.V(1).Info("Auto-detected CA bundle key contains invalid PEM data, skipping", - "configMapName", odhTrustedCABundleConfigMap, - "namespace", instance.Namespace, - "key", key) - } + for key := range configMap.Data { + keys = append(keys, key) + logger.V(1).Info("Auto-detected CA bundle key", + "configMapName", odhTrustedCABundleConfigMap, + "namespace", instance.Namespace, + "key", key) } logger.V(1).Info("ODH trusted CA bundle ConfigMap detected", diff --git a/controllers/llamastackdistribution_controller_test.go b/controllers/llamastackdistribution_controller_test.go index b6dc7509..0b1b7230 100644 --- a/controllers/llamastackdistribution_controller_test.go +++ b/controllers/llamastackdistribution_controller_test.go @@ -512,3 +512,287 @@ func TestNetworkPolicyConfiguration(t *testing.T) { }) } } + +// TestCABundleConfigMapKeyValidation tests that invalid ConfigMap keys are rejected during reconciliation. +func TestCABundleConfigMapKeyValidation(t *testing.T) { + ctrl.SetLogger(zap.New(zap.UseDevMode(true))) + + tests := []struct { + name string + configMapKeys []string + shouldFail bool + errorContains string + }{ + { + name: "valid ConfigMap keys", + configMapKeys: []string{"ca-bundle.crt", "root-ca.pem", "intermediate.crt"}, + shouldFail: false, + }, + { + name: "ConfigMap key with path traversal (..) should fail", + configMapKeys: []string{"../etc/passwd"}, + shouldFail: true, + errorContains: "invalid path characters", + }, + { + name: "ConfigMap key with forward slash should fail", + configMapKeys: []string{"path/to/cert.crt"}, + shouldFail: true, + errorContains: "invalid path characters", + }, + { + name: "ConfigMap key with double dots in middle should fail", + configMapKeys: []string{"ca..bundle.crt"}, + shouldFail: true, + errorContains: "invalid path characters", + }, + { + name: "empty ConfigMap key should fail", + configMapKeys: []string{""}, + shouldFail: true, + errorContains: "cannot be empty", + }, + { + name: "ConfigMap key with invalid characters should fail", + configMapKeys: []string{"ca bundle with spaces.crt"}, + shouldFail: true, + errorContains: "invalid characters", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // --- arrange --- + namespace := createTestNamespace(t, "test-cabundle-validation") + instance := NewDistributionBuilder(). + WithName("test-cabundle"). + WithNamespace(namespace.Name). + WithCABundle("test-ca-configmap", tt.configMapKeys). + Build() + + require.NoError(t, k8sClient.Create(t.Context(), instance)) + t.Cleanup(func() { _ = k8sClient.Delete(t.Context(), instance) }) + + // --- act --- + reconciler := createTestReconciler() + _, err := reconciler.Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: instance.Name, + Namespace: instance.Namespace, + }, + }) + + // --- assert --- + if tt.shouldFail { + require.Error(t, err, "reconciliation should fail for invalid ConfigMap keys") + require.Contains(t, err.Error(), tt.errorContains, + "error message should indicate the validation failure") + } else if err != nil { + // For valid keys, reconciliation might fail for other reasons (ConfigMap doesn't exist), + // but it should NOT fail due to key validation + require.NotContains(t, err.Error(), "failed to validate CA bundle ConfigMap keys", + "reconciliation should not fail due to key validation for valid keys") + } + }) + } +} + +// TestManagedCABundleConfigMap tests that the operator creates and manages CA bundle ConfigMaps. +func TestManagedCABundleConfigMap(t *testing.T) { + ctrl.SetLogger(zap.New(zap.UseDevMode(true))) + + t.Run("creates managed ConfigMap with concatenated certificates", func(t *testing.T) { + // --- arrange --- + namespace := createTestNamespace(t, "test-managed-cabundle") + + // Load valid test certificate + testCert := loadTestCertificate(t) + + // Create source CA bundle ConfigMap with multiple keys (using same cert as both for simplicity) + sourceConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "source-ca-bundle", + Namespace: namespace.Name, + }, + Data: map[string]string{ + "root-ca.crt": testCert, + "intermediate.crt": testCert, + }, + } + require.NoError(t, k8sClient.Create(t.Context(), sourceConfigMap)) + + instance := NewDistributionBuilder(). + WithName("test-managed"). + WithNamespace(namespace.Name). + WithCABundle("source-ca-bundle", []string{"root-ca.crt", "intermediate.crt"}). + Build() + + require.NoError(t, k8sClient.Create(t.Context(), instance)) + t.Cleanup(func() { _ = k8sClient.Delete(t.Context(), instance) }) + + // --- act --- + ReconcileDistribution(t, instance, false) + + // --- assert --- + managedConfigMapName := instance.Name + "-ca-bundle" + managedConfigMap := &corev1.ConfigMap{} + waitForResource(t, k8sClient, namespace.Name, managedConfigMapName, managedConfigMap) + + // Verify the managed ConfigMap has the correct structure + require.Contains(t, managedConfigMap.Data, "ca-bundle.crt", "managed ConfigMap should have ca-bundle.crt key") + caBundleData := managedConfigMap.Data["ca-bundle.crt"] + + // Verify certificates are present in the concatenated bundle + require.Contains(t, caBundleData, "BEGIN CERTIFICATE", "bundle should contain certificates") + require.Contains(t, caBundleData, "END CERTIFICATE", "bundle should contain complete certificates") + + // Verify owner reference + AssertResourceOwnedByInstance(t, managedConfigMap, instance) + + // Verify labels + require.Equal(t, "llama-stack-operator", managedConfigMap.Labels["app.kubernetes.io/managed-by"]) + require.Equal(t, instance.Name, managedConfigMap.Labels["app.kubernetes.io/instance"]) + require.Equal(t, "ca-bundle", managedConfigMap.Labels["app.kubernetes.io/component"]) + }) + + t.Run("updates managed ConfigMap when source changes", func(t *testing.T) { + // --- arrange --- + namespace := createTestNamespace(t, "test-cabundle-update") + + // Load valid test certificate + testCert := loadTestCertificate(t) + + sourceConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "source-ca-bundle", + Namespace: namespace.Name, + }, + Data: map[string]string{ + "ca-bundle.crt": testCert, + }, + } + require.NoError(t, k8sClient.Create(t.Context(), sourceConfigMap)) + + instance := NewDistributionBuilder(). + WithName("test-update"). + WithNamespace(namespace.Name). + WithCABundle("source-ca-bundle", nil). + Build() + + require.NoError(t, k8sClient.Create(t.Context(), instance)) + t.Cleanup(func() { _ = k8sClient.Delete(t.Context(), instance) }) + + ReconcileDistribution(t, instance, false) + + managedConfigMapName := instance.Name + "-ca-bundle" + managedConfigMap := &corev1.ConfigMap{} + waitForResource(t, k8sClient, namespace.Name, managedConfigMapName, managedConfigMap) + + originalData := managedConfigMap.Data["ca-bundle.crt"] + + // --- act --- + // Update source ConfigMap by adding the certificate twice (making bundle larger) + sourceConfigMap.Data["ca-bundle.crt"] = testCert + "\n" + testCert + require.NoError(t, k8sClient.Update(t.Context(), sourceConfigMap)) + + ReconcileDistribution(t, instance, false) + + // --- assert --- + require.NoError(t, k8sClient.Get(t.Context(), types.NamespacedName{ + Name: managedConfigMapName, + Namespace: namespace.Name, + }, managedConfigMap)) + + updatedData := managedConfigMap.Data["ca-bundle.crt"] + require.NotEqual(t, originalData, updatedData, "managed ConfigMap should be updated") + require.Contains(t, updatedData, "BEGIN CERTIFICATE", "managed ConfigMap should still contain certificate") + // Verify the bundle has two certificates now + require.Greater(t, len(updatedData), len(originalData), "updated bundle should be larger") + }) + + t.Run("rejects non-certificate PEM blocks", func(t *testing.T) { + // --- arrange --- + namespace := createTestNamespace(t, "test-reject-non-cert") + + // Create source ConfigMap with non-certificate PEM block (should be rejected) + // Using "PUBLIC KEY" type to test that only CERTIFICATE blocks are accepted + sourceConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "source-with-key", + Namespace: namespace.Name, + }, + Data: map[string]string{ + "ca-bundle.crt": `-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA1234567890ABCDEF +-----END PUBLIC KEY-----`, + }, + } + require.NoError(t, k8sClient.Create(t.Context(), sourceConfigMap)) + + instance := NewDistributionBuilder(). + WithName("test-reject"). + WithNamespace(namespace.Name). + WithCABundle("source-with-key", nil). + Build() + + require.NoError(t, k8sClient.Create(t.Context(), instance)) + t.Cleanup(func() { _ = k8sClient.Delete(t.Context(), instance) }) + + // --- act --- + reconciler := createTestReconciler() + _, err := reconciler.Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: instance.Name, + Namespace: instance.Namespace, + }, + }) + + // --- assert --- + require.Error(t, err, "reconciliation should fail for non-certificate PEM") + require.Contains(t, err.Error(), "failed to find valid certificates", + "error should indicate no valid certificates") + }) + + t.Run("rejects invalid X.509 certificates", func(t *testing.T) { + // --- arrange --- + namespace := createTestNamespace(t, "test-reject-invalid-x509") + + // Create source ConfigMap with malformed certificate data + // This has correct PEM structure but invalid X.509 certificate data + sourceConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "source-with-invalid-cert", + Namespace: namespace.Name, + }, + Data: map[string]string{ + "ca-bundle.crt": `-----BEGIN CERTIFICATE----- +InvalidCertificateDataThatIsNotValidX509 +-----END CERTIFICATE-----`, + }, + } + require.NoError(t, k8sClient.Create(t.Context(), sourceConfigMap)) + + instance := NewDistributionBuilder(). + WithName("test-reject-invalid"). + WithNamespace(namespace.Name). + WithCABundle("source-with-invalid-cert", nil). + Build() + + require.NoError(t, k8sClient.Create(t.Context(), instance)) + t.Cleanup(func() { _ = k8sClient.Delete(t.Context(), instance) }) + + // --- act --- + reconciler := createTestReconciler() + _, err := reconciler.Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: instance.Name, + Namespace: instance.Namespace, + }, + }) + + // --- assert --- + require.Error(t, err, "reconciliation should fail for invalid X.509 certificate") + require.Contains(t, err.Error(), "failed to parse X.509 certificate", + "error should indicate X.509 parsing failure") + }) +} diff --git a/controllers/manifests/base/deployment.yaml b/controllers/manifests/base/deployment.yaml index 2a50213a..975a1792 100644 --- a/controllers/manifests/base/deployment.yaml +++ b/controllers/manifests/base/deployment.yaml @@ -18,4 +18,3 @@ spec: serviceAccountName: sa containers: [] # Will be populated by controller volumes: [] # Will be populated by controller - initContainers: [] # Will be populated by controller diff --git a/controllers/resource_helper.go b/controllers/resource_helper.go index cf644e24..42800982 100644 --- a/controllers/resource_helper.go +++ b/controllers/resource_helper.go @@ -26,8 +26,6 @@ import ( llamav1alpha1 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/log" ) // Constants for validation limits. @@ -35,6 +33,9 @@ const ( // maxConfigMapKeyLength defines the maximum allowed length for ConfigMap keys // based on Kubernetes DNS subdomain name limits. maxConfigMapKeyLength = 253 + // FSGroup is the filesystem group ID for the pod. + // This is the default group ID for the llama-stack server. + FSGroup = int64(1001) ) // Probes configuration. @@ -49,6 +50,11 @@ const ( // Kubernetes ConfigMap keys must be valid DNS subdomain names or data keys. var validConfigMapKeyRegex = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9\-_.]*[a-zA-Z0-9])?$`) +// getManagedCABundleConfigMapName returns the name of the managed CA bundle ConfigMap. +func getManagedCABundleConfigMapName(instance *llamav1alpha1.LlamaStackDistribution) string { + return instance.Name + ManagedCABundleConfigMapSuffix +} + // startupScript is the script that will be used to start the server. var startupScript = ` set -e @@ -101,13 +107,14 @@ func validateConfigMapKeys(keys []string) error { if len(key) > maxConfigMapKeyLength { return fmt.Errorf("failed to validate ConfigMap key '%s': too long (max %d characters)", key, maxConfigMapKeyLength) } - if !validConfigMapKeyRegex.MatchString(key) { - return fmt.Errorf("failed to validate ConfigMap key '%s': contains invalid characters. Only alphanumeric characters, hyphens, underscores, and dots are allowed", key) - } - // Additional security check: prevent path traversal attempts + // Check for path traversal attempts first (before general regex check) + // to provide specific error messages for security-related issues if strings.Contains(key, "..") || strings.Contains(key, "/") { return fmt.Errorf("failed to validate ConfigMap key '%s': contains invalid path characters", key) } + if !validConfigMapKeyRegex.MatchString(key) { + return fmt.Errorf("failed to validate ConfigMap key '%s': contains invalid characters. Only alphanumeric characters, hyphens, underscores, and dots are allowed", key) + } } return nil } @@ -180,22 +187,14 @@ func configureContainerEnvironment(ctx context.Context, r *LlamaStackDistributio Value: mountPath, }) - // Add CA bundle environment variable if TLS config is specified - if instance.Spec.Server.TLSConfig != nil && instance.Spec.Server.TLSConfig.CABundle != nil { - // Set SSL_CERT_FILE to point to the specific CA bundle file + // Add CA bundle environment variable if any CA bundles are configured + // (explicit or auto-detected ODH bundles) + if hasAnyCABundle(ctx, r, instance) { + // Set SSL_CERT_FILE to point to the managed CA bundle file container.Env = append(container.Env, corev1.EnvVar{ Name: "SSL_CERT_FILE", - Value: CABundleMountPath, + Value: ManagedCABundleFilePath, }) - } else if r != nil { - // Check for auto-detected ODH trusted CA bundle - if _, keys, err := r.detectODHTrustedCABundle(ctx, instance); err == nil && len(keys) > 0 { - // Set SSL_CERT_FILE to point to the auto-detected consolidated CA bundle - container.Env = append(container.Env, corev1.EnvVar{ - Name: "SSL_CERT_FILE", - Value: CABundleMountPath, - }) - } } // Finally, add the user provided env vars @@ -214,6 +213,23 @@ func configureContainerMounts(ctx context.Context, r *LlamaStackDistributionReco addCABundleVolumeMount(ctx, r, instance, container) } +// hasAnyCABundle checks if any CA bundle will be mounted (explicit or auto-detected). +func hasAnyCABundle(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution) bool { + // Check for explicit CA bundle configuration + if instance.Spec.Server.TLSConfig != nil && instance.Spec.Server.TLSConfig.CABundle != nil { + return true + } + + // Check for auto-detected ODH trusted CA bundle + if r != nil { + if _, keys, err := r.detectODHTrustedCABundle(ctx, instance); err == nil && len(keys) > 0 { + return true + } + } + + return false +} + // configureContainerCommands sets up container commands and args. func configureContainerCommands(instance *llamav1alpha1.LlamaStackDistribution, container *corev1.Container) { // Override the container entrypoint to use the custom config file if user config is specified @@ -264,139 +280,54 @@ func addUserConfigVolumeMount(instance *llamav1alpha1.LlamaStackDistribution, co } } -// addCABundleVolumeMount adds the CA bundle volume mount to the container if TLS config is specified. -// For multiple keys: the init container writes DefaultCABundleKey to the root of the emptyDir volume, -// and the main container mounts it with SubPath to CABundleMountPath. -// For single key: the main container directly mounts the ConfigMap key. -// Also handles auto-detected ODH trusted CA bundle ConfigMaps. +// addCABundleVolumeMount adds the managed CA bundle volume mount to the container. +// Mounts the operator-managed ConfigMap containing all concatenated certificates. func addCABundleVolumeMount(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution, container *corev1.Container) { - if instance.Spec.Server.TLSConfig != nil && instance.Spec.Server.TLSConfig.CABundle != nil { + // Mount managed CA bundle if any CA bundles are configured + if hasAnyCABundle(ctx, r, instance) { container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ Name: CABundleVolumeName, - MountPath: CABundleMountPath, - SubPath: DefaultCABundleKey, + MountPath: ManagedCABundleMountPath, ReadOnly: true, }) - } else if r != nil { - // Check for auto-detected ODH trusted CA bundle - if _, keys, err := r.detectODHTrustedCABundle(ctx, instance); err == nil && len(keys) > 0 { - // Mount the auto-detected consolidated CA bundle - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: CABundleVolumeName, - MountPath: CABundleMountPath, - SubPath: DefaultCABundleKey, - ReadOnly: true, - }) - } } } -// createCABundleVolume creates the appropriate volume configuration for CA bundles. -// For single key: uses direct ConfigMap volume. -// For multiple keys: uses emptyDir volume with InitContainer to concatenate keys. -func createCABundleVolume(caBundleConfig *llamav1alpha1.CABundleConfig) corev1.Volume { - // For multiple keys, we'll use an emptyDir that gets populated by an InitContainer - if len(caBundleConfig.ConfigMapKeys) > 0 { - return corev1.Volume{ - Name: CABundleVolumeName, - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - } - } - - // For single key (legacy behavior), use direct ConfigMap volume +// createCABundleVolume creates the volume configuration for the managed CA bundle ConfigMap. +func createCABundleVolume(managedConfigMapName string) corev1.Volume { return corev1.Volume{ Name: CABundleVolumeName, VolumeSource: corev1.VolumeSource{ ConfigMap: &corev1.ConfigMapVolumeSource{ LocalObjectReference: corev1.LocalObjectReference{ - Name: caBundleConfig.ConfigMapName, + Name: managedConfigMapName, + }, + Items: []corev1.KeyToPath{ + { + Key: ManagedCABundleKey, + Path: ManagedCABundleKey, + }, }, }, }, } } -// createCABundleInitContainer creates an InitContainer that concatenates multiple CA bundle keys -// from a ConfigMap into a single file in the shared ca-bundle volume. -func createCABundleInitContainer(caBundleConfig *llamav1alpha1.CABundleConfig, image string) (corev1.Container, error) { - // Validate ConfigMap keys for security - if err := validateConfigMapKeys(caBundleConfig.ConfigMapKeys); err != nil { - return corev1.Container{}, fmt.Errorf("failed to validate ConfigMap keys: %w", err) - } - - // Build the file list as a shell array embedded in the script - // This ensures the arguments are properly passed to the script - var fileListBuilder strings.Builder - for i, key := range caBundleConfig.ConfigMapKeys { - if i > 0 { - fileListBuilder.WriteString(" ") - } - // Quote each key to handle any special characters safely - fileListBuilder.WriteString(fmt.Sprintf("%q", key)) - } - fileList := fileListBuilder.String() - - // Use a secure script approach that embeds the file list directly - // This eliminates the issue with arguments not being passed to sh -c - script := fmt.Sprintf(`#!/bin/sh -set -e -output_file="%s" -source_dir="%s" - -# Clear the output file -> "$output_file" - -# Process each validated key file (keys are pre-validated) -for key in %s; do - file_path="$source_dir/$key" - if [ -f "$file_path" ]; then - cat "$file_path" >> "$output_file" - echo >> "$output_file" # Add newline between certificates - else - echo "Warning: Certificate file $file_path not found" >&2 - fi -done`, CABundleTempPath, CABundleSourceDir, fileList) - - return corev1.Container{ - Name: CABundleInitName, - Image: image, - ImagePullPolicy: corev1.PullAlways, - Command: []string{"/bin/sh", "-c", script}, - // No Args needed since we embed the file list in the script - VolumeMounts: []corev1.VolumeMount{ - { - Name: CABundleSourceVolName, - MountPath: CABundleSourceDir, - ReadOnly: true, - }, - { - Name: CABundleVolumeName, - MountPath: CABundleTempDir, - }, - }, - SecurityContext: &corev1.SecurityContext{ - AllowPrivilegeEscalation: ptr.To(false), - RunAsNonRoot: ptr.To(true), - Capabilities: &corev1.Capabilities{ - Drop: []corev1.Capability{"ALL"}, - }, - }, - }, nil -} - // configurePodStorage configures the pod storage and returns the complete pod spec. func configurePodStorage(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution, container corev1.Container) corev1.PodSpec { + fsGroup := FSGroup podSpec := corev1.PodSpec{ Containers: []corev1.Container{container}, + SecurityContext: &corev1.PodSecurityContext{ + FSGroup: &fsGroup, + }, } - // Configure storage volumes and init containers + // Configure storage volumes configureStorage(instance, &podSpec) // Configure TLS CA bundle (with auto-detection support) - configureTLSCABundle(ctx, r, instance, &podSpec, container.Image) + configureTLSCABundle(ctx, r, instance, &podSpec) // Configure user config configureUserConfig(instance, &podSpec) @@ -416,7 +347,7 @@ func configureStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *c } } -// configurePersistentStorage sets up PVC-based storage with init container for permissions. +// configurePersistentStorage sets up PVC-based storage. func configurePersistentStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec) { // Use PVC for persistent storage podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ @@ -441,110 +372,17 @@ func configureEmptyDirStorage(podSpec *corev1.PodSpec) { } // configureTLSCABundle handles TLS CA bundle configuration. -// For multiple keys: adds a ca-bundle-init init container that concatenates all keys into a single file -// in a shared emptyDir volume, which the main container then mounts via SubPath. -// For single key: uses a direct ConfigMap volume mount. -// If no explicit CA bundle is configured, it checks for the well-known ODH trusted CA bundle ConfigMap. -func configureTLSCABundle(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec, image string) { - tlsConfig := instance.Spec.Server.TLSConfig - - // Handle explicit CA bundle configuration first - if tlsConfig != nil && tlsConfig.CABundle != nil { - addExplicitCABundle(ctx, tlsConfig.CABundle, podSpec, image) +// Mounts the operator-managed CA bundle ConfigMap that contains all certificates. +func configureTLSCABundle(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec) { + // Check if any CA bundles are configured (explicit or auto-detected ODH) + if !hasAnyCABundle(ctx, r, instance) { return } - // If no explicit CA bundle is configured, check for ODH trusted CA bundle auto-detection - if r != nil { - addAutoDetectedCABundle(ctx, r, instance, podSpec, image) - } -} - -// addExplicitCABundle handles explicitly configured CA bundles. -func addExplicitCABundle(ctx context.Context, caBundleConfig *llamav1alpha1.CABundleConfig, podSpec *corev1.PodSpec, image string) { - // Add CA bundle InitContainer if multiple keys are specified - if len(caBundleConfig.ConfigMapKeys) > 0 { - caBundleInitContainer, err := createCABundleInitContainer(caBundleConfig, image) - if err != nil { - log.FromContext(ctx).Error(err, "Failed to create CA bundle init container") - return - } - podSpec.InitContainers = append(podSpec.InitContainers, caBundleInitContainer) - } - - // Add CA bundle ConfigMap volume - volume := createCABundleVolume(caBundleConfig) + // Add the managed CA bundle ConfigMap volume + managedConfigMapName := getManagedCABundleConfigMapName(instance) + volume := createCABundleVolume(managedConfigMapName) podSpec.Volumes = append(podSpec.Volumes, volume) - - // Add source ConfigMap volume for multiple keys scenario - if len(caBundleConfig.ConfigMapKeys) > 0 { - sourceVolume := corev1.Volume{ - Name: CABundleSourceVolName, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: caBundleConfig.ConfigMapName, - }, - }, - }, - } - podSpec.Volumes = append(podSpec.Volumes, sourceVolume) - } -} - -// addAutoDetectedCABundle handles auto-detection of ODH trusted CA bundle ConfigMap. -func addAutoDetectedCABundle(ctx context.Context, r *LlamaStackDistributionReconciler, instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec, image string) { - if r == nil { - return - } - - configMap, keys, err := r.detectODHTrustedCABundle(ctx, instance) - if err != nil { - // Log error but don't fail the reconciliation - log.FromContext(ctx).Error(err, "Failed to detect ODH trusted CA bundle ConfigMap") - return - } - - if configMap == nil || len(keys) == 0 { - // No ODH trusted CA bundle found or no keys available - return - } - - // Create a virtual CA bundle config for auto-detected ConfigMap - autoCaBundleConfig := &llamav1alpha1.CABundleConfig{ - ConfigMapName: configMap.Name, - ConfigMapKeys: keys, // Use all available keys - } - - // Use the same logic as explicit configuration - caBundleInitContainer, err := createCABundleInitContainer(autoCaBundleConfig, image) - if err != nil { - // Log error and skip auto-detected CA bundle configuration - log.FromContext(ctx).Error(err, "Failed to create CA bundle init container for auto-detected ConfigMap") - return - } - podSpec.InitContainers = append(podSpec.InitContainers, caBundleInitContainer) - - // Add CA bundle emptyDir volume for auto-detected ConfigMap - volume := createCABundleVolume(autoCaBundleConfig) - podSpec.Volumes = append(podSpec.Volumes, volume) - - // Add source ConfigMap volume for auto-detected ConfigMap - sourceVolume := corev1.Volume{ - Name: CABundleSourceVolName, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: configMap.Name, - }, - }, - }, - } - podSpec.Volumes = append(podSpec.Volumes, sourceVolume) - - log.FromContext(ctx).Info("Auto-configured ODH trusted CA bundle", - "configMapName", configMap.Name, - "keys", keys) } // configureUserConfig handles user configuration setup. @@ -576,17 +414,6 @@ func configurePodOverrides(instance *llamav1alpha1.LlamaStackDistribution, podSp podSpec.ServiceAccountName = instance.Name + "-sa" } - // Configure pod-level security context for OpenShift SCC compatibility - if podSpec.SecurityContext == nil { - podSpec.SecurityContext = &corev1.PodSecurityContext{} - } - - // Set fsGroup to allow write access to mounted volumes - const defaultFSGroup = 1001 - if podSpec.SecurityContext.FSGroup == nil { - podSpec.SecurityContext.FSGroup = ptr.To(int64(defaultFSGroup)) - } - // Apply other pod overrides if specified if instance.Spec.Server.PodOverrides != nil { // Add volumes if specified diff --git a/controllers/resource_helper_test.go b/controllers/resource_helper_test.go index f1ea8dd9..b58e6e3c 100644 --- a/controllers/resource_helper_test.go +++ b/controllers/resource_helper_test.go @@ -596,13 +596,13 @@ func TestValidateConfigMapKeys(t *testing.T) { name: "command injection attempt", keys: []string{"valid-key; rm -rf /; echo malicious"}, expectError: true, - errorMsg: "contains invalid characters", + errorMsg: "contains invalid path characters", }, { name: "path traversal attempt", keys: []string{"../../../etc/passwd"}, expectError: true, - errorMsg: "contains invalid characters", + errorMsg: "contains invalid path characters", }, { name: "shell metacharacters", @@ -614,7 +614,7 @@ func TestValidateConfigMapKeys(t *testing.T) { name: "pipe injection", keys: []string{"key | cat /etc/passwd"}, expectError: true, - errorMsg: "contains invalid characters", + errorMsg: "contains invalid path characters", }, { name: "too long key", diff --git a/controllers/testing_support_test.go b/controllers/testing_support_test.go index c83d73c0..68bbc944 100644 --- a/controllers/testing_support_test.go +++ b/controllers/testing_support_test.go @@ -1,7 +1,13 @@ package controllers_test import ( + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" "fmt" + "math/big" "slices" "testing" "time" @@ -112,6 +118,17 @@ func (b *DistributionBuilder) WithUserConfig(configMapName string) *Distribution return b } +func (b *DistributionBuilder) WithCABundle(configMapName string, configMapKeys []string) *DistributionBuilder { + if b.instance.Spec.Server.TLSConfig == nil { + b.instance.Spec.Server.TLSConfig = &llamav1alpha1.TLSConfig{} + } + b.instance.Spec.Server.TLSConfig.CABundle = &llamav1alpha1.CABundleConfig{ + ConfigMapName: configMapName, + ConfigMapKeys: configMapKeys, + } + return b +} + func (b *DistributionBuilder) Build() *llamav1alpha1.LlamaStackDistribution { return b.instance.DeepCopy() } @@ -505,3 +522,49 @@ func createTestNamespace(t *testing.T, namePrefix string) *corev1.Namespace { }) return namespace } + +// loadTestCertificate generates a valid self-signed test certificate. +// This function generates certificates programmatically to avoid dependencies on external scripts +// or the openssl command, making tests portable across different environments including CI/CD. +func loadTestCertificate(t *testing.T) string { + t.Helper() + + // Generate a private key + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err, "Failed to generate private key") + + // Create a certificate template + serialNumber, err := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 128)) + require.NoError(t, err, "Failed to generate serial number") + + template := x509.Certificate{ + SerialNumber: serialNumber, + Subject: pkix.Name{ + Country: []string{"US"}, + Province: []string{"California"}, + Locality: []string{"Los Angeles"}, + Organization: []string{"Test Corp"}, + OrganizationalUnit: []string{"Testing"}, + CommonName: "test-ca", + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(365 * 24 * time.Hour), + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + IsCA: true, + } + + // Create a self-signed certificate + certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &privateKey.PublicKey, privateKey) + require.NoError(t, err, "Failed to create certificate") + + // Encode certificate to PEM format + certPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: certDER, + }) + require.NotNil(t, certPEM, "Failed to encode certificate to PEM") + + return string(certPEM) +} diff --git a/docs/additional/ca-bundle-configuration.md b/docs/additional/ca-bundle-configuration.md index 0fc841f0..cf83cebb 100644 --- a/docs/additional/ca-bundle-configuration.md +++ b/docs/additional/ca-bundle-configuration.md @@ -13,23 +13,33 @@ The CA bundle configuration allows you to: When you configure a CA bundle: -1. **ConfigMap Storage**: CA certificates are stored in a Kubernetes ConfigMap -2. **Volume Mounting**: The certificates are mounted at `/etc/ssl/certs/` in the container -3. **Environment Variable**: The `SSL_CERT_FILE` environment variable is set to point to the CA bundle -4. **Automatic Restarts**: Pods restart automatically when the CA bundle ConfigMap changes - -### Single Key vs Multiple Keys - -**Single Key (configMapKey):** -- Direct ConfigMap volume mount -- Certificate file mounted directly from the ConfigMap key -- Minimal resource overhead - -**Multiple Keys (configMapKeys):** -- Uses an InitContainer to concatenate multiple keys -- All certificates from specified keys are combined into a single file -- Slightly higher resource overhead due to InitContainer, but maintains standard SSL behavior -- The final consolidated file is always named `ca-bundle.crt` regardless of source key names +1. **ConfigMap Storage**: CA certificates are stored in a Kubernetes ConfigMap (source ConfigMap) +2. **Controller Processing**: The operator controller reads and validates certificates from the source ConfigMap(s) +3. **Concatenation**: Valid certificates are concatenated into a single PEM file using Go's `encoding/pem` package +4. **Managed ConfigMap**: The operator creates a managed ConfigMap named `{instance-name}-ca-bundle` containing the concatenated bundle +5. **Volume Mounting**: The managed ConfigMap is mounted at `/etc/ssl/certs/ca-bundle/ca-bundle.crt` in the container +6. **Environment Variable**: The `SSL_CERT_FILE` environment variable is automatically set to point to the mounted certificate bundle +7. **Automatic Restarts**: Pods restart automatically when the source CA bundle ConfigMap changes + +### Certificate Processing + +The operator processes CA bundle certificates in the controller before deployment: + +**Processing Steps:** +1. The controller reads CA certificate data from the source ConfigMap(s) specified in `tlsConfig.caBundle` +2. Each certificate is validated using Go's `encoding/pem` package to ensure proper PEM format +3. Valid `CERTIFICATE` blocks are extracted and concatenated into a single PEM file +4. The concatenated bundle is stored in a managed ConfigMap named `{instance-name}-ca-bundle` with key `ca-bundle.crt` +5. The managed ConfigMap is mounted directly at `/etc/ssl/certs/ca-bundle/ca-bundle.crt` in the pod +6. The `SSL_CERT_FILE` environment variable is automatically set to point to the mounted bundle file + +**Security Features:** +- Maximum bundle size limit (10MB) to prevent resource exhaustion attacks +- Maximum certificate count limit (1000 certificates) +- PEM format validation before processing +- Only valid X.509 CERTIFICATE blocks are extracted (other PEM types are ignored) +- Validation errors prevent deployment with clear error messages in the CR status +- No runtime dependencies (no openssl or other tools required in the container) ## Configuration Options @@ -312,7 +322,7 @@ kubectl get configmap my-ca-bundle -o yaml kubectl describe pod # Check mounted certificates -kubectl exec -- ls -la /etc/ssl/certs/ +kubectl exec -- ls -la /etc/ssl/certs/ca-bundle/ # Check SSL_CERT_FILE environment variable kubectl exec -- env | grep SSL_CERT_FILE @@ -346,10 +356,13 @@ Before deploying a LlamaStackDistribution with CA bundle: 3. **Namespace Isolation**: Use appropriate namespaces to isolate CA bundles 4. **Audit Trail**: Monitor ConfigMap changes in production environments 5. **Principle of Least Privilege**: Only grant necessary permissions to access CA bundle ConfigMaps +6. **Resource Limits**: The operator enforces limits during certificate concatenation (10MB max bundle size, 1000 max certificates) to prevent resource exhaustion +7. **Input Validation**: Certificates are validated in the controller using Go's `encoding/pem` package before deployment ## Limitations - Only supports PEM format certificates -- ConfigMap size limits apply (1MB by default) -- Certificate validation is handled by the underlying Python SSL libraries +- ConfigMap size limits apply (1MB by default for source ConfigMaps) +- Maximum bundle size is 10MB and maximum 1000 certificates (enforced by controller) +- Certificate validation is handled by Go's `encoding/pem` and `crypto/x509` packages in the controller - Cross-namespace ConfigMap access requires appropriate RBAC permissions diff --git a/tests/e2e/creation_test.go b/tests/e2e/creation_test.go index 215e2214..1016cec2 100644 --- a/tests/e2e/creation_test.go +++ b/tests/e2e/creation_test.go @@ -93,6 +93,10 @@ func testCreateDistributionForType(t *testing.T, distType string) *v1alpha1.Llam }, llsdistributionCR.Name, ns.Name, ResourceReadyTimeout, isDeploymentReady) require.NoError(t, err) + // Wait for pods to be running and ready + err = WaitForPodsReady(t, TestEnv, ns.Name, llsdistributionCR.Name, ResourceReadyTimeout) + require.NoError(t, err, "Pods should be running and ready") + // Verify service is created err = EnsureResourceReady(t, TestEnv, schema.GroupVersionKind{ Group: "", @@ -145,6 +149,15 @@ func testCRDeploymentUpdate(t *testing.T, distribution *v1alpha1.LlamaStackDistr }, distribution) require.NoError(t, err) + // Skip scaling test if PVC with ReadWriteOnce is used + // Most cloud providers (AWS EBS, Azure Disk, GCE PD) only support ReadWriteOnce for block storage + // ReadWriteMany requires network file systems (NFS, CephFS, etc.) which may not be available + if distribution.Spec.Server.Storage != nil { + t.Log("Skipping replica scaling test - PVC with ReadWriteOnce can only be attached to one pod at a time") + t.Log("To enable replica scaling with persistent storage, configure a StorageClass that supports ReadWriteMany") + return + } + // Update replicas distribution.Spec.Replicas = 2 err = TestEnv.Client.Update(TestEnv.Ctx, distribution) diff --git a/tests/e2e/test_utils.go b/tests/e2e/test_utils.go index 41782873..d594136b 100644 --- a/tests/e2e/test_utils.go +++ b/tests/e2e/test_utils.go @@ -3,6 +3,7 @@ package e2e import ( "context" + "fmt" "log" "os" "path/filepath" @@ -145,6 +146,115 @@ func EnsureResourceDeleted(t *testing.T, testenv *TestEnvironment, gvk schema.Gr }) } +// WaitForPodsReady polls until all pods for a deployment are running and ready. +func WaitForPodsReady(t *testing.T, testenv *TestEnvironment, namespace, deploymentName string, timeout time.Duration) error { + t.Helper() + ctx, cancel := context.WithTimeout(testenv.Ctx, timeout) + defer cancel() + + return wait.PollUntilContextTimeout(ctx, pollInterval, timeout, true, func(ctx context.Context) (bool, error) { + // Get pods for the deployment + podList, err := GetPodsForDeployment(testenv, ctx, namespace, deploymentName) + if err != nil { + t.Logf("Error listing pods: %v", err) + return false, err + } + + if len(podList.Items) == 0 { + t.Logf("No pods found for deployment %s yet", deploymentName) + return false, nil + } + + // Check each pod's status + for _, pod := range podList.Items { + ready, err := checkPodStatus(t, &pod) + if err != nil { + return false, err + } + if !ready { + return false, nil + } + } + + t.Logf("All pods for deployment %s are ready", deploymentName) + return true, nil + }) +} + +// checkPodStatus checks if a single pod is running and ready. +func checkPodStatus(t *testing.T, pod *corev1.Pod) (bool, error) { + t.Helper() + t.Logf("Pod %s status: Phase=%s, Ready=%v", pod.Name, pod.Status.Phase, isPodReady(pod)) + + // Check if pod is running + if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodSucceeded { + t.Logf("Pod %s not running yet (phase: %s)", pod.Name, pod.Status.Phase) + return false, nil + } + + // Check if pod is ready + if !isPodReady(pod) { + t.Logf("Pod %s not ready yet", pod.Name) + return false, nil + } + + // Check container statuses for errors + return checkContainerStatuses(t, pod) +} + +// checkContainerStatuses checks all container statuses in a pod for errors. +func checkContainerStatuses(t *testing.T, pod *corev1.Pod) (bool, error) { + t.Helper() + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + t.Logf("Container %s in pod %s is waiting: %s - %s", + containerStatus.Name, pod.Name, + containerStatus.State.Waiting.Reason, + containerStatus.State.Waiting.Message) + + // Fail fast on image pull errors or crash loops + if containerStatus.State.Waiting.Reason == "ImagePullBackOff" || + containerStatus.State.Waiting.Reason == "ErrImagePull" || + containerStatus.State.Waiting.Reason == "CrashLoopBackOff" { + return false, fmt.Errorf("failed to start container %s: %s - %s", + containerStatus.Name, + containerStatus.State.Waiting.Reason, + containerStatus.State.Waiting.Message) + } + } + + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode != 0 { + return false, fmt.Errorf("failed to run container %s: terminated with exit code %d: %s", + containerStatus.Name, + containerStatus.State.Terminated.ExitCode, + containerStatus.State.Terminated.Reason) + } + } + return true, nil +} + +// isPodReady checks if a pod has the Ready condition set to True. +func isPodReady(pod *corev1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady { + return condition.Status == corev1.ConditionTrue + } + } + return false +} + +// GetPodsForDeployment retrieves the list of pods for a given deployment. +func GetPodsForDeployment(testenv *TestEnvironment, ctx context.Context, namespace, deploymentName string) (*corev1.PodList, error) { + podList := &corev1.PodList{} + err := testenv.Client.List(ctx, podList, client.InNamespace(namespace), client.MatchingLabels{ + "app.kubernetes.io/instance": deploymentName, + }) + if err != nil { + return nil, err + } + return podList, nil +} + // CleanupTestEnv cleans up the test environment. func CleanupTestEnv(env *TestEnvironment) { // Implementation will be added later diff --git a/tests/e2e/tls_test.go b/tests/e2e/tls_test.go index 8dd19549..83745b94 100644 --- a/tests/e2e/tls_test.go +++ b/tests/e2e/tls_test.go @@ -100,6 +100,10 @@ func testLlamaStackWithCABundle(t *testing.T) { err = waitForDeploymentCreation(t, llsTestNS, "llamastack-with-config", 3*time.Minute) require.NoError(t, err, "LlamaStack deployment should be created by operator") + // Wait for pods to be running and ready + err = WaitForPodsReady(t, TestEnv, llsTestNS, "llamastack-with-config", 5*time.Minute) + require.NoError(t, err, "LlamaStack pods should be running and ready") + // Verify certificate volumes are mounted correctly err = verifyCertificateMounts(t, llsTestNS, "llamastack-with-config") require.NoError(t, err, "Certificate volumes should be mounted correctly") @@ -377,7 +381,11 @@ func verifyCertificateMounts(t *testing.T, namespace, name string) error { func hasCABundleVolume(volumes []corev1.Volume) bool { for _, volume := range volumes { - if volume.ConfigMap != nil && volume.ConfigMap.Name == "custom-ca-bundle" { + // Check for the managed CA bundle ConfigMap (named {instance-name}-ca-bundle) + // or the source CA bundle ConfigMap + if volume.ConfigMap != nil && + (strings.HasSuffix(volume.ConfigMap.Name, "-ca-bundle") || + volume.ConfigMap.Name == "custom-ca-bundle") { return true } } @@ -395,8 +403,9 @@ func hasCABundleMount(containers []corev1.Container) bool { func hasCABundleMountInContainer(mounts []corev1.VolumeMount) bool { for _, mount := range mounts { - if mount.MountPath == controllers.CABundleMountPath || - strings.Contains(mount.MountPath, "ca-bundle") { + if mount.MountPath == controllers.ManagedCABundleMountPath || + strings.Contains(mount.MountPath, "ca-bundle") || + strings.Contains(mount.MountPath, "ca-certificates") { return true } } @@ -419,7 +428,7 @@ func verifyEnvironmentVariables(t *testing.T, namespace, name string) error { // Check for TLS-related environment variables tlsEnvVarsFound := 0 expectedEnvVars := map[string]string{ - "VLLM_TLS_VERIFY": controllers.CABundleMountPath, + "SSL_CERT_FILE": controllers.ManagedCABundleFilePath, } for _, container := range deployment.Spec.Template.Spec.Containers {