Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2c34075
feat(api): add EtcdBackup CRD types
Mar 7, 2026
969fb81
feat(api): add EtcdBackup validation webhook
Mar 7, 2026
7342b87
feat(backup): add backup-agent binary
Mar 7, 2026
9f1373c
feat(factory): add backup Job factory
Mar 7, 2026
ad42895
feat(controller): add EtcdBackup reconciler
Mar 7, 2026
1854d3a
feat(manager): register EtcdBackup controller and webhook
Mar 7, 2026
f4adfd6
chore(config): add EtcdBackup CRD manifests, RBAC, and webhook config
Mar 7, 2026
56ab880
build: add backup-agent to Dockerfile and Makefile
Mar 7, 2026
d66b93d
feat(helm): add EtcdBackup support to Helm chart
Mar 7, 2026
bf727ec
fix(backup): address PR review feedback
Mar 7, 2026
a899205
feat(backup): add EtcdBackupSchedule CRD for recurring backups
Mar 8, 2026
f28c683
fix(backup): address review findings in backup subsystem
Mar 8, 2026
329f2d5
feat(bootstrap): add cluster restoration from backup snapshot
Mar 8, 2026
7d5b58c
fix: address review findings
Mar 8, 2026
3b08271
fix: address second review findings
Mar 8, 2026
d198a3d
fix: address third review findings
Mar 8, 2026
c3aac43
fix: address fourth review findings
Mar 8, 2026
12498f8
fix: address fifth review findings
Mar 8, 2026
2c4afc3
fix: skip name length check on update, add missing tests
Mar 8, 2026
9c9f26f
fix: add backup name length validation, restrict file permissions, us…
Mar 8, 2026
7ae4fd1
fix: add resource limits, restrict dir permissions, validate S3 endpoint
Mar 8, 2026
f1132e4
fix: use typed condition constant, requeue on ClusterNotFound
Mar 8, 2026
3b20c52
fix: add pod SecurityContext and set -e in restore script
Mar 8, 2026
cf69df2
fix: address maintainer review feedback
Mar 8, 2026
0406d8c
chore: replace reflect.DeepEqual with semantic.DeepEqual
BROngineer Mar 13, 2026
516d5d6
tests: verify cronjob updated on schedule update
BROngineer Mar 13, 2026
5bf0723
chore: add finished job ttl and active job deadline fields
BROngineer Mar 19, 2026
54658c5
chore: add validation for cron schedule
BROngineer Mar 19, 2026
a838863
chore: fix linting
BROngineer Mar 19, 2026
bc283b0
chore: revert adding fields to crds
BROngineer Mar 19, 2026
f671c51
chore: add context timeout to restore op
BROngineer Mar 20, 2026
4ee52ff
chore: consistent backups naming
BROngineer Mar 20, 2026
58a936e
chore: add indexer and watch for etcd cluster to schedule controller
BROngineer Mar 20, 2026
2657153
chore: fix nilaway false-positive fail in tests
BROngineer Mar 20, 2026
3714779
chore: generate all
BROngineer Mar 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ COPY internal/ ./internal/
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN CGO_ENABLED=0 GOOS="${TARGETOS:-linux}" GOARCH="${TARGETARCH}" go build -a -o manager cmd/manager/main.go
RUN CGO_ENABLED=0 GOOS="${TARGETOS:-linux}" GOARCH="${TARGETARCH}" go build -a -o manager cmd/manager/main.go && \
CGO_ENABLED=0 GOOS="${TARGETOS:-linux}" GOARCH="${TARGETARCH}" go build -a -o backup-agent cmd/backup-agent/main.go && \
CGO_ENABLED=0 GOOS="${TARGETOS:-linux}" GOARCH="${TARGETARCH}" go build -a -o restore-agent cmd/restore-agent/main.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand All @@ -29,3 +31,5 @@ ENTRYPOINT ["/manager"]
USER 65532:65532
WORKDIR /
COPY --chown=root:root --from=builder /workspace/manager .
COPY --chown=root:root --from=builder /workspace/backup-agent .
COPY --chown=root:root --from=builder /workspace/restore-agent .
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,25 @@ helm-crd-copy: yq kustomize ## Copy CRDs from kustomize to helm-chart
@$(eval TMP := $(shell mktemp -d))
@$(KUSTOMIZE) build config/default > $(TMP)/manifest.yaml && cd $(TMP) && $(YQ) -s '.kind + "-" + .metadata.name' --no-doc manifest.yaml && cd $(OLDPWD)
@mv $(TMP)/CustomResourceDefinition-etcdclusters.etcd.aenix.io charts/etcd-operator/crds/etcd-cluster.yaml
@mv $(TMP)/CustomResourceDefinition-etcdbackups.etcd.aenix.io charts/etcd-operator/crds/etcd-backup.yaml
@mv $(TMP)/CustomResourceDefinition-etcdbackupschedules.etcd.aenix.io charts/etcd-operator/crds/etcd-backup-schedule.yaml
@rm -rf $(TMP)

##@ Build

.PHONY: build
build: manifests generate fmt vet ## Build manager binary.
go build -o bin/manager cmd/manager/main.go
go build -o bin/backup-agent cmd/backup-agent/main.go
go build -o bin/restore-agent cmd/restore-agent/main.go

.PHONY: build-backup-agent
build-backup-agent: ## Build backup-agent binary.
go build -o bin/backup-agent cmd/backup-agent/main.go

.PHONY: build-restore-agent
build-restore-agent: ## Build restore-agent binary.
go build -o bin/restore-agent cmd/restore-agent/main.go

build-plugin:
go build -o bin/kubectl-etcd cmd/kubectl-etcd/main.go
Expand Down
24 changes: 24 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,28 @@ resources:
defaulting: true
validation: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: etcd.aenix.io
group: etcd.aenix.io
kind: EtcdBackup
path: github.com/aenix-io/etcd-operator/api/v1alpha1
version: v1alpha1
webhooks:
validation: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: etcd.aenix.io
group: etcd.aenix.io
kind: EtcdBackupSchedule
path: github.com/aenix-io/etcd-operator/api/v1alpha1
version: v1alpha1
webhooks:
validation: true
webhookVersion: v1
version: "3"
111 changes: 111 additions & 0 deletions api/v1alpha1/etcdbackup_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
Copyright 2024 The etcd-operator Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
EtcdBackupConditionStarted = "Started"
EtcdBackupConditionComplete = "Complete"
EtcdBackupConditionFailed = "Failed"
)

// EtcdBackupSpec defines the desired state of EtcdBackup
type EtcdBackupSpec struct {
// ClusterRef references the EtcdCluster to back up.
ClusterRef corev1.LocalObjectReference `json:"clusterRef"`
// Destination defines where the backup will be stored.
Destination BackupDestination `json:"destination"`
}

// BackupDestination defines the target location for the backup. Exactly one must be specified.
type BackupDestination struct {
// S3 defines S3-compatible storage as the backup destination.
// +optional
S3 *S3BackupDestination `json:"s3,omitempty"`
// PVC defines a PersistentVolumeClaim as the backup destination.
// +optional
PVC *PVCBackupDestination `json:"pvc,omitempty"`
}

// S3BackupDestination defines S3-compatible storage parameters.
type S3BackupDestination struct {
// Endpoint is the S3-compatible endpoint URL (e.g., "https://s3.amazonaws.com").
Endpoint string `json:"endpoint"`
// Bucket is the name of the S3 bucket.
Bucket string `json:"bucket"`
// Key is the key prefix (directory path) within the bucket.
// The operator appends the backup filename automatically.
// +optional
Key string `json:"key,omitempty"`
// CredentialsSecretRef references a Secret containing AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY keys.
CredentialsSecretRef corev1.LocalObjectReference `json:"credentialsSecretRef"`
// Region is the AWS region for the S3 bucket.
// +optional
Region string `json:"region,omitempty"`
// ForcePathStyle forces path-style S3 URLs (e.g., endpoint/bucket/key)
// instead of virtual-hosted-style (e.g., bucket.endpoint/key).
// Most S3-compatible providers (MinIO, Ceph) require path style.
// +optional
ForcePathStyle bool `json:"forcePathStyle,omitempty"`
}

// PVCBackupDestination defines a PersistentVolumeClaim as the backup target.
type PVCBackupDestination struct {
// ClaimName is the name of the PersistentVolumeClaim to use.
ClaimName string `json:"claimName"`
// SubPath is an optional sub-directory within the PVC volume.
// The operator appends the backup filename automatically.
// +optional
SubPath string `json:"subPath,omitempty"`
}

// EtcdBackupStatus defines the observed state of EtcdBackup
type EtcdBackupStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Cluster",type=string,JSONPath=`.spec.clusterRef.name`
// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.conditions[?(@.type=="Complete")].status`
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`

// EtcdBackup is the Schema for the etcdbackups API
type EtcdBackup struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec EtcdBackupSpec `json:"spec,omitempty"`
Status EtcdBackupStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// EtcdBackupList contains a list of EtcdBackup
type EtcdBackupList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []EtcdBackup `json:"items"`
}

func init() {
SchemeBuilder.Register(&EtcdBackup{}, &EtcdBackupList{})
}
175 changes: 175 additions & 0 deletions api/v1alpha1/etcdbackup_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
Copyright 2024 The etcd-operator Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
"fmt"
"path/filepath"
"strings"

"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation/field"
ctrl "sigs.k8s.io/controller-runtime"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

var etcdbackuplog = logf.Log.WithName("etcdbackup-resource")

// SetupWebhookWithManager will setup the manager to manage the webhooks
func (r *EtcdBackup) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}

// +kubebuilder:webhook:path=/validate-etcd-aenix-io-v1alpha1-etcdbackup,mutating=false,failurePolicy=fail,sideEffects=None,groups=etcd.aenix.io,resources=etcdbackups,verbs=create;update,versions=v1alpha1,name=vetcdbackup.kb.io,admissionReviewVersions=v1

var _ webhook.Validator = &EtcdBackup{}

// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (r *EtcdBackup) ValidateCreate() (admission.Warnings, error) {
etcdbackuplog.Info("validate create", "name", r.Name)

var allErrors field.ErrorList

// Job name = "{name}-backup" (7 char suffix).
// Job names must be <= 63 chars (DNS label limit).
const jobSuffix = "-backup"
const maxJobNameLen = 63
maxNameLen := maxJobNameLen - len(jobSuffix)
if len(r.Name) > maxNameLen {
allErrors = append(allErrors, field.Invalid(
field.NewPath("metadata", "name"),
r.Name,
fmt.Sprintf("name must be at most %d characters (Job name limit is %d, suffix %q is %d characters)",
maxNameLen, maxJobNameLen, jobSuffix, len(jobSuffix)),
))
}

if r.Spec.ClusterRef.Name == "" {
allErrors = append(allErrors, field.Required(
field.NewPath("spec", "clusterRef", "name"),
"clusterRef.name is required",
))
}

destErrors := r.validateDestination()
allErrors = append(allErrors, destErrors...)

if len(allErrors) > 0 {
return nil, errors.NewInvalid(
schema.GroupKind{Group: GroupVersion.Group, Kind: "EtcdBackup"},
r.Name, allErrors)
}

return nil, nil
}

// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
func (r *EtcdBackup) ValidateUpdate(old runtime.Object) (admission.Warnings, error) {
etcdbackuplog.Info("validate update", "name", r.Name)

oldBackup, ok := old.(*EtcdBackup)
if !ok {
return nil, fmt.Errorf("expected EtcdBackup but got %T", old)
}

if !equality.Semantic.DeepEqual(r.Spec, oldBackup.Spec) {
var allErrors field.ErrorList
allErrors = append(allErrors, field.Forbidden(
field.NewPath("spec"),
"EtcdBackup spec is immutable",
))
return nil, errors.NewInvalid(
schema.GroupKind{Group: GroupVersion.Group, Kind: "EtcdBackup"},
r.Name, allErrors)
}

return nil, nil
}

// ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (r *EtcdBackup) ValidateDelete() (admission.Warnings, error) {
etcdbackuplog.Info("validate delete", "name", r.Name)
return nil, nil
}

func (r *EtcdBackup) validateDestination() field.ErrorList {
return validateBackupDestination(r.Spec.Destination, field.NewPath("spec", "destination"))
}

// validateBackupDestination validates a BackupDestination at the given field path.
// This is shared between EtcdBackup and EtcdCluster (bootstrap restore source) webhooks.
func validateBackupDestination(dest BackupDestination, destPath *field.Path) field.ErrorList {
var allErrors field.ErrorList

if dest.S3 == nil && dest.PVC == nil {
allErrors = append(allErrors, field.Required(
destPath,
"exactly one of s3 or pvc must be specified",
))
return allErrors
}

if dest.S3 != nil && dest.PVC != nil {
allErrors = append(allErrors, field.Invalid(
destPath,
"both s3 and pvc",
"exactly one of s3 or pvc must be specified, not both",
))
return allErrors
}

if s3 := dest.S3; s3 != nil {
s3Path := destPath.Child("s3")
if s3.Endpoint == "" {
allErrors = append(allErrors, field.Required(s3Path.Child("endpoint"), "endpoint is required"))
} else if !strings.HasPrefix(s3.Endpoint, "http://") && !strings.HasPrefix(s3.Endpoint, "https://") {
allErrors = append(allErrors, field.Invalid(s3Path.Child("endpoint"), s3.Endpoint,
"endpoint must start with http:// or https://"))
}
if s3.Bucket == "" {
allErrors = append(allErrors, field.Required(s3Path.Child("bucket"), "bucket is required"))
}
if s3.CredentialsSecretRef.Name == "" {
allErrors = append(allErrors, field.Required(s3Path.Child("credentialsSecretRef", "name"), "credentialsSecretRef.name is required"))
}
}

if pvc := dest.PVC; pvc != nil {
pvcPath := destPath.Child("pvc")
if pvc.ClaimName == "" {
allErrors = append(allErrors, field.Required(pvcPath.Child("claimName"), "claimName is required"))
}
if pvc.SubPath != "" {
cleaned := filepath.Clean(pvc.SubPath)
if strings.HasPrefix(cleaned, "..") || filepath.IsAbs(cleaned) {
allErrors = append(allErrors, field.Invalid(
pvcPath.Child("subPath"), pvc.SubPath,
"subPath must be a relative path and must not contain '..' components",
))
}
}
}

return allErrors
}
Loading
Loading