Skip to content

Commit ca23d7e

Browse files
committed
Reduce the mean time to recovery (MTTR) in case of failed deployments
Signed-off-by: Matheus Pimenta <matheuscscp@gmail.com>
1 parent 40ed2cc commit ca23d7e

37 files changed

+1174
-168
lines changed

.github/workflows/e2e.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,6 @@ jobs:
712712
713713
kubectl -n helm-system delete -f config/testdata/$test_name
714714
- name: Run install test fail test
715-
# TODO: re-enable after upgrading to Helm 4.1.1 (https://github.com/helm/helm/pull/31730)
716-
if: false
717715
run: |
718716
test_name=install-test-fail
719717
kubectl -n helm-system apply -f config/testdata/$test_name
@@ -739,8 +737,6 @@ jobs:
739737
740738
kubectl -n helm-system delete -f config/testdata/$test_name
741739
- name: Run install test fail ignore test
742-
# TODO: re-enable after upgrading to Helm 4.1.1 (https://github.com/helm/helm/pull/31730)
743-
if: false
744740
run: |
745741
test_name=install-test-fail-ignore
746742
kubectl -n helm-system apply -f config/testdata/$test_name
@@ -863,8 +859,6 @@ jobs:
863859
864860
kubectl delete -n helm-system -f config/testdata/$test_name/install.yaml
865861
- name: Run upgrade test fail test
866-
# TODO: re-enable after upgrading to Helm 4.1.1 (https://github.com/helm/helm/pull/31730)
867-
if: false
868862
run: |
869863
test_name=upgrade-test-fail
870864
kubectl -n helm-system apply -f config/testdata/$test_name/install.yaml

api/v2/helmrelease_types.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ type HelmReleaseSpec struct {
194194
// health of custom resources using Common Expression Language (CEL).
195195
// The expressions are evaluated only when the specific Helm action
196196
// taking place has wait enabled, i.e. DisableWait is false, and the
197-
// 'watcher' WaitStrategy is used.
197+
// 'poller' WaitStrategy is used.
198198
// +optional
199199
HealthCheckExprs []kustomize.CustomHealthCheck `json:"healthCheckExprs,omitempty"`
200200
}
@@ -441,8 +441,8 @@ type HelmChartTemplateVerification struct {
441441
type WaitStrategyName string
442442

443443
const (
444-
// WaitStrategyWatcher is the strategy for watching resource statuses via kstatus.
445-
WaitStrategyWatcher WaitStrategyName = "watcher"
444+
// WaitStrategyPoller is the strategy for polling resource statuses via kstatus.
445+
WaitStrategyPoller WaitStrategyName = "poller"
446446

447447
// WaitStrategyLegacy is the legacy strategy for waiting for resources to be ready
448448
// used in Helm v3.
@@ -453,12 +453,12 @@ const (
453453
// resources to become ready.
454454
type WaitStrategy struct {
455455
// Name is Helm's wait strategy for waiting for applied resources to
456-
// become ready. One of 'watcher' or 'legacy'. The 'watcher' strategy uses
457-
// kstatus to watch resource statuses, while the 'legacy' strategy uses
456+
// become ready. One of 'poller' or 'legacy'. The 'poller' strategy uses
457+
// kstatus to poll resource statuses, while the 'legacy' strategy uses
458458
// Helm v3's waiting logic.
459-
// Defaults to 'watcher', or to 'legacy' when UseHelm3Defaults feature
459+
// Defaults to 'poller', or to 'legacy' when UseHelm3Defaults feature
460460
// gate is enabled.
461-
// +kubebuilder:validation:Enum=watcher;legacy
461+
// +kubebuilder:validation:Enum=poller;legacy
462462
// +required
463463
Name WaitStrategyName `json:"name"`
464464
}

config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ spec:
355355
health of custom resources using Common Expression Language (CEL).
356356
The expressions are evaluated only when the specific Helm action
357357
taking place has wait enabled, i.e. DisableWait is false, and the
358-
'watcher' WaitStrategy is used.
358+
'poller' WaitStrategy is used.
359359
items:
360360
description: CustomHealthCheck defines the health check for custom
361361
resources.
@@ -1102,13 +1102,13 @@ spec:
11021102
name:
11031103
description: |-
11041104
Name is Helm's wait strategy for waiting for applied resources to
1105-
become ready. One of 'watcher' or 'legacy'. The 'watcher' strategy uses
1106-
kstatus to watch resource statuses, while the 'legacy' strategy uses
1105+
become ready. One of 'poller' or 'legacy'. The 'poller' strategy uses
1106+
kstatus to poll resource statuses, while the 'legacy' strategy uses
11071107
Helm v3's waiting logic.
1108-
Defaults to 'watcher', or to 'legacy' when UseHelm3Defaults feature
1108+
Defaults to 'poller', or to 'legacy' when UseHelm3Defaults feature
11091109
gate is enabled.
11101110
enum:
1111-
- watcher
1111+
- poller
11121112
- legacy
11131113
type: string
11141114
required:

config/testdata/server-side-apply/rollback-upgrade.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ spec:
2929
replicaCount: 2
3030
faults:
3131
unready: true
32-
timeout: 3s
32+
timeout: 10s

config/testdata/upgrade-fail-remediate/upgrade.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ spec:
2525
replicaCount: 2
2626
faults:
2727
unready: true
28-
timeout: 3s
28+
timeout: 10s

config/testdata/upgrade-fail-retry/upgrade.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ spec:
2525
replicaCount: 2
2626
faults:
2727
unready: true
28-
timeout: 3s
28+
timeout: 10s

docs/api/v2/helm.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ resources to become ready.</p>
435435
health of custom resources using Common Expression Language (CEL).
436436
The expressions are evaluated only when the specific Helm action
437437
taking place has wait enabled, i.e. DisableWait is false, and the
438-
&lsquo;watcher&rsquo; WaitStrategy is used.</p>
438+
&lsquo;poller&rsquo; WaitStrategy is used.</p>
439439
</td>
440440
</tr>
441441
</table>
@@ -1603,7 +1603,7 @@ resources to become ready.</p>
16031603
health of custom resources using Common Expression Language (CEL).
16041604
The expressions are evaluated only when the specific Helm action
16051605
taking place has wait enabled, i.e. DisableWait is false, and the
1606-
&lsquo;watcher&rsquo; WaitStrategy is used.</p>
1606+
&lsquo;poller&rsquo; WaitStrategy is used.</p>
16071607
</td>
16081608
</tr>
16091609
</tbody>
@@ -3471,10 +3471,10 @@ WaitStrategyName
34713471
</td>
34723472
<td>
34733473
<p>Name is Helm&rsquo;s wait strategy for waiting for applied resources to
3474-
become ready. One of &lsquo;watcher&rsquo; or &lsquo;legacy&rsquo;. The &lsquo;watcher&rsquo; strategy uses
3475-
kstatus to watch resource statuses, while the &lsquo;legacy&rsquo; strategy uses
3474+
become ready. One of &lsquo;poller&rsquo; or &lsquo;legacy&rsquo;. The &lsquo;poller&rsquo; strategy uses
3475+
kstatus to poll resource statuses, while the &lsquo;legacy&rsquo; strategy uses
34763476
Helm v3&rsquo;s waiting logic.
3477-
Defaults to &lsquo;watcher&rsquo;, or to &lsquo;legacy&rsquo; when UseHelm3Defaults feature
3477+
Defaults to &lsquo;poller&rsquo;, or to &lsquo;legacy&rsquo; when UseHelm3Defaults feature
34783478
gate is enabled.</p>
34793479
</td>
34803480
</tr>

docs/spec/v2/helmreleases.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -952,14 +952,14 @@ for resources to become ready after Helm actions.
952952
The field offers the following subfields:
953953

954954
- `.name` (Required): The strategy for waiting for resources to be ready.
955-
One of `watcher` or `legacy`. The `watcher` strategy uses kstatus to watch resource
955+
One of `poller` or `legacy`. The `poller` strategy uses kstatus to poll resource
956956
statuses, while the `legacy` strategy uses Helm v3's waiting logic. Defaults to
957-
`watcher`, or to `legacy` when the `UseHelm3Defaults` feature gate is enabled.
957+
`poller`, or to `legacy` when the `UseHelm3Defaults` feature gate is enabled.
958958

959959
```yaml
960960
spec:
961961
waitStrategy:
962-
name: watcher
962+
name: poller
963963
```
964964

965965
### Health check expressions
@@ -968,8 +968,8 @@ spec:
968968
checks on custom resources using [Common Expression Language (CEL)](https://cel.dev/).
969969

970970
The expressions are evaluated only when the Helm action taking place has wait
971-
enabled (i.e. `.spec.<action>.disableWait` is `false`) and the `watcher`
972-
wait strategy is used (i.e. `.spec.waitStrategy.name` is `watcher`).
971+
enabled (i.e. `.spec.<action>.disableWait` is `false`) and the `poller`
972+
wait strategy is used (i.e. `.spec.waitStrategy.name` is `poller`).
973973

974974
The `.spec.healthCheckExprs` field accepts a list of objects with the following fields:
975975

internal/action/client.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
Copyright 2026 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package action
18+
19+
import (
20+
"context"
21+
22+
helmkube "helm.sh/helm/v4/pkg/kube"
23+
"k8s.io/cli-runtime/pkg/genericclioptions"
24+
25+
"github.com/fluxcd/pkg/ssa"
26+
)
27+
28+
// Client wraps a Helm kube Client to replace the kstatus implementation.
29+
type Client struct {
30+
// We need to embed the struct and not helmkube.Interface
31+
// as Helm adds more methods to the struct over time
32+
// without adding them to the interface. This ensures
33+
// we always embed all methods.
34+
*helmkube.Client
35+
36+
newResourceManager func(sr ...NewStatusReaderFunc) *ssa.ResourceManager
37+
waitContext context.Context
38+
}
39+
40+
// Ensure Client implements helmkube.Interface.
41+
var _ helmkube.Interface = (*Client)(nil)
42+
43+
// Ensure Client implements helmkube.InterfaceWaitOptions.
44+
var _ helmkube.InterfaceWaitOptions = (*Client)(nil)
45+
46+
// NewClient returns a new Helm kube Client that uses kstatus for waits.
47+
func NewClient(getter genericclioptions.RESTClientGetter) *Client {
48+
return &Client{Client: helmkube.New(getter)}
49+
}
50+
51+
// GetWaiter implements helmkube.InterfaceWaitOptions by returning
52+
// a custom kstatus-based Waiter.
53+
func (c *Client) GetWaiter(strategy helmkube.WaitStrategy) (helmkube.Waiter, error) {
54+
return c.newWaiter(strategy)
55+
}
56+
57+
// GetWaiterWithOptions implements helmkube.InterfaceWaitOptions by
58+
// returning a custom kstatus-based Waiter.
59+
func (c *Client) GetWaiterWithOptions(strategy helmkube.WaitStrategy,
60+
opts ...helmkube.WaitOption) (helmkube.Waiter, error) {
61+
return c.newWaiter(strategy, opts...)
62+
}
63+
64+
// newWaiter returns a new Waiter based on the provided strategy.
65+
func (c *Client) newWaiter(strategy helmkube.WaitStrategy,
66+
opts ...helmkube.WaitOption) (helmkube.Waiter, error) {
67+
68+
if strategy == helmkube.LegacyStrategy || c.newResourceManager == nil {
69+
return c.Client.GetWaiterWithOptions(strategy, opts...)
70+
}
71+
72+
return &waiter{
73+
c: c.Client,
74+
strategy: strategy,
75+
newResourceManager: c.newResourceManager,
76+
waitContext: c.waitContext,
77+
}, nil
78+
}

internal/action/config.go

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@ limitations under the License.
1717
package action
1818

1919
import (
20+
"context"
2021
"fmt"
2122
"log/slog"
2223

23-
"github.com/fluxcd/cli-utils/pkg/kstatus/polling/engine"
2424
helmaction "helm.sh/helm/v4/pkg/action"
25-
helmkube "helm.sh/helm/v4/pkg/kube"
2625
helmstorage "helm.sh/helm/v4/pkg/storage"
2726
helmdriver "helm.sh/helm/v4/pkg/storage/driver"
2827
"k8s.io/cli-runtime/pkg/genericclioptions"
2928

29+
"github.com/fluxcd/pkg/ssa"
30+
3031
"github.com/fluxcd/helm-controller/internal/storage"
3132
)
3233

@@ -45,15 +46,18 @@ type ConfigFactory struct {
4546
// Getter is the RESTClientGetter used to get the RESTClient for the
4647
// Kubernetes API.
4748
Getter genericclioptions.RESTClientGetter
48-
// KubeClient is the (Helm) Kubernetes client, it is Helm-specific and
49+
// KubeClient is the (wrapped) Helm Kubernetes client, it is Helm-specific and
4950
// contains a factory used for lazy-loading.
50-
KubeClient *helmkube.Client
51+
KubeClient *Client
5152
// Driver to use for the Helm action.
5253
Driver helmdriver.Driver
5354
// StorageLog is the logger to use for the Helm storage driver.
5455
StorageLog slog.Handler
55-
// StatusReader is the status reader used to evaluate custom health checks.
56-
StatusReader engine.StatusReader
56+
// NewResourceManager is the resource manager used to evaluate custom health checks.
57+
NewResourceManager func(sr ...NewStatusReaderFunc) *ssa.ResourceManager
58+
// WaitContext is the context used for waiting operations in the Helm
59+
// Kubernetes client.
60+
WaitContext context.Context
5761
}
5862

5963
// ConfigFactoryOption is a function that configures a ConfigFactory.
@@ -62,7 +66,7 @@ type ConfigFactoryOption func(*ConfigFactory) error
6266
// NewConfigFactory returns a new ConfigFactory configured with the provided
6367
// options.
6468
func NewConfigFactory(getter genericclioptions.RESTClientGetter, opts ...ConfigFactoryOption) (*ConfigFactory, error) {
65-
kubeClient := helmkube.New(getter)
69+
kubeClient := NewClient(getter)
6670
factory := &ConfigFactory{
6771
Getter: getter,
6872
KubeClient: kubeClient,
@@ -133,10 +137,19 @@ func WithStorageLog(log slog.Handler) ConfigFactoryOption {
133137
}
134138
}
135139

136-
// WithStatusReader sets the ConfigFactory.StatusReader.
137-
func WithStatusReader(reader engine.StatusReader) ConfigFactoryOption {
140+
// WithResourceManager sets the ConfigFactory.ResourceManager.
141+
func WithResourceManager(mgr func(sr ...NewStatusReaderFunc) *ssa.ResourceManager) ConfigFactoryOption {
142+
return func(f *ConfigFactory) error {
143+
f.NewResourceManager = mgr
144+
return nil
145+
}
146+
}
147+
148+
// WithWaitContext sets the context used for waiting operations in the Helm
149+
// Kubernetes client.
150+
func WithWaitContext(ctx context.Context) ConfigFactoryOption {
138151
return func(f *ConfigFactory) error {
139-
f.StatusReader = reader
152+
f.WaitContext = ctx
140153
return nil
141154
}
142155
}
@@ -156,11 +169,12 @@ func (c *ConfigFactory) NewStorage(observers ...storage.ObserveFunc) *helmstorag
156169
// Build returns a new Helm action.Configuration configured with the receiver
157170
// values, and the provided logger and observer(s).
158171
func (c *ConfigFactory) Build(log slog.Handler, observers ...storage.ObserveFunc) *helmaction.Configuration {
159-
client := c.KubeClient
172+
client := NewClient(c.Getter)
173+
client.newResourceManager = c.NewResourceManager
174+
client.waitContext = c.WaitContext
160175

161176
var opts []helmaction.ConfigurationOption
162177
if log != nil {
163-
client = helmkube.New(c.Getter)
164178
client.SetLogger(log)
165179
opts = append(opts, helmaction.ConfigurationSetLogger(log))
166180
}

0 commit comments

Comments
 (0)