Skip to content

Commit 5b7cb21

Browse files
authored
fix: karpenter tagging, addons and flaky integration tests (#8508)
* refactor: merge security group tagging tests into karpenter tests - Move security group tagging integration tests from separate test suite into karpenter_test.go - Consolidate tests to reduce cluster creation time from 4 to 2 clusters - Add comprehensive test for Karpenter deployment with security group tagging when both Karpenter and karpenter.sh/discovery metadata tag are present - Enhance legacy Karpenter test to also verify security group is NOT tagged when metadata tag is missing - Remove redundant security_group_tagging test directory and files - Fix Karpenter version string to '1.6.2' (was 'v1.6.2' which is not present) This change improves test efficiency while maintaining comprehensive coverage of both Karpenter functionality and security group tagging behavior. * Remove redundant addon test cases - Removed namespace validation edge case tests - Removed duplicate addon creation tests - Cleaned up repetitive test scenarios for better maintainability * Revert "Add exact match check to use specified addon versions (#8478)" This reverts commit 4af29b0. * fix: fix flaky integration tests
1 parent 233336b commit 5b7cb21

File tree

13 files changed

+380
-941
lines changed

13 files changed

+380
-941
lines changed

integration/tests/addons/addons_test.go

Lines changed: 149 additions & 290 deletions
Large diffs are not rendered by default.

integration/tests/crud/creategetdelete_test.go

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,21 @@ var _ = Describe("(Integration) Create, Get, Scale & Delete", func() {
271271
test.Close()
272272
Eventually(func() int {
273273
return len(test.ListPods(test.Namespace, metav1.ListOptions{}).Items)
274-
}, "3m", "1s").Should(BeZero())
274+
}, "5m", "1s").Should(BeZero())
275275
})
276276

277277
It("should deploy podinfo service to the cluster and access it via proxy", func() {
278278
d := test.CreateDeploymentFromFile(test.Namespace, "../../data/crud-podinfo.yaml")
279+
DeferCleanup(func() {
280+
clientset := makeClientset()
281+
gracePeriod := int64(0)
282+
err := clientset.AppsV1().Deployments(test.Namespace).Delete(context.Background(), d.Name, metav1.DeleteOptions{
283+
GracePeriodSeconds: &gracePeriod,
284+
})
285+
if err != nil {
286+
fmt.Fprintf(GinkgoWriter, "Failed to delete deployment %s: %v\n", d.Name, err)
287+
}
288+
})
279289
test.WaitForDeploymentReady(d, commonTimeout)
280290

281291
pods := test.ListPodsFromDeployment(d)
@@ -298,6 +308,16 @@ var _ = Describe("(Integration) Create, Get, Scale & Delete", func() {
298308

299309
It("should have functional DNS", func() {
300310
d := test.CreateDaemonSetFromFile(test.Namespace, "../../data/test-dns.yaml")
311+
DeferCleanup(func() {
312+
clientset := makeClientset()
313+
gracePeriod := int64(0)
314+
err := clientset.AppsV1().DaemonSets(test.Namespace).Delete(context.Background(), d.Name, metav1.DeleteOptions{
315+
GracePeriodSeconds: &gracePeriod,
316+
})
317+
if err != nil {
318+
fmt.Fprintf(GinkgoWriter, "Failed to delete daemonset %s: %v\n", d.Name, err)
319+
}
320+
})
301321
test.WaitForDaemonSetReady(d, commonTimeout)
302322
ds, err := test.GetDaemonSet(test.Namespace, d.Name)
303323
Expect(err).ShouldNot(HaveOccurred())
@@ -306,6 +326,16 @@ var _ = Describe("(Integration) Create, Get, Scale & Delete", func() {
306326

307327
It("should have access to HTTP(S) sites", func() {
308328
d := test.CreateDaemonSetFromFile(test.Namespace, "../../data/test-http.yaml")
329+
DeferCleanup(func() {
330+
clientset := makeClientset()
331+
gracePeriod := int64(0)
332+
err := clientset.AppsV1().DaemonSets(test.Namespace).Delete(context.Background(), d.Name, metav1.DeleteOptions{
333+
GracePeriodSeconds: &gracePeriod,
334+
})
335+
if err != nil {
336+
fmt.Fprintf(GinkgoWriter, "Failed to delete daemonset %s: %v\n", d.Name, err)
337+
}
338+
})
309339
test.WaitForDaemonSetReady(d, commonTimeout)
310340
ds, err := test.GetDaemonSet(test.Namespace, d.Name)
311341
Expect(err).ShouldNot(HaveOccurred())
@@ -348,7 +378,7 @@ var _ = Describe("(Integration) Create, Get, Scale & Delete", func() {
348378
test.Close()
349379
Eventually(func() int {
350380
return len(test.ListPods(test.Namespace, metav1.ListOptions{}).Items)
351-
}, "3m", "1s").Should(BeZero())
381+
}, "5m", "1s").Should(BeZero())
352382
})
353383

354384
It("should have OIDC disabled by default", func() {
@@ -425,6 +455,16 @@ var _ = Describe("(Integration) Create, Get, Scale & Delete", func() {
425455

426456
It("should successfully run pods with an iamserviceaccount", func() {
427457
d := test.CreateDeploymentFromFile(test.Namespace, "../../data/iamserviceaccount-checker.yaml")
458+
DeferCleanup(func() {
459+
clientset := makeClientset()
460+
gracePeriod := int64(0)
461+
err := clientset.AppsV1().Deployments(test.Namespace).Delete(context.Background(), d.Name, metav1.DeleteOptions{
462+
GracePeriodSeconds: &gracePeriod,
463+
})
464+
if err != nil {
465+
fmt.Fprintf(GinkgoWriter, "Failed to delete deployment %s: %v\n", d.Name, err)
466+
}
467+
})
428468
test.WaitForDeploymentReady(d, 10*time.Minute)
429469

430470
pods := test.ListPodsFromDeployment(d)

integration/tests/karpenter/karpenter_test.go

Lines changed: 169 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,18 @@
33
package karpenter
44

55
import (
6+
"bytes"
7+
"context"
8+
"encoding/json"
69
"fmt"
710
"testing"
811
"time"
912

13+
"github.com/aws/aws-sdk-go-v2/aws"
14+
"github.com/aws/aws-sdk-go-v2/config"
15+
cfn "github.com/aws/aws-sdk-go-v2/service/cloudformation"
16+
awsec2 "github.com/aws/aws-sdk-go-v2/service/ec2"
17+
1018
. "github.com/onsi/ginkgo/v2"
1119
. "github.com/onsi/gomega"
1220
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -16,6 +24,7 @@ import (
1624
clusterutils "github.com/weaveworks/eksctl/integration/utilities/cluster"
1725
"github.com/weaveworks/eksctl/integration/utilities/kube"
1826
api "github.com/weaveworks/eksctl/pkg/apis/eksctl.io/v1alpha5"
27+
"github.com/weaveworks/eksctl/pkg/cfn/outputs"
1928
"github.com/weaveworks/eksctl/pkg/karpenter"
2029
"github.com/weaveworks/eksctl/pkg/testutils"
2130
)
@@ -27,13 +36,20 @@ func init() {
2736
if err := api.Register(); err != nil {
2837
panic(fmt.Errorf("unexpected error registering API scheme: %w", err))
2938
}
30-
params = tests.NewParams("")
39+
params = tests.NewParams("karpenter")
3140
}
3241

3342
func TestKarpenter(t *testing.T) {
3443
testutils.RegisterAndRun(t)
3544
}
3645

46+
// NewConfig creates an AWS config for the given region
47+
func NewConfig(region string) aws.Config {
48+
cfg, err := config.LoadDefaultConfig(context.Background(), config.WithRegion(region))
49+
Expect(err).NotTo(HaveOccurred())
50+
return cfg
51+
}
52+
3753
var _ = Describe("(Integration) Karpenter", func() {
3854
var (
3955
clusterName string
@@ -51,10 +67,117 @@ var _ = Describe("(Integration) Karpenter", func() {
5167
Expect(cmd).To(RunSuccessfully())
5268
})
5369

54-
Context("Creating a cluster with Karpenter", func() {
70+
Context("Creating a cluster with Karpenter and security group tagging", func() {
71+
params.LogStacksEventsOnFailure()
72+
73+
It("should deploy Karpenter successfully and tag security group with karpenter.sh/discovery", func() {
74+
By("creating a cluster with both Karpenter enabled and karpenter.sh/discovery in metadata.tags")
75+
76+
clusterConfig := &api.ClusterConfig{
77+
TypeMeta: metav1.TypeMeta{
78+
Kind: api.ClusterConfigKind,
79+
APIVersion: api.SchemeGroupVersion.String(),
80+
},
81+
Metadata: &api.ClusterMeta{
82+
Name: clusterName,
83+
Region: params.Region,
84+
Version: api.DefaultVersion,
85+
Tags: map[string]string{
86+
"karpenter.sh/discovery": clusterName,
87+
},
88+
},
89+
IAM: &api.ClusterIAM{
90+
WithOIDC: api.Enabled(),
91+
},
92+
Karpenter: &api.Karpenter{
93+
Version: "1.6.3",
94+
},
95+
ManagedNodeGroups: []*api.ManagedNodeGroup{
96+
{
97+
NodeGroupBase: &api.NodeGroupBase{
98+
Name: "managed-ng-1",
99+
ScalingConfig: &api.ScalingConfig{
100+
MinSize: aws.Int(1),
101+
MaxSize: aws.Int(2),
102+
DesiredCapacity: aws.Int(1),
103+
},
104+
},
105+
},
106+
},
107+
}
108+
109+
data, err := json.Marshal(clusterConfig)
110+
Expect(err).NotTo(HaveOccurred())
111+
112+
cmd := params.EksctlCreateCmd.
113+
WithArgs(
114+
"cluster",
115+
"--config-file=-",
116+
"--verbose=4",
117+
"--kubeconfig", params.KubeconfigPath,
118+
).
119+
WithoutArg("--region", params.Region).
120+
WithStdin(bytes.NewReader(data))
121+
Expect(cmd).To(RunSuccessfully())
122+
123+
By("verifying Karpenter pods are healthy")
124+
kubeTest, err := kube.NewTest(params.KubeconfigPath)
125+
Expect(err).NotTo(HaveOccurred())
126+
127+
// Check that Karpenter webhook pod is ready
128+
Expect(kubeTest.WaitForPodsReady(karpenter.DefaultNamespace, metav1.ListOptions{
129+
LabelSelector: "app.kubernetes.io/instance=karpenter",
130+
}, 1, 10*time.Minute)).To(Succeed())
131+
132+
By("verifying the cluster shared node security group has karpenter.sh/discovery tags")
133+
config := NewConfig(params.Region)
134+
cfnSession := cfn.NewFromConfig(config)
135+
ec2Session := awsec2.NewFromConfig(config)
136+
137+
// Get the cluster stack to find the node security group
138+
describeStackOut, err := cfnSession.DescribeStacks(context.Background(), &cfn.DescribeStacksInput{
139+
StackName: aws.String(fmt.Sprintf("eksctl-%s-cluster", clusterName)),
140+
})
141+
Expect(err).NotTo(HaveOccurred())
142+
143+
var nodeSecurityGroupID string
144+
for _, output := range describeStackOut.Stacks[0].Outputs {
145+
if *output.OutputKey == outputs.ClusterSharedNodeSecurityGroup {
146+
nodeSecurityGroupID = *output.OutputValue
147+
break
148+
}
149+
}
150+
Expect(nodeSecurityGroupID).NotTo(BeEmpty(), "ClusterSharedNodeSecurityGroup should be found in stack outputs")
151+
152+
// Verify the security group has the expected karpenter.sh/discovery tag
153+
sgOutput, err := ec2Session.DescribeSecurityGroups(context.Background(), &awsec2.DescribeSecurityGroupsInput{
154+
GroupIds: []string{nodeSecurityGroupID},
155+
})
156+
Expect(err).NotTo(HaveOccurred())
157+
Expect(sgOutput.SecurityGroups).To(HaveLen(1))
158+
159+
securityGroup := sgOutput.SecurityGroups[0]
160+
var foundKarpenterTag bool
161+
var karpenterTagValue string
162+
163+
for _, tag := range securityGroup.Tags {
164+
if *tag.Key == "karpenter.sh/discovery" {
165+
foundKarpenterTag = true
166+
karpenterTagValue = *tag.Value
167+
break
168+
}
169+
}
170+
171+
Expect(foundKarpenterTag).To(BeTrue(), "Security group should have karpenter.sh/discovery tag")
172+
Expect(karpenterTagValue).To(Equal(clusterName), "karpenter.sh/discovery tag value should match cluster name")
173+
})
174+
175+
})
176+
177+
Context("Creating a cluster with Karpenter without any tag", func() {
55178
params.LogStacksEventsOnFailure()
56179

57-
It("should support karpenter", func() {
180+
It("should support karpenter and verify security group is NOT tagged when metadata tag is missing", func() {
58181
cmd := params.EksctlCreateCmd.
59182
WithArgs(
60183
"cluster",
@@ -66,12 +189,55 @@ var _ = Describe("(Integration) Karpenter", func() {
66189
WithStdin(clusterutils.ReaderFromFile(clusterName, params.Region, "testdata/cluster-config.yaml"))
67190
Expect(cmd).To(RunSuccessfully())
68191

192+
By("verifying Karpenter pods are healthy")
69193
kubeTest, err := kube.NewTest(params.KubeconfigPath)
70194
Expect(err).NotTo(HaveOccurred())
71195
// Check webhook pod
72196
Expect(kubeTest.WaitForPodsReady(karpenter.DefaultNamespace, metav1.ListOptions{
73197
LabelSelector: "app.kubernetes.io/instance=karpenter",
74198
}, 1, 10*time.Minute)).To(Succeed())
199+
200+
By("verifying the cluster shared node security group does NOT have karpenter.sh/discovery tags")
201+
config := NewConfig(params.Region)
202+
cfnSession := cfn.NewFromConfig(config)
203+
ec2Session := awsec2.NewFromConfig(config)
204+
205+
// Get the cluster stack to find the node security group
206+
describeStackOut, err := cfnSession.DescribeStacks(context.Background(), &cfn.DescribeStacksInput{
207+
StackName: aws.String(fmt.Sprintf("eksctl-%s-cluster", clusterName)),
208+
})
209+
Expect(err).NotTo(HaveOccurred())
210+
211+
var nodeSecurityGroupID string
212+
for _, output := range describeStackOut.Stacks[0].Outputs {
213+
if *output.OutputKey == outputs.ClusterSharedNodeSecurityGroup {
214+
nodeSecurityGroupID = *output.OutputValue
215+
break
216+
}
217+
}
218+
Expect(nodeSecurityGroupID).NotTo(BeEmpty(), "ClusterSharedNodeSecurityGroup should be found in stack outputs")
219+
220+
// Verify the security group does NOT have the karpenter.sh/discovery tag
221+
sgOutput, err := ec2Session.DescribeSecurityGroups(context.Background(), &awsec2.DescribeSecurityGroupsInput{
222+
GroupIds: []string{nodeSecurityGroupID},
223+
})
224+
Expect(err).NotTo(HaveOccurred())
225+
Expect(sgOutput.SecurityGroups).To(HaveLen(1))
226+
227+
securityGroup := sgOutput.SecurityGroups[0]
228+
var foundKarpenterTag bool
229+
230+
for _, tag := range securityGroup.Tags {
231+
if *tag.Key == "karpenter.sh/discovery" {
232+
foundKarpenterTag = true
233+
break
234+
}
235+
}
236+
237+
Expect(foundKarpenterTag).To(BeFalse(), "Security group should NOT have karpenter.sh/discovery tag when only Karpenter is enabled without metadata tag")
238+
239+
GinkgoWriter.Printf("Successfully verified Karpenter deployment without security group tagging\n")
240+
GinkgoWriter.Printf("Karpenter pods are healthy but security group %s does NOT have karpenter.sh/discovery tag\n", nodeSecurityGroupID)
75241
})
76242
})
77243
})

integration/tests/karpenter/testdata/cluster-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ metadata:
55
name: <generated>
66

77
karpenter:
8-
version: 'v0.20.0'
8+
version: '1.6.3'
99

1010
iam:
1111
withOIDC: true

0 commit comments

Comments
 (0)