Skip to content

Commit 86cdb5e

Browse files
authored
Implement workflow to install addon/helm on performance cluster (#1786)
1 parent d705a03 commit 86cdb5e

File tree

1 file changed

+246
-0
lines changed

1 file changed

+246
-0
lines changed
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT
3+
name: "Install/Remove Helm Charts After Scaling"
4+
on:
5+
# Use workflow_run to trigger this workflow after the scaling workflow completes
6+
workflow_run:
7+
workflows: ["EKS Cluster Scaling"]
8+
types:
9+
- completed
10+
branches:
11+
- main
12+
13+
# Keep the manual trigger option
14+
workflow_dispatch:
15+
inputs:
16+
# Required Core Settings
17+
cluster_name:
18+
description: 'EKS Cluster Name'
19+
required: true
20+
type: string
21+
default: 'eks-performance'
22+
region:
23+
description: 'AWS Region'
24+
required: true
25+
type: string
26+
default: 'us-west-2'
27+
28+
# Optional Settings
29+
cloudwatch_agent_repository:
30+
description: 'CloudWatch Agent Repository'
31+
type: string
32+
cloudwatch_agent_tag:
33+
description: 'CloudWatch Agent Tag'
34+
type: string
35+
cloudwatch_agent_operator_repository:
36+
description: 'CloudWatch Agent Operator Repository'
37+
type: string
38+
cloudwatch_agent_operator_tag:
39+
description: 'CloudWatch Agent Operator Tag'
40+
type: string
41+
helm-charts-branch:
42+
description: 'Branch of the helm charts to test'
43+
type: string
44+
default: 'main'
45+
operator-branch:
46+
description: 'Branch of the operator to test'
47+
type: string
48+
default: 'main'
49+
terraform_assume_role:
50+
description: 'AWS IAM Role to assume'
51+
type: string
52+
53+
concurrency:
54+
group: ${{ github.workflow }}-${{ github.ref }}
55+
cancel-in-progress: true
56+
57+
env:
58+
# Cluster environment variables
59+
AWS_REGION: ${{ inputs.region || 'us-west-2' }}
60+
CLUSTER_NAME: ${{ inputs.cluster_name || 'eks-performance' }}
61+
TERRAFORM_AWS_ASSUME_ROLE: ${{ inputs.terraform_assume_role || vars.TERRAFORM_AWS_ASSUME_ROLE }}
62+
TERRAFORM_AWS_ASSUME_ROLE_DURATION: 3600 # 1 hour duration
63+
64+
# ECR repository environment variables
65+
AGENT_ECR_TEST_REPO: "cwagent-integration-test"
66+
OPERATOR_ECR_TEST_REPO: "cwagent-operator-pre-release"
67+
68+
# Github repository environment variables
69+
OPERATOR_GITHUB_REPO_NAME: "aws/amazon-cloudwatch-agent-operator"
70+
71+
jobs:
72+
# Check if this workflow should run
73+
check-trigger:
74+
runs-on: ubuntu-latest
75+
if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'schedule') }}
76+
steps:
77+
- name: Check trigger type
78+
id: check-trigger
79+
run: |
80+
if [ "${{ github.event_name }}" == "workflow_run" ]; then
81+
echo "Triggered by workflow_run from a scheduled event"
82+
else
83+
echo "Triggered manually via workflow_dispatch"
84+
fi
85+
outputs:
86+
should_continue: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'schedule') }}
87+
88+
# Get latest operator commit from github repo
89+
GetLatestOperatorCommitSHA:
90+
needs: check-trigger
91+
if: ${{ needs.check-trigger.outputs.should_continue == 'true' }}
92+
runs-on: ubuntu-latest
93+
outputs:
94+
operator_commit_sha: ${{steps.get_latest_sha.outputs.operator_sha}}
95+
operator_repo_name: ${{env.OPERATOR_GITHUB_REPO_NAME}}
96+
steps:
97+
- name: Checkout the target repo
98+
uses: actions/checkout@v4
99+
with:
100+
repository: ${{env.OPERATOR_GITHUB_REPO_NAME}}
101+
ref: ${{ inputs.operator-branch || 'main' }}
102+
path: operator-repo
103+
104+
- name: Get latest commit SHA
105+
id: get_latest_sha
106+
run: |
107+
cd operator-repo
108+
latest_sha=$(git rev-parse HEAD)
109+
echo "operator_sha=$latest_sha" >> "$GITHUB_OUTPUT"
110+
111+
# Build and upload agent image to ECR repo
112+
BuildAgent:
113+
needs: check-trigger
114+
if: ${{ needs.check-trigger.outputs.should_continue == 'true' }}
115+
uses: ./.github/workflows/build-test-artifacts.yml
116+
concurrency:
117+
group: "Build-Test-Artifacts-${{github.ref_name}}"
118+
cancel-in-progress: true
119+
secrets: inherit
120+
permissions:
121+
id-token: write
122+
contents: read
123+
with:
124+
test-image-before-upload: false
125+
126+
# Build and upload operator image to ECR repo
127+
BuildOperator:
128+
needs: [ check-trigger, GetLatestOperatorCommitSHA ]
129+
if: ${{ needs.check-trigger.outputs.should_continue == 'true' }}
130+
uses: aws/amazon-cloudwatch-agent-operator/.github/workflows/build-and-upload.yml@main
131+
concurrency:
132+
group: ${{ github.workflow }}-operator-${{ inputs.operator-branch || 'main' }}
133+
cancel-in-progress: true
134+
secrets: inherit
135+
with:
136+
tag: ${{needs.GetLatestOperatorCommitSHA.outputs.operator_commit_sha}}
137+
target-sha: ${{needs.GetLatestOperatorCommitSHA.outputs.operator_commit_sha}}
138+
repository: ${{needs.GetLatestOperatorCommitSHA.outputs.operator_repo_name}}
139+
test-image-before-upload: false
140+
141+
install-helm:
142+
needs: [ check-trigger, BuildAgent, BuildOperator, GetLatestOperatorCommitSHA ]
143+
if: ${{ needs.check-trigger.outputs.should_continue == 'true' }}
144+
runs-on: ubuntu-latest
145+
permissions:
146+
id-token: write
147+
contents: read
148+
steps:
149+
- name: Configure AWS Credentials
150+
uses: aws-actions/configure-aws-credentials@v4
151+
with:
152+
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE}}
153+
aws-region: ${{ env.AWS_REGION}}
154+
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
155+
156+
- name: Login ECR
157+
id: login-ecr
158+
uses: aws-actions/amazon-ecr-login@v2
159+
160+
- name: Install kubectl
161+
uses: azure/setup-kubectl@v3
162+
with:
163+
version: 'latest'
164+
165+
- name: Install Helm
166+
uses: azure/setup-helm@v3
167+
with:
168+
version: 'latest'
169+
170+
- name: Update kubeconfig
171+
run: |
172+
aws eks update-kubeconfig --name $CLUSTER_NAME --region $AWS_REGION
173+
174+
- name: Clone Helm Charts Repository
175+
run: |
176+
rm -rf ./helm-charts
177+
git clone -b ${{ inputs.helm-charts-branch || 'main' }} https://github.com/aws-observability/helm-charts.git ./helm-charts
178+
179+
- name: Check node count and manage Helm chart
180+
run: |
181+
NODE_COUNT=$(kubectl get nodes --no-headers | wc -l)
182+
183+
if [ "$NODE_COUNT" -eq 0 ]; then
184+
echo "Node count is 0, removing Helm chart"
185+
helm uninstall amazon-cloudwatch-observability -n amazon-cloudwatch || echo "Chart not found or already removed"
186+
else
187+
echo "Node count is $NODE_COUNT, installing/updating Helm chart"
188+
189+
# Echo all variables being passed to helm
190+
echo "CLUSTER_NAME: ${{ inputs.cluster_name ||env.CLUSTER_NAME }}"
191+
echo "REGION: ${{ inputs.region || env.AWS_REGION }}"
192+
echo "AGENT_REPOSITORY: ${{ inputs.cloudwatch_agent_repository || env.AGENT_ECR_TEST_REPO }}"
193+
echo "AGENT_TAG: ${{ inputs.cloudwatch_agent_tag || github.sha }}"
194+
echo "AGENT_REPOSITORY_DOMAIN: ${{ steps.login-ecr.outputs.registry }}"
195+
echo "MANAGER_REPOSITORY: ${{ inputs.cloudwatch_agent_operator_repository || env.OPERATOR_ECR_TEST_REPO }}"
196+
echo "MANAGER_TAG: ${{ inputs.cloudwatch_agent_operator_tag || needs.GetLatestOperatorCommitSHA.outputs.operator_commit_sha }}"
197+
echo "MANAGER_REPOSITORY_DOMAIN: ${{ steps.login-ecr.outputs.registry }}"
198+
199+
helm upgrade --install amazon-cloudwatch-observability \
200+
./helm-charts/charts/amazon-cloudwatch-observability \
201+
--namespace amazon-cloudwatch \
202+
--create-namespace \
203+
--set clusterName=${{ inputs.cluster_name ||env.CLUSTER_NAME }} \
204+
--set region=${{ inputs.region || env.AWS_REGION }} \
205+
--set agent.image.repository=${{ inputs.cloudwatch_agent_repository || env.AGENT_ECR_TEST_REPO }} \
206+
--set agent.image.tag=${{ inputs.cloudwatch_agent_tag || github.sha }} \
207+
--set agent.image.repositoryDomainMap.public=${{ steps.login-ecr.outputs.registry }} \
208+
--set manager.image.repository=${{ inputs.cloudwatch_agent_operator_repository || env.OPERATOR_ECR_TEST_REPO }} \
209+
--set manager.image.tag=${{ inputs.cloudwatch_agent_operator_tag || needs.GetLatestOperatorCommitSHA.outputs.operator_commit_sha }} \
210+
--set manager.image.repositoryDomainMap.public=${{ steps.login-ecr.outputs.registry }}
211+
fi
212+
213+
cleanup-on-failure:
214+
if: ${{ failure() || cancelled() }}
215+
runs-on: ubuntu-latest
216+
needs: [ install-helm ]
217+
permissions:
218+
id-token: write
219+
contents: read
220+
steps:
221+
- name: Configure AWS Credentials
222+
uses: aws-actions/configure-aws-credentials@v4
223+
with:
224+
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE}}
225+
aws-region: ${{ env.AWS_REGION}}
226+
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
227+
228+
- name: Install kubectl
229+
uses: azure/setup-kubectl@v3
230+
with:
231+
version: 'latest'
232+
233+
- name: Install Helm
234+
uses: azure/setup-helm@v3
235+
with:
236+
version: 'latest'
237+
238+
- name: Update kubeconfig
239+
run: |
240+
aws eks update-kubeconfig --name $CLUSTER_NAME --region $AWS_REGION
241+
242+
- name: Uninstall Helm chart
243+
run: |
244+
echo "Test was cancelled or failed. Cleaning up resources..."
245+
helm uninstall amazon-cloudwatch-observability -n amazon-cloudwatch || echo "Chart not found or already removed"
246+
echo "Cleanup completed"

0 commit comments

Comments
 (0)