Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
8a9afa3
chore: ✨ add .gitignore for GitHub Actions configuration
ibakshay May 8, 2025
f974a22
chore(helm-lint): ✨ update validation settings and clean up workflow
ibakshay May 8, 2025
277a45a
chore: ✨ add initial project structure and documentation
ibakshay May 9, 2025
d9d20ea
feat(makefile): ✨ add `generate-readme` target for README generation
ibakshay May 9, 2025
c6c00b2
chore(values.yaml): ✨ update comments and structure for clarity
ibakshay May 9, 2025
ef1b577
chore(helm-docs): ✨ add Helm Docs Check workflow
ibakshay May 9, 2025
bda3684
chore(Makefile): ✨ update `HELM_DOCS_REPO` URL and remove unused `ARC…
ibakshay May 9, 2025
6a94427
chore(values.yaml): ✨ update comment for PrometheusRules labels
ibakshay May 9, 2025
3a631e5
chore(helm-docs): ✨ update README.md path in Helm Docs Check workflow
ibakshay May 9, 2025
8f7ec29
chore(values.yaml): ✨ update comment for PrometheusRules labels
ibakshay May 9, 2025
7353514
chore(README.md, values.yaml, Chart.yaml): ✨ update comments and main…
ibakshay May 9, 2025
ea7dd34
chore(Makefile): ✨ add lint target to Makefile
ibakshay May 9, 2025
dcdd40a
chore(helm-docs): ✨ update error message for outdated README.md
ibakshay May 9, 2025
59dbc3e
chore(helm-docs): ✨ refactor README.md error handling
ibakshay May 9, 2025
b7415f1
chore(helm-docs): ✨ update error message for outdated README.md
ibakshay May 9, 2025
6b0c51d
chore(helm-docs): ✨ rename step for clarity
ibakshay May 9, 2025
49f8a81
chore(values.yaml): ✨ update comment for PrometheusRules labels
ibakshay May 9, 2025
4016262
chore(values.yaml): ✨ update comments for clarity
ibakshay May 9, 2025
2ce34c2
chore(licenserc.yaml): ✨ add 'README.md.gotmpl' to paths-ignore
ibakshay May 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/configs/helm-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://github.com/helm/chart-testing#configuration
remote: origin
target-branch: main
validate-maintainers: false
check-version-increment: false
validate-maintainers: true
check-version-increment: true
chart-dirs:
- charts
1 change: 1 addition & 0 deletions .github/licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ header:
- '**/*.txt'
- '*Dockerfile*'
- 'Makefile'
- 'README.md.gotmpl'
- 'pkg/idproxy/web/**'
- 'pkg/apis/scheme_builder.go' # Belongs to the Kubernetes authors
- 'cmd/tcp-proxy/main.go' # MIT License
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/helm-docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: "Helm Docs Check"

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "charts/kubernetes-operations/values.yaml"

jobs:
helm-docs-check:
runs-on: [default]
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}

- name: Set up go environment
uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5
with:
go-version: "stable"

- name: Generate README.md using Helm Docs
id: generate-readme
run: |
echo "Generating README.md for charts/kubernetes-operations..."
make generate-readme

if git diff --quiet -- README.md; then
echo "README.md is already up-to-date."
echo "outdated=false" >> "$GITHUB_OUTPUT"
else
echo "README.md has changed."
echo "outdated=true" >> "$GITHUB_OUTPUT"
fi

- name: Helm-Docs Check
if: steps.generate-readme.outputs.outdated == 'true'
run: |
echo "::error file=README.md::README.md is outdated. Please run 'make generate-readme' and update the PR."
exit 1
30 changes: 2 additions & 28 deletions .github/workflows/helm-lint.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
name: "Helm lint and tests"
on:
pull_request:
types: [ opened, synchronize, reopened ]
types: [opened, synchronize, reopened]

env:
REGISTRY: ghcr.io

jobs:
helm-lint-test:
runs-on: [ default ]
runs-on: [default]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
with:
Expand Down Expand Up @@ -38,29 +38,3 @@ jobs:
- name: Run chart-linting
if: steps.list-changed.outputs.changed == 'true'
run: ct lint --config .github/configs/helm-lint.yaml --target-branch ${{ github.event.repository.default_branch }}

- name: Check version bump
id: check-bump
if: steps.list-changed.outputs.changed == 'true'
continue-on-error: true
run: |
for chart in $(ct list-changed --config .github/configs/helm-lint.yaml --target-branch ${{ github.event.repository.default_branch }}); do
chart_version=$(yq .version "$chart/Chart.yaml")
if helm pull "oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/$(dirname $chart)" --version $chart_version; then
echo "chart=$(dirname $chart)" >> "$GITHUB_OUTPUT"
echo "chart_version=${chart_version}" >> "$GITHUB_OUTPUT"
echo "needsbump=true" >> "$GITHUB_OUTPUT"
fi
done

- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
if: steps.check-bump.outputs.needsbump == 'true'
with:
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: ':warning: Chart `oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/${{ steps.check-bump.outputs.chart }}:${{ steps.check-bump.outputs.chart_version }}` already exists in OCI registry. Please increment the chart version.'
})
core.setFailed(`Action failed with error: Chart version bump required`);
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# config file for testing GitHub Actions locally
act_pull_request.json
bin
40 changes: 40 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Detect OS (Linux/macOS)
OS := $(shell uname -s | tr '[:upper:]' '[:lower:]')

# Detect ARCH (AMD64 or ARM64)
UNAME_M := $(shell uname -m)

## Location to install dependencies to
LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
mkdir -p $(LOCALBIN)
KUSTOMIZE ?= $(LOCALBIN)/kustomize
YQ ?= $(LOCALBIN)/yq
HELM_DOCS_VERSION ?= 1.14.2
HELM_DOCS_REPO ?= https://github.com/norwoodj/helm-docs/releases/download/v$(HELM_DOCS_VERSION)/helm-docs_$(HELM_DOCS_VERSION)_$(OS)_$(UNAME_M).tar.gz


## Download `helm-docs` locally if necessary
.PHONY: helm-docs
helm-docs: $(LOCALBIN)
@if test -x $(LOCALBIN)/helm-docs && ! $(LOCALBIN)/helm-docs -v | grep -q $(HELM_DOCS_VERSION); then \
echo "$(LOCALBIN)/helm-docs -v is not expected $(HELM_DOCS_VERSION). Removing it before installing."; \
rm -f $(LOCALBIN)/helm-docs; \
fi

@# Download and install helm-docs if not present
@if [ ! -s "$(LOCALBIN)/helm-docs" ]; then \
echo "Downloading helm-docs $(HELM_DOCS_VERSION) to $(LOCALBIN)"; \
curl -L -f $(HELM_DOCS_REPO) -o $(LOCALBIN)/helm-docs.tar.gz; \
tar -xzf $(LOCALBIN)/helm-docs.tar.gz -C $(LOCALBIN) helm-docs; \
rm $(LOCALBIN)/helm-docs.tar.gz; \
chmod +x $(LOCALBIN)/helm-docs; \
fi

.PHONY: generate-readme
generate-readme: helm-docs
@$(LOCALBIN)/helm-docs -c charts/kubernetes-operations -o ../../README.md -t ../../README.md.gotmpl

.PHONY: lint
lint:
ct lint --config .github/configs/helm-lint.yaml
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ The content is structured as follows:
kubernetes-operations
├── playbooks/ Step-by-step instructions for troubleshooting.
└── charts/
└── kubernetes-operations
Expand All @@ -32,6 +32,22 @@ kubernetes-operations

The content of the repository can be installed independently or as part of the [greenhouse-extensions](https://github.com/cloudoperators/greenhouse-extensions/tree/main/kube-monitoring).

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| dashboards.create | bool | `true` | Enables ConfigMap resources with dashboards to be created |
| dashboards.plutonoSelectors | list | `[{"name":"plutono-dashboard","value":"\"true\""}]` | Label selectors for the Plutono dashboards to be picked up by Plutono. |
| global.commonLabels | object | `{}` | Common labels to add to all resources # |
| prometheusRules.NodeInMaintenance | object | `{"label":"maintenance_state","value":"in-maintenance"}` | The label value pair that marks a Kubernetes node as 'in maintenance' |
| prometheusRules.additionalRuleAnnotations | object | `{}` | Additional annotations for PrometheusRule alerts |
| prometheusRules.additionalRuleLabels | string | `nil` | Additional labels for PrometheusRule alerts # This is useful for adding additional labels such as "support_group" or "service" for the routing of alerts to each rule |
| prometheusRules.annotations | object | `{}` | Annotations for PrometheusRules |
| prometheusRules.create | bool | `true` | Enables PrometheusRule resources to be created |
| prometheusRules.disabled | object | `{}` | Disabled PrometheusRule alerts |
| prometheusRules.labels | object | `{}` | Labels for PrometheusRules |
| prometheusRules.ruleSelectors | string | `nil` | Label selectors for the Prometheus rules to be picked up by Prometheus. |

## Support, Feedback, Contributing

This project is open to feature requests/suggestions, bug reports etc. via [GitHub issues](https://github.com/cloudoperators/k8s-monitoring/issues). Contribution and feedback are encouraged and always welcome. For more information about how to contribute, the project structure, as well as additional contribution information, see our [Contribution Guidelines](CONTRIBUTING.md).
Expand All @@ -49,6 +65,6 @@ Copyright 2024 SAP SE or an SAP affiliate company and k8s-monitoring contributor

# Contributing

If you are contributing to the `kubernetes-operations` chart, update the associated content and increment the version in the `Chart.yaml`.
If you are contributing to the `kubernetes-operations` chart, update the associated content and increment the version in the `Chart.yaml`.

If you use this chart with the [kube-monitoring](https://github.com/cloudoperators/greenhouse-extensions/tree/main/kube-monitoring) Plugin from [Greenhouse](https://github.com/cloudoperators/greenhouse), update the version in the [Chart.yaml](https://github.com/cloudoperators/greenhouse-extensions/blob/main/kube-monitoring/charts/Chart.yaml) as well as the [plugindefinition](https://github.com/cloudoperators/greenhouse-extensions/blob/main/kube-monitoring/plugindefinition.yaml) versions of `kube-monitoring` so that the operations platform can perform the rollout.
56 changes: 56 additions & 0 deletions README.md.gotmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# kubernetes-operations

[![REUSE status](https://api.reuse.software/badge/github.com/cloudoperators/k8s-monitoring)](https://api.reuse.software/info/github.com/cloudoperators/k8s-monitoring)

## About this project

A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes.

# Content

The content is structured as follows:

```
kubernetes-operations
├── playbooks/ Step-by-step instructions for troubleshooting.
└── charts/
└── kubernetes-operations
├── aggregations Prometheus aggregation rules for kubernetes.
├── alerts Prometheus alerts for kubernetes.
├── dashboards Plutono dashboards for visualizing key metrics.
└── Chart.yaml Helm chart manifest.
```

## Requirements and Setup

The content of the repository can be installed independently or as part of the [greenhouse-extensions](https://github.com/cloudoperators/greenhouse-extensions/tree/main/kube-monitoring).

{{ template "chart.valuesSection" . }}

## Support, Feedback, Contributing

This project is open to feature requests/suggestions, bug reports etc. via [GitHub issues](https://github.com/cloudoperators/k8s-monitoring/issues). Contribution and feedback are encouraged and always welcome. For more information about how to contribute, the project structure, as well as additional contribution information, see our [Contribution Guidelines](CONTRIBUTING.md).

## Security / Disclosure
If you find any bug that may be a security problem, please follow our instructions at [in our security policy](https://github.com/cloudoperators/k8s-monitoring/security/policy) on how to report it. Please do not create GitHub issues for security-related doubts or problems.

## Code of Conduct

We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone. By participating in this project, you agree to abide by its [Code of Conduct](https://github.com/cloudoperators/.github/blob/main/CODE_OF_CONDUCT.md) at all times.

## Licensing

Copyright 2024 SAP SE or an SAP affiliate company and k8s-monitoring contributors. Please see our [LICENSE](LICENSE) for copyright and license information. Detailed information including third-party components and their licensing/copyright information is available [via the REUSE tool](https://api.reuse.software/info/github.com/cloudoperators/k8s-monitoring).

# Contributing

If you are contributing to the `kubernetes-operations` chart, update the associated content and increment the version in the `Chart.yaml`.

If you use this chart with the [kube-monitoring](https://github.com/cloudoperators/greenhouse-extensions/tree/main/kube-monitoring) Plugin from [Greenhouse](https://github.com/cloudoperators/greenhouse), update the version in the [Chart.yaml](https://github.com/cloudoperators/greenhouse-extensions/blob/main/kube-monitoring/charts/Chart.yaml) as well as the [plugindefinition](https://github.com/cloudoperators/greenhouse-extensions/blob/main/kube-monitoring/plugindefinition.yaml) versions of `kube-monitoring` so that the operations platform can perform the rollout.
4 changes: 2 additions & 2 deletions charts/kubernetes-operations/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

apiVersion: v2
name: kubernetes-operations
version: 1.2.2
version: 1.2.3
description: A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes.
maintainers:
- name: Richard Tief (I520251)
- name: richardtief
email: [email protected]
keywords:
- Helm Chart
Expand Down
26 changes: 12 additions & 14 deletions charts/kubernetes-operations/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,55 @@
# SPDX-License-Identifier: Apache-2.0

global:
## Common labels to add to all resources
# -- Common labels to add to all resources
##
commonLabels: {}

## C default rules for monitoring the cluster
## -- default rules for monitoring the cluster
##
prometheusRules:

## Enables PrometheusRule resources to be created
# -- Enables PrometheusRule resources to be created
create: true

## Label selectors for the Prometheus rules to be picked up by Prometheus.
# -- Label selectors for the Prometheus rules to be picked up by Prometheus.
ruleSelectors:
# - name: plugin
# value: kube-monitoring
# - name: prometheus
# value: kubernetes

## Labels for PrometheusRules
# -- Labels for PrometheusRules
labels: {}

## Annotations for PrometheusRules
# -- Annotations for PrometheusRules
annotations: {}

## Additional labels for PrometheusRule alerts
# -- Additional labels for PrometheusRule alerts
## This is useful for adding additional labels such as "support_group" or "service" for the routing of alerts to each rule
additionalRuleLabels:
# support_group: support
# service: my-service

## Additional annotations for PrometheusRule alerts
# -- Additional annotations for PrometheusRule alerts
additionalRuleAnnotations: {}

## Disabled PrometheusRule alerts
# -- Disabled PrometheusRule alerts
disabled: {}
# KubernetesApiServerDown: true
# KubeletDown: true

## The label value pair that marks a Kubernetes node as 'in maintenance'
# -- The label value pair that marks a Kubernetes node as 'in maintenance'
NodeInMaintenance:
label: maintenance_state
value: in-maintenance

## Create default dashboards for monitoring the cluster
##
dashboards:

## Enables ConfigMap resources with dashboards to be created
# -- Enables ConfigMap resources with dashboards to be created
create: true

## Label selectors for the Plutono dashboards to be picked up by Plutono.
# -- Label selectors for the Plutono dashboards to be picked up by Plutono.
plutonoSelectors:
- name: plutono-dashboard
value: '"true"'
Loading