diff --git a/README.md b/README.md index b3b6628b15..d1637469c4 100644 --- a/README.md +++ b/README.md @@ -28,15 +28,14 @@ [Helm](https://helm.sh/) must be installed to use the chart. Please refer to Helm's [documentation](https://helm.sh/docs/) to get started. -Once Helm is set up properly, add the repository as follows: +After Helm is set up properly, add the repository as follows: ```console helm repo add grafana https://grafana.github.io/helm-charts ``` -See -the [Chart Documentation](https://github.com/grafana/k8s-monitoring-helm/blob/main/charts/k8s-monitoring/README.md) -for chart install instructions. +Refer to the [Chart Documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/) +to learn more, including about chart installation instructions. ## Office Hours diff --git a/charts/k8s-monitoring/Makefile b/charts/k8s-monitoring/Makefile index 377b739aab..40beed7967 100644 --- a/charts/k8s-monitoring/Makefile +++ b/charts/k8s-monitoring/Makefile @@ -96,32 +96,32 @@ values.schema.json: values.yaml $(COLLECTOR_SCHEMA_FILES) $(DESTINATION_SCHEMA_F README.md: README.md.gotmpl values.yaml Chart.yaml docker run --rm --platform linux/amd64 --volume $(shell pwd):/chart ghcr.io/grafana/helm-chart-toolbox-doc-generator --chart /chart > $@ -ALLOY_HELM_VERSION = $(shell helm show chart charts/alloy-operator-*.tgz | yq eval '.appVersion') -ALLOY_IMAGE = $(shell helm template test --repo https://grafana.github.io/helm-charts alloy --version $(ALLOY_HELM_VERSION) | yq eval 'select(.kind=="DaemonSet" and .metadata.name=="test-alloy") | .spec.template.spec.containers[0].image') -ALLOY_OPERATOR_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-alloy-operator") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -BEYLA_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-beyla") | .spec.template.spec.containers[0].image' docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml) -CONFIG_RELOADER_IMAGE = $(shell helm template test --repo https://grafana.github.io/helm-charts alloy --version $(ALLOY_HELM_VERSION) | yq eval 'select(.kind=="DaemonSet" and .metadata.name=="test-alloy") | .spec.template.spec.containers[1].image') -KEPLER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-kepler") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -KUBE_STATE_METRICS_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-kube-state-metrics") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -NODE_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-node-exporter") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -OPENCOST_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-opencost") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -WINDOWS_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-windows-exporter") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) -docs/Images.md: Chart.yaml docs/examples/features/cluster-metrics/default/output.yaml docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml - @echo "# Images" > $@ - @echo "" >> $@ - @echo "The following is the list of images potentially used in the $(shell yq eval '.version' Chart.yaml) version of the $(shell yq eval '.name' Chart.yaml) Helm chart:" >> $@ - @echo "" >> $@ - @echo "| Image Name | Repository | Feature |" >> $@ - @echo "| ---------- | ---------- | ------- |" >> $@ - @echo "| Alloy | $(ALLOY_IMAGE) | Always used. The telemetry data collector. Enabled with \`alloy-____.enabled=true\`. Deployed via the Alloy Operator. |" >> $@ - @echo "| Alloy Operator | $(ALLOY_OPERATOR_IMAGE) | Always used. Deploys and manages Grafana Alloy collector instances. |" >> $@ - @echo "| Beyla | $(BEYLA_IMAGE) | Automatically instruments apps on the cluster, generating metrics and traces. Enabled with \`autoInstrumentation.beyla.enabled=true\`. |" >> $@ - @echo "| Config Reloader | $(CONFIG_RELOADER_IMAGE) | Alloy sidecar that reloads the Alloy configuration upon changes. Enabled with \`alloy-____.configReloader.enabled=true\`. Deployed via the Alloy Operator. |" >> $@ - @echo "| Kepler | $(KEPLER_IMAGE) | Gathers energy metrics for Kubernetes objects. Enabled with \`clusterMetrics.kepler.enabled=true\`. |" >> $@ - @echo "| kube-state-metrics | $(KUBE_STATE_METRICS_IMAGE) | Gathers Kubernetes cluster object metrics. Enabled with \`clusterMetrics.kube-state-metrics.deploy=true\`. |" >> $@ - @echo "| Node Exporter | $(NODE_EXPORTER_IMAGE) | Gathers Kubernetes cluster node metrics. Enabled with \`clusterMetrics.node-exporter.deploy=true\`. |" >> $@ - @echo "| OpenCost | $(OPENCOST_EXPORTER_IMAGE) | Gathers cost metrics for Kubernetes objects. Enabled with \`clusterMetrics.opencost.enabled=true\`. |" >> $@ - @echo "| Windows Exporter | $(WINDOWS_EXPORTER_IMAGE) | Gathers Kubernetes cluster node metrics for Windows nodes. Enabled with \`clusterMetrics.windows-exporter.deploy=true\`. |" >> $@ +#ALLOY_HELM_VERSION = $(shell helm show chart charts/alloy-operator-*.tgz | yq eval '.appVersion') +#ALLOY_IMAGE = $(shell helm template test --repo https://grafana.github.io/helm-charts alloy --version $(ALLOY_HELM_VERSION) | yq eval 'select(.kind=="DaemonSet" and .metadata.name=="test-alloy") | .spec.template.spec.containers[0].image') +#ALLOY_OPERATOR_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-alloy-operator") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#BEYLA_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-beyla") | .spec.template.spec.containers[0].image' docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml) +#CONFIG_RELOADER_IMAGE = $(shell helm template test --repo https://grafana.github.io/helm-charts alloy --version $(ALLOY_HELM_VERSION) | yq eval 'select(.kind=="DaemonSet" and .metadata.name=="test-alloy") | .spec.template.spec.containers[1].image') +#KEPLER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-kepler") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#KUBE_STATE_METRICS_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-kube-state-metrics") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#NODE_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-node-exporter") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#OPENCOST_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="Deployment" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-opencost") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#WINDOWS_EXPORTER_IMAGE = $(shell yq eval 'select(.kind=="DaemonSet" and .metadata.name=="$(EXAMPLE_RELEASE_NAME)-windows-exporter") | .spec.template.spec.containers[0].image' docs/examples/features/cluster-metrics/default/output.yaml) +#docs/Images.md: Chart.yaml docs/examples/features/cluster-metrics/default/output.yaml docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml +# @echo "# Images" > $@ +# @echo "" >> $@ +# @echo "The following is the list of images potentially used in the $(shell yq eval '.version' Chart.yaml) version of the $(shell yq eval '.name' Chart.yaml) Helm chart:" >> $@ +# @echo "" >> $@ +# @echo "| Image Name | Repository | Feature |" >> $@ +# @echo "| ---------- | ---------- | ------- |" >> $@ +# @echo "| Alloy | $(ALLOY_IMAGE) | Always used. The telemetry data collector. Enabled with \`alloy-____.enabled=true\`. Deployed via the Alloy Operator. |" >> $@ +# @echo "| Alloy Operator | $(ALLOY_OPERATOR_IMAGE) | Always used. Deploys and manages Grafana Alloy collector instances. |" >> $@ +# @echo "| Beyla | $(BEYLA_IMAGE) | Automatically instruments apps on the cluster, generating metrics and traces. Enabled with \`autoInstrumentation.beyla.enabled=true\`. |" >> $@ +# @echo "| Config Reloader | $(CONFIG_RELOADER_IMAGE) | Alloy sidecar that reloads the Alloy configuration upon changes. Enabled with \`alloy-____.configReloader.enabled=true\`. Deployed via the Alloy Operator. |" >> $@ +# @echo "| Kepler | $(KEPLER_IMAGE) | Gathers energy metrics for Kubernetes objects. Enabled with \`clusterMetrics.kepler.enabled=true\`. |" >> $@ +# @echo "| kube-state-metrics | $(KUBE_STATE_METRICS_IMAGE) | Gathers Kubernetes cluster object metrics. Enabled with \`clusterMetrics.kube-state-metrics.deploy=true\`. |" >> $@ +# @echo "| Node Exporter | $(NODE_EXPORTER_IMAGE) | Gathers Kubernetes cluster node metrics. Enabled with \`clusterMetrics.node-exporter.deploy=true\`. |" >> $@ +# @echo "| OpenCost | $(OPENCOST_EXPORTER_IMAGE) | Gathers cost metrics for Kubernetes objects. Enabled with \`clusterMetrics.opencost.enabled=true\`. |" >> $@ +# @echo "| Windows Exporter | $(WINDOWS_EXPORTER_IMAGE) | Gathers Kubernetes cluster node metrics for Windows nodes. Enabled with \`clusterMetrics.windows-exporter.deploy=true\`. |" >> $@ # Example targets EXAMPLE_RELEASE_NAME=k8smon @@ -209,7 +209,7 @@ platform-test-checks: $(PLATFORM_TEST_OUTPUT_FILES) .PHONY: clean clean: clean-examples - rm -f README.md docs/Images.md values.schema.json templates/destinations/_destination_types.tpl schema-mods/destination.json + rm -f README.md values.schema.json templates/destinations/_destination_types.tpl schema-mods/destination.json rm -f $(COLLECTOR_SCHEMA_FILES) $(COLLECTOR_DOCS_FILES) rm -f $(DESTINATION_SCHEMA_FILES) $(DESTINATION_DOCS_FILES) set -e && \ @@ -229,7 +229,7 @@ build-features: # Build targets .PHONY: build -build: build-features README.md docs/Images.md values.schema.json templates/destinations/_destination_types.tpl $(COLLECTOR_DOCS_FILES) $(DESTINATION_DOCS_FILES) examples integration-test-checks platform-test-checks +build: build-features README.md values.schema.json templates/destinations/_destination_types.tpl $(COLLECTOR_DOCS_FILES) $(DESTINATION_DOCS_FILES) examples integration-test-checks platform-test-checks # Test targets .PHONY: test unittest lint-helm lint-configs diff --git a/charts/k8s-monitoring/README.md b/charts/k8s-monitoring/README.md index 7e13dc4b8e..8884bf2cc5 100644 --- a/charts/k8s-monitoring/README.md +++ b/charts/k8s-monitoring/README.md @@ -34,7 +34,7 @@ Version 2.1 was re-versioned to be 3.0. If you are on 2.1, please upgrade to 3.0 ### Version 2.0 -v2 introduces some significant changes to the chart configuration values. Refer to the migration [documentation](./docs/Migration.md) for tools and strategies to migrate from v1. +v2 introduces some significant changes to the chart configuration values. Refer to the migration [documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/migrate-helm-chart/) for tools and strategies to migrate from v1. ## Usage @@ -145,7 +145,7 @@ destinationsMap: #### Collectors -([Documentation](./docs/Collectors.md)) +([Documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/collector-reference/)) Collectors are workloads that are dedicated to gathering metrics, logs, traces, and profiles from the cluster and from workloads on the cluster. There are multiple collector instances to optimize around the collection requirements. @@ -158,7 +158,7 @@ The list of collectors are: * **alloy-singleton** is a 1-replica Deployment to collect cluster events. * **alloy-profiles** is a DaemonSet used to instrument and collect profiling data. -To enable a collector, add a new section to your values file. Ex: +To enable a collector, add a new section to your values file. Example: ```YAML alloy-{collector_name}: @@ -169,7 +169,7 @@ alloy-{collector_name}: #### Features -([Documentation](./docs/Features.md)) +([Documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/#features)) This section is where you define which features you want to enable with this chart. Features define what kind of data to collect. diff --git a/charts/k8s-monitoring/README.md.gotmpl b/charts/k8s-monitoring/README.md.gotmpl index 05c21c1bdc..247296f31f 100644 --- a/charts/k8s-monitoring/README.md.gotmpl +++ b/charts/k8s-monitoring/README.md.gotmpl @@ -35,7 +35,7 @@ Version 2.1 was re-versioned to be 3.0. If you are on 2.1, please upgrade to 3.0 ### Version 2.0 -v2 introduces some significant changes to the chart configuration values. Refer to the migration [documentation](./docs/Migration.md) for tools and strategies to migrate from v1. +v2 introduces some significant changes to the chart configuration values. Refer to the migration [documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/migrate-helm-chart/) for tools and strategies to migrate from v1. ## Usage @@ -146,7 +146,7 @@ destinationsMap: #### Collectors -([Documentation](./docs/Collectors.md)) +([Documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/collector-reference/)) Collectors are workloads that are dedicated to gathering metrics, logs, traces, and profiles from the cluster and from workloads on the cluster. There are multiple collector instances to optimize around the collection requirements. @@ -159,7 +159,7 @@ The list of collectors are: * **alloy-singleton** is a 1-replica Deployment to collect cluster events. * **alloy-profiles** is a DaemonSet used to instrument and collect profiling data. -To enable a collector, add a new section to your values file. Ex: +To enable a collector, add a new section to your values file. Example: ```YAML alloy-{collector_name}: @@ -170,7 +170,7 @@ alloy-{collector_name}: #### Features -([Documentation](./docs/Features.md)) +([Documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/#features)) This section is where you define which features you want to enable with this chart. Features define what kind of data to collect. diff --git a/charts/k8s-monitoring/docs/Collectors.md b/charts/k8s-monitoring/docs/Collectors.md index 4576b5d623..9984e07d59 100644 --- a/charts/k8s-monitoring/docs/Collectors.md +++ b/charts/k8s-monitoring/docs/Collectors.md @@ -1,350 +1,3 @@ # Collectors -Collectors are Alloy instances deployed by the Alloy Operator as Kubernetes workloads. Each collector uses a -workload type appropriate for the telemetry type it collects. - -## General configuration - -Each collector is defined in its own section in the Kubernetes Monitoring Helm chart values file. Here is an example of -the general format to enable and configure a collector: - -```yaml -alloy-: - enabled: true # Enable deploying this collector - - alloy: # Settings related to the Alloy instance - ... - controller: # Settings related to the Alloy controller - ... -``` - -This creates a Kubernetes workload as either a DaemonSet, StatefulSet, or Deployment, with its own set of Pods running -Alloy containers. - -Because collectors are deployed using the Alloy Operator, you can use any of the -standard [Alloy helm chart values](https://raw.githubusercontent.com/grafana/alloy/refs/heads/main/operations/helm/charts/alloy/values.yaml). -These values will be used when creating the Alloy instance. - -Options specific to the Kubernetes Monitoring Helm chart are described in the following reference section. - -## Alloy Receiver - -- **Pods Name**: `-alloy-receiver-*` -- **Default Controller Type**: DaemonSet -- **Service Name**: `-alloy-receiver` - -This collector creates an Alloy instance deployed as a DaemonSet to receive application metrics when -the [Application Observability](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-application-observability) -feature is enabled. - -For each -[receiver](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-application-observability#receivers-jaeger) -enabled in the feature, configure this collector to expose the corresponding ports on the Kubernetes service that is -fronting the Pods. For example, to enable a receiver to collect Zipkin traces, add: - -```yaml -applicationObservability: - enabled: true - receivers: - otlp: - grpc: - enabled: true - port: 4317 - http: - enabled: true - port: 4318 - -... -alloy-receiver: - enabled: true - alloy: - extraPorts: - - name: otlp-grpc - port: 4317 - targetPort: 4317 - protocol: TCP - - name: otlp-http - port: 4318 - targetPort: 4318 - protocol: TCP -``` - -### Client endpoint configuration - -You can configure endpoints inside or outside the Cluster. - -#### Inside the Cluster - -Applications inside the Kubernetes Cluster can use -the [kubedns](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#namespaces-of-services) name to -reference a particular receiver endpoint. For example: - -```yaml -endpoint: http://grafana-k8s-monitoring-alloy[.mynamespace.cluster.local]:4318 -``` - -#### Outside the Cluster - -To expose the receiver to applications outside the Cluster (for example, for frontend observability), you can use -different approaches depending on your setup. Load balancers are created by whatever controllers are installed on your -Cluster. Make sure to check -the [Alloy chart values](https://raw.githubusercontent.com/grafana/alloy/main/operations/helm/charts/alloy/values.yaml) -for the full list of options. - -For example, to create a NLB on AWS EKS when using the AWS LB controller: - -```yaml -alloy-receiver: - alloy: - service: - type: LoadBalancer -``` - -To create an ALB instead: - -```yaml -alloy-receiver: - alloy: - ingress: - enabled: true - path: / - faroPort: 12347 -``` - -You can also create additional services and ingress objects as needed if the Alloy chart options don't fit your needs. -Consult your Kubernetes vendor documentation for details. - -### Istio/Service Mesh - -Depending on your mesh configuration, you might need to explicitly include the Grafana monitoring namespace as a member, -or declare the Alloy instance as a backend of your application for traffic within the Cluster. - -For traffic from outside the Cluster, you likely need to set up an ingress gateway into your mesh. - -In any case, consult your mesh vendor for details. - -## Troubleshooting - -Here are some troubleshooting tips related to configuring collectors. - -### Startup issues - -Make sure your Pods are up and running. To do so, use this command to show you a list of Pods and associated states: - -`kubectl get pods -n ` - -While you may have meta-monitoring turned on (which would expose the Alloy Pod logs in Loki), this is not helpful when -the alloy-logs instance itself is faulty. - -To troubleshoot startup problems, you can inspect the Pod -logs [like any other Kubernetes workload](https://kubernetes.io/docs/reference/kubectl/generated/kubectl_logs/). To -watch the alloy-logs instance Pods: - -`kubectl logs -f --tail 100 ds/grafana-k8s-monitoring-alloy-logs` - -### Alloy debugger - -You can apply [standard Alloy troubleshooting strategies](https://grafana.com/docs/alloy/latest/troubleshoot/) to each -collector pod, but specifically for Kubernetes. - -1. To access the Alloy UI on a collector Pod, forward the UI port to your local machine: - - ```bash - kubectl port-forward grafana-k8s-monitoring-alloy-receiver 12345:12345 - ``` - -2. Open your browser to `http://localhost:12345` - -## Scaling - -Follow these instructions for appropriate scaling. - -### DaemonSets and Singleton instances - -For collectors deployed as DaemonSets and Singleton instances, one Pod is deployed per Node. You cannot deploy more -replicas with this type of controller. Instead, scale the individual Pods by increasing the resource requests and -limits. Refer to -the [Alloy helm chart sizing guidelines](https://grafana.com/docs/alloy/latest/introduction/estimate-resource-usage/) to -learn how to best tune those parameters. For example: - -```yaml -alloy-metrics: - alloy: - resources: - requests: {} - limits: {} -``` - -### StatefulSets - -For StatefulSet collectors, set the number of replicas in the `alloy` config section of the collector: - -```yaml -alloy-metrics: - controller: - replicas: 3 -``` - -### Autoscaling - -**Use with caution as Autoscalers can cause Cluster outtages when not configured properly.** - -Alloy doesn't provide autoscaling out of the box, but you can use the -Kubernetes [HPA](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) -and [VPA](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler) autoscalers, depending on the type of deployment for the collector. Set the target for the -HorizontalPodAutoscaler or VerticalPodAutoscaler to the collector deployment name. - -## Values reference - -### Alloy Logs - -| Key | Type | Default | Description | -|-------------------------------------------|--------|---------------|------------------------------------------------------------------------------------------| -| alloy-logs.controller.type | string | `"daemonset"` | The type of controller to use for the Alloy Logs instance. | -| alloy-logs.enabled | bool | `false` | Deploy the Alloy instance for collecting log data. | -| alloy-logs.extraConfig | string | `""` | Extra Alloy configuration to be added to the configuration file. | -| alloy-logs.liveDebugging.enabled | bool | `false` | Enable live debugging for the Alloy instance. | -| alloy-logs.logging.format | string | `"logfmt"` | Format to use for writing Alloy log lines. | -| alloy-logs.logging.level | string | `"info"` | Level at which Alloy log lines should be written. | -| alloy-logs.remoteConfig.auth.password | string | `""` | The password to use for the remote config server. | -| alloy-logs.remoteConfig.auth.passwordFrom | string | `""` | Raw config for accessing the password. | -| alloy-logs.remoteConfig.auth.passwordKey | string | `"password"` | The key for storing the username in the secret. | -| alloy-logs.remoteConfig.auth.type | string | `"none"` | The type of authentication to use for the remote config server. | -| alloy-logs.remoteConfig.auth.username | string | `""` | The username to use for the remote config server. | -| alloy-logs.remoteConfig.auth.usernameFrom | string | `""` | Raw config for accessing the username. | -| alloy-logs.remoteConfig.auth.usernameKey | string | `"username"` | The key for storing the username in the secret. | -| alloy-logs.remoteConfig.enabled | bool | `false` | Enable fetching configuration from a remote config server. | -| alloy-logs.remoteConfig.extraAttributes | object | `{}` | Attributes to be added to this collector when requesting configuration. | -| alloy-logs.remoteConfig.pollFrequency | string | `"5m"` | The frequency at which to poll the remote config server for updates. | -| alloy-logs.remoteConfig.proxyURL | string | `""` | The proxy URL to use of the remote config server. | -| alloy-logs.remoteConfig.secret.create | bool | `true` | Whether to create a secret for the remote config server. | -| alloy-logs.remoteConfig.secret.embed | bool | `false` | If true, skip secret creation and embed the credentials directly into the configuration. | -| alloy-logs.remoteConfig.secret.name | string | `""` | The name of the secret to create. | -| alloy-logs.remoteConfig.secret.namespace | string | `""` | The namespace for the secret. | -| alloy-logs.remoteConfig.url | string | `""` | The URL of the remote config server. | - -### Alloy Metrics - -| Key | Type | Default | Description | -|----------------------------------------------|--------|-----------------|------------------------------------------------------------| -| alloy-metrics.controller.replicas | int | `1` | The number of replicas for the Alloy Metrics instance. | -| alloy-metrics.controller.type | string | `"statefulset"` | The type of controller to use for the Alloy Metrics | -| instance. | | | | -| alloy-metrics.enabled | bool | `false` | Deploy the Alloy instance for collecting metrics. | -| alloy-metrics.extraConfig | string | `""` | Extra Alloy configuration to be added to the configuration | -| file. | | | | -| alloy-metrics.liveDebugging.enabled | bool | `false` | Enable live debugging for the Alloy instance. | -| alloy-metrics.logging.format | string | `"logfmt"` | Format to use for writing Alloy log lines. | -| alloy-metrics.logging.level | string | `"info"` | Level at which Alloy log lines should be written. | -| alloy-metrics.remoteConfig.auth.password | string | `""` | The password to use for the remote config | -| server. | | | | -| alloy-metrics.remoteConfig.auth.passwordFrom | string | `""` | Raw config for accessing the password. | -| alloy-metrics.remoteConfig.auth.passwordKey | string | `"password"` | The key for storing the password in the | -| secret. | | | | -| alloy-metrics.remoteConfig.auth.type | string | `"none"` | The type of authentication to use for the remote | -| config server. | | | | -| alloy-metrics.remoteConfig.auth.username | string | `""` | The username to use for the remote config | -| server. | | | | -| alloy-metrics.remoteConfig.auth.usernameFrom | string | `""` | Raw config for accessing the password. | -| alloy-metrics.remoteConfig.auth.usernameKey | string | `"username"` | The key for storing the username in the | -| secret. | | | | -| alloy-metrics.remoteConfig.enabled | bool | `false` | Enable fetching configuration from a remote config | -| server. | | | | -| alloy-metrics.remoteConfig.extraAttributes | object | `{}` | Attributes to be added to this collector when | -| requesting configuration. | | | | -| alloy-metrics.remoteConfig.pollFrequency | string | `"5m"` | The frequency at which to poll the remote config | -| server for updates. | | | | -| alloy-metrics.remoteConfig.proxyURL | string | `""` | The proxy URL to use of the remote config server. | -| alloy-metrics.remoteConfig.secret.create | bool | `true` | Whether to create a secret for the remote config | -| server. | | | | -| alloy-metrics.remoteConfig.secret.embed | bool | `false` | If true, skip secret creation and embed the | -| credentials directly into the configuration. | | | | -| alloy-metrics.remoteConfig.secret.name | string | `""` | The name of the secret to create. | -| alloy-metrics.remoteConfig.secret.namespace | string | `""` | The namespace for the secret. | -| alloy-metrics.remoteConfig.url | string | `""` | The URL of the remote config server. | - -### Alloy Profiles - -| Key | Type | Default | Description | -|-----------------------------------------------|--------|---------------|------------------------------------------------------------------------------------------| -| alloy-profiles.controller.type | string | `"daemonset"` | The type of controller to use for the Alloy Profiles instance. | -| alloy-profiles.enabled | bool | `false` | Deploy the Alloy instance for gathering profiles. | -| alloy-profiles.extraConfig | string | `""` | Extra Alloy configuration to be added to the configuration file. | -| alloy-profiles.liveDebugging.enabled | bool | `false` | Enable live debugging for the Alloy instance. | -| alloy-profiles.logging.format | string | `"logfmt"` | Format to use for writing Alloy log lines. | -| alloy-profiles.logging.level | string | `"info"` | Level at which Alloy log lines should be written. | -| alloy-profiles.remoteConfig.auth.password | string | `""` | The password to use for the remote config server. | -| alloy-profiles.remoteConfig.auth.passwordFrom | string | `""` | Raw config for accessing the password. | -| alloy-profiles.remoteConfig.auth.passwordKey | string | `"password"` | The key for storing the password in the secret. | -| alloy-profiles.remoteConfig.auth.type | string | `"none"` | The type of authentication to use for the remote config server. | -| alloy-profiles.remoteConfig.auth.username | string | `""` | The username to use for the remote config server. | -| alloy-profiles.remoteConfig.auth.usernameFrom | string | `""` | Raw config for accessing the username. | -| alloy-profiles.remoteConfig.auth.usernameKey | string | `"username"` | The key for storing the username in the secret. | -| alloy-profiles.remoteConfig.enabled | bool | `false` | Enable fetching configuration from a remote config server. | -| alloy-profiles.remoteConfig.extraAttributes | object | `{}` | Attributes to be added to this collector when requesting configuration. | -| alloy-profiles.remoteConfig.pollFrequency | string | `"5m"` | The frequency at which to poll the remote config server for updates. | -| alloy-profiles.remoteConfig.proxyURL | string | `""` | The proxy URL to use of the remote config server. | -| alloy-profiles.remoteConfig.secret.create | bool | `true` | Whether to create a secret for the remote config server. | -| alloy-profiles.remoteConfig.secret.embed | bool | `false` | If true, skip secret creation and embed the credentials directly into the configuration. | -| alloy-profiles.remoteConfig.secret.name | string | `""` | The name of the secret to create. | -| alloy-profiles.remoteConfig.secret.namespace | string | `""` | The namespace for the secret. | -| alloy-profiles.remoteConfig.url | string | `""` | The URL of the remote config server. | - -### Alloy Receiver - -| Key | Type | Default | Description | -|-----------------------------------------------|--------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| alloy-receiver.alloy.extraPorts | list | `[]` | The ports to expose for the Alloy receiver. | -| alloy-receiver.controller.type | string | `"daemonset"` | The type of controller to use for the Alloy Receiver instance. | -| alloy-receiver.enabled | bool | `false` | Deploy the Alloy instance for opening receivers to collect application data. | -| alloy-receiver.extraConfig | string | `""` | Extra Alloy configuration to be added to the configuration file. | -| alloy-receiver.extraService.enabled | bool | `false` | Create an extra service for the Alloy receiver. This service will mirror the alloy-receiver service, but its name can be customized to match existing application settings. | -| alloy-receiver.extraService.fullname | string | `""` | If set, the full name of the extra service to create. This will result in the format ``. | -| alloy-receiver.extraService.name | string | `"alloy"` | The name of the extra service to create. This will result in the format `-`. | -| alloy-receiver.liveDebugging.enabled | bool | `false` | Enable live debugging for the Alloy instance. | -| alloy-receiver.logging.format | string | `"logfmt"` | Format to use for writing Alloy log lines. | -| alloy-receiver.logging.level | string | `"info"` | Level at which Alloy log lines should be written. | -| alloy-receiver.remoteConfig.auth.password | string | `""` | The password to use for the remote config server. | -| alloy-receiver.remoteConfig.auth.passwordFrom | string | `""` | Raw config for accessing the password. | -| alloy-receiver.remoteConfig.auth.passwordKey | string | `"password"` | The key for storing the password in the secret. | -| alloy-receiver.remoteConfig.auth.type | string | `"none"` | The type of authentication to use for the remote config server. | -| alloy-receiver.remoteConfig.auth.username | string | `""` | The username to use for the remote config server. | -| alloy-receiver.remoteConfig.auth.usernameFrom | string | `""` | Raw config for accessing the username. | -| alloy-receiver.remoteConfig.auth.usernameKey | string | `"username"` | The key for storing the username in the secret. | -| alloy-receiver.remoteConfig.enabled | bool | `false` | Enable fetching configuration from a remote config server. | -| alloy-receiver.remoteConfig.extraAttributes | object | `{}` | Attributes to be added to this collector when requesting configuration. | -| alloy-receiver.remoteConfig.pollFrequency | string | `"5m"` | The frequency at which to poll the remote config server for updates. | -| alloy-receiver.remoteConfig.proxyURL | string | `""` | The proxy URL to use of the remote config server. | -| alloy-receiver.remoteConfig.secret.create | bool | `true` | Whether to create a secret for the remote config server. | -| alloy-receiver.remoteConfig.secret.embed | bool | `false` | If true, skip secret creation and embed the credentials directly into the configuration. | -| alloy-receiver.remoteConfig.secret.name | string | `""` | The name of the secret to create. | -| alloy-receiver.remoteConfig.secret.namespace | string | `""` | The namespace for the secret. | -| alloy-receiver.remoteConfig.url | string | `""` | The URL of the remote config server. | - -### Alloy Singleton - -| Key | Type | Default | Description | -|------------------------------------------------|--------|----------------|-----------------------------------------------------------------------------------------------------------------| -| alloy-singleton.controller.replicas | int | `1` | The number of replicas for the Alloy Singleton instance. Must remain a single instance to avoid duplicate data. | -| alloy-singleton.controller.type | string | `"deployment"` | The type of controller to use for the Alloy Singleton instance. | -| alloy-singleton.enabled | bool | `false` | Deploy the Alloy instance for data sources required to be deployed on a single replica. | -| alloy-singleton.extraConfig | string | `""` | Extra Alloy configuration to be added to the configuration file. | -| alloy-singleton.liveDebugging.enabled | bool | `false` | Enable live debugging for the Alloy instance. | -| alloy-singleton.logging.format | string | `"logfmt"` | Format to use for writing Alloy log lines. | -| alloy-singleton.logging.level | string | `"info"` | Level at which Alloy log lines should be written. | -| alloy-singleton.remoteConfig.auth.password | string | `""` | The password to use for the remote config server. | -| alloy-singleton.remoteConfig.auth.passwordFrom | string | `""` | Raw config for accessing the password. | -| alloy-singleton.remoteConfig.auth.passwordKey | string | `"password"` | The key for storing the password in the secret. | -| alloy-singleton.remoteConfig.auth.type | string | `"none"` | The type of authentication to use for the remote config server. | -| alloy-singleton.remoteConfig.auth.username | string | `""` | The username to use for the remote config server. | -| alloy-singleton.remoteConfig.auth.usernameFrom | string | `""` | Raw config for accessing the username. | -| alloy-singleton.remoteConfig.auth.usernameKey | string | `"username"` | The key for storing the username in the secret. | -| alloy-singleton.remoteConfig.enabled | bool | `false` | Enable fetching configuration from a remote config server. | -| alloy-singleton.remoteConfig.extraAttributes | object | `{}` | Attributes to be added to this collector when requesting configuration. | -| alloy-singleton.remoteConfig.pollFrequency | string | `"5m"` | The frequency at which to poll the remote config server for updates. | -| alloy-singleton.remoteConfig.proxyURL | string | `""` | The proxy URL to use of the remote config server. | -| alloy-singleton.remoteConfig.secret.create | bool | `true` | Whether to create a secret for the remote config server. | -| alloy-singleton.remoteConfig.secret.embed | bool | `false` | If true, skip secret creation and embed the credentials directly into the configuration. | -| alloy-singleton.remoteConfig.secret.name | string | `""` | The name of the secret to create. | -| alloy-singleton.remoteConfig.secret.namespace | string | `""` | The namespace for the secret. | -| alloy-singleton.remoteConfig.url | string | `""` | The URL of the remote config server. | +This content has moved to [Grafana Alloy collector reference](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/collector-reference/). diff --git a/charts/k8s-monitoring/docs/Features.md b/charts/k8s-monitoring/docs/Features.md index d6e2a4a86a..8591724439 100644 --- a/charts/k8s-monitoring/docs/Features.md +++ b/charts/k8s-monitoring/docs/Features.md @@ -1,90 +1,3 @@ # Features -The Kubernetes Monitoring Helm chart contains multiple features to group common monitoring tasks into sections within -the chart. Any feature contains the Alloy configuration used to discover, gather, process, and deliver the appropriate -telemetry data, as well as some additional Kubernetes workloads to supplement Alloy's functionality. Features can be -enabled with the `enabled` flag, and each contain multiple configuration options described in the feature's -documentation. - -These are the current features supported in this Helm chart: - -- [Cluster Metrics](#cluster-metrics) -- [Cluster Events](#cluster-events) -- [Application Observability](#application-observability) -- [Annotation Autodiscovery](#annotation-autodiscovery) -- [Prometheus Operator Objects](#prometheus-operator-objects) -- [Node Logs](#node-logs) -- [Pod Logs](#pod-logs) -- [Service Integrations](#service-integrations) -- [Profiling](#profiling) -- [Frontend Observability](#frontend-observability) - -## Cluster Metrics - -Collects metrics about the Kubernetes Cluster, including the control plane if configured to do so. -Refer to the [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-cluster-metrics) for more information. - -## Cluster Events - -Collects Kubernetes Cluster events from the Kubernetes API server. -Refer to the [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-cluster-events) for more information. - -## Application Observability - -Opens receivers to collect telemetry data from instrumented applications, including tail sampling when configured to do -so. Refer -to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-application-observability) for more information. - -## Annotation Autodiscovery - -Collects metrics from any Pod or Service that uses a specific annotation. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-annotation-autodiscovery) for more information. - -## Prometheus Operator Objects - -Collects metrics from Prometheus Operator objects, such as PodMonitors and ServiceMonitors. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-prometheus-operator-objects) for more information. - -## Node Logs - -Collects logs from Kubernetes Cluster Nodes. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-node-logs) for more information. - -## Pod Logs - -Collects logs from Kubernetes Pods. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-pod-logs) for more information. - -## Service Integrations - -Collects metrics and logs from a variety of popular services and integrations. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-integrations) for more information. - -## Profiling - -Collect profiles using Pyroscope. -Refer to [documentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-profiling) for more information. - -## Contributing - -Features are stored in their own Helm chart. That chart is not a standalone chart, but is included in the main -k8s-monitoring Helm chart as a dependency. The parent chart interacts with the feature chart via template functions. - -To add a new feature, create a new Helm chart in the `charts` directory. The chart should have a `feature-` prefix in -its name. The following files are required for a feature chart: - -- `templates/_module.alloy.tpl` - This file should contain a template function named - `feature..module` Creates an [Alloy module](https://grafana.com/docs/alloy/latest/get-started/modules/) - that wraps the configuration for your feature, and exposes any of these arguments as appropriate: - - `metrics_destination` - Defines where scrape metrics should be delivered - - `logs_destination` - Defines where logs should be delivered - - `traces_destination` - Defines where traces should be delivered - - `profiles_destination` - Defines where profiles should be delivered - -- `templates/_notes.alloy.tpl` - This file should contain these template functions: - - `feature..notes.deployments` - Returns a list of workloads that will be - deployed to the Kubernetes Cluster by the feature - - `feature..notes.task` - Returns a one-line summary of what this feature will do - - `feature..notes.actions` - Returns any prompts for the user to take additional - action after deployment - - `feature..summary` - Returns a dictionary of settings that is used for self-reporting metrics +This content has moved to [Features](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/#features). diff --git a/charts/k8s-monitoring/docs/Images.md b/charts/k8s-monitoring/docs/Images.md index 719d64968a..5d6772bdc7 100644 --- a/charts/k8s-monitoring/docs/Images.md +++ b/charts/k8s-monitoring/docs/Images.md @@ -1,15 +1,3 @@ # Images -The following is the list of images potentially used in the 3.2.6 version of the k8s-monitoring Helm chart: - -| Image Name | Repository | Feature | -| ---------- | ---------- | ------- | -| Alloy | docker.io/grafana/alloy:v1.10.1 | Always used. The telemetry data collector. Enabled with `alloy-____.enabled=true`. Deployed via the Alloy Operator. | -| Alloy Operator | ghcr.io/grafana/alloy-operator:1.2.1 | Always used. Deploys and manages Grafana Alloy collector instances. | -| Beyla | docker.io/grafana/beyla:2.5.6 | Automatically instruments apps on the cluster, generating metrics and traces. Enabled with `autoInstrumentation.beyla.enabled=true`. | -| Config Reloader | quay.io/prometheus-operator/prometheus-config-reloader:v0.81.0 | Alloy sidecar that reloads the Alloy configuration upon changes. Enabled with `alloy-____.configReloader.enabled=true`. Deployed via the Alloy Operator. | -| Kepler | quay.io/sustainable_computing_io/kepler:release-0.8.0 | Gathers energy metrics for Kubernetes objects. Enabled with `clusterMetrics.kepler.enabled=true`. | -| kube-state-metrics | registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.16.0 | Gathers Kubernetes cluster object metrics. Enabled with `clusterMetrics.kube-state-metrics.deploy=true`. | -| Node Exporter | quay.io/prometheus/node-exporter:v1.9.1 | Gathers Kubernetes cluster node metrics. Enabled with `clusterMetrics.node-exporter.deploy=true`. | -| OpenCost | ghcr.io/opencost/opencost:1.113.0@sha256:b313d6d320058bbd3841a948fb636182f49b46df2368d91e2ae046ed03c0f83c | Gathers cost metrics for Kubernetes objects. Enabled with `clusterMetrics.opencost.enabled=true`. | -| Windows Exporter | ghcr.io/prometheus-community/windows-exporter:0.31.2 | Gathers Kubernetes cluster node metrics for Windows nodes. Enabled with `clusterMetrics.windows-exporter.deploy=true`. | +This content has moved to [Images](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/#images). diff --git a/charts/k8s-monitoring/docs/Migration.md b/charts/k8s-monitoring/docs/Migration.md index 054ec97054..43c7b512ac 100644 --- a/charts/k8s-monitoring/docs/Migration.md +++ b/charts/k8s-monitoring/docs/Migration.md @@ -1,416 +1,3 @@ -# Migration guide +# Migration -## Migrate from version 1.0 to 3.x - -To migrate from version 1.0 to 3.x, you will need to follow the steps for migrating from version 1.x to 2.0, and then -consider and changes that might benefit you when migrating to 3.x. - -## Migrate from version 2.0 to 3.x - -The 3.0 release of the k8s-monitoring Helm chart no longer utilizes the -[Alloy Helm chart](https://github.com/grafana/alloy/tree/main/operations/helm/charts/alloy) as a subchart dependency. -Instead the chart uses the new [Alloy Operator](https://github.com/grafana/alloy-operator) to deploy Alloy instances. -This allows for a more flexible and powerful deployment of Alloy, as well as the ability of your chosen features to -appropriately configure those Alloy instances. - -There are no required changes to your values.yaml file, but if you are using any of these options, you may want to -update your values file: - -- *Private image registries* - The addition of the Alloy Operator brings another image that may need to be referenced - from a new location. -- *Tail Sampling* - If you were pairing this chart with the Grafana Sampling chart in order to do tail sampling, this - functionality has been embedded into the k8s-monitoring Helm chart. Your trace destinations should now be sent to - their ultimate destination, and you can utilize the [tailSampling option](./examples/tail-sampling) of the trace - destination. -- *Service Graph Metrics* - Similar to tail sampling, in version 3.1, the service graph metrics functionality has been - embedded into the k8s-monitoring Helm chart. - -## Migrate from version 1.x to 2.0 - -The 2.0 release of the Kubernetes Monitoring Helm chart includes major changes from the 1.x version. Many of the -features have been reorganized around features rather than data types (such as metrics, logs, and so on). This document -explains how the settings have changed feature by feature, and how to migrate your v1 values.yaml file. - -In v1, many features were enabled by default. Cluster metrics, pod logs, cluster events, and so on. In v2, all features -are turned off by default. This means your values file better reflects your desired feature set. - -A migration tool is available -at [https://grafana.github.io/k8s-monitoring-helm-migrator/](https://grafana.github.io/k8s-monitoring-helm-migrator/). - -### Destinations - -The definition of where data is delivered has changed from `externalServices`, an object of four types, to -`destinations`, an array of any number of types. Before the `externalServices` object had four types of destinations: - -- `prometheus` - Where all metrics are delivered. This could refer to a true Prometheus server or an OTLP destination - that handles metrics. -- `loki` - Where all logs are delivered. This could refer to a true Loki server or an OTLP destination that handles - logs. -- `tempo` - Where all traces are delivered. This could refer to a true Tempo server or an OTLP destination that - handles traces. -- `pyroscope` - Where all profiles are delivered. - -In v1, the service essentially referred to the destination for the data type. In v2, the destination refers to the -protocol used to deliver the data type. - -Refer to [Destinations](destinations/README.md) for more information. - -Here's how to map from v1 `externalServices` to v2 `destinations`: - -| Service | v1.x setting | v2.0 setting | -|-------------------|-------------------------------|------------------------------------------------------------| -| Prometheus | `externalServices.prometheus` | `destinations: [{type: "prometheus"}]` | -| Prometheus (OTLP) | `externalServices.prometheus` | `destinations: [{type: "otlp", metrics: {enabled: true}}]` | -| Loki | `externalServices.loki` | `destinations: [{type: "loki"}]` | -| Loki (OTLP) | `externalServices.loki` | `destinations: [{type: "loki", logs: {enabled: true}}]` | -| Tempo | `externalServices.tempo` | `destinations: [{type: "otlp"}]` | -| Pyroscope | `externalServices.pyroscope` | `destinations: [{type: "pyroscope"}]` | - -#### Steps to take - -1. Create a destination for each external service you are using. -2. Provide a `name` and a `type` for the destination. -3. Provide the URL for the destination. *NOTE* this is a full data writing/pushing URL, not only the hostname. -4. Map the other settings from the original service to the new destination: - -- `authMode` --> `auth.type` -- Auth definitions (e.g. `basicAuth`) --> `auth` -- `externalLabels` --> `extraLabels` -- `writeRelabelRules` --> `metricProcessingRules` - -### Collectors - -The Alloy instances have been further split from the original to allow for more flexibility in the configuration and -predictability in their resource requirements. Each feature allows for setting the collector, but the defaults have been -chosen carefully, so you should only need to change these if you have specific requirements. - -| Responsibility | v1.x Alloy | v2.0 Alloy | Notes | -|-----------------------|------------------|-------------------|---------------------------------------------------------------------------------| -| Metrics | `alloy` | `alloy-metrics` | | -| Logs | `alloy-logs` | `alloy-logs` | | -| Cluster events | `alloy-events` | `alloy-singleton` | This is also for anything else that must only be deployed to a single instance. | -| Application receivers | `alloy` | `alloy-receiver` | | -| Profiles | `alloy-profiles` | `alloy-profiles` | | - -#### Steps to take - -1. Rename `alloy` to `alloy-metrics` -2. Rename `alloy-events` to `alloy-singleton` -3. Move any open receiver ports to the `alloy-receiver` instance - -### Cluster Events - -Gathering of Cluster events has been moved into its own feature called `clusterEvents`. - -| Feature | v1.x setting | v2.0 setting | -|----------------|-----------------------|-----------------| -| Cluster Events | `logs.cluster_events` | `clusterEvents` | - -#### Steps to take - -If using Cluster events, `logs.cluster_events.enabled`: - -1. Enable `clusterEvents` and `alloy-singleton` in your values file: - - ```yaml - clusterEvents: - enabled: true - alloy-singleton: - enabled: true - ``` - -2. Move `logs.cluster_events` to `clusterEvents` -3. Rename `extraStageBlocks` to `extraProcessingStages` - -### Cluster Metrics - -Cluster metrics refers to any metric data source that scrapes metrics about the cluster itself. This includes the -following data sources: - -- Cluster metrics (Kubelet, API Server, and so on) -- Node metrics (Node Exporter & Windows Exporter) -- kube-state-metrics -- Energy metrics via Kepler -- Cost metrics via OpenCost - -These have all been combined into a single feature called `clusterMetrics`. - -| Feature | v1.x setting | v2.0 setting | Notes | -|-------------------------------|-------------------------------|-------------------------------------|------------------------------------------------------------------------------------| -| Kubelet metrics | `metrics.kubelet` | `clusterMetrics.kubelet` | | -| cAdvisor metrics | `metrics.cadvisor` | `clusterMetrics.cadvisor` | | -| kube-state-metrics metrics | `metrics.cadvisor` | `clusterMetrics.kube-state-metrics` | | -| kube-state-metrics deployment | `kube-state-metrics` | `clusterMetrics.kube-state-metrics` | The decision to deploy is controlled by `clusterMetrics.kube-state-metrics.deploy` | -| Node Exporter metrics | `metrics.node-exporter` | `clusterMetrics.node-exporter` | | -| Node Exporter deployment | `prometheus-node-exporter` | `clusterMetrics.node-exporter` | The decision to deploy is controlled by `clusterMetrics.node-exporter.deploy` | -| Windows Exporter metrics | `metrics.windows-exporter` | `clusterMetrics.windows-exporter` | | -| Windows Exporter deployment | `prometheus-windows-exporter` | `clusterMetrics.windows-exporter` | The decision to deploy is controlled by `clusterMetrics.windows-exporter.deploy` | -| Energy metrics (Kepler) | `metrics.kepler` | `clusterMetrics.kepler` | | -| Kepler deployment | `kepler` | `clusterMetrics.kepler` | | -| Cost metrics (OpenCost) | `metrics.opencost` | `clusterMetrics.opencost` | | -| OpenCost deployment | `opencost` | `clusterMetrics.opencost` | | - -#### Steps to take - -If using Cluster metrics, `metrics.enabled`: - -1. Enable `clusterMetrics` and `alloy-metrics` in your values file: - - ```yaml - clusterMetrics: - enabled: true - alloy-metrics: - enabled: true - ``` - -2. Move each of the sections in the above table to `clusterMetrics` -3. Rename any `extraRelabelingRules` to `extraDiscoveryRules` -4. Rename any `extraMetricRelabelingRules` to `extraMetricProcessingRules` - -### Annotation Auto-discovery - -Discovery of pods and services by annotation has been moved into its own feature called `annotationAutodiscovery`. - -| Feature | v1.x setting | v2.0 setting | Notes | -|---------------------------|------------------------|---------------------------|-------| -| Annotation auto-discovery | `metrics.autoDiscover` | `annotationAutodiscovery` | | - -#### Steps to take - -If using annotation auto-discovery, `metrics.autoDiscover.enabled`: - -1. Enable `annotationAutodiscovery` and `alloy-metrics` in your values file: - - ```yaml - annotationAutodiscovery: - enabled: true - alloy-metrics: - enabled: true - ``` - -2. Move the contents of `metrics.autoDiscover` to `annotationAutodiscovery` -3. Rename any `extraRelabelingRules` to `extraDiscoveryRules` -4. Rename any `extraMetricRelabelingRules` to `extraMetricProcessingRules` - -### Application Observability - -Application Observability is the new name for the feature that encompasses receiving data via various receivers (e.g. -OTLP, Zipkin, etc...), processing that data, and delivering it to the destinations. Previously, this was mostly handled -within the metrics, logs, and traces sections, but has been moved into its own feature. - -| Feature | v1.x setting | v2.0 setting | Notes | -|----------------------|-------------------------------|-----------------------------------------------|-------| -| Collector ports | `alloy.alloy.extraPorts` | `alloy-receiver.alloy.extraPorts` | | -| Receiver definitions | `receivers` | `applicationObservability.receivers` | | -| Processors | `receivers.processors` | `applicationObservability.processors` | | -| Metric Filters | `metrics.receiver.filters` | `applicationObservability.metrics.filters` | | -| Metric Transforms | `metrics.receiver.transforms` | `applicationObservability.metrics.transforms` | | -| Log Filters | `logs.receiver.filters` | `applicationObservability.logs.filters` | | -| Log Transforms | `logs.receiver.transforms` | `applicationObservability.logs.transforms` | | -| Trace Filters | `traces.receiver.filters` | `applicationObservability.traces.filters` | | -| Trace Transforms | `traces.receiver.transforms` | `applicationObservability.traces.transforms` | | - -#### Steps to take - -If using application observability, `traces.enabled`, `receivers.*.enabled`: - -1. Enable `applicationObservability` and `alloy-receiver` in your values file: - - ```yaml - applicationObservability: - enabled: true - alloy-receiver: - enabled: true - ``` - -2. Move any extra ports opened for applications from `alloy.alloy.extraPorts` to `alloy-receiver.alloy.extraPorts` -3. Enable the receivers you want to use in `applicationObservability.receivers`, for example: - - ```yaml - applicationObservability: - receivers: - grpc: - enabled: true - ``` - -4. Move receiver processors from `receivers.processors` to `applicationObservability.processors` -5. Move metric filters from `metrics.receiver.filters` to `applicationObservability.metrics.filters` -6. Move metric transforms from `metrics.receiver.transforms` to `applicationObservability.metrics.transforms` -7. Move log filters from `logs.receiver.filters` to `applicationObservability.logs.filters` -8. Move log transforms from `logs.receiver.transforms` to `applicationObservability.logs.transforms` -9. Move trace filters from `traces.receiver.filters` to `applicationObservability.traces.filters` -10. Move trace transforms from `traces.receiver.transforms` to `applicationObservability.traces.transforms` - -### Zero-code instrumentation with Grafana Beyla - -Deployment and handling of the zero-code instrumentation feature (using Grafana Beyla) has been moved into its own -feature called `autoInstrumentation`. - -| Feature | v1.x setting | v2.0 setting | Notes | -|------------------------------|-----------------|-----------------------------|-------| -| Auto-instrumentation metrics | `metrics.beyla` | `autoInstrumentation.beyla` | | -| Beyla deployment | `beyla` | `autoInstrumentation.beyla` | | - -#### Steps to take - -If using Beyla, `beyla.enabled`: - -1. Enable `autoInstrumentation` and `alloy-metrics` in your values file: - - ```yaml - autoInstrumentation: - enabled: true - alloy-metrics: - enabled: true - ``` - -2. Combine `beyla` and `metrics.beyla` and copy to `autoInstrumentation.beyla` - -### Pod logs - -Gathering of Pods logs has been moved into its own feature called `podLogs`. - -| Feature | v1.x setting | v2.0 setting | Notes | -|----------|-----------------|--------------|-------| -| Pod logs | `logs.pod_logs` | `podLogs` | | - -#### Steps to take - -If using Pod logs `logs.pod_logs.enabled`: - -1. Enable `podLogs` and `alloy-logs` in your values file: - - ```yaml - podLogs: - enabled: true - alloy-logs: - enabled: true - ``` - -2. Move `logs.pod_logs` to `podLogs` -3. Rename any `extraRelabelingRules` to `extraDiscoveryRules` -4. Rename any `extraStageBlocks` to `extraLogProcessingStages` - -### Prometheus Operator objects - -Handling for Prometheus Operator objects, such as `ServiceMonitors`, `PodMonitors`, and `Probes` has been moved to the -`prometheusOperatorObjects` feature. This feature also includes the option to deploy the Prometheus Operator CRDs. - -| Feature | v1.x setting | v2.0 setting | Notes | -|-------------------------|------------------------------------|---------------------------------------------|-------| -| PodMonitor settings | `metrics.podMonitors` | `prometheusOperatorObjects.podMonitors` | | -| Probe settings | `metrics.probes` | `prometheusOperatorObjects.probes` | | -| ServiceMonitor settings | `metrics.serviceMonitors` | `prometheusOperatorObjects.serviceMonitors` | | -| CRDs deployment | `prometheus-operator-crds.enabled` | `crds.deploy` | | - -#### Steps to take - -If using Prometheus Operator objects, `metrics.podMonitors.enabled`, `metrics.probes.enabled`, -`metrics.serviceMonitors.enabled`, `prometheus-operator-crds.enabled`: - -1. Enable `prometheusOperatorObjects` and `alloy-metrics` in your values file: - - ```yaml - prometheusOperatorObjects: - enabled: true - alloy-metrics: - enabled: true - ``` - -2. Move `metrics.podMonitors` to `prometheusOperatorObjects.podMonitors` -3. Move `metrics.probes` to `prometheusOperatorObjects.probes` -4. Move `metrics.serviceMonitors` to `prometheusOperatorObjects.serviceMonitors` - -### Integrations - -Integrations are a new feature in v2.0 that allow you to enable and configure additional data sources. This -includes the Alloy metrics that were previously part of `v1`. Some service integrations that previously needed to be -defined in the `extraConfig` and `logs.extraConfig` sections can now be used in the integration feature. - -Replace your `extraConfig` to the new `integrations`feature if either of these are true: - -- You are using the `metrics.alloy` setting for getting Alloy metrics. -- You are using `extraConfig` to add config to get data from any of the new build-in integrations. - -#### Built-in integrations - -| Integration | v1.x setting | v2.0 setting | Notes | -|--------------|------------------------------------|-----------------------------|-------| -| Alloy | `metrics.alloy` | `integrations.alloy` | | -| cert-manager | `extraConfig` | `integrations.cert-manager` | | -| etcd | `extraConfig` | `integrations.etcd` | | -| MySQL | `extraConfig` & `logs.extraConfig` | `integrations.mysql` | | - -##### Steps to take - -If using the Alloy integration `metrics.alloy.enabled`, or if using `extraConfig` for cert-manager, etcd, or MySQL: - -1. Create instances of the integration that you want, and enable `alloy-metrics` in your values file: - - ```yaml - integrations: - alloy: - instances: - - name: "alloy" - alloy-metrics: - enabled: true - ``` - -2. Move `metrics.alloy` to `integrations.alloy.instances[]` - -#### Other integrations - -For service integrations that are not available in the built-in integrations feature, you can continue to use them -in the `extraConfig` sections. See the [Extra Configs](#extra-configs) section below for guidance. - -### Extra configs - -The variables for adding arbitrary configuration to the Alloy instances have been moved inside the respective Alloy -instance. If you are using `extraConfig` to add configuration for scraping metrics from an integration built-in with the -[integrations](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-integrations) -feature (such as cert-manager, etcd, or MySQL), you can move that configuration to the new `integrations` feature. - -For other uses of `extraConfig`, refer to the following table: - -| extraConfig | v1.x setting | v2.0 setting | Notes | -|--------------------|-----------------------------------|-------------------------------|-------| -| Alloy for Metrics | `extraConfig` | `alloy-metrics.extraConfig` | | -| Alloy for Apps | `extraConfig` | `alloy-receiver.extraConfig` | | -| Alloy for Events | `logs.cluster_events.extraConfig` | `alloy-singleton.extraConfig` | | -| Alloy for Logs | `logs.extraConfig` | `alloy-logs.extraConfig` | | -| Alloy for Profiles | `profiles.extraConfig` | `alloy-profiles.extraConfig` | | - -#### Steps to take - -1. Move `extraConfig` related to metrics to `alloy-metrics.extraConfig` -2. Move `extraConfig` related to application receivers to `alloy-receivers.extraConfig` -3. Move `logs.cluster_events.extraConfig` to `alloy-singleton.extraConfig` -4. Move `logs.extraConfig` to `alloy-logs.extraConfig` -5. Move `profiles.extraConfig` to `alloy-profiles.extraConfig` -6. Rename destinations for telemetry data to the appropriate destination component. Refer to the following section. - -#### Destination names - -Note that the `` in the component reference is the name of the destination, set to lowercase and -with any non-alphanumeric characters replaced with an underscore. For example, if your destination is named -`Grafana Cloud Metrics`, then the destination name would be `grafana_cloud_metrics`. - -| Data type | v1.x setting | v2.0 setting | -|-----------|-----------------------------------------------|-------------------------------------------------------| -| Metrics | `prometheus.relabel.metrics_service.receiver` | `prometheus.remote_write..receiver` | -| Logs | `loki.process.logs_service.receiver` | `loki.write..receiver` | -| Traces | `otelcol.exporter.otlp.traces_service.input` | `otelcol.exporter.otlp..input` | -| Profiles | `pyroscope.write.profiles_service.receiver` | `pyroscope.write..receiver` | - -### Dropped features - -The following features have been removed from the 2.0 release: - - -- **Pre-install hooks**: The pre-install and pre-upgrade hooks that performed config validation have been removed. The - Alloy Pods now validate the configuration at runtime and log any issues and without these Pods. This greatly - decreases startup time. -- **`helm test` functionality**: The `helm test` functionality that ran a config analysis and attempted to query the - databases for expected metrics and logs has been removed. This functionality was either not fully developed or not - useful in production environments. The query testing was mainly for CI/CD testing in development and has been - replaced by more effective and comprehensive methods. +This content has moved to [Migrate to another version of the Helm chart](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/migrate-helm-chart/). diff --git a/charts/k8s-monitoring/docs/ScrapeApplicationMetrics.md b/charts/k8s-monitoring/docs/ScrapeApplicationMetrics.md index b0891c6294..519e0b8e95 100644 --- a/charts/k8s-monitoring/docs/ScrapeApplicationMetrics.md +++ b/charts/k8s-monitoring/docs/ScrapeApplicationMetrics.md @@ -1,332 +1,3 @@ # Scraping Additional Metrics -If you have an application or a service running on your Kubernetes Cluster that is exporting Prometheus metrics, you can -use this chart to scrape those metrics and send them to your datastore. This document explains several options to -accomplish this. - -## Options - -1. Use the `k8s.grafana.com/scrape` annotation on your Pods or services. -2. Use Prometheus Operator CRDs, like ServiceMonitors, PodMonitors, or Probes. -3. Make a custom Grafana Alloy configuration. - -## Annotations - -You can use the [Annotation Autodiscovery](../charts/feature-annotation-autodiscovery) feature to look for Pods and -Services that have the `k8s.grafana.com/scrape` annotation set. When set, the Alloy instance scrapes them for metrics. - -Extra annotations can also be set to control the behavior of the discovery and scraping of the metrics: - -- `k8s.grafana.com/job: ` - Sets the job label. -- `k8s.grafana.com/instance: ` - Sets the instance label. -- `k8s.grafana.com/metrics.path: ` - Sets the metrics path. Required if the metrics path is not the default - of `/metrics`. -- `k8s.grafana.com/metrics.portName: ` - Specifies the port to scrape, by name. This named port must exist on - the pod or service. -- `k8s.grafana.com/metrics.portNumber: ` - Specifies to port to scrape, by number. -- `k8s.grafana.com/metrics.scheme: [http|https]` - Sets the scheme to use. Required if the scheme is not HTTP. - -The chart itself provides additional options: - -- `annotationAutodiscovery.extraDiscoveryRules` - Use relabeling rules to filter the Pods or services to scrape. -- `annotationAutodiscovery.metricsTuning` - Specify which metrics to keep or drop. -- `annotationAutodiscovery.extraMetricProcessingRules` - Use relabeling rules to process the metrics after scraping - them. - -These values apply to all discovered Pods and services. Refer to the -[feature documentation](../charts/feature-annotation-autodiscovery) to learn about all the possible options. - -## Prometheus Operator CRDs - -You can use the [Prometheus Operator Objects](../charts/feature-prometheus-operator-objects) feature to detect and -utilize ServiceMonitor, PodMonitor, and Probe objects on the Kubernetes cluster. If any of those objects are detected, -Alloy will utilize them to extend its configuration. - -For more information about creating and configuring these options, refer to -the [Prometheus Operator Documentation](https://github.com/prometheus-operator/prometheus-operator). - -This chart provides ways to customize how Alloy handles these objects. - -### Controlling discovery - -To change how Prometheus Operator objects are discovered, use these options in the Helm chart: - -- `prometheusOperatorObjects.serviceMonitors.enabled` - If set to true, Alloy looks for and consumes ServiceMonitors. -- `prometheusOperatorObjects.serviceMonitors.namespaces` - Only use ServiceMonitors that exist in these namespaces. -- `prometheusOperatorObjects.serviceMonitors.selector` - Use - a [selector](https://grafana.com/docs/alloy/latest/reference/components/prometheus.operator.servicemonitors/#selector-block) - block to provide a more refined selection of objects. - -The same options are present for `prometheusOperatorObjects.podmonitors` and `prometheusOperatorObjects.probes`. - -### Controlling scraping - -Most of the scrape configuration is embedded in the Prometheus Operator object itself. - -- `prometheusOperatorObjects.serviceMonitors.scrapeInterval` - Sets the scrape interval, if one was not specified in the - object. - -The same option is present for `prometheusOperatorObjects.podmonitors` and `prometheusOperatorObjects.probes`. - -### Controlling processing - -This chart can set metrics relabeling rules for processing the metrics after scraping them. - -- `prometheusOperatorObjects.serviceMonitors.extraMetricRelabelingRules` - Sets post-scraping rules for - a [prometheus.relabel](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/) - configuration component. - -The same option is present for `prometheusOperatorObjects.podmonitors` and `prometheusOperatorObjects.probes`. - -## Custom Alloy Config - -This option allows for the greatest amount of flexibility and utility. - -When adding new configuration, it's helpful to think of it in four phases: - -1. Discovery - How should the collector find my service? -2. Scraping - How should metrics get scraped from my service? -3. Processing - Is there any work that needs to be done to these metrics? -4. Delivery - Where should these metrics be sent? - -We will go deeper into each phase below. - -## Discovery - -The discovery phase is about finding the specific pod or service that needs to be scraped for metrics. - -To get started, you can use the -[`discovery.kubernetes`](https://grafana.com/docs/alloy/latest/reference/components/discovery.kubernetes/) component to -discover specific resources in your Kubernetes Cluster. This component uses the Kubernetes API to discover pods, -services, -endpoints, nodes, and more. - -This component can also pre-filter the discovered resources based on their namespace, labels, and other selectors. This -is recommended, because it'll greatly reduce the CPU and memory usage of the Alloy instance, as it will not need to -filter through the resources in the relabeling compnent later. - -Here is an example component that we've named "blue_database_service". This component takes the list of all services -from `discovery.kubernetes.services` and filters to a service named "database", in the namespace "blue", with the port -named "metrics": - -```grafana-alloy -discovery.kubernetes "blue_database_service" { - role = "service" // This component will return services... - namespaces { - names = ["blue"] // ... that exist in the "blue" namespace - } - - selector { - role = "service" - label = "app.kubernetes.io/name=db" // ... and have the label "app.kubernetes.io/name=db" - } -} -``` - -### Service discovery - -You'll likely need to do additional filtering after discovering components with the `discovery.kubernetes` -component. You can use rules to select your specific service based on its name, namespace, labels, port names or -numbers, and many other variables. To do so, use -a [`discovery.relabel`](https://grafana.com/docs/alloy/latest/reference/components/discovery.relabel/) -component and add one or more rules, using meta-labels that are set automatically by the -`discovery.kubernetes` component and always start with `__`. - -Here, we're continuing our example and will add a `discovery.relabel` component. This component takes the list of -services from our `discovery.kubernetes` component and further filters them to return only the one with the port named -"metrics": - -```grafana-alloy -discovery.relabel "blue_database_service" { - targets = discovery.kubernetes.blue_database_service.targets // Gets our service from before... - rule { // ... and only scrape its port named "metrics". - source_labels = ["__meta_kubernetes_service_port_name"] - regex = "metrics" - action = "keep" - } -} -``` - -The [documentation](https://grafana.com/docs/alloy/latest/reference/components/discovery.kubernetes/#service-role) -has the list of meta labels for services. Note that there are different labels for port name and port number. Make sure -you use the right label for a named port or the port number. - -This is also a good place to add any extra labels that will be added to the metrics after scraping. For example, if you -wanted to set the label `team="blue"`, you might use this additional rule in the `blue_database_service` component: - -```grafana-alloy - rule { - target_label = "team" - action = "replace" - replacement = "blue" - } -``` - -### Pod discovery - -Similar to service discovery, use -a [`discovery.kubernetes`](https://grafana.com/docs/alloy/latest/reference/components/discovery.kubernetes/) component -and a [`discovery.relabel`](https://grafana.com/docs/alloy/latest/reference/components/discovery.relabel/) component to -select the specific Pod you want to scrape. -The [meta labels for pods](https://grafana.com/docs/alloy/latest/reference/components/discovery.kubernetes/#pod-role) -will be slightly different, but the concept is the same. - -Here is an example that filters to a specific set of Pods that starts with name "analysis", with the label -"system.component=image": - -```grafana-alloy -discovery.kubernetes "image_analysis_pods" { - role = "pod" - - selector { - role = "pod" - label = "system.component=image" - } -} - -discovery.relabel "image_analysis_pods" { - targets = discovery.kubernetes.image_analysis_pods.targets - rule { - source_labels = ["__meta_kubernetes_pod_name"] - regex = "analysis.*" - action = "keep" - } -} -``` - -Note that there is a unique meta label for every Kubernetes label. The labels are prefixed with -`__meta_kubernetes_pod_label_` and the label name is normalized so all non-alphanumeric characters become underscores -(`_`). - -## Scraping - -Now that you've selected the specific pod or service you want, you can scrape it for metrics. Do this with the -[`prometheus.scrape`](https://grafana.com/docs/alloy/latest/reference/components/prometheus.scrape/) component. -Essentially, you only need to declare what targets to scrape and where to send the scraped metrics. Here is an example: - -```grafana-alloy -prometheus.scrape "processing_app" { - targets = discovery.relabel.image_analysis_pods.output - forward_to = [prometheus.relabel.metrics_service.receiver] -} -``` - -Note that the `forward_to` field in the [Delivery](#delivery) is explained in a subsequent section of this document. - -This component gives a lot of flexibility to modify how things are scraped, including setting the `job` label, how -frequently the metrics should be scraped, the path to scrape, and many more. Here is an example with lots of options: - -```grafana-alloy -prometheus.scrape "processing_app" { - targets = discovery.relabel.image_analysis_pods.output - job_name = "integrations/processing" - scrape_interval = "120s" - metrics_path = "/api/v1/metrics" - forward_to = [prometheus.relabel.metrics_service.receiver] -} -``` - -## Processing - -Often, you want to perform some post-scrape processing to the metrics. Some common reasons are to: - -- Limit the amount of metrics being sent up to Prometheus. -- Add, change, or drop labels. - -Processing is done with the -[`prometheus.relabel`](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/) -component. It uses the same type of rules as `discovery.relabel`, but instead of filtering scrape _targets_, it filters -the _metrics_ that were scraped. - -Here is an example of processing that filters down the scraped metrics to only `up` and anything that starts with -`processor` (thus, dropping all other metrics): - -```grafana-alloy -prometheus.scrape "processing_app" { - targets = discovery.relabel.image_analysis_pods.output - forward_to = [prometheus.relabel.processing_app.receiver] -} - -prometheus.relabel "processing_app" { - rule { - source_labels = ["__name__"] - regex = "up|processor.*" - action = "keep" - } - forward_to = [prometheus.relabel.metrics_service.receiver] -} -``` - -Note that the `prometheus.scrape` component needs to be adjusted to forward to this component. - -## Delivery - -The `prometheus.scrape` and `prometheus.relabel` components need to send their outputs to another component. This is the -purpose of their `forward_to` field. Forwarding can be to another `prometheus.relabel` component, but eventually, the -final step is to send the metrics to a Prometheus server for storage, where it can be further processed by recording -rules, or queried and displayed by Grafana. For this, use -the [`prometheus.remote_write`](https://grafana.com/docs/alloy/latest/reference/components/prometheus.remote_write/) -component. - -This chart automatically creates components for your metrics destinations, configured by the `.destinations` values. The -names for these components are derived from the destination name and type: - -| Destination name | Destination type | Component name | -|--------------------------|------------------|-----------------------------------------------------------| -| `My Metrics Destination` | `prometheus` | `prometheus.remote_write.my_metrics_destination.receiver` | -| `otlp-endpoint` | `otlp` | `otelcol.receiver.prometheus.otlp_endpoint.receiver` | - -Note that the component name uses lowercase and replaces non-alphanumeric characters with underscores (`_`). - -## Putting it all together - -The easiest way to include your configuration into this chart is to save it into a file and pass it directly to the -`helm install` command: - -```text -$ ls -processor-config.alloy chart-values.yaml -$ cat processor_config.alloy -discovery.kubernetes "image_analysis_pods" { - role = "pod" - - selector { - role = "pod" - label = "system.component=image" - } -} - -discovery.relabel "image_analysis_pods" { - targets = discovery.kubernetes.image_analysis_pods.targets - rule { - source_labels = ["__meta_kubernetes_pod_name"] - regex = "analysis.*" - action = "keep" - } -} - -prometheus.scrape "processing_app" { - targets = discovery.relabel.image_analysis_pods.output - forward_to = [prometheus.relabel.processing_app.receiver] -} - -prometheus.relabel "processing_app" { - rule { - source_labels = ["__name__"] - regex = "up|processor.*" - action = "keep" - } - forward_to = [prometheus.relabel.metrics_service.receiver] -} -$ head chart-values.yaml -cluster: - name: my-cluster - -destinations: - - name: metrics-service - type: prometheus - url: https://my-metrics-destination.example.com/api/v1/write -$ helm upgrade --install grafana-k8s-monitoring grafana/k8s-monitoring --values chart-values.yaml --set-file "alloy-metrics.extraConfig=processor-config.alloy" -``` - -For more information about using the `extraConfig` values, see [the documentation](UsingExtraConfig.md). +This content has moved to [Scrape and forward application metrics](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/scrape-app-pod-metrics/). diff --git a/charts/k8s-monitoring/docs/Structure.md b/charts/k8s-monitoring/docs/Structure.md index d156fa03cc..575a8f91b9 100644 --- a/charts/k8s-monitoring/docs/Structure.md +++ b/charts/k8s-monitoring/docs/Structure.md @@ -1,99 +1,78 @@ # Structure The Kubernetes Monitoring Helm chart contains many software packages, and builds a comprehensive set of configuration -and secrets for those packages. - -![Kubernetes Monitoring inside of a Cluster](https://grafana.com/media/docs/grafana-cloud/k8s/helm-diagram-v3.png) - -## Software deployed - -This Helm chart deploys several packages to generate and capture the telemetry data on the cluster. This list -corresponds to the list of dependencies in this chart's Chart.yaml file. For each package, there is an associated -section inside the Helm chart's values.yaml file that controls how it is configured. - -| Name | Type | Associated values | Description | -|----------------------------------------------------------------------------------------|---------------------------|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Grafana Alloy](https://grafana.com/oss/alloy/) for Metrics | StatefulSet | `alloy-metrics` | The Alloy instance responsible for scraping metrics, and accepting metrics, logs, and traces via receivers. | -| Grafana Alloy Singleton | Deployment | `alloy-singleton` | The Alloy instance responsible for anything that must be done on a single instance, such as gathering Cluster events from the API server. This instance does not support clustering, so only one instance should be used. | -| Grafana Alloy for Logs | DaemonSet | `alloy-logs` | The Alloy instance that gathers Pod logs. By default, it uses HostPath volume mounts to read Pod log files directly from the Nodes. It can alternatively get logs via the API server, and be deployed as a Deployment. | -| Grafana Alloy for Application Data | DaemonSet | `alloy-receiver` | The Alloy instance that opens receiver ports to process data delivered directly to Alloy (for example, applications instrumented with OpenTelemetry). SDKs | -| Grafana Alloy for Profiles | DaemonSet | `alloy-events` | The Alloy instance responsible for gathering profiles. | -| [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) | Deployment | `kube-state-metrics` | A service for generating metrics about the state of the objects inside the Cluster. | -| [Node Exporter](https://github.com/prometheus/node_exporter) | DaemonSet | `prometheus-node-exporter` | An exporter for gathering hardware and OS metrics for *NIX nodes of the Cluster. | -| [Windows Exporter](https://github.com/prometheus-community/windows_exporter) | DaemonSet | `prometheus-windows-exporter` | An exporter for gathering hardware and OS metrics for Windows nodes of the Cluster. Not deployed by default. | -| [OpenCost](https://www.opencost.io/) | Deployment | `opencost` | Used for gathering cost metrics for the Cluster. | -| [Prometheus Operator CRDs](https://github.com/prometheus-operator/prometheus-operator) | CustomResourceDefinitions | `prometheus-operator-crds` | The custom resources for the Prometheus Operator. Use if you want to deploy PodMonitors, ServiceMonitors, or Probes. | -| [Grafana Beyla](https://grafana.com/oss/beyla-ebpf/) | DaemonSet | `beyla` | Used for zero-code instrumentation of applications and gathering network metrics. | -| [Kepler](https://sustainable-computing.io/) | DaemonSet | `kepler` | Used for gathering energy consumption metrics. | - -### Grafana Alloy instances - -There are multiple instances of Grafana Alloy instead of one instance that includes all functions. This design is -required for: - -* Balance between functionality and scalability -* Security - -#### Functionality/scalability balance - -Without multiple instances, scalability can be hindered. For example, the default functionality of the Grafana Alloy for -Logs is to gather -logs via HostPath volume mounts. -This functionality requires the instance to be deployed as a DaemonSet. -The Grafana Alloy for Metrics is -deployed as a StatefulSet, which allows it to be scaled (optionally with a HorizontalPodAutoscaler) based on load. -Otherwise, it would lose its ability to scale. -The Grafana Alloy Singleton cannot be -scaled beyond one replica, because that would result in duplicate data being sent. - -#### Security - -Another reason for using distinct instances is to minimize the security footprint required. While the Alloy for Logs -may require a HostPath volume mount, the other instances do not. -That means they can be deployed with a more restrictive -security context. -This is similarly why we use a distinct Grafana Beyla and Node Exporter deployments to gather zero-code instrumented -data and Node metrics respectively, rather than using the -[beyla.ebpf](https://grafana.com/docs/alloy/latest/reference/components/beyla/beyla.ebpf/) or -[prometheus.exporter.unix](https://grafana.com/docs/alloy/latest/reference/components/prometheus/prometheus.exporter.unix/) -Alloy components. -Separate instances allow Beyla and Node Exporter to be deployed with the permissions they require to gather their -data, while limiting Grafana Alloy to only act as a collector of that data. - -## Configuration created - -This Helm chart also creates the configuration files, stored in ConfigMaps, for the Grafana Alloy instances. The -configuration is built based on the features enabled in the values file and the collector they are assigned to. For -example, the Cluster Metrics feature is assigned to the Grafana Alloy for Metrics by default. - -All configuration related to telemetry data destinations are automatically loaded onto the Grafana Alloy instances that -require them. +and secrets for those packages. Refer to +the [Helm chart documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/) +to learn more. + +## Charts + +Features are stored in their own Helm chart in +the [charts folder](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts). Each feature +chart is not a standalone chart, but is included in the main k8s-monitoring Helm chart as a dependency. The parent chart +interacts with the feature chart via template functions. + +To learn more, refer +to [Features documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/#features). + +## Collectors + +Collectors in +the [collectors folder](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/collectors) are +Grafana Alloy instances deployed by the Alloy Operator as Kubernetes workloads. +To view the values and descriptions, refer +to [alloy](https://github.com/grafana/k8s-monitoring-helm/blob/main/charts/k8s-monitoring/docs/collectors/alloy.md). +To learn more, refer +to [Collectors reference documentation](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/collector-reference/). + +## Examples + +The [examples folder](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/docs/examples) +contains full examples to guide you in configuring and customizing the Helm chart. To learn more, refer +to [Customize the Helm chart](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/customize-helm-chart/). + +## Destinations + +The [destinations folder](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/destinations) +contains examples and a values.yaml file for each destination. To learn more about destinations, refer +to [Destinations and proxies](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/customize-helm-chart/#destinations-and-proxies). + +## Contributing + +Use the following guidelines to contribute to the Helm chart. ### Features -Here is the list of features, their section within the values file, and the default collector they are assigned to: - -| Name | Associated values | Default collector | Description | -|----------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Cluster Metrics](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-cluster-metrics) | `clusterMetrics` | `alloy-metrics` | Gathers metrics related the the Kubernetes Cluster itself. | -| [Cluster Events](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-cluster-events) | `clusterEvents` | `alloy-singleton` | Gathers Kubernetes lifecycle events as log data. | -| [Node Logs](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-node-logs) | `nodeLogs` | `alloy-logs` | Gathers logs from the Kubernetes Nodes. | -| [Pod Logs](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-pod-logs) | `podLogs` | `alloy-logs` | Gathers logs from the Kubernetes Pods. | -| [Application Observability](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-application-observability) | `applicationObservability` | `alloy-receiver` | Receives and processes application data from instrumented services. | -| [Auto Instrumentation](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-auto-instrumentation) | `autoInstrumentation` | `alloy-metrics` | Deploys Grafana Beyla and gathers zero-code instrumented application metrics. If Application Observability is also enabled, zero-code instrumented application traces are captured as well. | -| [Annotation Autodiscovery](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-annotation-autodiscovery) | `annotationAutodiscovery` | `alloy-metrics` | Automatically discovers and scrapes metrics from specially annotated Pods and Services. | -| [Prometheus Operator Objects](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-prometheus-operator-objects) | `prometheusOperatorObjects` | `alloy-metrics` | Discovers and scrapes metrics from Probes, PodMonitors, and ServiceMonitors. | -| [Profiling](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-profiling) | `profiling` | `alloy-profiles` | Gathers application profiles from processes running within the Kubernetes Cluster. | -| [Integrations](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-integrations) | `integrations` | `alloy-metrics` & `alloy-logs` | Gathers metrics and logs from common services. | - -### Additional configuration sources - -Each collector also has the ability to specify additional configuration sources. These are specified within the Alloy -instance's own section in the values file: - -| Name | Associated values | Description | -|----------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Extra configuration | `alloy-___.extraConfig` | Additional configuration to be added to the configuration file. Use this for adding custom configuration, but do not use it to modify existing configuration. | -| Remote configuration | `alloy-___.remoteConfig` | Configuration for fetching remotely defined configuration. To configure, refer to [Grafana Fleet Management](https://grafana.com/docs/grafana-cloud/send-data/fleet-management/). | -| Logging | `alloy-___.logging` | Configuration for [logging](https://grafana.com/docs/alloy/latest/reference/config-blocks/logging/). | -| Live debugging | `alloy-___.liveDebugging` | Configuration for enabling the [Alloy Live Debugging feature](https://grafana.com/docs/alloy/latest/troubleshoot/debug/#live-debugging-page). | +To add a new feature, create a new Helm chart in the `charts` directory. The chart should have a `feature-` prefix in +its name. The following files are required for a feature chart: + +- `templates/_module.alloy.tpl` - This file should contain a template function named + `feature..module` Creates an [Alloy module](https://grafana.com/docs/alloy/latest/get-started/modules/) + that wraps the configuration for your feature, and exposes any of these arguments as appropriate: + - `metrics_destination` - Defines where scrape metrics should be delivered + - `logs_destination` - Defines where logs should be delivered + - `traces_destination` - Defines where traces should be delivered + - `profiles_destination` - Defines where profiles should be delivered + +- `templates/_notes.alloy.tpl` - This file should contain these template functions: + - `feature..notes.deployments` - Returns a list of workloads that will be + deployed to the Kubernetes Cluster by the feature + - `feature..notes.task` - Returns a one-line summary of what this feature will do + - `feature..notes.actions` - Returns any prompts for the user to take additional + action after deployment + - `feature..summary` - Returns a dictionary of settings that is used for self-reporting metrics + +Also refer to the [Contributing guide](./CONTRIBUTING.md). + +### Documentation + +To add a README.md for a feature: + +1. Within the examples directory, create a subfolder for the feature in the [features](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/docs/examples/features) folder. +2. Add a description text for the feature. +3. Add a values.yaml file. +4. Need your help here, Pete. + +To add documentation to +the [Kubernetes Monitoring Helm chart docs](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/), +create an issue in this repository if you are not a Grafana employee. diff --git a/charts/k8s-monitoring/docs/TargetingDataCollection.md b/charts/k8s-monitoring/docs/TargetingDataCollection.md index d4d34bd602..4f7fecacde 100644 --- a/charts/k8s-monitoring/docs/TargetingDataCollection.md +++ b/charts/k8s-monitoring/docs/TargetingDataCollection.md @@ -1,56 +1,3 @@ # Targeted data collection -The Kubernetes Monitoring Helm chart allows you to target specific namespaces or Pods for data collection. There are -many different methods to control this, and the following sections will explain how to use many of them. - -## Kubernetes Annotations - -Often annotations are used for controlling service discovery, but you can also use them to configure how data is -collected. Several features within this Helm chart can be controlled using Kubernetes annotations. - -### Feature: Annotation Autodiscovery - -Use the [Annotation Autodiscovery feature](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-annotation-autodiscovery) to discover and scrape Prometheus-style metrics from Pods and Services -on your Cluster. You can apply these default annotations to a Pod or Service: - -* `k8s.grafana.com/scrape`: Scrape this Pod or Service for metrics. -* `k8s.grafana.com/job`: The value to use for the `job` label. -* `k8s.grafana.com/instance`: The value to use for the `instance` label. -* `k8s.grafana.com/metrics.container`: The name of the container within the Pod to scrape for metrics. This is used to target a specific container within a Pod that has multiple containers. -* `k8s.grafana.com/metrics.path`: The path to scrape for metrics. Defaults to `/metrics`. -* `k8s.grafana.com/metrics.portNumber`: The port on the Pod or Service to scrape for metrics. This is used to target a specific port by its number, rather than all ports. -* `k8s.grafana.com/metrics.portName`: The named port on the Pod or Service to scrape for metrics. This is used to target a specific port by its name, rather than all ports. -* `k8s.grafana.com/metrics.scheme`: The scheme to use when scraping metrics. Defaults to `http`. -* `k8s.grafana.com/metrics.param`: Allows for setting HTTP parameters when calling the scrape endpoint. Use with `k8s.grafana.com/metrics.param_=""`. -* `k8s.grafana.com/metrics.scrapeInterval`: The scrape interval to use when scraping metrics. Defaults to `60s`. -* `k8s.grafana.com/metrics.scrapeTimeout`: The scrape timeout to use when scraping metrics. Defaults to `10s`. - -### Feature: Profiling - -The [Profiling feature](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-profiling) allows you to collect profiling data from your applications. This feature can collect -profiles using eBPF, Java, or pprof. - -#### eBPF Profiling - -`profiles.grafana.com/cpu.ebpf.enabled`: Using eBPF, collect CPU profiles from this Pod. - -#### Java Profiling - -`profiles.grafana.com/java.enabled`: Collect Java profiles from this Pod. - -#### pprof Profiling - -For each enabled type (`memory`, `block`, `goroutine`, `mutex`, `cpu`, `fgprof`, `godeltaprof_memory`, -`godeltaprof_mutex`, `godeltaprof_block`), you can use the following annotations to control profiling: - -* `profiles.grafana.com/.scrape`: This Pod should have pprof profiles collected for the specified type. -* `profiles.grafana.com/.port`: Profiles for the specified type should be collected from this port number. -* `profiles.grafana.com/.port_name`: Profiles for the specified type should be collected from this named port. -* `profiles.grafana.com/.path`: Profiles for the specified type should be collected from this path. -* `profiles.grafana.com/.scheme`: The scheme to use when scraping profiles for the specified type. Defaults to `http`. - -### Feature: Pod Logs - -Use the following annotation to control [Pod logs](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-pod-logs) collection: - -`k8s.grafana.com/logs.job`: The value to use for the `job` label. +This content has moved to [Kubernetes annotations](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/customize-helm-chart/#kubernetes-annotations). diff --git a/charts/k8s-monitoring/docs/Troubleshooting.md b/charts/k8s-monitoring/docs/Troubleshooting.md index 59600ade8c..ad86478cc6 100644 --- a/charts/k8s-monitoring/docs/Troubleshooting.md +++ b/charts/k8s-monitoring/docs/Troubleshooting.md @@ -1,130 +1,3 @@ # Troubleshooting - -This document contains some information about frequently encountered issues and how to resolve them. - -- [General Tips](#general-tips) - - [Alloy Web UI](#alloy-web-ui) -- [Instructions for specific Cluster platform providers](#instructions-for-specific-cluster-platform-providers) -- [Frequently seen problems](#frequently-seen-problems) - - [Authentication error: invalid scope requested](#authentication-error-invalid-scope-requested) - - [Kepler pods crashing on AWS Graviton nodes](#kepler-pods-crashing-on-aws-graviton-nodes) - - [ResourceExhausted Error when Sending Traces](#resourceexhausted-error-when-sending-traces) - -## General tips - -### Alloy Web UI - -Grafana Alloy has a -[web user interface](https://grafana.com/docs/alloy/latest/tasks/debug/#alloy-ui) that shows every configuration -component that the Alloy instance is using and the component status. By default, the web UI runs on each Alloy pod on -port `12345`. Since that UI is typically not exposed external to the Cluster, you can use port-forwarding to access it. - -`kubectl port-forward svc/grafana-k8s-monitoring-alloy 12345:12345` - -Then open a browser to `http://localhost:12345` - -## Instructions for specific Cluster platform providers - -Certain Kubernetes Cluster platforms require some specific configurations for this Helm chart. If your Cluster is -running on one of these platforms, see the example for the changes required to run this Helm chart: - -- [Azure AKS](examples/platforms/azure-aks) -- [AWS EKS on Fargate](examples/platforms/eks-fargate) -- [Google GKE Autopilot](examples/platforms/gke-autopilot) -- [OpenShift](examples/platforms/openshift) - -## Frequently seen problems - -### Authentication error: invalid scope requested - -To deliver telemetry data to Grafana Cloud, you use -an [Access Policy Token](https://grafana.com/docs/grafana-cloud/account-management/authentication-and-permissions/access-policies/) -with the appropriate scopes. Scopes define an action that can be done to a specific data type. For -example `metrics:write` permits writing metrics. - -If sending data to Grafana Cloud, this Helm chart uses the `:write` scopes for delivering data. It can optionally -use the `:read` scopes when running the [Data Test Job](./HelmTests.md#data-test). - -If your token does not have the correct scope, you will see errors in the Grafanaa Alloy logs. For example, when trying -to deliver profiles to Pyroscrope without the `profiles:write` scope: - -```text -msg="final error sending to profiles to endpoint" component=pyroscope.write.profiles_service endpoint=https://tempo-prod-1-prod-eu-west-2.grafana.net:443 err="unauthenticated: authentication error: invalid scope requested" -``` - -The table below shows the scopes required for various actions done by this chart: - -| Data type | Server | Scope for writing | Scope for reading | -|-----------------------|---------------------------------------------|-------------------|-------------------| -| Metrics | Grafana Cloud Metrics (Prometheus or Mimir) | `metrics:write` | `metrics:read` | -| Logs & Cluster Events | Grafana Cloud Logs (Loki) | `logs:write` | `logs:read` | -| Traces | Grafana Cloud Trace (Tempo) | `traces:write` | `traces:read` | -| Profiles | Grafana Cloud Profiles (Pyroscope) | `profiles:write` | `profiles:read` | - -### Kepler pods crashing on AWS Graviton nodes - -Kepler [cannot run](https://github.com/sustainable-computing-io/kepler/issues/1556) on AWS Graviton nodes and pods on -those nodes will CrashLoopBackOff. To prevent this, you can add a node selector to the Kepler deployment: - -```yaml -kepler: - nodeSelector: - kubernetes.io/arch: amd64 -``` - -### ResourceExhausted Error when Sending Traces - -If you have traces enabled, and you see log entries in your `alloy` instance that looks like this: - -```text -Permanent error: rpc error: code = ResourceExhausted desc = grpc: received message after decompression larger than max (5268750 vs. 4194304)" dropped_items=11226 -ts=2024-09-19T19:52:35.16668052Z level=info msg="rejoining peers" service=cluster peers_count=1 peers=6436336134343433.grafana-k8s-monitoring-alloy-cluster.default.svc.cluster.local.:12345 -``` - -It's likely due to the span size being too large. You can fix this by adjusting the batch size: - -```yaml -receivers: - processors: - batch: - maxSize: 2000 -``` - -Start with 2000 and adjust as needed. - -### Troubleshooting Pod Log and Trace Correlation Issues - -**Problem:** You're experiencing issues correlating Kubernetes pod logs with OpenTelemetry traces, metrics, and application logs. This often occurs when the `service.name`, `service.namespace`, and `service.instance.id` metadata do not consistently match across all your telemetry signals. - -**Solution:** To ensure proper correlation, we recommend aligning your metadata according to OpenTelemetry specifications, particularly the "[Specify resource attributes using Kubernetes annotations](https://opentelemetry.io/docs/specs/semconv/non-normative/k8s-attributes/)" guide. Follow these steps: - -1. **Define `service.name` and `service.namespace` consistently:** Prioritize these methods in order of preference: - - - **Kubernetes Pod Annotations:** Use `resource.opentelemetry.io/service.name` and `resource.opentelemetry.io/service.namespace` on your pods. - - **Kubernetes Pod Label & Namespace Name:** Utilize the `app.kubernetes.io/name` pod label and the Kubernetes namespace name. - - **Kubernetes Deployment & Namespace Names:** Infer from your Kubernetes deployment and namespace names. - - For more options, refer to the "[Specify resource attributes using Kubernetes annotations](https://opentelemetry.io/docs/specs/semconv/non-normative/k8s-attributes/)" guide. - -2. **Derive `service.instance.id`:** Infer `service.instance.id` from Kubernetes namespace, pod, and container names using the format: `concat([k8s.namespace.name, k8s.pod.name, k8s.container.name], '.')`. See "OpenTelemetry Operator" recommendation below. - -3. **Inject Resource Attributes into Workloads:** Pass these `service.name`, `service.namespace`, and `service.instance.id` resource attributes to your containerized workloads' OpenTelemetry instrumentation. This is typically done by injecting them as environment variables: - - - `OTEL_SERVICE_NAME` - - `OTEL_RESOURCE_ATTRIBUTES` - (Refer to OpenTelemetry SDK Environment Variables for more details.) - - **Recommended Approach:** Use the [OpenTelemetry Operator](https://github.com/open-telemetry/opentelemetry-operator) for automatic injection. Add the `instrumentation.opentelemetry.io/inject-sdk: "true"` Pod annotation. The OTel Operator integrates seamlessly with Grafana Kubernetes Monitoring by automatically configuring the exporter endpoint to `http://grafana-k8s-monitoring-alloy-receiver.default.svc.cluster.local:4318` within its `Instrumentation` CRD. - - **Alternative:** Manually specify the `OTEL_*` environment variables directly in your Kubernetes deployment manifests, including deriving `service.instance.id` from Kubernetes metadata (`concat([k8s.namespace.name, k8s.pod.name, k8s.container.name], '.')`). - -4. **Configure Pod Log Collection:** Ensure your Grafana Kubernetes Monitoring Helm Chart is configured to collect pod logs using the `filelog` gather method ([docs](https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/charts/feature-pod-logs)). - Add the following to your `values.yml`: - - ```yaml - podLogs: - enabled: true - gatherMethod: filelog - ``` - -5. **Verify Correlation:** After applying these configurations, verify the successful correlation of your pod logs with application traces, metrics, and other logs in Grafana Application Observability, Grafana Explore, or through Grafana Drilldown features. +This content has moved to [Troubleshoot the Kubernetes Monitoring Helm chart configuration](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/troubleshoot-helm-chart/). diff --git a/charts/k8s-monitoring/docs/UsingExtraConfig.md b/charts/k8s-monitoring/docs/UsingExtraConfig.md index 360dab2e81..c3024d04f2 100644 --- a/charts/k8s-monitoring/docs/UsingExtraConfig.md +++ b/charts/k8s-monitoring/docs/UsingExtraConfig.md @@ -1,60 +1,3 @@ # Additional Configuration with `extraConfig` -The Kubernetes Monitoring Helm chart has the ability to supply additional configuration to the Grafana Alloy instances -using the `extraConfig` sections. Anything put in these sections are added to the existing configuration that is created -by this chart. There are a few methods to use these sections that will be explored in this document. - -## How to use - -Helm provides multiple ways to set these additional configuration values. Either keep the values in the same file as the -rest of your Kubernetes Monitoring configuration, or store them separately as their own files and include during Helm -chart install. - -### Set as values - -You can set the contents of your extra configuration into your values file: - -```shell -$ ls -values.yaml -$ cat values.yaml -cluster: - name: my-cluster -... -alloy-metrics: - extraConfig: |- - // Any arbitrary Alloy configuration can be placed here. - logging { - level = "debug" - } -... -alloy-logs: -... - extraConfig: | - // Any arbitrary Alloy configuration can be placed here. - logging { - level = "debug" - } -... -$ helm upgrade grafana-k8s-monitoring grafana/k8s-monitoring --values values.yaml -``` - -For more examples, see: - -* [Extra Configuration example](examples/extra-configuration). -* [MongoDB Atlas example](examples/service-integrations/mongodb-atlas). - -### Set as files - -You can save the contents of your extra configuration as files and use Helm's `--set-file` argument: - -```shell -$ ls -values.yaml metricsConfig.alloy logsConfig.alloy -$ helm upgrade grafana-k8s-monitoring --atomic --timeout 300s grafana/k8s-monitoring \ - --values values.yaml \ - --set-file "alloy-metrics.extraConfig=metricsConfig.alloy" \ - --set-file "alloy-logs.extraConfig=logsConfig.alloy" -``` - -This can be beneficial once your extra configuration grows to a certain size. +This content has moved to [Extra config](https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/helm-chart-config/helm-chart/customize-helm-chart/#extra-config). diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/README.md b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/README.md index 322de0676c..aec88cf80f 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/README.md +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/README.md @@ -2,9 +2,9 @@ (NOTE: Do not edit README.md directly. It is a generated file!) ( To make changes, please modify values.yaml or description.txt and run `make examples`) --> -# Auto-Instrumentation with Beyla for Metrics and Traces +# Zero-code instrumentation with Beyla for Metrics and Traces -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code instrumentation feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. It also coordinates with the Application Observability feature to generate traces for your application. diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/description.txt b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/description.txt index 8bb76317bc..cb54a874a9 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/description.txt +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/description.txt @@ -1,5 +1,5 @@ -# Auto-Instrumentation with Beyla for Metrics and Traces +# Zero-code instrumentation with Beyla for Metrics and Traces -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code instrumentation feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. It also coordinates with the Application Observability feature to generate traces for your application. diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/README.md b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/README.md index 5ce02e990c..be33c3b63d 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/README.md +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/README.md @@ -2,9 +2,9 @@ (NOTE: Do not edit README.md directly. It is a generated file!) ( To make changes, please modify values.yaml or description.txt and run `make examples`) --> -# Auto-Instrumentation with Beyla for Metrics +# Zero-code instrumentation with Beyla for Metrics -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. ## Values diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/description.txt b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/description.txt index 865be16973..32d995a59d 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/description.txt +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/description.txt @@ -1,4 +1,4 @@ -# Auto-Instrumentation with Beyla for Metrics +# Zero-code instrumentation with Beyla for Metrics -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/README.md b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/README.md index f9ff5d83c5..582479c047 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/README.md +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/README.md @@ -2,9 +2,9 @@ (NOTE: Do not edit README.md directly. It is a generated file!) ( To make changes, please modify values.yaml or description.txt and run `make examples`) --> -# Auto-Instrumentation with Discovery Rules +# Zero-code nstrumentation with Discovery Rules -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code instrumentation feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. It also shows how to set [discovery rules](https://grafana.com/docs/beyla/latest/configure/service-discovery/) to control which services are instrumented. diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/description.txt b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/description.txt index 3f9467b18d..79d4ef27f2 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/description.txt +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/discovery-rules/description.txt @@ -1,6 +1,6 @@ -# Auto-Instrumentation with Discovery Rules +# Zero-code nstrumentation with Discovery Rules -This example demonstrates how to enable the auto-instrumentation feature, which deploys Grafana Beyla to automatically +This example demonstrates how to enable the zero-code instrumentation feature, which deploys Grafana Beyla to automatically instrument your application for metrics collection. It also shows how to set [discovery rules](https://grafana.com/docs/beyla/latest/configure/service-discovery/) to control which services are instrumented.