diff --git a/docker/alloy-config.river b/docker/alloy-config.river new file mode 100644 index 000000000000..13c53afbcd9c --- /dev/null +++ b/docker/alloy-config.river @@ -0,0 +1,184 @@ +// Grafana Alloy configuration for Linera validator observability +// Collects metrics, logs, and traces and forwards to central stack + +// ==================== Prometheus Metrics Scraping ==================== + +// Scrape metrics from proxy service +prometheus.scrape "proxy_metrics" { + targets = [{ + __address__ = "proxy:21100", + job = "linera-proxy", + instance = env("HOSTNAME"), + }] + + // Forward to OTLP converter for remote push if configured + forward_to = [otelcol.receiver.prometheus.default.receiver] + + scrape_interval = "15s" + scrape_timeout = "10s" +} + +// Scrape metrics from shard services (all 4 replicas) +prometheus.scrape "shard_metrics" { + targets = [ + { + __address__ = "shard:21100", + job = "linera-shard", + instance = env("HOSTNAME"), + }, + ] + + // Forward to OTLP converter for remote push if configured + forward_to = [otelcol.receiver.prometheus.default.receiver] + + scrape_interval = "15s" + scrape_timeout = "10s" +} + +// Expose Alloy's own metrics +prometheus.exporter.self "alloy" {} + +prometheus.scrape "alloy_metrics" { + targets = prometheus.exporter.self.alloy.targets + // Forward to OTLP converter for remote push if configured + forward_to = [otelcol.receiver.prometheus.default.receiver] +} + +// ==================== Prometheus Metrics Export (Optional) ==================== + +// Convert Prometheus metrics to OTLP and send to central (Prometheus 3.x uses OTLP) +// To enable, set these environment variables: +// PROMETHEUS_OTLP_URL: https://your-prometheus-endpoint/otlp +// PROMETHEUS_OTLP_USER: your-username +// PROMETHEUS_OTLP_PASS: your-password + +// Export Prometheus metrics as OTLP +otelcol.exporter.otlphttp "prometheus" { + client { + endpoint = env("PROMETHEUS_OTLP_URL") + + auth = otelcol.auth.basic.prometheus_credentials.handler + + tls { + insecure_skip_verify = false + } + } +} + +// Basic auth for Prometheus OTLP +otelcol.auth.basic "prometheus_credentials" { + username = env("PROMETHEUS_OTLP_USER") + password = env("PROMETHEUS_OTLP_PASS") +} + +// Convert Prometheus metrics to OTLP format +otelcol.receiver.prometheus "default" { + output { + metrics = [otelcol.exporter.otlphttp.prometheus.input] + } +} + +// ==================== Loki Logs Collection ==================== + +// Discover docker containers +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" +} + +// Relabel discovered containers +discovery.relabel "docker_logs" { + targets = discovery.docker.containers.targets + + rule { + source_labels = ["__meta_docker_container_name"] + target_label = "container" + } + + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_service"] + target_label = "service" + } + + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_project"] + target_label = "project" + } +} + +// Read docker logs +loki.source.docker "containers" { + host = "unix:///var/run/docker.sock" + targets = discovery.relabel.docker_logs.output + forward_to = [loki.write.central.receiver] +} + +// Write logs to central Loki (optional - only if env vars are set) +// To enable, set these environment variables: +// LOKI_PUSH_URL: https://your-loki-endpoint/loki/api/v1/push +// LOKI_PUSH_USER: your-username +// LOKI_PUSH_PASS: your-password +loki.write "central" { + endpoint { + url = env("LOKI_PUSH_URL") + + basic_auth { + username = env("LOKI_PUSH_USER") + password = env("LOKI_PUSH_PASS") + } + + tls_config { + insecure_skip_verify = false + } + } + + external_labels = { + cluster = "validator-docker-compose", + validator = env("HOSTNAME"), + } +} + +// ==================== Tempo Traces Collection ==================== + +// OTLP receiver for traces +otelcol.receiver.otlp "default" { + grpc { + endpoint = "0.0.0.0:4317" + } + + http { + endpoint = "0.0.0.0:4318" + } + + output { + traces = [otelcol.exporter.otlphttp.central.input] + } +} + +// Export traces to central Tempo (optional - only if env vars are set) +// To enable, set these environment variables: +// TEMPO_OTLP_URL: https://your-tempo-endpoint/tempo/otlp +// TEMPO_OTLP_USER: your-username +// TEMPO_OTLP_PASS: your-password +otelcol.exporter.otlphttp "central" { + client { + endpoint = env("TEMPO_OTLP_URL") + + auth = otelcol.auth.basic.credentials.handler + + tls { + insecure_skip_verify = false + } + } +} + +// Basic auth for OTLP +otelcol.auth.basic "credentials" { + username = env("TEMPO_OTLP_USER") + password = env("TEMPO_OTLP_PASS") +} + +// ==================== Metrics Exposition ==================== + +// Expose Prometheus-compatible metrics endpoint for central Prometheus to scrape +// This runs on port 12345 and exposes all collected metrics +// Note: Alloy's own metrics are already exposed via prometheus.exporter.self diff --git a/docker/docker-compose.alloy.yml b/docker/docker-compose.alloy.yml new file mode 100644 index 000000000000..e0946b1cc732 --- /dev/null +++ b/docker/docker-compose.alloy.yml @@ -0,0 +1,49 @@ +# Docker Compose override file to enable Grafana Alloy for central observability +# +# Usage: +# docker-compose -f docker-compose.yml -f docker-compose.alloy.yml up -d +# +# Documentation: See MONITORING.md for complete setup and configuration guide +# +# Required environment variables for remote push: +# PROMETHEUS_OTLP_URL: https://your-prometheus-endpoint/otlp +# PROMETHEUS_OTLP_USER: your-username +# PROMETHEUS_OTLP_PASS: your-password +# LOKI_PUSH_URL: https://your-loki-endpoint/loki/api/v1/push +# LOKI_PUSH_USER: your-username +# LOKI_PUSH_PASS: your-password +# TEMPO_OTLP_URL: https://your-tempo-endpoint/tempo/otlp +# TEMPO_OTLP_USER: your-username +# TEMPO_OTLP_PASS: your-password + +services: + alloy: + image: grafana/alloy:latest + container_name: alloy + ports: + - "12345:12345" # Prometheus metrics exposition + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver + volumes: + - ./alloy-config.river:/etc/alloy/config.river:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + command: + - "run" + - "--server.http.listen-addr=0.0.0.0:12345" + - "/etc/alloy/config.river" + environment: + - HOSTNAME=${HOSTNAME:-validator} + - PROMETHEUS_OTLP_URL=${PROMETHEUS_OTLP_URL:-} + - PROMETHEUS_OTLP_USER=${PROMETHEUS_OTLP_USER:-} + - PROMETHEUS_OTLP_PASS=${PROMETHEUS_OTLP_PASS:-} + - LOKI_PUSH_URL=${LOKI_PUSH_URL:-} + - LOKI_PUSH_USER=${LOKI_PUSH_USER:-} + - LOKI_PUSH_PASS=${LOKI_PUSH_PASS:-} + - TEMPO_OTLP_URL=${TEMPO_OTLP_URL:-} + - TEMPO_OTLP_USER=${TEMPO_OTLP_USER:-} + - TEMPO_OTLP_PASS=${TEMPO_OTLP_PASS:-} + labels: + com.centurylinklabs.watchtower.enable: "true" + depends_on: + - proxy + - shard diff --git a/kubernetes/linera-validator/Chart.lock b/kubernetes/linera-validator/Chart.lock index f0f67683ccaa..acf0e3be6533 100644 --- a/kubernetes/linera-validator/Chart.lock +++ b/kubernetes/linera-validator/Chart.lock @@ -8,5 +8,8 @@ dependencies: - name: pyroscope repository: https://grafana.github.io/helm-charts version: 1.14.2 -digest: sha256:7fe611b57ddb6d72aa31bac87568fdb8e531e988e2ce4067b931d3026332f027 -generated: "2025-09-01T16:56:59.19795-03:00" +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 1.3.1 +digest: sha256:295a8fc7b332a0b3c3223c2192ee1dbff016f8707760c5b4b22d76403d6d7af4 +generated: "2025-10-21T02:26:24.01435788+02:00" diff --git a/kubernetes/linera-validator/Chart.yaml b/kubernetes/linera-validator/Chart.yaml index 301d59f669c4..07e1dd5a34d7 100644 --- a/kubernetes/linera-validator/Chart.yaml +++ b/kubernetes/linera-validator/Chart.yaml @@ -15,24 +15,29 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.2.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.16.0" +appVersion: "1.16.1" # Dependencies of the application being deployed. dependencies: - name: kube-prometheus-stack version: "51.0.3" repository: "https://prometheus-community.github.io/helm-charts" - + condition: kube-prometheus-stack.enabled - name: loki-stack version: "2.8.9" repository: "https://grafana.github.io/helm-charts" - + condition: loki-stack.enabled - name: pyroscope version: "1.14.2" repository: "https://grafana.github.io/helm-charts" + condition: pyroscope.enabled + - name: alloy + version: "1.3.1" + repository: "https://grafana.github.io/helm-charts" + condition: alloy.enabled diff --git a/kubernetes/linera-validator/alloy-config.river.tpl b/kubernetes/linera-validator/alloy-config.river.tpl new file mode 100644 index 000000000000..303c6894fc51 --- /dev/null +++ b/kubernetes/linera-validator/alloy-config.river.tpl @@ -0,0 +1,230 @@ +// Grafana Alloy configuration for Linera validator observability +// Collects metrics, logs, and traces and forwards to central stack + +// ==================== Prometheus Metrics Scraping ==================== + +// Discover Kubernetes pods for scraping +discovery.kubernetes "pods" { + role = "pod" + + namespaces { + names = [env("NAMESPACE")] + } +} + +// Relabel discovered pods to scrape linera-proxy and linera-shard +discovery.relabel "linera_metrics" { + targets = discovery.kubernetes.pods.targets + + // Only scrape pods with app=linera-validator label + rule { + source_labels = ["__meta_kubernetes_pod_label_app"] + regex = "linera-validator" + action = "keep" + } + + // Set job label based on container name + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "job" + replacement = "linera-${1}" + } + + // Set instance label to pod name + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "instance" + } + + // Set namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // Use metrics port (21100) + rule { + source_labels = ["__meta_kubernetes_pod_container_port_number"] + regex = "21100" + action = "keep" + } + + // Set __address__ to pod IP:port + rule { + source_labels = ["__meta_kubernetes_pod_ip", "__meta_kubernetes_pod_container_port_number"] + separator = ":" + target_label = "__address__" + } +} + +// Scrape metrics from discovered pods +prometheus.scrape "linera_metrics" { + targets = discovery.relabel.linera_metrics.output + + // Forward to OTLP converter for remote push if configured + forward_to = [otelcol.receiver.prometheus.default.receiver] + + scrape_interval = "15s" + scrape_timeout = "10s" +} + +// Expose Alloy's own metrics +prometheus.exporter.self "alloy" {} + +prometheus.scrape "alloy_metrics" { + targets = prometheus.exporter.self.alloy.targets + // Forward to OTLP converter for remote push if configured + forward_to = [otelcol.receiver.prometheus.default.receiver] +} + +// ==================== Prometheus Metrics Export (Optional) ==================== + +// Convert Prometheus metrics to OTLP and send to central (Prometheus 3.x uses OTLP) +// To enable, set these environment variables: +// PROMETHEUS_OTLP_URL: https://your-prometheus-endpoint/otlp +// PROMETHEUS_OTLP_USER: your-username +// PROMETHEUS_OTLP_PASS: your-password + +// Export Prometheus metrics as OTLP +otelcol.exporter.otlphttp "prometheus" { + client { + endpoint = env("PROMETHEUS_OTLP_URL") + + auth = otelcol.auth.basic.prometheus_credentials.handler + + tls { + insecure_skip_verify = false + } + } +} + +// Basic auth for Prometheus OTLP +otelcol.auth.basic "prometheus_credentials" { + username = env("PROMETHEUS_OTLP_USER") + password = env("PROMETHEUS_OTLP_PASS") +} + +// Convert Prometheus metrics to OTLP format +otelcol.receiver.prometheus "default" { + output { + metrics = [otelcol.exporter.otlphttp.prometheus.input] + } +} + +// ==================== Loki Logs Collection ==================== + +// Discover Kubernetes pods for log collection +discovery.kubernetes "pod_logs" { + role = "pod" + + namespaces { + names = [env("NAMESPACE")] + } +} + +// Relabel discovered pods for log collection +discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pod_logs.targets + + // Only collect logs from linera-validator pods + rule { + source_labels = ["__meta_kubernetes_pod_label_app"] + regex = "linera-validator" + action = "keep" + } + + // Set pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // Set container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // Set namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } +} + +// Read pod logs +loki.source.kubernetes "pods" { + targets = discovery.relabel.pod_logs.output + forward_to = [loki.write.central.receiver] +} + +// Write logs to central Loki (optional - only if env vars are set) +// To enable, set these environment variables: +// LOKI_PUSH_URL: https://your-loki-endpoint/loki/api/v1/push +// LOKI_PUSH_USER: your-username +// LOKI_PUSH_PASS: your-password +loki.write "central" { + endpoint { + url = env("LOKI_PUSH_URL") + + basic_auth { + username = env("LOKI_PUSH_USER") + password = env("LOKI_PUSH_PASS") + } + + tls_config { + insecure_skip_verify = false + } + } + + external_labels = { + cluster = env("CLUSTER_NAME"), + validator = env("VALIDATOR_NAME"), + } +} + +// ==================== Tempo Traces Collection ==================== + +// OTLP receiver for traces +otelcol.receiver.otlp "default" { + grpc { + endpoint = "0.0.0.0:4317" + } + + http { + endpoint = "0.0.0.0:4318" + } + + output { + traces = [otelcol.exporter.otlphttp.central.input] + } +} + +// Export traces to central Tempo (optional - only if env vars are set) +// To enable, set these environment variables: +// TEMPO_OTLP_URL: https://your-tempo-endpoint/tempo/otlp +// TEMPO_OTLP_USER: your-username +// TEMPO_OTLP_PASS: your-password +otelcol.exporter.otlphttp "central" { + client { + endpoint = env("TEMPO_OTLP_URL") + + auth = otelcol.auth.basic.credentials.handler + + tls { + insecure_skip_verify = false + } + } +} + +// Basic auth for OTLP +otelcol.auth.basic "credentials" { + username = env("TEMPO_OTLP_USER") + password = env("TEMPO_OTLP_PASS") +} + +// ==================== Metrics Exposition ==================== + +// Expose Prometheus-compatible metrics endpoint for central Prometheus to scrape +// This runs on port 12345 and exposes all collected metrics +// Note: Alloy's own metrics are already exposed via prometheus.exporter.self diff --git a/kubernetes/linera-validator/charts/alloy-1.3.1.tgz b/kubernetes/linera-validator/charts/alloy-1.3.1.tgz new file mode 100644 index 000000000000..aa44ef7cca76 Binary files /dev/null and b/kubernetes/linera-validator/charts/alloy-1.3.1.tgz differ