Skip to content

Commit ab50b12

Browse files
authored
feat: support custom otel collector config (BETA) (#1074)
plus the fix to reduce bloat in opamp agent logs Users should be able to mount the custom otel collector config file and add/overrider receivers, processors and exporters For example: ``` receivers: hostmetrics: collection_interval: 5s scrapers: cpu: load: memory: disk: filesystem: network: # override the default processors processors: batch: send_batch_size: 10000 timeout: 10s memory_limiter: limit_mib: 2000 service: pipelines: metrics/hostmetrics: receivers: [hostmetrics] # attach existing processors processors: [memory_limiter, batch] # attach existing exporters exporters: [clickhouse] ``` This will add a new `hostmetrics` receiver + `metrics/hostmetrics` pipeline and update existing `batch` + `memory_limiter` processors WARNING: This feature is still in beta, and future updates may change how it works, potentially affecting compatibility Ref: HDX-1865
1 parent 41083a4 commit ab50b12

File tree

13 files changed

+564
-220
lines changed

13 files changed

+564
-220
lines changed

.changeset/metal-yaks-rhyme.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/api": patch
3+
---
4+
5+
feat: support custom otel collector config (BETA)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/api": patch
3+
---
4+
5+
fix: reduce bloat in opamp agent logs

docker-compose.dev.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,17 @@ services:
3030
HYPERDX_API_KEY: ${HYPERDX_API_KEY}
3131
HYPERDX_LOG_LEVEL: ${HYPERDX_LOG_LEVEL}
3232
OPAMP_SERVER_URL: 'http://host.docker.internal:${HYPERDX_OPAMP_PORT}'
33+
CUSTOM_OTELCOL_CONFIG_FILE: '/etc/otelcol-contrib/custom.config.yaml'
3334
# Uncomment to enable stdout logging for the OTel collector
34-
# OTEL_SUPERVISOR_PASSTHROUGH_LOGS: 'true'
35+
OTEL_SUPERVISOR_PASSTHROUGH_LOGS: 'false'
3536
# Uncomment to enable JSON schema in ClickHouse
3637
# Be sure to also set BETA_CH_OTEL_JSON_SCHEMA_ENABLED to 'true' in ch-server
3738
# OTEL_AGENT_FEATURE_GATE_ARG: '--feature-gates=clickhouse.json'
3839
volumes:
3940
- ./docker/otel-collector/config.yaml:/etc/otelcol-contrib/config.yaml
4041
- ./docker/otel-collector/supervisor_docker.yaml:/etc/otel/supervisor.yaml
42+
# Add a custom config file
43+
- ./docker/otel-collector/custom.config.yaml:/etc/otelcol-contrib/custom.config.yaml
4144
ports:
4245
- '13133:13133' # health_check extension
4346
- '24225:24225' # fluentd receiver

docker/otel-collector/Dockerfile

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,44 @@ FROM otel/opentelemetry-collector-contrib:0.129.1 AS col
33
FROM otel/opentelemetry-collector-opampsupervisor:0.128.0 AS supervisor
44

55
# From: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/aa5c3aa4c7ec174361fcaf908de8eaca72263078/cmd/opampsupervisor/Dockerfile#L18
6-
FROM alpine:latest@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c AS prep
7-
RUN apk --update add ca-certificates
8-
RUN mkdir -p /etc/otel/supervisor-data/
9-
10-
FROM scratch AS base
6+
FROM alpine:latest@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c AS base
117

128
ARG USER_UID=10001
139
ARG USER_GID=10001
10+
11+
# Install certs, create user/group, and make the writable data dir
12+
RUN apk add --no-cache ca-certificates && \
13+
addgroup -S -g ${USER_GID} otel && \
14+
adduser -S -u ${USER_UID} -G otel otel && \
15+
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data
16+
1417
USER ${USER_UID}:${USER_GID}
1518

16-
COPY --from=prep /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
17-
COPY --from=prep --chmod=777 --chown=${USER_UID}:${USER_GID} /etc/otel/supervisor-data /etc/otel/supervisor-data
18-
COPY --from=supervisor --chmod=755 /usr/local/bin/opampsupervisor /
19-
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml
19+
COPY --from=supervisor --chmod=755 /usr/local/bin/opampsupervisor /opampsupervisor
2020
COPY --from=col --chmod=755 /otelcol-contrib /otelcontribcol
2121

22+
# Copy entrypoint and log rotation scripts
23+
COPY --chmod=755 ./entrypoint.sh /entrypoint.sh
24+
COPY --chmod=755 ./log-rotator.sh /log-rotator.sh
25+
2226
## dev ##############################################################################################
2327
FROM base AS dev
2428

2529
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
30+
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml
2631

2732
EXPOSE 4317 4318 13133
2833

29-
ENTRYPOINT ["/opampsupervisor"]
34+
ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
3035
CMD ["--config", "/etc/otel/supervisor.yaml"]
3136

3237
## prod #############################################################################################
3338
FROM base AS prod
3439

3540
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
41+
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml
3642

3743
EXPOSE 4317 4318 13133
3844

39-
ENTRYPOINT ["/opampsupervisor"]
45+
ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
4046
CMD ["--config", "/etc/otel/supervisor.yaml"]
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
receivers:
2+
# Troubleshooting
3+
prometheus:
4+
config:
5+
scrape_configs:
6+
- job_name: 'otelcol'
7+
scrape_interval: 30s
8+
static_configs:
9+
- targets:
10+
- '0.0.0.0:8888'
11+
- ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT}
12+
# Data sources: logs
13+
fluentforward:
14+
endpoint: '0.0.0.0:24225'
15+
# Configured via OpAMP w/ authentication
16+
# Data sources: traces, metrics, logs
17+
# otlp/hyperdx:
18+
# protocols:
19+
# grpc:
20+
# include_metadata: true
21+
# endpoint: '0.0.0.0:4317'
22+
# http:
23+
# cors:
24+
# allowed_origins: ['*']
25+
# allowed_headers: ['*']
26+
# include_metadata: true
27+
# endpoint: '0.0.0.0:4318'
28+
processors:
29+
transform:
30+
log_statements:
31+
- context: log
32+
error_mode: ignore
33+
statements:
34+
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
35+
# as a string containing JSON content.
36+
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where
37+
IsString(log.body)
38+
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert")
39+
where IsMap(log.cache)
40+
- flatten(log.attributes) where IsMap(log.cache)
41+
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
42+
- context: log
43+
error_mode: ignore
44+
conditions:
45+
- severity_number == 0 and severity_text == ""
46+
statements:
47+
# Infer: extract the first log level keyword from the first 256 characters of the body
48+
- set(log.cache["substr"], log.body.string) where Len(log.body.string)
49+
< 256
50+
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where
51+
Len(log.body.string) >= 256
52+
- set(log.cache, ExtractPatterns(log.cache["substr"],
53+
"(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
54+
# Infer: detect FATAL
55+
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where
56+
IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
57+
- set(log.severity_text, "fatal") where log.severity_number ==
58+
SEVERITY_NUMBER_FATAL
59+
# Infer: detect ERROR
60+
- set(log.severity_number, SEVERITY_NUMBER_ERROR) where
61+
IsMatch(log.cache["0"], "(?i)(error|err)")
62+
- set(log.severity_text, "error") where log.severity_number ==
63+
SEVERITY_NUMBER_ERROR
64+
# Infer: detect WARN
65+
- set(log.severity_number, SEVERITY_NUMBER_WARN) where
66+
IsMatch(log.cache["0"], "(?i)(warn|notice)")
67+
- set(log.severity_text, "warn") where log.severity_number ==
68+
SEVERITY_NUMBER_WARN
69+
# Infer: detect DEBUG
70+
- set(log.severity_number, SEVERITY_NUMBER_DEBUG) where
71+
IsMatch(log.cache["0"], "(?i)(debug|dbug)")
72+
- set(log.severity_text, "debug") where log.severity_number ==
73+
SEVERITY_NUMBER_DEBUG
74+
# Infer: detect TRACE
75+
- set(log.severity_number, SEVERITY_NUMBER_TRACE) where
76+
IsMatch(log.cache["0"], "(?i)(trace)")
77+
- set(log.severity_text, "trace") where log.severity_number ==
78+
SEVERITY_NUMBER_TRACE
79+
# Infer: else
80+
- set(log.severity_text, "info") where log.severity_number == 0
81+
- set(log.severity_number, SEVERITY_NUMBER_INFO) where
82+
log.severity_number == 0
83+
- context: log
84+
error_mode: ignore
85+
statements:
86+
# Normalize the severity_text case
87+
- set(log.severity_text, ConvertCase(log.severity_text, "lower"))
88+
resourcedetection:
89+
detectors:
90+
- env
91+
- system
92+
- docker
93+
timeout: 5s
94+
override: false
95+
batch:
96+
memory_limiter:
97+
# 80% of maximum memory up to 2G
98+
limit_mib: 1500
99+
# 25% of limit up to 2G
100+
spike_limit_mib: 512
101+
check_interval: 5s
102+
connectors:
103+
routing/logs:
104+
default_pipelines: [logs/out-default]
105+
error_mode: ignore
106+
table:
107+
- context: log
108+
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
109+
pipelines: [logs/out-rrweb]
110+
exporters:
111+
debug:
112+
verbosity: detailed
113+
sampling_initial: 5
114+
sampling_thereafter: 200
115+
clickhouse/rrweb:
116+
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
117+
endpoint: ${env:CLICKHOUSE_ENDPOINT}
118+
password: ${env:CLICKHOUSE_PASSWORD}
119+
username: ${env:CLICKHOUSE_USER}
120+
ttl: 720h
121+
logs_table_name: hyperdx_sessions
122+
timeout: 5s
123+
retry_on_failure:
124+
enabled: true
125+
initial_interval: 5s
126+
max_interval: 30s
127+
max_elapsed_time: 300s
128+
clickhouse:
129+
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
130+
endpoint: ${env:CLICKHOUSE_ENDPOINT}
131+
password: ${env:CLICKHOUSE_PASSWORD}
132+
username: ${env:CLICKHOUSE_USER}
133+
ttl: 720h
134+
timeout: 5s
135+
retry_on_failure:
136+
enabled: true
137+
initial_interval: 5s
138+
max_interval: 30s
139+
max_elapsed_time: 300s
140+
extensions:
141+
health_check:
142+
endpoint: :13133
143+
service:
144+
telemetry:
145+
metrics:
146+
readers:
147+
- pull:
148+
exporter:
149+
prometheus:
150+
host: '0.0.0.0'
151+
port: 8888
152+
logs:
153+
level: ${HYPERDX_LOG_LEVEL}
154+
extensions: [health_check]
155+
pipelines:
156+
traces:
157+
# receivers: [otlp/hyperdx]
158+
processors: [memory_limiter, batch]
159+
exporters: [clickhouse]
160+
metrics:
161+
# receivers: [otlp/hyperdx, prometheus]
162+
processors: [memory_limiter, batch]
163+
exporters: [clickhouse]
164+
logs/in:
165+
# receivers: [otlp/hyperdx, fluentforward]
166+
exporters: [routing/logs]
167+
logs/out-default:
168+
receivers: [routing/logs]
169+
processors: [memory_limiter, transform, batch]
170+
exporters: [clickhouse]
171+
logs/out-rrweb:
172+
receivers: [routing/logs]
173+
processors: [memory_limiter, batch]
174+
exporters: [clickhouse/rrweb]

0 commit comments

Comments
 (0)