Skip to content

Commit d3352e7

Browse files
committed
docs: Add metrics documentation
Generate and document all OPA metrics in a central registry. Add operational metrics sections to monitoring docs. Fixes: #6730 Signed-off-by: Anivar A Aravind <[email protected]>
1 parent d3d014d commit d3352e7

File tree

6 files changed

+670
-2
lines changed

6 files changed

+670
-2
lines changed

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ version:
9595
release-dir:
9696
@echo $(RELEASE_DIR)
9797

98+
9899
.PHONY: generate
99100
generate: wasm-lib-build
100101
ifeq ($(GOOS),windows)
@@ -577,3 +578,7 @@ depr-release-local:
577578
-e TELEMETRY_URL=$(TELEMETRY_URL) \
578579
$(RELEASE_BUILD_IMAGE) \
579580
/_src/build/build-release.sh --output-dir=/$(RELEASE_DIR) --source-url=/_src
581+
582+
.PHONY: generate-metrics-docs
583+
generate-metrics-docs:
584+
@$(GO) run cmd/metrics-docs/main.go > docs/docs/metrics-registry.md

cmd/metrics-docs/main.go

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
// Copyright 2025 The OPA Authors. All rights reserved.
2+
// Use of this source code is governed by an Apache2
3+
// license that can be found in the LICENSE file.
4+
5+
package main
6+
7+
import (
8+
"fmt"
9+
"sort"
10+
"strings"
11+
)
12+
13+
type MetricSource string
14+
15+
const (
16+
SourceMetrics MetricSource = "v1/metrics/metrics.go"
17+
SourceInstrumentation MetricSource = "v1/topdown/instrumentation.go"
18+
SourceDiskStorage MetricSource = "v1/storage/disk/txn.go"
19+
SourceHTTPBuiltin MetricSource = "v1/topdown/http.go"
20+
SourceGlobBuiltin MetricSource = "v1/topdown/glob.go"
21+
SourceRegexBuiltin MetricSource = "v1/topdown/regex.go"
22+
SourceWASMPool MetricSource = "internal/wasm/sdk/internal/wasm/pool.go"
23+
SourceWASMVM MetricSource = "internal/wasm/sdk/internal/wasm/vm.go"
24+
SourceServer MetricSource = "v1/server/server.go"
25+
SourceCompile MetricSource = "v1/ast/compile.go"
26+
)
27+
28+
type Metric struct {
29+
Name string
30+
Type string
31+
Source MetricSource
32+
ConstantName string
33+
Description string
34+
}
35+
36+
var metricsRegistry = []Metric{
37+
// Core metrics from v1/metrics/metrics.go
38+
{Name: "bundle_request", Type: "timer", Source: SourceMetrics, ConstantName: "BundleRequest",
39+
Description: "Time to download bundle from remote server (nanoseconds)"},
40+
{Name: "server_handler", Type: "timer", Source: SourceMetrics, ConstantName: "ServerHandler",
41+
Description: "Total time to handle REST API request (nanoseconds)"},
42+
{Name: "server_query_cache_hit", Type: "counter", Source: SourceMetrics, ConstantName: "ServerQueryCacheHit",
43+
Description: "Number of queries served from server cache (count)"},
44+
{Name: "sdk_decision_eval", Type: "timer", Source: SourceMetrics, ConstantName: "SDKDecisionEval",
45+
Description: "Time to evaluate decision in SDK mode (nanoseconds)"},
46+
47+
// Query evaluation metrics
48+
{Name: "rego_query_compile", Type: "timer", Source: SourceMetrics, ConstantName: "RegoQueryCompile",
49+
Description: "Time to compile parsed query into evaluation form (nanoseconds)"},
50+
{Name: "rego_query_eval", Type: "timer", Source: SourceMetrics, ConstantName: "RegoQueryEval",
51+
Description: "Time to execute compiled query against data (nanoseconds)"},
52+
{Name: "rego_query_parse", Type: "timer", Source: SourceMetrics, ConstantName: "RegoQueryParse",
53+
Description: "Time to parse query string into AST (nanoseconds)"},
54+
55+
// Module and data metrics
56+
{Name: "rego_module_parse", Type: "timer", Source: SourceMetrics, ConstantName: "RegoModuleParse",
57+
Description: "Time to parse Rego policy modules (nanoseconds)"},
58+
{Name: "rego_module_compile", Type: "timer", Source: SourceMetrics, ConstantName: "RegoModuleCompile",
59+
Description: "Time to compile policy modules into evaluation form (nanoseconds)"},
60+
{Name: "rego_data_parse", Type: "timer", Source: SourceMetrics, ConstantName: "RegoDataParse",
61+
Description: "Time to parse JSON/YAML data documents (nanoseconds)"},
62+
{Name: "rego_input_parse", Type: "timer", Source: SourceMetrics, ConstantName: "RegoInputParse",
63+
Description: "Time to parse input document for query (nanoseconds)"},
64+
{Name: "rego_load_files", Type: "timer", Source: SourceMetrics, ConstantName: "RegoLoadFiles",
65+
Description: "Time to load policy/data files from disk (nanoseconds)"},
66+
{Name: "rego_load_bundles", Type: "timer", Source: SourceMetrics, ConstantName: "RegoLoadBundles",
67+
Description: "Time to load and activate bundles (nanoseconds)"},
68+
{Name: "rego_external_resolve", Type: "timer", Source: SourceMetrics, ConstantName: "RegoExternalResolve",
69+
Description: "Time to resolve external data references (nanoseconds)"},
70+
{Name: "rego_partial_eval", Type: "timer", Source: SourceMetrics, ConstantName: "RegoPartialEval",
71+
Description: "Time to partially evaluate policy (nanoseconds)"},
72+
73+
// Compilation metrics
74+
{Name: "compile_prep_partial", Type: "timer", Source: SourceMetrics, ConstantName: "CompilePrepPartial",
75+
Description: "Partial evaluation preparation time (nanoseconds)"},
76+
{Name: "compile_eval_constraints", Type: "timer", Source: SourceMetrics, ConstantName: "CompileEvalConstraints",
77+
Description: "Constraint evaluation time (nanoseconds)"},
78+
{Name: "compile_translate_queries", Type: "timer", Source: SourceMetrics, ConstantName: "CompileTranslateQueries",
79+
Description: "Query translation time (nanoseconds)"},
80+
{Name: "compile_extract_annotations_unknowns", Type: "timer", Source: SourceMetrics, ConstantName: "CompileExtractAnnotationsUnknowns",
81+
Description: "Unknown annotation extraction time (nanoseconds)"},
82+
{Name: "compile_extract_annotations_mask", Type: "timer", Source: SourceMetrics, ConstantName: "CompileExtractAnnotationsMask",
83+
Description: "Mask annotation extraction time (nanoseconds)"},
84+
{Name: "compile_eval_mask_rule", Type: "timer", Source: SourceMetrics, ConstantName: "CompileEvalMaskRule",
85+
Description: "Mask rule evaluation time (nanoseconds)"},
86+
{Name: "compile_stage_check_imports", Type: "timer", Source: SourceCompile,
87+
Description: "Import checking stage time (nanoseconds)"},
88+
{Name: "compile_stage_comprehension_index_build", Type: "counter", Source: SourceCompile,
89+
Description: "Number of comprehension indices built (count)"},
90+
91+
// HTTP built-in metrics
92+
{Name: "rego_builtin_http_send", Type: "timer", Source: SourceHTTPBuiltin, ConstantName: "httpSendLatencyMetricKey",
93+
Description: "Total time spent in http.send() calls (nanoseconds)"},
94+
{Name: "rego_builtin_http_send_interquery_cache_hits", Type: "counter", Source: SourceHTTPBuiltin,
95+
Description: "HTTP responses served from inter-query cache (count)"},
96+
{Name: "rego_builtin_http_send_network_requests", Type: "counter", Source: SourceHTTPBuiltin,
97+
Description: "Actual HTTP network requests made - cache misses (count)"},
98+
99+
// Pattern matching built-ins
100+
{Name: "rego_builtin_glob_interquery_value_cache_hits", Type: "counter", Source: SourceGlobBuiltin,
101+
Description: "Glob pattern cache hits (count)"},
102+
{Name: "rego_builtin_regex_interquery_value_cache_hits", Type: "counter", Source: SourceRegexBuiltin,
103+
Description: "Regex pattern cache hits (count)"},
104+
105+
// Evaluation operation metrics (timers + histograms)
106+
{Name: "eval_op_plug", Type: "timer", Source: SourceInstrumentation,
107+
Description: "Plugging operation time (nanoseconds)"},
108+
{Name: "eval_op_plug", Type: "histogram", Source: SourceInstrumentation,
109+
Description: "Plugging operation time distribution (percentiles)"},
110+
{Name: "eval_op_resolve", Type: "timer", Source: SourceInstrumentation,
111+
Description: "Reference resolution time (nanoseconds)"},
112+
{Name: "eval_op_resolve", Type: "histogram", Source: SourceInstrumentation,
113+
Description: "Reference resolution time distribution (percentiles)"},
114+
{Name: "eval_op_rule_index", Type: "timer", Source: SourceInstrumentation,
115+
Description: "Rule indexing time (nanoseconds)"},
116+
{Name: "eval_op_rule_index", Type: "histogram", Source: SourceInstrumentation,
117+
Description: "Rule indexing time distribution (percentiles)"},
118+
{Name: "eval_op_builtin_call", Type: "timer", Source: SourceInstrumentation,
119+
Description: "Built-in function call time (nanoseconds)"},
120+
{Name: "eval_op_builtin_call", Type: "histogram", Source: SourceInstrumentation,
121+
Description: "Built-in function call time distribution (percentiles)"},
122+
123+
// Cache metrics
124+
{Name: "eval_op_virtual_cache_hit", Type: "counter", Source: SourceInstrumentation,
125+
Description: "Virtual document cache hits (count)"},
126+
{Name: "eval_op_virtual_cache_miss", Type: "counter", Source: SourceInstrumentation,
127+
Description: "Virtual document cache misses (count)"},
128+
{Name: "eval_op_base_cache_hit", Type: "counter", Source: SourceInstrumentation,
129+
Description: "Base document cache hits (count)"},
130+
{Name: "eval_op_base_cache_miss", Type: "counter", Source: SourceInstrumentation,
131+
Description: "Base document cache misses (count)"},
132+
{Name: "eval_op_comprehension_cache_skip", Type: "counter", Source: SourceInstrumentation,
133+
Description: "Comprehension cache skips (count)"},
134+
{Name: "eval_op_comprehension_cache_build", Type: "counter", Source: SourceInstrumentation,
135+
Description: "Comprehension cache builds (count)"},
136+
{Name: "eval_op_comprehension_cache_hit", Type: "counter", Source: SourceInstrumentation,
137+
Description: "Comprehension cache hits (count)"},
138+
{Name: "eval_op_comprehension_cache_miss", Type: "counter", Source: SourceInstrumentation,
139+
Description: "Comprehension cache misses (count)"},
140+
141+
// Partial evaluation operations (timers + histograms)
142+
{Name: "partial_op_save_unify", Type: "timer", Source: SourceInstrumentation,
143+
Description: "Unification save time (nanoseconds)"},
144+
{Name: "partial_op_save_unify", Type: "histogram", Source: SourceInstrumentation,
145+
Description: "Unification save time distribution (percentiles)"},
146+
{Name: "partial_op_save_set_contains", Type: "timer", Source: SourceInstrumentation,
147+
Description: "Set contains save time (nanoseconds)"},
148+
{Name: "partial_op_save_set_contains", Type: "histogram", Source: SourceInstrumentation,
149+
Description: "Set contains save time distribution (percentiles)"},
150+
{Name: "partial_op_save_set_contains_rec", Type: "timer", Source: SourceInstrumentation,
151+
Description: "Recursive set contains save time (nanoseconds)"},
152+
{Name: "partial_op_save_set_contains_rec", Type: "histogram", Source: SourceInstrumentation,
153+
Description: "Recursive set contains save time distribution (percentiles)"},
154+
{Name: "partial_op_copy_propagation", Type: "timer", Source: SourceInstrumentation,
155+
Description: "Copy propagation optimization time (nanoseconds)"},
156+
{Name: "partial_op_copy_propagation", Type: "histogram", Source: SourceInstrumentation,
157+
Description: "Copy propagation optimization distribution (percentiles)"},
158+
159+
// Disk storage metrics
160+
{Name: "disk_read", Type: "timer", Source: SourceDiskStorage,
161+
Description: "Disk read operation time (nanoseconds)"},
162+
{Name: "disk_write", Type: "timer", Source: SourceDiskStorage,
163+
Description: "Disk write operation time (nanoseconds)"},
164+
{Name: "disk_commit", Type: "timer", Source: SourceDiskStorage,
165+
Description: "Disk commit operation time (nanoseconds)"},
166+
{Name: "disk_read_bytes", Type: "counter", Source: SourceDiskStorage,
167+
Description: "Total bytes read from disk (bytes)"},
168+
{Name: "disk_read_keys", Type: "counter", Source: SourceDiskStorage,
169+
Description: "Number of keys read from disk (count)"},
170+
{Name: "disk_written_keys", Type: "counter", Source: SourceDiskStorage,
171+
Description: "Number of keys written to disk (count)"},
172+
{Name: "disk_deleted_keys", Type: "counter", Source: SourceDiskStorage,
173+
Description: "Number of keys deleted from disk (count)"},
174+
175+
// WASM metrics
176+
{Name: "wasm_pool_acquire", Type: "timer", Source: SourceWASMPool,
177+
Description: "WASM instance acquisition time (nanoseconds)"},
178+
{Name: "wasm_pool_release", Type: "timer", Source: SourceWASMPool,
179+
Description: "WASM instance release time (nanoseconds)"},
180+
{Name: "wasm_vm_eval", Type: "timer", Source: SourceWASMVM,
181+
Description: "WASM evaluation time (nanoseconds)"},
182+
{Name: "wasm_vm_eval_prepare_input", Type: "timer", Source: SourceWASMVM,
183+
Description: "WASM input preparation time (nanoseconds)"},
184+
{Name: "wasm_vm_eval_call", Type: "timer", Source: SourceWASMVM,
185+
Description: "WASM function call time (nanoseconds)"},
186+
{Name: "wasm_vm_eval_execute", Type: "timer", Source: SourceWASMVM,
187+
Description: "WASM execution time (nanoseconds)"},
188+
{Name: "wasm_vm_eval_prepare_result", Type: "timer", Source: SourceWASMVM,
189+
Description: "WASM result preparation time (nanoseconds)"},
190+
191+
// Server metrics
192+
{Name: "server_read_bytes", Type: "timer", Source: SourceServer,
193+
Description: "Request body read time (nanoseconds)"},
194+
}
195+
196+
func (m Metric) formatMetricName() string {
197+
switch m.Type {
198+
case "timer":
199+
return fmt.Sprintf("timer_%s_ns", m.Name)
200+
case "counter":
201+
return "counter_" + m.Name
202+
case "histogram":
203+
return "histogram_" + m.Name
204+
default:
205+
return m.Name
206+
}
207+
}
208+
209+
func groupByType(metrics []Metric) map[string][]Metric {
210+
groups := make(map[string][]Metric)
211+
for _, m := range metrics {
212+
groups[m.Type] = append(groups[m.Type], m)
213+
}
214+
return groups
215+
}
216+
217+
func groupBySource(metrics []Metric) map[MetricSource][]Metric {
218+
groups := make(map[MetricSource][]Metric)
219+
for _, m := range metrics {
220+
groups[m.Source] = append(groups[m.Source], m)
221+
}
222+
return groups
223+
}
224+
225+
func main() {
226+
fmt.Println("# OPA Metrics Registry")
227+
fmt.Println("<!-- This file is auto-generated. DO NOT EDIT. -->")
228+
fmt.Println()
229+
fmt.Printf("Total metrics: **%d**\n\n", len(metricsRegistry))
230+
231+
byType := groupByType(metricsRegistry)
232+
233+
fmt.Println("## Summary")
234+
fmt.Printf("- **Timers**: %d (measure duration in nanoseconds)\n", len(byType["timer"]))
235+
fmt.Printf("- **Counters**: %d (track counts and accumulations)\n", len(byType["counter"]))
236+
fmt.Printf("- **Histograms**: %d (track distributions)\n\n", len(byType["histogram"]))
237+
238+
fmt.Println("## Metrics by Category")
239+
fmt.Println()
240+
241+
categories := map[string][]string{
242+
"Query Processing": {"rego_query_"},
243+
"Policy Compilation": {"rego_module_", "compile_"},
244+
"Evaluation Operations": {"eval_op_"},
245+
"Partial Evaluation": {"partial_op_", "rego_partial_eval"},
246+
"Caching": {"cache_hit", "cache_miss", "cache_build", "cache_skip", "interquery"},
247+
"Storage & I/O": {"disk_", "rego_load_"},
248+
"Built-in Functions": {"http_send", "glob_interquery", "regex_interquery", "rego_external_resolve"},
249+
"WASM Runtime": {"wasm_"},
250+
"Bundle Management": {"bundle_"},
251+
"Data Processing": {"rego_data_", "rego_input_"},
252+
"Server & SDK": {"server_", "sdk_"},
253+
}
254+
255+
categoryNames := make([]string, 0, len(categories))
256+
for name := range categories {
257+
categoryNames = append(categoryNames, name)
258+
}
259+
sort.Strings(categoryNames)
260+
261+
for _, category := range categoryNames {
262+
patterns := categories[category]
263+
var categoryMetrics []Metric
264+
for _, m := range metricsRegistry {
265+
for _, pattern := range patterns {
266+
if strings.Contains(m.Name, pattern) {
267+
categoryMetrics = append(categoryMetrics, m)
268+
break
269+
}
270+
}
271+
}
272+
273+
if len(categoryMetrics) > 0 {
274+
metricWord := "metrics"
275+
if len(categoryMetrics) == 1 {
276+
metricWord = "metric"
277+
}
278+
fmt.Printf("### %s (%d %s)\n\n", category, len(categoryMetrics), metricWord)
279+
280+
sort.Slice(categoryMetrics, func(i, j int) bool {
281+
return categoryMetrics[i].Name < categoryMetrics[j].Name
282+
})
283+
284+
for _, m := range categoryMetrics {
285+
fmt.Printf("- **`%s`** - %s", m.formatMetricName(), m.Description)
286+
if m.ConstantName != "" {
287+
fmt.Printf(" (`%s`)", m.ConstantName)
288+
}
289+
fmt.Printf("\n")
290+
}
291+
fmt.Println()
292+
}
293+
}
294+
295+
fmt.Println("## Source Files")
296+
fmt.Println()
297+
fmt.Println("Metrics are defined across several files:")
298+
fmt.Println()
299+
300+
sourceDescriptions := map[MetricSource]string{
301+
SourceMetrics: "Core metrics constants",
302+
SourceInstrumentation: "Evaluation operation metrics",
303+
SourceDiskStorage: "Disk storage metrics",
304+
SourceHTTPBuiltin: "HTTP built-in metrics",
305+
SourceWASMPool: "WASM pool management",
306+
SourceWASMVM: "WASM VM execution",
307+
SourceCompile: "Compilation stage metrics",
308+
SourceServer: "Server operation metrics",
309+
}
310+
311+
bySource := groupBySource(metricsRegistry)
312+
sourceFiles := make([]MetricSource, 0, len(sourceDescriptions))
313+
for source := range sourceDescriptions {
314+
sourceFiles = append(sourceFiles, source)
315+
}
316+
sort.Slice(sourceFiles, func(i, j int) bool {
317+
return string(sourceFiles[i]) < string(sourceFiles[j])
318+
})
319+
320+
for _, source := range sourceFiles {
321+
desc := sourceDescriptions[source]
322+
if metrics, ok := bySource[source]; ok {
323+
metricWord := "metrics"
324+
if len(metrics) == 1 {
325+
metricWord = "metric"
326+
}
327+
fmt.Printf("- **%s** (%d %s) - %s\n", source, len(metrics), metricWord, desc)
328+
}
329+
}
330+
}

0 commit comments

Comments
 (0)