Skip to content

Commit 9c965a7

Browse files
authored
Aggregate counter downsampling preserves resets (#143381)
In this PR we aim to improve the accuracy of the aggregate counter by the following changes: - The downsampled document will record the first and not the last value of the counter. This should improve accuracy because the first value is closer to the start of the bucket than the last value. - If we detect a reset, we track extra documents, the last value before the reset and, optionally, the value after the reset. These documents will preserve the original timestamps. Our hypothesis is that with these two changes, we can have a more accurate counter estimation without a big performance regression (vefiried in #142280), assuming that reset events are rare and usually affect all counters at the same moment. Closes #136178
1 parent 4aaa3b6 commit 9c965a7

File tree

20 files changed

+1480
-189
lines changed

20 files changed

+1480
-189
lines changed

docs/changelog/143381.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
area: Downsampling
2+
issues:
3+
- 136178
4+
pr: 143381
5+
summary: Aggregate counter downsampling preserves resets
6+
type: enhancement

x-pack/plugin/downsample/qa/mixed-cluster/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ dependencies {
2020

2121
restResources {
2222
restApi {
23-
include '_common', 'bulk', 'cluster', 'indices', 'search', 'ingest.put_pipeline', 'ingest.delete_pipeline'
23+
include '_common', 'bulk', 'cluster', 'indices', 'search', 'ingest.put_pipeline', 'ingest.delete_pipeline', 'capabilities'
2424
}
2525
}
2626

x-pack/plugin/downsample/qa/mixed-cluster/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml

Lines changed: 70 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,12 @@ setup:
8383
---
8484
"Downsample index":
8585
- requires:
86-
cluster_features: ["gte_v8.10.0"]
87-
reason: "Downsampling executed using persistent task framework from version 8.10"
88-
test_runner_features: allowed_warnings
86+
capabilities:
87+
- method: POST
88+
path: /{index}/_downsample/{target_index}
89+
capabilities: [ "downsampling.store_reset_counters" ]
90+
test_runner_features: [ capabilities, allowed_warnings ]
91+
reason: Storing counter resets when downsampling was added in 9.4
8992

9093
- do:
9194
allowed_warnings:
@@ -105,30 +108,72 @@ setup:
105108
body:
106109
sort: [ "_tsid", "@timestamp" ]
107110

108-
- length: { hits.hits: 4 }
109-
- match: { hits.hits.0._source._doc_count: 2 }
110-
# Verify dimensions & time
111-
- match: { hits.hits.0._source.metricset: pod }
112-
- match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
111+
- length: { hits.hits: 7 }
112+
113+
# Downsampled doc
114+
- match: { hits.hits.0._source._doc_count: 1 }
113115
- match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z }
116+
# Dimensions
117+
- match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
118+
- match: { hits.hits.0._source.metricset: pod }
119+
# Metrics
120+
- match: { hits.hits.0._source.k8s\.pod\.multi-counter: 7 }
121+
- match: { hits.hits.0._source.k8s\.pod\.scaled-counter: 7.0 }
122+
- match: { hits.hits.0._source.k8s\.pod\.multi-gauge.min: 100 }
123+
- match: { hits.hits.0._source.k8s\.pod\.multi-gauge.max: 102 }
124+
- match: { hits.hits.0._source.k8s\.pod\.multi-gauge.sum: 607 }
125+
- match: { hits.hits.0._source.k8s\.pod\.multi-gauge.value_count: 6 }
126+
- match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.min: 100.0 }
127+
- match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.max: 101.0 }
128+
- match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.sum: 201.0 }
129+
- match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.value_count: 2 }
130+
- match: { hits.hits.0._source.k8s\.pod\.network\.tx.min: 1434521831 }
131+
- match: { hits.hits.0._source.k8s\.pod\.network\.tx.max: 1434577921 }
132+
- match: { hits.hits.0._source.k8s\.pod\.network\.tx.value_count: 2 }
133+
# Labels
134+
- match: { hits.hits.0._source.k8s\.pod\.ip: "10.10.55.56" }
135+
- match: { hits.hits.0._source.k8s\.pod\.created_at: "2021-04-28T19:43:00.000Z" }
136+
- match: { hits.hits.0._source.k8s\.pod\.number_of_containers: 1 }
137+
- match: { hits.hits.0._source.k8s\.pod\.tags: ["backend", "test", "us-west2"] }
138+
- match: { hits.hits.0._source.k8s\.pod\.values: [1, 1, 2] }
139+
- is_false: hits.hits.0._source.k8s\.pod\.running
140+
141+
# Doc with counter resets
142+
- is_false: hits.hits.1._source._doc_count
143+
- match: { hits.hits.1._source.@timestamp: 2021-04-28T18:50:23.142Z }
144+
# Dimensions
145+
- match: { hits.hits.1._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
146+
- match: { hits.hits.1._source.metricset: pod }
147+
# Metrics
148+
- match: { hits.hits.1._source.k8s\.pod\.multi-counter: 0 }
149+
- match: { hits.hits.1._source.k8s\.pod\.scaled-counter: 0.0 }
150+
# Only dimensions and counters that have been reset are in this doc
151+
- is_false: hits.hits.1._source.k8s\.pod\.multi-gauge
152+
153+
# Next downsampled doc
154+
- match: { hits.hits.2._source._doc_count: 1 }
155+
- match: { hits.hits.2._source.@timestamp: 2021-04-28T19:00:00.000Z }
156+
# Dimensions
157+
- match: { hits.hits.2._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
158+
- match: { hits.hits.2._source.metricset: pod }
159+
# Metrics
160+
- match: { hits.hits.2._source.k8s\.pod\.multi-counter: 1000 }
161+
- match: { hits.hits.2._source.k8s\.pod\.scaled-counter: 1000.0 }
162+
- match: { hits.hits.2._source.k8s\.pod\.multi-gauge.min: 95 }
114163

115-
# Verify metrics
116-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.min: 100 }
117-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.max: 102 }
118-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.sum: 607 }
119-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.value_count: 6 }
120-
- match: { hits.hits.0._source.k8s.pod.multi-counter: 0 }
121-
- match: { hits.hits.0._source.k8s.pod.network.tx.min: 1434521831 }
122-
- match: { hits.hits.0._source.k8s.pod.network.tx.max: 1434577921 }
123-
- match: { hits.hits.0._source.k8s.pod.network.tx.value_count: 2 }
124-
- match: { hits.hits.0._source.k8s.pod.ip: "10.10.55.56" }
125-
- match: { hits.hits.0._source.k8s.pod.created_at: "2021-04-28T19:43:00.000Z" }
126-
- match: { hits.hits.0._source.k8s.pod.number_of_containers: 1 }
127-
- match: { hits.hits.0._source.k8s.pod.tags: [ "backend", "test", "us-west2" ] }
128-
- match: { hits.hits.0._source.k8s.pod.values: [ 1, 1, 2 ] }
129-
- is_false: hits.hits.0._source.k8s.pod.running
164+
# Doc with counter resets
165+
- is_false: hits.hits.3._source._doc_count
166+
- match: { hits.hits.3._source.@timestamp: 2021-04-28T19:51:03.142Z }
167+
# Dimensions
168+
- match: { hits.hits.3._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
169+
- match: { hits.hits.3._source.metricset: pod }
170+
# Metrics
171+
- match: { hits.hits.3._source.k8s\.pod\.multi-counter: 76 }
172+
# Only dimensions and counters that have been reset are in this doc
173+
- is_false: hits.hits.3._source.k8s\.pod\.scaled-counter
174+
- is_false: hits.hits.3._source.k8s\.pod\.multi-gauge
130175

131-
# Assert rollup index settings
176+
# Assert downsample index settings
132177
- do:
133178
indices.get_settings:
134179
index: test-downsample
@@ -138,7 +183,7 @@ setup:
138183
- match: { test-downsample.settings.index.time_series.start_time: 2021-04-28T00:00:00Z }
139184
- match: { test-downsample.settings.index.routing_path: [ "metricset", "k8s.pod.uid"] }
140185

141-
# Assert rollup index mapping
186+
# Assert downsample index mapping
142187
- do:
143188
indices.get_mapping:
144189
index: test-downsample

x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample-with-security/10_basic.yml

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,12 @@ setup:
293293
---
294294
"Downsample index":
295295
- requires:
296-
cluster_features: ["gte_v8.13.0"]
297-
reason: _tsid hashing introduced in 8.13
298-
test_runner_features: allowed_warnings
296+
capabilities:
297+
- method: POST
298+
path: /{index}/_downsample/{target_index}
299+
capabilities: [ "downsampling.store_reset_counters" ]
300+
test_runner_features: [ capabilities, allowed_warnings ]
301+
reason: Storing counter resets when downsampling was added in 9.4
299302

300303
- do:
301304
allowed_warnings:
@@ -315,26 +318,40 @@ setup:
315318
body:
316319
sort: [ "_tsid", "@timestamp" ]
317320

318-
- length: { hits.hits: 4 }
319-
- match: { hits.hits.0._source._doc_count: 2 }
320-
- match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
321-
- match: { hits.hits.0._source.metricset: pod }
322-
- match: { hits.hits.0._source.@timestamp: "2021-04-28T18:00:00.000Z" }
323-
- match: { hits.hits.0._source.k8s.pod.multi-counter: 0 }
324-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.min: 100.0 }
325-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.max: 102.0 }
326-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.sum: 607.0 }
327-
- match: { hits.hits.0._source.k8s.pod.multi-gauge.value_count: 6 }
328-
- match: { hits.hits.0._source.k8s.pod.network.tx.min: 1434521831 }
329-
- match: { hits.hits.0._source.k8s.pod.network.tx.max: 1434577921 }
330-
- match: { hits.hits.0._source.k8s.pod.network.tx.value_count: 2 }
331-
- match: { hits.hits.0._source.k8s.pod.ip: "10.10.55.56" }
332-
- match: { hits.hits.0._source.k8s.pod.created_at: "2021-04-28T19:43:00.000Z" }
333-
- match: { hits.hits.0._source.k8s.pod.number_of_containers: 1 }
334-
- match: { hits.hits.0._source.k8s.pod.tags: [ "backend", "test", "us-west2" ] }
335-
- match: { hits.hits.0._source.k8s.pod.values: [ 1, 1, 2 ] }
321+
- length: { hits.hits: 7 }
322+
323+
# Downsampled doc
324+
- match: { hits.hits.0._source._doc_count: 1 }
325+
- match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z }
326+
# Dimensions
327+
- match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
328+
- match: { hits.hits.0._source.metricset: pod }
329+
# Metrics
330+
- match: { hits.hits.0._source.k8s.pod.multi-counter: 7 }
331+
- match: { hits.hits.0._source.k8s.pod.multi-gauge.min: 100 }
332+
- match: { hits.hits.0._source.k8s.pod.multi-gauge.max: 102 }
333+
- match: { hits.hits.0._source.k8s.pod.multi-gauge.sum: 607 }
334+
- match: { hits.hits.0._source.k8s.pod.multi-gauge.value_count: 6 }
335+
- match: { hits.hits.0._source.k8s.pod.network.tx.min: 1434521831 }
336+
- match: { hits.hits.0._source.k8s.pod.network.tx.max: 1434577921 }
337+
- match: { hits.hits.0._source.k8s.pod.network.tx.value_count: 2 }
338+
# Labels
339+
- match: { hits.hits.0._source.k8s.pod.ip: "10.10.55.56" }
340+
- match: { hits.hits.0._source.k8s.pod.created_at: "2021-04-28T19:43:00.000Z" }
341+
- match: { hits.hits.0._source.k8s.pod.number_of_containers: 1 }
342+
- match: { hits.hits.0._source.k8s.pod.tags: ["backend", "test", "us-west2"] }
343+
- match: { hits.hits.0._source.k8s.pod.values: [1, 1, 2] }
336344
- is_false: hits.hits.0._source.k8s.pod.running
337345

346+
# Doc with counter resets
347+
- is_false: hits.hits.1._source._doc_count
348+
- match: { hits.hits.1._source.@timestamp: 2021-04-28T18:50:23.142Z }
349+
# Dimensions
350+
- match: { hits.hits.1._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
351+
- match: { hits.hits.1._source.metricset: pod }
352+
# Metrics
353+
- match: { hits.hits.1._source.k8s.pod.multi-counter: 0 }
354+
338355
# Assert downsample index settings
339356
- do:
340357
indices.get_settings:
@@ -369,12 +386,3 @@ setup:
369386
- do:
370387
indices.get:
371388
index: test
372-
373-
# Assert downsample index has been force merged
374-
- do:
375-
indices.segments:
376-
index: test-downsample
377-
378-
- match: { _shards.total: 1}
379-
- match: { indices.test-downsample.shards.0.0.num_committed_segments: 1}
380-
- match: { indices.test-downsample.shards.0.0.num_search_segments: 1}

0 commit comments

Comments
 (0)