7
7
"time"
8
8
9
9
"github.com/pkg/errors"
10
+ "github.com/prometheus/client_golang/prometheus"
11
+ "github.com/prometheus/prometheus/pkg/labels"
10
12
"github.com/prometheus/prometheus/pkg/timestamp"
11
13
"github.com/stretchr/testify/assert"
12
14
"github.com/stretchr/testify/require"
@@ -17,6 +19,7 @@ import (
17
19
"github.com/cortexproject/cortex/pkg/ring"
18
20
"github.com/cortexproject/cortex/pkg/ring/kv"
19
21
"github.com/cortexproject/cortex/pkg/ring/kv/consul"
22
+ "github.com/cortexproject/cortex/pkg/util"
20
23
"github.com/cortexproject/cortex/pkg/util/flagext"
21
24
"github.com/cortexproject/cortex/pkg/util/services"
22
25
"github.com/cortexproject/cortex/pkg/util/test"
@@ -196,13 +199,14 @@ func TestCheckReplicaMultiCluster(t *testing.T) {
196
199
replica1 := "replica1"
197
200
replica2 := "replica2"
198
201
202
+ reg := prometheus .NewPedanticRegistry ()
199
203
c , err := newClusterTracker (HATrackerConfig {
200
204
EnableHATracker : true ,
201
205
KVStore : kv.Config {Store : "inmemory" },
202
206
UpdateTimeout : 100 * time .Millisecond ,
203
207
UpdateTimeoutJitterMax : 0 ,
204
208
FailoverTimeout : time .Second ,
205
- }, trackerLimits {maxClusters : 100 }, nil )
209
+ }, trackerLimits {maxClusters : 100 }, reg )
206
210
require .NoError (t , err )
207
211
require .NoError (t , services .StartAndAwaitRunning (context .Background (), c ))
208
212
defer services .StopAndAwaitTerminated (context .Background (), c ) //nolint:errcheck
@@ -224,20 +228,34 @@ func TestCheckReplicaMultiCluster(t *testing.T) {
224
228
assert .NoError (t , err )
225
229
err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
226
230
assert .NoError (t , err )
231
+
232
+ // We expect no CAS operation failures.
233
+ metrics , err := reg .Gather ()
234
+ require .NoError (t , err )
235
+
236
+ assert .Equal (t , uint64 (0 ), util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
237
+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
238
+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "5.*" ),
239
+ }))
240
+ assert .Greater (t , util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
241
+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
242
+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "2.*" ),
243
+ }), uint64 (0 ))
227
244
}
228
245
229
246
func TestCheckReplicaMultiClusterTimeout (t * testing.T ) {
230
247
start := mtime .Now ()
231
248
replica1 := "replica1"
232
249
replica2 := "replica2"
233
250
251
+ reg := prometheus .NewPedanticRegistry ()
234
252
c , err := newClusterTracker (HATrackerConfig {
235
253
EnableHATracker : true ,
236
254
KVStore : kv.Config {Store : "inmemory" },
237
255
UpdateTimeout : 100 * time .Millisecond ,
238
256
UpdateTimeoutJitterMax : 0 ,
239
257
FailoverTimeout : time .Second ,
240
- }, trackerLimits {maxClusters : 100 }, nil )
258
+ }, trackerLimits {maxClusters : 100 }, reg )
241
259
require .NoError (t , err )
242
260
require .NoError (t , services .StartAndAwaitRunning (context .Background (), c ))
243
261
defer services .StopAndAwaitTerminated (context .Background (), c ) //nolint:errcheck
@@ -259,7 +277,13 @@ func TestCheckReplicaMultiClusterTimeout(t *testing.T) {
259
277
err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
260
278
assert .NoError (t , err )
261
279
262
- // Wait more than the timeout.
280
+ // Reject samples from replica 2 in each cluster.
281
+ err = c .checkReplica (context .Background (), "user" , "c1" , replica2 )
282
+ assert .Error (t , err )
283
+ err = c .checkReplica (context .Background (), "user" , "c2" , replica2 )
284
+ assert .Error (t , err )
285
+
286
+ // Wait more than the failover timeout.
263
287
mtime .NowForce (start .Add (1100 * time .Millisecond ))
264
288
265
289
// Accept a sample from c1/replica2.
@@ -271,6 +295,19 @@ func TestCheckReplicaMultiClusterTimeout(t *testing.T) {
271
295
assert .Error (t , err )
272
296
err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
273
297
assert .NoError (t , err )
298
+
299
+ // We expect no CAS operation failures.
300
+ metrics , err := reg .Gather ()
301
+ require .NoError (t , err )
302
+
303
+ assert .Equal (t , uint64 (0 ), util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
304
+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
305
+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "5.*" ),
306
+ }))
307
+ assert .Greater (t , util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
308
+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
309
+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "2.*" ),
310
+ }), uint64 (0 ))
274
311
}
275
312
276
313
// Test that writes only happen every update timeout.
0 commit comments