Skip to content

Commit dd7f16b

Browse files
authored
New metric for ingester errors (#6901)
* New metric for ingester errors Signed-off-by: Daniel Deluiggi <[email protected]> * changelog Signed-off-by: Daniel Deluiggi <[email protected]> * change error reason Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent 69ac3d1 commit dd7f16b

File tree

5 files changed

+22
-6
lines changed

5 files changed

+22
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
* [ENHANCEMENT] Distributor: Add native histograms max sample size bytes limit validation. #6834
5757
* [ENHANCEMENT] Querier: Support caching parquet labels file in parquet queryable. #6835
5858
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
59+
* [ENHANCEMENT] Ingester: Add new metric `cortex_ingester_push_errors_total` to track reasons for ingester request failures. #6901
5960
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
6061
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
6162
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576

pkg/ingester/ingester.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,11 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
11671167
span, ctx := opentracing.StartSpanFromContext(ctx, "Ingester.Push")
11681168
defer span.Finish()
11691169

1170+
userID, err := tenant.TenantID(ctx)
1171+
if err != nil {
1172+
return nil, err
1173+
}
1174+
11701175
// We will report *this* request in the error too.
11711176
inflight := i.inflightPushRequests.Inc()
11721177
i.maxInflightPushRequests.Track(inflight)
@@ -1175,6 +1180,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
11751180
gl := i.getInstanceLimits()
11761181
if gl != nil && gl.MaxInflightPushRequests > 0 {
11771182
if inflight > gl.MaxInflightPushRequests {
1183+
i.metrics.pushErrorsTotal.WithLabelValues(userID, pushErrTooManyInflightRequests).Inc()
11781184
return nil, errTooManyInflightPushRequests
11791185
}
11801186
}
@@ -1186,11 +1192,6 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
11861192
defer req.Free()
11871193
defer cortexpb.ReuseSlice(req.Timeseries)
11881194

1189-
userID, err := tenant.TenantID(ctx)
1190-
if err != nil {
1191-
return nil, err
1192-
}
1193-
11941195
il := i.getInstanceLimits()
11951196
if il != nil && il.MaxIngestionRate > 0 {
11961197
if rate := i.ingestionRate.Rate(); rate >= il.MaxIngestionRate {

pkg/ingester/ingester_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6515,7 +6515,8 @@ func TestIngester_inflightPushRequests(t *testing.T) {
65156515
cfg.InstanceLimitsFn = func() *InstanceLimits { return &limits }
65166516
cfg.LifecyclerConfig.JoinAfter = 0
65176517

6518-
i, err := prepareIngesterWithBlocksStorage(t, cfg, prometheus.NewRegistry())
6518+
reg := prometheus.NewRegistry()
6519+
i, err := prepareIngesterWithBlocksStorage(t, cfg, reg)
65196520
require.NoError(t, err)
65206521
require.NoError(t, services.StartAndAwaitRunning(context.Background(), i))
65216522
defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck
@@ -6553,6 +6554,11 @@ func TestIngester_inflightPushRequests(t *testing.T) {
65536554

65546555
_, err := i.Push(ctx, req)
65556556
require.Equal(t, errTooManyInflightPushRequests, err)
6557+
require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
6558+
# HELP cortex_ingester_push_errors_total The total number of push errors per user.
6559+
# TYPE cortex_ingester_push_errors_total counter
6560+
cortex_ingester_push_errors_total{reason="tooManyInflightRequests",user="test"} 1
6561+
`), "cortex_ingester_push_errors_total"))
65566562
return nil
65576563
})
65586564

pkg/ingester/instance_limits.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ var (
1313
errMaxSeriesLimitReached = errors.New("cannot add series: ingesters's max series limit reached")
1414
errTooManyInflightPushRequests = errors.New("cannot push: too many inflight push requests in ingester")
1515
errTooManyInflightQueryRequests = errors.New("cannot push: too many inflight query requests in ingester")
16+
17+
pushErrTooManyInflightRequests = "tooManyInflightRequests"
1618
)
1719

1820
// InstanceLimits describes limits used by ingester. Reaching any of these will result in error response to the call.

pkg/ingester/metrics.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type ingesterMetrics struct {
4444
memMetadataCreatedTotal *prometheus.CounterVec
4545
memSeriesRemovedTotal *prometheus.CounterVec
4646
memMetadataRemovedTotal *prometheus.CounterVec
47+
pushErrorsTotal *prometheus.CounterVec
4748

4849
activeSeriesPerUser *prometheus.GaugeVec
4950
activeNHSeriesPerUser *prometheus.GaugeVec
@@ -165,6 +166,10 @@ func newIngesterMetrics(r prometheus.Registerer,
165166
Name: "cortex_ingester_memory_metadata_removed_total",
166167
Help: "The total number of metadata that were removed per user.",
167168
}, []string{"user"}),
169+
pushErrorsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
170+
Name: "cortex_ingester_push_errors_total",
171+
Help: "The total number of push errors per user.",
172+
}, []string{"user", "reason"}),
168173

169174
maxUsersGauge: promauto.With(r).NewGaugeFunc(prometheus.GaugeOpts{
170175
Name: instanceLimits,
@@ -295,6 +300,7 @@ func (m *ingesterMetrics) deletePerUserMetrics(userID string) {
295300
m.activeNHSeriesPerUser.DeleteLabelValues(userID)
296301
m.usagePerLabelSet.DeletePartialMatch(prometheus.Labels{"user": userID})
297302
m.limitsPerLabelSet.DeletePartialMatch(prometheus.Labels{"user": userID})
303+
m.pushErrorsTotal.DeletePartialMatch(prometheus.Labels{"user": userID})
298304

299305
if m.memSeriesCreatedTotal != nil {
300306
m.memSeriesCreatedTotal.DeleteLabelValues(userID)

0 commit comments

Comments
 (0)