Skip to content

Commit 088575f

Browse files
committed
removed label & passing shared metrics aggregator from top level
1 parent 5906bd7 commit 088575f

File tree

7 files changed

+58
-59
lines changed

7 files changed

+58
-59
lines changed

cmd/glbc/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ import (
6969
_ "k8s.io/ingress-gce/pkg/klog"
7070
"k8s.io/ingress-gce/pkg/neg"
7171
"k8s.io/ingress-gce/pkg/neg/metrics"
72+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
7273
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
7374
negtypes "k8s.io/ingress-gce/pkg/neg/types"
7475
"k8s.io/ingress-gce/pkg/utils/zonegetter"
@@ -686,7 +687,12 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
686687
adapter = ctx.Cloud
687688
}
688689

690+
// register NEG prometheus metrics
691+
metrics.RegisterMetrics()
692+
syncMetrics.RegisterMetrics()
693+
689694
negMetrics := metrics.NewNegMetrics("")
695+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, logger, negMetrics.ProviderConfigID)
690696

691697
// TODO: Refactor NEG to use cloud mocks so ctx.Cloud can be referenced within NewController.
692698
negController, err := neg.NewController(
@@ -725,6 +731,7 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
725731
stopCh,
726732
logger,
727733
negMetrics,
734+
syncerMetrics,
728735
)
729736
if err != nil {
730737
return nil, fmt.Errorf("failed to create NEG controller: %w", err)

pkg/multiproject/neg/neg.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"k8s.io/ingress-gce/pkg/multiproject/filteredinformer"
1818
"k8s.io/ingress-gce/pkg/neg"
1919
"k8s.io/ingress-gce/pkg/neg/metrics"
20+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
2021
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
2122
negtypes "k8s.io/ingress-gce/pkg/neg/types"
2223
"k8s.io/ingress-gce/pkg/network"
@@ -29,6 +30,12 @@ import (
2930
"k8s.io/klog/v2"
3031
)
3132

33+
func init() {
34+
// register prometheus metrics
35+
metrics.RegisterMetrics()
36+
syncMetrics.RegisterMetrics()
37+
}
38+
3239
// StartNEGController creates and runs a NEG controller for the specified ProviderConfig.
3340
// The returned channel is closed by StopControllersForProviderConfig to signal a shutdown
3441
// specific to this ProviderConfig's controller.
@@ -294,7 +301,7 @@ func createNEGController(
294301
noDefaultBackendServicePort := utils.ServicePort{}
295302
var noNodeTopologyInformer cache.SharedIndexInformer
296303
negMetrics := metrics.NewNegMetrics(providerConfigID)
297-
304+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, logger, negMetrics.ProviderConfigID)
298305
negController, err := neg.NewController(
299306
kubeClient,
300307
svcNegClient,
@@ -331,6 +338,7 @@ func createNEGController(
331338
stopCh,
332339
logger,
333340
negMetrics,
341+
syncerMetrics,
334342
)
335343
if err != nil {
336344
return nil, fmt.Errorf("failed to create NEG controller: %w", err)

pkg/neg/controller.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ import (
5555
"k8s.io/klog/v2"
5656
)
5757

58-
func init() {
59-
// register prometheus metrics
60-
metrics.RegisterMetrics()
61-
syncMetrics.RegisterMetrics()
62-
}
63-
6458
// Controller is network endpoint group controller.
6559
// It determines whether NEG for a service port is needed, then signals NegSyncerManager to sync it.
6660
type Controller struct {
@@ -163,6 +157,7 @@ func NewController(
163157
stopCh <-chan struct{},
164158
logger klog.Logger,
165159
negMetrics *metrics.NegMetrics,
160+
syncerMetrics *syncMetrics.SyncerMetrics,
166161
) (*Controller, error) {
167162
if svcNegClient == nil {
168163
return nil, fmt.Errorf("svcNegClient is nil")
@@ -188,8 +183,6 @@ func NewController(
188183
recorder := eventBroadcaster.NewRecorder(negScheme,
189184
apiv1.EventSource{Component: "neg-controller"})
190185

191-
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, logger, negMetrics.ProviderConfigID)
192-
193186
manager := newSyncerManager(
194187
namer,
195188
l4Namer,

pkg/neg/controller_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ func newTestControllerWithParamsAndContext(kubeClient kubernetes.Interface, test
165165
make(<-chan struct{}),
166166
klog.TODO(),
167167
testContext.NegMetrics,
168+
metricscollector.FakeSyncerMetrics(),
168169
)
169170
}
170171

pkg/neg/metrics/metrics.go

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,10 @@ var (
6666
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
6767
},
6868
[]string{
69-
"operation", // endpoint operation
70-
"neg_type", // type of neg
71-
"api_version", // GCE API version
72-
"result", // result of the sync
73-
"providerconfig_id", // provider config ID if multi-project NEG
69+
"operation", // endpoint operation
70+
"neg_type", // type of neg
71+
"api_version", // GCE API version
72+
"result", // result of the sync
7473
},
7574
)
7675

@@ -83,10 +82,9 @@ var (
8382
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
8483
},
8584
[]string{
86-
"operation", // endpoint operation
87-
"neg_type", // type of neg
88-
"result", // result of the sync
89-
"providerconfig_id", // provider config ID if multi-project NEG
85+
"operation", // endpoint operation
86+
"neg_type", // type of neg
87+
"result", // result of the sync
9088
},
9189
)
9290

@@ -102,7 +100,6 @@ var (
102100
"neg_type", //type of neg
103101
"endpoint_calculator_mode", // type of endpoint calculator used
104102
"result", // result of the sync
105-
"providerconfig_id", // provider config ID if multi-project NEG
106103
},
107104
)
108105

@@ -115,23 +112,19 @@ var (
115112
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
116113
},
117114
[]string{
118-
"process", // type of manager process loop
119-
"result", // result of the process
120-
"providerconfig_id", // provider config ID if multi-project NEG
115+
"process", // type of manager process loop
116+
"result", // result of the process
121117
},
122118
)
123119

124-
InitializationLatency = prometheus.NewHistogramVec(
120+
InitializationLatency = prometheus.NewHistogram(
125121
prometheus.HistogramOpts{
126122
Subsystem: negControllerSubsystem,
127123
Name: "neg_initialization_duration_seconds",
128124
Help: "Initialization latency of a NEG",
129125
// custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), +Inf]
130126
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
131127
},
132-
[]string{
133-
"providerconfig_id", // provider config ID if multi-project NEG
134-
},
135128
)
136129

137130
LastSyncTimestamp = prometheus.NewGauge(
@@ -173,9 +166,8 @@ var (
173166
Buckets: append([]float64{0}, prometheus.ExponentialBuckets(1, 2, 20)...),
174167
},
175168
[]string{
176-
"neg_type", // type of neg
177-
"endpoint_type", // type of endpoint
178-
"providerconfig_id", // provider config ID if multi-project NEG
169+
"neg_type", // type of neg
170+
"endpoint_type", // type of endpoint
179171
},
180172
)
181173

@@ -187,9 +179,7 @@ var (
187179
Name: "error_count",
188180
Help: "Counts of server errors and NEG controller errors.",
189181
},
190-
[]string{"error_type",
191-
"providerconfig_id", // provider config ID if multi-project NEG
192-
},
182+
[]string{"error_type"},
193183
)
194184

195185
LabelNumber = prometheus.NewHistogram(
@@ -228,7 +218,7 @@ var (
228218
Name: "gce_request_count",
229219
Help: "Number of requests sent by NEG Controller to Arcus.",
230220
},
231-
[]string{"request", "result", "providerconfig_id"},
221+
[]string{"request", "result"},
232222
)
233223

234224
// GCERequestLatency tracks the latency of GCE requests the neg controller sends to the NEG API
@@ -240,7 +230,7 @@ var (
240230
// custom buckets - [0.001, 0.01, 0.1, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, +Inf]
241231
Buckets: append([]float64{0.001, 0.01, 0.1}, prometheus.ExponentialBuckets(1, 2, 20)...),
242232
},
243-
[]string{"request", "result", "providerconfig_id"},
233+
[]string{"request", "result"},
244234
)
245235

246236
// K8sRequestCount tracks the number of K8s requests the neg controller sends to the K8s API
@@ -250,7 +240,7 @@ var (
250240
Name: "k8s_request_count",
251241
Help: "Number of requests sent by NEG Controller to Kubernetes API Server.",
252242
},
253-
[]string{"request", "result", "providerconfig_id"},
243+
[]string{"request", "result"},
254244
)
255245

256246
// K8sRequestLatency tracks the latency of K8s requests the neg controller sends to the K8s API
@@ -262,7 +252,7 @@ var (
262252
// custom buckets - [0.001, 0.01, 0.1, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, +Inf]
263253
Buckets: append([]float64{0.001, 0.01, 0.1}, prometheus.ExponentialBuckets(1, 2, 20)...),
264254
},
265-
[]string{"request", "result", "providerconfig_id"},
255+
[]string{"request", "result"},
266256
)
267257
)
268258

@@ -305,26 +295,26 @@ func NewNegMetrics(providerConfigID string) *NegMetrics {
305295
func (m *NegMetrics) PublishNegOperationMetrics(operation, negType, apiVersion string, err error, numEndpoints int, start time.Time) {
306296
result := getResult(err)
307297

308-
NegOperationLatency.WithLabelValues(operation, negType, apiVersion, result, m.ProviderConfigID).Observe(time.Since(start).Seconds())
309-
NegOperationEndpoints.WithLabelValues(operation, negType, result, m.ProviderConfigID).Observe(float64(numEndpoints))
298+
NegOperationLatency.WithLabelValues(operation, negType, apiVersion, result).Observe(time.Since(start).Seconds())
299+
NegOperationEndpoints.WithLabelValues(operation, negType, result).Observe(float64(numEndpoints))
310300
}
311301

312302
// PublishNegSyncMetrics publishes collected metrics for the sync of NEG
313303
func (m *NegMetrics) PublishNegSyncMetrics(negType, endpointCalculator string, err error, start time.Time) {
314304
result := getResult(err)
315305

316-
SyncerSyncLatency.WithLabelValues(negType, endpointCalculator, result, m.ProviderConfigID).Observe(time.Since(start).Seconds())
306+
SyncerSyncLatency.WithLabelValues(negType, endpointCalculator, result).Observe(time.Since(start).Seconds())
317307
}
318308

319309
// PublishNegManagerProcessMetrics publishes collected metrics for the neg manager loops
320310
func (m *NegMetrics) PublishNegManagerProcessMetrics(process string, err error, start time.Time) {
321311
result := getResult(err)
322-
ManagerProcessLatency.WithLabelValues(process, result, m.ProviderConfigID).Observe(time.Since(start).Seconds())
312+
ManagerProcessLatency.WithLabelValues(process, result).Observe(time.Since(start).Seconds())
323313
}
324314

325315
// PublishNegInitializationMetrics publishes collected metrics for time from request to initialization of NEG
326316
func (m *NegMetrics) PublishNegInitializationMetrics(latency time.Duration) {
327-
InitializationLatency.WithLabelValues(m.ProviderConfigID).Observe(latency.Seconds())
317+
InitializationLatency.Observe(latency.Seconds())
328318
}
329319

330320
func PublishNegSyncerStalenessMetrics(syncerStaleness time.Duration) {
@@ -338,7 +328,7 @@ func PublishNegEPSStalenessMetrics(epsStaleness time.Duration) {
338328
// PublishDegradedModeCorrectnessMetrics publishes collected metrics
339329
// of the correctness of degraded mode calculations compared with the current one
340330
func (m *NegMetrics) PublishDegradedModeCorrectnessMetrics(count int, endpointType string, negType string) {
341-
DegradeModeCorrectness.WithLabelValues(negType, endpointType, m.ProviderConfigID).Observe(float64(count))
331+
DegradeModeCorrectness.WithLabelValues(negType, endpointType).Observe(float64(count))
342332
}
343333

344334
// PublishNegControllerErrorCountMetrics publishes collected metrics
@@ -347,8 +337,8 @@ func (m *NegMetrics) PublishNegControllerErrorCountMetrics(err error, isIgnored
347337
if err == nil {
348338
return
349339
}
350-
NegControllerErrorCount.WithLabelValues(totalNegError, m.ProviderConfigID).Inc()
351-
NegControllerErrorCount.WithLabelValues(getErrorLabel(err, isIgnored), m.ProviderConfigID).Inc()
340+
NegControllerErrorCount.WithLabelValues(totalNegError).Inc()
341+
NegControllerErrorCount.WithLabelValues(getErrorLabel(err, isIgnored)).Inc()
352342
}
353343

354344
// PublishLabelPropagationError publishes error occured during label propagation.
@@ -374,8 +364,8 @@ func (m *NegMetrics) PublishGCERequestCountMetrics(start time.Time, requestType
374364
result = otherError
375365
}
376366
}
377-
GCERequestLatency.WithLabelValues(requestType, result, m.ProviderConfigID).Observe(time.Since(start).Seconds())
378-
GCERequestCount.WithLabelValues(requestType, result, m.ProviderConfigID).Inc()
367+
GCERequestLatency.WithLabelValues(requestType, result).Observe(time.Since(start).Seconds())
368+
GCERequestCount.WithLabelValues(requestType, result).Inc()
379369
}
380370

381371
// PublishK8sRequestCountMetrics publishes collected metrics for K8s Request Counts
@@ -390,8 +380,8 @@ func (m *NegMetrics) PublishK8sRequestCountMetrics(start time.Time, requestType
390380
result = otherError
391381
}
392382
}
393-
K8sRequestLatency.WithLabelValues(requestType, result, m.ProviderConfigID).Observe(time.Since(start).Seconds())
394-
K8sRequestCount.WithLabelValues(requestType, result, m.ProviderConfigID).Inc()
383+
K8sRequestLatency.WithLabelValues(requestType, result).Observe(time.Since(start).Seconds())
384+
K8sRequestCount.WithLabelValues(requestType, result).Inc()
395385
}
396386

397387
func getResult(err error) string {

pkg/neg/metrics/metricscollector/metrics.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ var (
8888
Name: "number_of_endpoints",
8989
Help: "The total number of endpoints",
9090
},
91-
[]string{"feature", "provider_config_id"},
91+
[]string{"feature"},
9292
)
9393

9494
DualStackMigrationFinishedDurations = prometheus.NewHistogram(
@@ -116,7 +116,7 @@ var (
116116
Name: "syncer_count_by_endpoint_type",
117117
Help: "Number of Syncers managing NEGs containing endpoint of a particular kind",
118118
},
119-
[]string{"endpoint_type", "provider_config_id"},
119+
[]string{"endpoint_type"},
120120
)
121121

122122
DualStackMigrationServiceCount = prometheus.NewGauge(
@@ -134,7 +134,7 @@ var (
134134
Name: "sync_result",
135135
Help: "Current count for each sync result",
136136
},
137-
[]string{"result", "provider_config_id"},
137+
[]string{"result"},
138138
)
139139

140140
negsManagedCount = prometheus.NewGaugeVec(
@@ -143,14 +143,14 @@ var (
143143
Name: "managed_neg_count",
144144
Help: "Number of NEGs the Neg Controller Manages",
145145
},
146-
[]string{"location", "endpoint_type", "provider_config_id"},
146+
[]string{"location", "endpoint_type"},
147147
)
148148

149149
networkEndpointGroupCount = prometheus.NewGaugeVec(
150150
prometheus.GaugeOpts{
151151
Name: "number_of_negs",
152152
Help: "Number of NEGs",
153153
},
154-
[]string{"feature", "provider_config_id"},
154+
[]string{"feature"},
155155
)
156156
)

pkg/neg/metrics/metricscollector/metrics_collector.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ func (sm *SyncerMetrics) Run(stopCh <-chan struct{}) {
133133
// export exports syncer metrics.
134134
func (sm *SyncerMetrics) export() {
135135
lpMetrics := sm.computeLabelMetrics()
136-
NumberOfEndpoints.WithLabelValues(totalEndpoints, sm.providerConfigID).Set(float64(lpMetrics.NumberOfEndpoints))
137-
NumberOfEndpoints.WithLabelValues(epWithAnnotation, sm.providerConfigID).Set(float64(lpMetrics.EndpointsWithAnnotation))
136+
NumberOfEndpoints.WithLabelValues(totalEndpoints).Set(float64(lpMetrics.NumberOfEndpoints))
137+
NumberOfEndpoints.WithLabelValues(epWithAnnotation).Set(float64(lpMetrics.EndpointsWithAnnotation))
138138

139139
stateCount, syncerCount := sm.computeSyncerStateMetrics()
140140
//Reset metric so non-existent keys are now 0
@@ -155,7 +155,7 @@ func (sm *SyncerMetrics) export() {
155155
//Clear existing metrics (ensures that keys that don't exist anymore are reset)
156156
negsManagedCount.Reset()
157157
for key, count := range negCounts {
158-
negsManagedCount.WithLabelValues(key.location, key.endpointType, sm.providerConfigID).Set(float64(count))
158+
negsManagedCount.WithLabelValues(key.location, key.endpointType).Set(float64(count))
159159
}
160160

161161
sm.logger.V(3).Info("Exporting syncer related metrics", "Syncer count", syncerCount,
@@ -173,7 +173,7 @@ func (sm *SyncerMetrics) export() {
173173

174174
syncerCountByEndpointType, migrationEndpointCount, migrationServicesCount := sm.computeDualStackMigrationCounts()
175175
for endpointType, count := range syncerCountByEndpointType {
176-
SyncerCountByEndpointType.WithLabelValues(endpointType, sm.providerConfigID).Set(float64(count))
176+
SyncerCountByEndpointType.WithLabelValues(endpointType).Set(float64(count))
177177
}
178178
syncerEndpointState.WithLabelValues(string(negtypes.DualStackMigration)).Set(float64(migrationEndpointCount))
179179
DualStackMigrationServiceCount.Set(float64(migrationServicesCount))
@@ -182,7 +182,7 @@ func (sm *SyncerMetrics) export() {
182182

183183
negCount := sm.computeNegMetrics()
184184
for feature, count := range negCount {
185-
networkEndpointGroupCount.WithLabelValues(feature.String(), sm.providerConfigID).Set(float64(count))
185+
networkEndpointGroupCount.WithLabelValues(feature.String()).Set(float64(count))
186186
}
187187
sm.logger.V(3).Info("Exported NEG usage metrics", "NEG count", fmt.Sprintf("%#v", negCount))
188188
}
@@ -194,7 +194,7 @@ func (sm *SyncerMetrics) UpdateSyncerStatusInMetrics(key negtypes.NegSyncerKey,
194194
syncErr := negtypes.ClassifyError(err)
195195
reason = syncErr.Reason
196196
}
197-
syncerSyncResult.WithLabelValues(string(reason), sm.providerConfigID).Inc()
197+
syncerSyncResult.WithLabelValues(string(reason)).Inc()
198198
sm.mu.Lock()
199199
defer sm.mu.Unlock()
200200
if sm.syncerStateMap == nil {

0 commit comments

Comments
 (0)