Skip to content

Commit c87b6a2

Browse files
committed
Fixed metrics collector aggregation issue for NEG metrics in MT
1 parent 74d3b58 commit c87b6a2

File tree

79 files changed

+3783
-1854
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+3783
-1854
lines changed

OWNERS

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
approvers:
2-
- aojea
3-
- thockin
4-
- bowei
5-
- freehan
6-
- mrhohn
7-
- prameshj
8-
- swetharepakula
9-
- panslava
10-
- cezarygerard
11-
- kl52752
12-
- spencerhance
13-
- code-elinka
14-
- mmamczur
15-
- alexkats
16-
- gauravkghildiyal
17-
- tortillazhawaii
2+
- aojea
3+
- thockin
4+
- bowei
5+
- mrhohn
6+
- swetharepakula
7+
- panslava
8+
- cezarygerard
9+
- kl52752
10+
- spencerhance
11+
- code-elinka
12+
- mmamczur
13+
- gauravkghildiyal
14+
- tortillazhawaii
15+
- felipeyepez
1816
reviewers:
19-
- aojea
20-
- sawsa307
17+
- aojea
18+
- sawsa307

cmd/glbc/app/init.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ import (
3535
// DefaultBackendServicePort returns the ServicePort which will be
3636
// used as the default backend for load balancers.
3737
func DefaultBackendServicePort(kubeClient kubernetes.Interface, logger klog.Logger) utils.ServicePort {
38+
if !flags.F.RunIngressController && !flags.F.EnableNEGsForIngress {
39+
noDefaultBackendServicePort := utils.ServicePort{}
40+
return noDefaultBackendServicePort
41+
}
42+
3843
if flags.F.DefaultSvc == "" {
3944
klog.Fatalf("Please specify --default-backend-service")
4045
}

cmd/glbc/main.go

Lines changed: 115 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,19 @@ import (
5959
"k8s.io/ingress-gce/pkg/utils"
6060
"k8s.io/klog/v2"
6161

62-
ingctx "k8s.io/ingress-gce/pkg/context"
63-
"k8s.io/ingress-gce/pkg/controller"
64-
"k8s.io/ingress-gce/pkg/neg"
65-
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
66-
negtypes "k8s.io/ingress-gce/pkg/neg/types"
67-
6862
"k8s.io/ingress-gce/cmd/glbc/app"
6963
"k8s.io/ingress-gce/pkg/backendconfig"
64+
ingctx "k8s.io/ingress-gce/pkg/context"
65+
"k8s.io/ingress-gce/pkg/controller"
7066
"k8s.io/ingress-gce/pkg/crd"
7167
"k8s.io/ingress-gce/pkg/firewalls"
7268
"k8s.io/ingress-gce/pkg/flags"
7369
_ "k8s.io/ingress-gce/pkg/klog"
70+
"k8s.io/ingress-gce/pkg/neg"
71+
"k8s.io/ingress-gce/pkg/neg/metrics"
72+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
73+
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
74+
negtypes "k8s.io/ingress-gce/pkg/neg/types"
7475
"k8s.io/ingress-gce/pkg/utils/zonegetter"
7576
"k8s.io/ingress-gce/pkg/version"
7677
)
@@ -265,9 +266,12 @@ func main() {
265266
if nodeTopologyClient != nil {
266267
nodeTopologyFactory = informernodetopology.NewSharedInformerFactory(nodeTopologyClient, flags.F.ResyncPeriod)
267268
}
269+
ctx := context.Background()
270+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, rootLogger, "")
271+
268272
if flags.F.LeaderElection.LeaderElect {
269273
err := multiprojectstart.StartWithLeaderElection(
270-
context.Background(),
274+
ctx,
271275
leaderElectKubeClient,
272276
hostname,
273277
rootLogger,
@@ -283,10 +287,12 @@ func main() {
283287
gceCreator,
284288
namer,
285289
stopCh,
290+
syncerMetrics,
286291
)
287292
if err != nil {
288293
rootLogger.Error(err, "Failed to start multi-project syncer with leader election")
289294
}
295+
rOption.closeStopCh()
290296
} else {
291297
multiprojectstart.Start(
292298
rootLogger,
@@ -302,6 +308,7 @@ func main() {
302308
gceCreator,
303309
namer,
304310
stopCh,
311+
syncerMetrics,
305312
)
306313
}
307314
}, rOption.wg)
@@ -364,7 +371,7 @@ func main() {
364371
id: fmt.Sprintf("%v_%x", hostname, rand.Intn(1e6)),
365372
}
366373

367-
enableOtherControllers := flags.F.RunIngressController || flags.F.RunL4Controller || flags.F.RunL4NetLBController || flags.F.EnableIGController || flags.F.EnablePSC
374+
enableL4Controllers := flags.F.RunL4Controller || flags.F.RunL4NetLBController || flags.F.EnableIGController || flags.F.EnablePSC
368375
runNEG := func() {
369376
logger := rootLogger.WithName("NEGController")
370377
logger.Info("Start running the enabled controllers",
@@ -376,15 +383,22 @@ func main() {
376383
}
377384
}
378385
runIngress := func() {
379-
logger := rootLogger.WithName("Other controllers")
386+
logger := rootLogger.WithName("Ingress controller")
380387
logger.Info("Start running the enabled controllers",
381388
"Ingress controller", flags.F.RunIngressController,
389+
)
390+
runIngressControllers(ctx, systemHealth, rOption, leOption, logger)
391+
}
392+
393+
runL4 := func() {
394+
logger := rootLogger.WithName("L4 controllers")
395+
logger.Info("Start running the enabled controllers",
382396
"L4 controller", flags.F.RunL4Controller,
383397
"L4 NetLB controller", flags.F.RunL4NetLBController,
384398
"InstanceGroup controller", flags.F.EnableIGController,
385399
"PSC controller", flags.F.EnablePSC,
386400
)
387-
runControllers(ctx, systemHealth, rOption, leOption, logger)
401+
runL4Controllers(ctx, systemHealth, rOption, leOption, logger)
388402
}
389403

390404
if flags.F.LeaderElection.LeaderElect {
@@ -401,28 +415,45 @@ func main() {
401415
logger.Info("NEG Controller exited.")
402416
}
403417
runIngress = func() {
404-
logger := rootLogger.WithName("Other controllers")
418+
logger := rootLogger.WithName("Ingress controller")
405419
logger.Info("Start running Ingress leader election",
406420
"Ingress controller", flags.F.RunIngressController,
421+
)
422+
ingressRunner, err := makeIngressRunnerWithLeaderElection(ctx, systemHealth, rOption, leOption, logger)
423+
if err != nil {
424+
klog.Fatalf("makeLeaderElectionConfig()=%v, want nil", err)
425+
}
426+
leaderelection.RunOrDie(context.Background(), *ingressRunner)
427+
}
428+
if !flags.F.GateL4ByLock {
429+
klog.Fatalf("--gate-l4-by-lock must be true when --leader-elect=true")
430+
}
431+
runL4 = func() {
432+
logger := rootLogger.WithName("L4 controller")
433+
logger.Info("Start running L4 leader election",
407434
"L4 controller", flags.F.RunL4Controller,
408435
"L4 NetLB controller", flags.F.RunL4NetLBController,
409436
"InstanceGroup controller", flags.F.EnableIGController,
410437
"PSC controller", flags.F.EnablePSC,
411438
)
412-
ingressRunner, err := makeIngressRunnerWithLeaderElection(ctx, systemHealth, rOption, leOption, logger)
439+
l4Runner, err := makeL4RunnerWithLeaderElection(ctx, systemHealth, rOption, leOption, logger)
413440
if err != nil {
414441
klog.Fatalf("makeLeaderElectionConfig()=%v, want nil", err)
415442
}
416-
leaderelection.RunOrDie(context.Background(), *ingressRunner)
443+
leaderelection.RunOrDie(context.Background(), *l4Runner)
417444
}
445+
418446
}
419447

420448
if flags.F.EnableNEGController {
421449
go runNEG()
422450
}
423-
if enableOtherControllers {
451+
if flags.F.RunIngressController {
424452
go runIngress()
425453
}
454+
if enableL4Controllers {
455+
go runL4()
456+
}
426457

427458
<-rOption.stopCh
428459
waitWithTimeout(rOption.wg, rootLogger)
@@ -479,14 +510,35 @@ func makeIngressRunnerWithLeaderElection(
479510
leOption,
480511
flags.F.LeaderElection.LockObjectName,
481512
func(context.Context) {
482-
runControllers(ctx, systemHealth, runOption, leOption, logger)
513+
runIngressControllers(ctx, systemHealth, runOption, leOption, logger)
483514
},
484515
func() {
485516
logger.Info("lost master")
486517
},
487518
)
488519
}
489520

521+
func makeL4RunnerWithLeaderElection(
522+
ctx *ingctx.ControllerContext,
523+
systemHealth *systemhealth.SystemHealth,
524+
runOption runOption,
525+
leOption leaderElectionOption,
526+
logger klog.Logger,
527+
) (*leaderelection.LeaderElectionConfig, error) {
528+
lockLogger := logger.WithValues("lock", l4LockName)
529+
return makeRunnerWithLeaderElection(
530+
leOption,
531+
l4LockName,
532+
func(context.Context) {
533+
lockLogger.V(0).Info("Acquired L4 Leader election lock")
534+
runL4Controllers(ctx, systemHealth, runOption, leOption, logger)
535+
},
536+
func() {
537+
lockLogger.Info("lost master")
538+
},
539+
)
540+
}
541+
490542
// makeRunnerWithLeaderElection creates a LeaderElectionConfig with the provided options and callbacks.
491543
// It will create a new resource lock associated with the configuration.
492544
func makeRunnerWithLeaderElection(
@@ -520,7 +572,7 @@ func makeRunnerWithLeaderElection(
520572
}, nil
521573
}
522574

523-
func runControllers(ctx *ingctx.ControllerContext, systemHealth *systemhealth.SystemHealth, option runOption, leOption leaderElectionOption, logger klog.Logger) {
575+
func runIngressControllers(ctx *ingctx.ControllerContext, systemHealth *systemhealth.SystemHealth, option runOption, leOption leaderElectionOption, logger klog.Logger) {
524576
if flags.F.RunIngressController {
525577
lbc := controller.NewLoadBalancerController(ctx, option.stopCh, logger)
526578
systemHealth.AddHealthCheck("ingress", lbc.SystemHealth)
@@ -538,72 +590,55 @@ func runControllers(ctx *ingctx.ControllerContext, systemHealth *systemhealth.Sy
538590
logger.V(0).Info("firewall controller started")
539591
}
540592

541-
runL4Controllers(ctx, systemHealth, option, leOption, logger)
542-
543593
ctx.Start(option.stopCh)
544594
}
545595

546596
func runL4Controllers(ctx *ingctx.ControllerContext, systemHealth *systemhealth.SystemHealth, option runOption, leOption leaderElectionOption, logger klog.Logger) {
547597
if !flags.F.RunL4Controller && !flags.F.EnablePSC && !flags.F.EnableIGController && !flags.F.RunL4NetLBController {
548598
return
549599
}
550-
run := func() {
551-
if flags.F.RunL4Controller {
552-
l4Controller := l4lb.NewILBController(ctx, option.stopCh, logger)
553-
systemHealth.AddHealthCheck(l4lb.L4ILBControllerName, l4Controller.SystemHealth)
554-
runWithWg(l4Controller.Run, option.wg)
555-
logger.V(0).Info("L4 controller started")
556-
}
557-
558-
if flags.F.EnablePSC {
559-
pscController := psc.NewController(ctx, option.stopCh, logger)
560-
runWithWg(pscController.Run, option.wg)
561-
logger.V(0).Info("PSC Controller started")
562-
}
563600

564-
if flags.F.EnableIGController {
565-
igControllerParams := &instancegroups.ControllerConfig{
566-
NodeInformer: ctx.NodeInformer,
567-
ZoneGetter: ctx.ZoneGetter,
568-
IGManager: ctx.InstancePool,
569-
HasSynced: ctx.HasSynced,
570-
EnableMultiSubnetCluster: flags.F.EnableIGMultiSubnetCluster,
571-
ReadOnlyMode: flags.F.ReadOnlyMode,
572-
StopCh: option.stopCh,
573-
}
574-
igController := instancegroups.NewController(igControllerParams, logger)
575-
runWithWg(igController.Run, option.wg)
576-
}
601+
if flags.F.GateL4ByLock {
602+
go collectLockAvailabilityMetrics(l4LockName, flags.F.GKEClusterType, option.stopCh, logger)
603+
}
577604

578-
// The L4NetLbController will be run when RbsMode flag is Set
579-
if flags.F.RunL4NetLBController {
580-
l4netlbController := l4lb.NewL4NetLBController(ctx, option.stopCh, logger)
581-
systemHealth.AddHealthCheck(l4lb.L4NetLBControllerName, l4netlbController.SystemHealth)
605+
if flags.F.RunL4Controller {
606+
l4Controller := l4lb.NewILBController(ctx, option.stopCh, logger)
607+
systemHealth.AddHealthCheck(l4lb.L4ILBControllerName, l4Controller.SystemHealth)
608+
runWithWg(l4Controller.Run, option.wg)
609+
logger.V(0).Info("L4 controller started")
610+
}
582611

583-
runWithWg(l4netlbController.Run, option.wg)
584-
logger.V(0).Info("L4NetLB controller started")
612+
if flags.F.EnablePSC {
613+
pscController := psc.NewController(ctx, option.stopCh, logger)
614+
runWithWg(pscController.Run, option.wg)
615+
logger.V(0).Info("PSC Controller started")
616+
}
617+
618+
if flags.F.EnableIGController {
619+
igControllerParams := &instancegroups.ControllerConfig{
620+
NodeInformer: ctx.NodeInformer,
621+
ZoneGetter: ctx.ZoneGetter,
622+
IGManager: ctx.InstancePool,
623+
HasSynced: ctx.HasSynced,
624+
EnableMultiSubnetCluster: flags.F.EnableIGMultiSubnetCluster,
625+
ReadOnlyMode: flags.F.ReadOnlyMode,
626+
StopCh: option.stopCh,
585627
}
628+
igController := instancegroups.NewController(igControllerParams, logger)
629+
runWithWg(igController.Run, option.wg)
586630
}
587-
if !flags.F.LeaderElection.LeaderElect || !flags.F.GateL4ByLock {
588-
run()
589-
return
590-
}
591-
lockLogger := logger.WithValues("lock", l4LockName)
592-
runner, err := makeRunnerWithLeaderElection(leOption, l4LockName, func(ctx context.Context) {
593-
lockLogger.V(0).Info("Acquired L4 Leader election lock")
594-
go collectLockAvailabilityMetrics(l4LockName, flags.F.GKEClusterType, option.stopCh, lockLogger)
595-
run()
596-
}, func() {
597-
lockLogger.V(0).Info("Stop running L4 Leader election")
598-
})
599-
if err != nil {
600-
klog.Fatalf("L4 makeLeaderElectionConfig()=%v, want nil", err)
631+
632+
// The L4NetLbController will be run when RbsMode flag is Set
633+
if flags.F.RunL4NetLBController {
634+
l4netlbController := l4lb.NewL4NetLBController(ctx, option.stopCh, logger)
635+
systemHealth.AddHealthCheck(l4lb.L4NetLBControllerName, l4netlbController.SystemHealth)
636+
637+
runWithWg(l4netlbController.Run, option.wg)
638+
logger.V(0).Info("L4NetLB controller started")
601639
}
602-
// run in a separate goroutine to not block further operation if lock can't be acquired.
603-
go func() {
604-
lockLogger.V(0).Info("Attempt to acquire L4 Leader election lock")
605-
leaderelection.RunOrDie(context.Background(), *runner)
606-
}()
640+
641+
ctx.Start(option.stopCh)
607642
}
608643

609644
func runNEGController(ctx *ingctx.ControllerContext, systemHealth *systemhealth.SystemHealth, option runOption, logger klog.Logger) error {
@@ -656,6 +691,13 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
656691
adapter = ctx.Cloud
657692
}
658693

694+
// register NEG prometheus metrics
695+
metrics.RegisterMetrics()
696+
syncMetrics.RegisterMetrics()
697+
698+
negMetrics := metrics.NewNegMetrics("")
699+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, logger, negMetrics.ProviderConfigID)
700+
659701
// TODO: Refactor NEG to use cloud mocks so ctx.Cloud can be referenced within NewController.
660702
negController, err := neg.NewController(
661703
ctx.KubeClient,
@@ -674,7 +716,7 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
674716
ctx.HasSynced,
675717
ctx.L4Namer,
676718
ctx.DefaultBackendSvcPort,
677-
negtypes.NewAdapterWithRateLimitSpecs(ctx.Cloud, flags.F.GCERateLimit.Values(), adapter),
719+
negtypes.NewAdapterWithRateLimitSpecs(ctx.Cloud, flags.F.GCERateLimit.Values(), adapter, negMetrics),
678720
zoneGetter,
679721
ctx.ClusterNamer,
680722
flags.F.ResyncPeriod,
@@ -689,8 +731,11 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
689731
ctx.EnableIngressRegionalExternal,
690732
flags.F.EnableL4NetLBNEG,
691733
flags.F.ReadOnlyMode,
734+
flags.F.EnableNEGsForIngress,
692735
stopCh,
693736
logger,
737+
negMetrics,
738+
syncerMetrics,
694739
)
695740
if err != nil {
696741
return nil, fmt.Errorf("failed to create NEG controller: %w", err)

0 commit comments

Comments
 (0)