Skip to content

Commit ee89d0e

Browse files
committed
Fixed metrics collector aggregation issue for NEG metrics in MT
1 parent b7a97eb commit ee89d0e

31 files changed

+389
-220
lines changed

cmd/glbc/main.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,19 @@ import (
5959
"k8s.io/ingress-gce/pkg/utils"
6060
"k8s.io/klog/v2"
6161

62-
ingctx "k8s.io/ingress-gce/pkg/context"
63-
"k8s.io/ingress-gce/pkg/controller"
64-
"k8s.io/ingress-gce/pkg/neg"
65-
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
66-
negtypes "k8s.io/ingress-gce/pkg/neg/types"
67-
6862
"k8s.io/ingress-gce/cmd/glbc/app"
6963
"k8s.io/ingress-gce/pkg/backendconfig"
64+
ingctx "k8s.io/ingress-gce/pkg/context"
65+
"k8s.io/ingress-gce/pkg/controller"
7066
"k8s.io/ingress-gce/pkg/crd"
7167
"k8s.io/ingress-gce/pkg/firewalls"
7268
"k8s.io/ingress-gce/pkg/flags"
7369
_ "k8s.io/ingress-gce/pkg/klog"
70+
"k8s.io/ingress-gce/pkg/neg"
71+
"k8s.io/ingress-gce/pkg/neg/metrics"
72+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
73+
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
74+
negtypes "k8s.io/ingress-gce/pkg/neg/types"
7475
"k8s.io/ingress-gce/pkg/utils/zonegetter"
7576
"k8s.io/ingress-gce/pkg/version"
7677
)
@@ -266,6 +267,8 @@ func main() {
266267
nodeTopologyFactory = informernodetopology.NewSharedInformerFactory(nodeTopologyClient, flags.F.ResyncPeriod)
267268
}
268269
ctx := context.Background()
270+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, rootLogger)
271+
269272
if flags.F.LeaderElection.LeaderElect {
270273
err := multiprojectstart.StartWithLeaderElection(
271274
ctx,
@@ -284,6 +287,7 @@ func main() {
284287
gceCreator,
285288
namer,
286289
stopCh,
290+
syncerMetrics,
287291
)
288292
if err != nil {
289293
rootLogger.Error(err, "Failed to start multi-project syncer with leader election")
@@ -304,6 +308,7 @@ func main() {
304308
gceCreator,
305309
namer,
306310
stopCh,
311+
syncerMetrics,
307312
)
308313
}
309314
}, rOption.wg)
@@ -711,6 +716,13 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
711716
adapter = ctx.Cloud
712717
}
713718

719+
// register NEG prometheus metrics
720+
metrics.RegisterMetrics()
721+
syncMetrics.RegisterMetrics()
722+
723+
negMetrics := metrics.NewNegMetrics()
724+
syncerMetrics := syncMetrics.NewNegMetricsCollector(flags.F.NegMetricsExportInterval, logger)
725+
714726
// TODO: Refactor NEG to use cloud mocks so ctx.Cloud can be referenced within NewController.
715727
negController, err := neg.NewController(
716728
ctx.KubeClient,
@@ -729,7 +741,7 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
729741
ctx.HasSynced,
730742
ctx.L4Namer,
731743
ctx.DefaultBackendSvcPort,
732-
negtypes.NewAdapterWithRateLimitSpecs(ctx.Cloud, flags.F.GCERateLimit.Values(), adapter),
744+
negtypes.NewAdapterWithRateLimitSpecs(ctx.Cloud, flags.F.GCERateLimit.Values(), adapter, negMetrics),
733745
zoneGetter,
734746
ctx.ClusterNamer,
735747
flags.F.ResyncPeriod,
@@ -747,6 +759,8 @@ func createNEGController(ctx *ingctx.ControllerContext, systemHealth *systemheal
747759
flags.F.EnableNEGsForIngress,
748760
stopCh,
749761
logger,
762+
negMetrics,
763+
syncerMetrics,
750764
)
751765
if err != nil {
752766
return nil, fmt.Errorf("failed to create NEG controller: %w", err)

docs/deploy/local/README.md

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,14 @@ from the root of this repo:
3232
$ hack/setup-local.sh <cluster-name>
3333
```
3434

35-
## Setup GCE permissions
35+
## Authorize gcloud and kubectl
3636

37-
When running locally, the Ingress-GCE controller looks on the local machine
38-
for credentials to create GCE networking resources. Specifically it looks for a
39-
json file specified at the GOOGLE_APPLICATION_CREDENTIALS variable. Given this,
40-
it is most desirable to follow these steps:
41-
42-
1. Create a Service Account in GCP and give the account Compute Admin permissions
43-
44-
2. Create a key for the Service Account and download it
45-
46-
Then run the following:
37+
Once the cluster is ready prepare authorization to it.
38+
You need to authorize both gcloud and kubectl.
4739

4840
```console
49-
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key/file
41+
$ gcloud auth application-default login
42+
$ gcloud container clusters get-credentials CLUSTER_NAME --region CLUSTER_LOCATION
5043
```
5144

5245
## Run the controller
@@ -62,6 +55,15 @@ binary in a container and place it in `bin/amd64`.
6255
make build
6356
```
6457

58+
NOTE -
59+
If you get build errors with Docker getting a permission denied while pulling the base image run the following commands:
60+
61+
```console
62+
sudo addgroup --system docker
63+
sudo usermod -aG docker $USER
64+
newgrp docker
65+
```
66+
6567
For Mac OS users or to build the binary locally and output it in the
6668
`bin/amd64` directory run:
6769

pkg/controller/controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
"k8s.io/ingress-gce/pkg/loadbalancers"
4949
"k8s.io/ingress-gce/pkg/loadbalancers/features"
5050
"k8s.io/ingress-gce/pkg/metrics"
51+
negmetrics "k8s.io/ingress-gce/pkg/neg/metrics"
5152
negtypes "k8s.io/ingress-gce/pkg/neg/types"
5253
ingsync "k8s.io/ingress-gce/pkg/sync"
5354
"k8s.io/ingress-gce/pkg/translator"
@@ -139,7 +140,7 @@ func NewLoadBalancerController(
139140
instancePool: ctx.InstancePool,
140141
l7Pool: loadbalancers.NewLoadBalancerPool(ctx.Cloud, ctx.ClusterNamer, ctx, namer.NewFrontendNamerFactory(ctx.ClusterNamer, ctx.KubeSystemUID, logger), logger),
141142
backendSyncer: backends.NewBackendSyncer(backendPool, healthChecker, ctx.Cloud, ctx.Translator),
142-
negLinker: backends.NewNEGLinker(backendPool, negtypes.NewAdapter(ctx.Cloud), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger),
143+
negLinker: backends.NewNEGLinker(backendPool, negtypes.NewAdapter(ctx.Cloud, negmetrics.NewNegMetrics()), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger),
143144
igLinker: backends.NewInstanceGroupLinker(ctx.InstancePool, backendPool, logger),
144145
metrics: ctx.ControllerMetrics,
145146
ZoneGetter: ctx.ZoneGetter,

pkg/l4lb/l4controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"k8s.io/ingress-gce/pkg/forwardingrules"
3939
"k8s.io/ingress-gce/pkg/l4lb/metrics"
4040
"k8s.io/ingress-gce/pkg/loadbalancers"
41+
negmetrics "k8s.io/ingress-gce/pkg/neg/metrics"
4142
negtypes "k8s.io/ingress-gce/pkg/neg/types"
4243
"k8s.io/ingress-gce/pkg/network"
4344
"k8s.io/ingress-gce/pkg/utils"
@@ -106,7 +107,7 @@ func NewILBController(ctx *context.ControllerContext, stopCh <-chan struct{}, lo
106107
hasSynced: ctx.HasSynced,
107108
}
108109
l4c.backendPool = backends.NewPool(ctx.Cloud, l4c.namer)
109-
l4c.NegLinker = backends.NewNEGLinker(l4c.backendPool, negtypes.NewAdapter(ctx.Cloud), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger)
110+
l4c.NegLinker = backends.NewNEGLinker(l4c.backendPool, negtypes.NewAdapter(ctx.Cloud, negmetrics.NewNegMetrics()), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger)
110111

111112
l4c.svcQueue = utils.NewPeriodicTaskQueueWithMultipleWorkers("l4", "services", l4c.numWorkers, l4c.syncWrapper, logger)
112113

pkg/l4lb/l4netlbcontroller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"k8s.io/ingress-gce/pkg/instancegroups"
4040
"k8s.io/ingress-gce/pkg/l4lb/metrics"
4141
"k8s.io/ingress-gce/pkg/loadbalancers"
42+
negmetrics "k8s.io/ingress-gce/pkg/neg/metrics"
4243
negtypes "k8s.io/ingress-gce/pkg/neg/types"
4344
"k8s.io/ingress-gce/pkg/network"
4445
"k8s.io/ingress-gce/pkg/utils"
@@ -138,7 +139,7 @@ func NewL4NetLBController(
138139
adapter = ctx.Cloud
139140
}
140141
l4netLBc.networkResolver = network.NewNetworksResolver(networkLister, gkeNetworkParamSetLister, adapter, ctx.EnableMultinetworking, logger)
141-
l4netLBc.negLinker = backends.NewNEGLinker(l4netLBc.backendPool, negtypes.NewAdapter(ctx.Cloud), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger)
142+
l4netLBc.negLinker = backends.NewNEGLinker(l4netLBc.backendPool, negtypes.NewAdapter(ctx.Cloud, negmetrics.NewNegMetrics()), ctx.Cloud, ctx.SvcNegInformer.GetIndexer(), logger)
142143
l4netLBc.svcQueue = utils.NewPeriodicTaskQueueWithMultipleWorkers("l4netLB", "services", ctx.NumL4NetLBWorkers, l4netLBc.syncWrapper, logger)
143144

144145
ctx.ServiceInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{

pkg/multiproject/manager/manager.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"k8s.io/ingress-gce/pkg/multiproject/finalizer"
1616
"k8s.io/ingress-gce/pkg/multiproject/gce"
1717
"k8s.io/ingress-gce/pkg/multiproject/neg"
18+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
1819
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
1920
providerconfigclient "k8s.io/ingress-gce/pkg/providerconfig/client/clientset/versioned"
2021
svcnegclient "k8s.io/ingress-gce/pkg/svcneg/client/clientset/versioned"
@@ -44,6 +45,7 @@ type ProviderConfigControllersManager struct {
4445
lpConfig labels.PodLabelPropagationConfig
4546
gceCreator gce.GCECreator
4647
globalStopCh <-chan struct{}
48+
syncerMetrics *syncMetrics.SyncerMetrics
4749
}
4850

4951
type ControllerSet struct {
@@ -66,6 +68,7 @@ func NewProviderConfigControllerManager(
6668
gceCreator gce.GCECreator,
6769
globalStopCh <-chan struct{},
6870
logger klog.Logger,
71+
syncerMetrics *syncMetrics.SyncerMetrics,
6972
) *ProviderConfigControllersManager {
7073
return &ProviderConfigControllersManager{
7174
controllers: make(map[string]*ControllerSet),
@@ -84,6 +87,7 @@ func NewProviderConfigControllerManager(
8487
lpConfig: lpConfig,
8588
gceCreator: gceCreator,
8689
globalStopCh: globalStopCh,
90+
syncerMetrics: syncerMetrics,
8791
}
8892
}
8993

@@ -133,6 +137,7 @@ func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(
133137
pccm.globalStopCh,
134138
logger,
135139
pc,
140+
pccm.syncerMetrics,
136141
)
137142
if err != nil {
138143
cleanupErr := finalizer.DeleteProviderConfigNEGCleanupFinalizer(pc, pccm.providerConfigClient, logger)

pkg/multiproject/neg/neg.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import (
1616
"k8s.io/ingress-gce/pkg/flags"
1717
"k8s.io/ingress-gce/pkg/multiproject/filteredinformer"
1818
"k8s.io/ingress-gce/pkg/neg"
19+
"k8s.io/ingress-gce/pkg/neg/metrics"
20+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
1921
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
2022
negtypes "k8s.io/ingress-gce/pkg/neg/types"
2123
"k8s.io/ingress-gce/pkg/network"
@@ -28,6 +30,12 @@ import (
2830
"k8s.io/klog/v2"
2931
)
3032

33+
func init() {
34+
// register prometheus metrics
35+
metrics.RegisterMetrics()
36+
syncMetrics.RegisterMetrics()
37+
}
38+
3139
// StartNEGController creates and runs a NEG controller for the specified ProviderConfig.
3240
// The returned channel is closed by StopControllersForProviderConfig to signal a shutdown
3341
// specific to this ProviderConfig's controller.
@@ -60,10 +68,18 @@ func StartNEGController(
6068
globalStopCh <-chan struct{},
6169
logger klog.Logger,
6270
providerConfig *providerconfig.ProviderConfig,
71+
syncerMetrics *syncMetrics.SyncerMetrics,
6372
) (chan<- struct{}, error) {
6473
providerConfigName := providerConfig.Name
6574
logger.V(2).Info("Initializing NEG controller", "providerConfig", providerConfigName)
6675

76+
// This is the ID for tenant/cluster for which the NEG controller is created.
77+
providerConfigID := ""
78+
if providerConfig.Spec.PrincipalInfo != nil {
79+
providerConfigID = providerConfig.Spec.PrincipalInfo.ID
80+
logger.V(2).Info("Initializing NEG controller", "providerConfigID", providerConfigID)
81+
}
82+
6783
// The ProviderConfig-specific stop channel. We close this in StopControllersForProviderConfig.
6884
providerConfigStopCh := make(chan struct{})
6985

@@ -114,6 +130,8 @@ func StartNEGController(
114130
lpConfig,
115131
joinedStopCh,
116132
logger,
133+
providerConfigID,
134+
syncerMetrics,
117135
)
118136

119137
if err != nil {
@@ -272,6 +290,8 @@ func createNEGController(
272290
lpConfig labels.PodLabelPropagationConfig,
273291
stopCh <-chan struct{},
274292
logger klog.Logger,
293+
providerConfigID string,
294+
syncerMetrics *syncMetrics.SyncerMetrics,
275295
) (*neg.Controller, error) {
276296

277297
// The adapter uses Network SelfLink
@@ -283,7 +303,7 @@ func createNEGController(
283303

284304
noDefaultBackendServicePort := utils.ServicePort{}
285305
var noNodeTopologyInformer cache.SharedIndexInformer
286-
306+
negMetrics := metrics.NewNegMetrics()
287307
negController, err := neg.NewController(
288308
kubeClient,
289309
svcNegClient,
@@ -301,7 +321,7 @@ func createNEGController(
301321
hasSynced,
302322
l4Namer,
303323
noDefaultBackendServicePort,
304-
negtypes.NewAdapterWithRateLimitSpecs(cloud, flags.F.GCERateLimit.Values(), adapter),
324+
negtypes.NewAdapterWithRateLimitSpecs(cloud, flags.F.GCERateLimit.Values(), adapter, negMetrics),
305325
zoneGetter,
306326
clusterNamer,
307327
flags.F.ResyncPeriod,
@@ -319,6 +339,8 @@ func createNEGController(
319339
flags.F.EnableNEGsForIngress,
320340
stopCh,
321341
logger,
342+
negMetrics,
343+
syncerMetrics,
322344
)
323345
if err != nil {
324346
return nil, fmt.Errorf("failed to create NEG controller: %w", err)

pkg/multiproject/start/start.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@ import (
1919
pccontroller "k8s.io/ingress-gce/pkg/multiproject/controller"
2020
"k8s.io/ingress-gce/pkg/multiproject/gce"
2121
"k8s.io/ingress-gce/pkg/multiproject/manager"
22+
"k8s.io/ingress-gce/pkg/neg/metrics"
23+
syncMetrics "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector"
2224
"k8s.io/ingress-gce/pkg/neg/syncers/labels"
2325
providerconfigclient "k8s.io/ingress-gce/pkg/providerconfig/client/clientset/versioned"
2426
providerconfiginformers "k8s.io/ingress-gce/pkg/providerconfig/client/informers/externalversions"
2527
"k8s.io/ingress-gce/pkg/recorders"
28+
2629
svcnegclient "k8s.io/ingress-gce/pkg/svcneg/client/clientset/versioned"
2730
informersvcneg "k8s.io/ingress-gce/pkg/svcneg/client/informers/externalversions"
2831
"k8s.io/ingress-gce/pkg/utils/namer"
@@ -31,6 +34,12 @@ import (
3134

3235
const multiProjectLeaderElectionLockName = "ingress-gce-multi-project-lock"
3336

37+
func init() {
38+
// register prometheus metrics
39+
metrics.RegisterMetrics()
40+
syncMetrics.RegisterMetrics()
41+
}
42+
3443
// StartWithLeaderElection starts the ProviderConfig controller with leader election.
3544
func StartWithLeaderElection(
3645
parentCtx context.Context,
@@ -49,12 +58,13 @@ func StartWithLeaderElection(
4958
gceCreator gce.GCECreator,
5059
rootNamer *namer.Namer,
5160
stopCh <-chan struct{},
61+
syncerMetrics *syncMetrics.SyncerMetrics,
5262
) error {
5363
logger.V(1).Info("Starting multi-project controller with leader election", "host", hostname)
5464

5565
recordersManager := recorders.NewManager(eventRecorderKubeClient, logger)
5666

57-
leConfig, err := makeLeaderElectionConfig(leaderElectKubeClient, hostname, recordersManager, logger, kubeClient, svcNegClient, kubeSystemUID, eventRecorderKubeClient, providerConfigClient, informersFactory, svcNegFactory, networkFactory, nodeTopologyFactory, gceCreator, rootNamer)
67+
leConfig, err := makeLeaderElectionConfig(leaderElectKubeClient, hostname, recordersManager, logger, kubeClient, svcNegClient, kubeSystemUID, eventRecorderKubeClient, providerConfigClient, informersFactory, svcNegFactory, networkFactory, nodeTopologyFactory, gceCreator, rootNamer, syncerMetrics)
5868
if err != nil {
5969
return err
6070
}
@@ -88,6 +98,7 @@ func makeLeaderElectionConfig(
8898
nodeTopologyFactory informernodetopology.SharedInformerFactory,
8999
gceCreator gce.GCECreator,
90100
rootNamer *namer.Namer,
101+
syncerMetrics *syncMetrics.SyncerMetrics,
91102
) (*leaderelection.LeaderElectionConfig, error) {
92103
recorder := recordersManager.Recorder(flags.F.LeaderElection.LockObjectNamespace)
93104
// add a uniquifier so that two processes on the same host don't accidentally both become active
@@ -116,7 +127,7 @@ func makeLeaderElectionConfig(
116127
Callbacks: leaderelection.LeaderCallbacks{
117128
OnStartedLeading: func(ctx context.Context) {
118129
logger.Info("Became leader, starting multi-project controller")
119-
Start(logger, kubeClient, svcNegClient, kubeSystemUID, eventRecorderKubeClient, providerConfigClient, informersFactory, svcNegFactory, networkFactory, nodeTopologyFactory, gceCreator, rootNamer, ctx.Done())
130+
Start(logger, kubeClient, svcNegClient, kubeSystemUID, eventRecorderKubeClient, providerConfigClient, informersFactory, svcNegFactory, networkFactory, nodeTopologyFactory, gceCreator, rootNamer, ctx.Done(), syncerMetrics)
120131
},
121132
OnStoppedLeading: func() {
122133
logger.Info("Stop running multi-project leader election")
@@ -143,6 +154,7 @@ func Start(
143154
gceCreator gce.GCECreator,
144155
rootNamer *namer.Namer,
145156
stopCh <-chan struct{},
157+
syncerMetrics *syncMetrics.SyncerMetrics,
146158
) {
147159
logger.V(1).Info("Starting ProviderConfig controller")
148160
lpConfig := labels.PodLabelPropagationConfig{}
@@ -173,6 +185,7 @@ func Start(
173185
gceCreator,
174186
stopCh,
175187
logger,
188+
syncerMetrics,
176189
)
177190
logger.V(1).Info("Initialized ProviderConfig controller manager")
178191

0 commit comments

Comments
 (0)