@@ -3,8 +3,10 @@ package targetgroupbinding
33import (
44 "context"
55 "fmt"
6+ "k8s.io/apimachinery/pkg/util/cache"
67 "net/netip"
78 lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
9+ "sync"
810 "time"
911
1012 elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
@@ -28,7 +30,10 @@ import (
2830 "sigs.k8s.io/controller-runtime/pkg/client"
2931)
3032
31- const defaultRequeueDuration = 15 * time .Second
33+ const (
34+ defaultRequeueDuration = 15 * time .Second
35+ invalidVPCTTL = 60 * time .Minute
36+ )
3237
3338// ResourceManager manages the TargetGroupBinding resource.
3439type ResourceManager interface {
@@ -64,6 +69,9 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB
6469 multiClusterManager : multiClusterManager ,
6570 metricsCollector : metricsCollector ,
6671
72+ invalidVpcCache : cache .NewExpiring (),
73+ invalidVpcCacheTTL : defaultTargetsCacheTTL ,
74+
6775 requeueDuration : defaultRequeueDuration ,
6876 }
6977}
@@ -84,6 +92,10 @@ type defaultResourceManager struct {
8492 metricsCollector lbcmetrics.MetricCollector
8593 vpcID string
8694
95+ invalidVpcCache * cache.Expiring
96+ invalidVpcCacheTTL time.Duration
97+ invalidVpcCacheMutex sync.RWMutex
98+
8799 requeueDuration time.Duration
88100}
89101
@@ -550,29 +562,10 @@ func (m *defaultResourceManager) registerPodEndpoints(ctx context.Context, tgb *
550562 "registering endpoints using the targetGroup's vpcID %s which is different from the cluster's vpcID %s" , tgb .Spec .VpcID , m .vpcID ))
551563 }
552564
553- var overrideAzFn func (addr netip.Addr ) bool
554- if tgb .Spec .IamRoleArnToAssume != "" {
555- // If we're interacting with another account, then we should always be setting "all" AZ to allow this
556- // target to get registered by the ELB API.
557- overrideAzFn = func (_ netip.Addr ) bool {
558- return true
559- }
560- } else {
561- vpcInfo , err := m .vpcInfoProvider .FetchVPCInfo (ctx , vpcID )
562- if err != nil {
563- return err
564- }
565- var vpcRawCIDRs []string
566- vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv4CIDRs ()... )
567- vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv6CIDRs ()... )
568- vpcCIDRs , err := networking .ParseCIDRs (vpcRawCIDRs )
569- if err != nil {
570- return err
571- }
572- // If the pod ip resides out of all the VPC CIDRs, then the only way to force the ELB API is to use "all" AZ.
573- overrideAzFn = func (addr netip.Addr ) bool {
574- return ! networking .IsIPWithinCIDRs (addr , vpcCIDRs )
575- }
565+ overrideAzFn , err := m .generateOverrideAzFn (ctx , vpcID , tgb .Spec .IamRoleArnToAssume )
566+
567+ if err != nil {
568+ return err
576569 }
577570
578571 sdkTargets , err := m .prepareRegistrationCall (endpoints , overrideAzFn )
@@ -626,6 +619,66 @@ func (m *defaultResourceManager) updateTGBCheckPoint(ctx context.Context, tgb *e
626619 return nil
627620}
628621
622+ func (m * defaultResourceManager ) generateOverrideAzFn (ctx context.Context , vpcID string , assumeRole string ) (func (addr netip.Addr ) bool , error ) {
623+ // Cross-Account is configured by assuming a role.
624+ usingCrossAccount := assumeRole != ""
625+
626+ // We need to cache the vpc response for the various assume roles.
627+ // There are two cases to consider when using assuming a role:
628+ // 1. Using a peered VPC connection to provide connectivity among accounts.
629+ // 2. Using RAM shared subnet(s) to provide connectivity among accounts.
630+ // We need to handle the case where the user is potentially using the same VPC in the peered context
631+ // as well as the RAM shared context.
632+ // Using peered VPC connection, we will always need to override the AZ.
633+ // Using a RAM shared subnet / VPC means that we follow the standard logic of checking the pod ip against the VPC CIDRs.
634+
635+ invalidVPCCacheKey := fmt .Sprintf ("%s-%s" , assumeRole , vpcID )
636+
637+ if usingCrossAccount {
638+ // Prevent spamming EC2 with requests.
639+ // We can use the cached result for this VPC ID given for the current assume role ARN
640+ m .invalidVpcCacheMutex .RLock ()
641+ _ , invalidVPC := m .invalidVpcCache .Get (invalidVPCCacheKey )
642+ m .invalidVpcCacheMutex .RUnlock ()
643+
644+ // In this case, we already received that this VPC was invalid, we can shortcut the EC2 call and just override the AZ.
645+ if invalidVPC {
646+ return func (addr netip.Addr ) bool {
647+ return true
648+ }, nil
649+ }
650+ }
651+
652+ vpcInfo , err := m .vpcInfoProvider .FetchVPCInfo (ctx , vpcID )
653+ if err != nil {
654+ // A VPC Not Found Error along with cross-account usage means that the VPC either, is not shared with the assume
655+ // role account OR this falls into case (1) from above where the VPC is just peered but not shared with RAM.
656+ // As we can't differentiate if RAM sharing wasn't set up correctly OR the VPC is set up via peering, we will
657+ // just default to assume that the VPC is peered but not shared.
658+ if isVPCNotFoundError (err ) && usingCrossAccount {
659+ m .invalidVpcCacheMutex .Lock ()
660+ m .invalidVpcCache .Set (invalidVPCCacheKey , true , m .invalidVpcCacheTTL )
661+ m .invalidVpcCacheMutex .Unlock ()
662+ return func (addr netip.Addr ) bool {
663+ return true
664+ }, nil
665+ }
666+ return nil , err
667+ }
668+ var vpcRawCIDRs []string
669+ vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv4CIDRs ()... )
670+ vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv6CIDRs ()... )
671+ vpcCIDRs , err := networking .ParseCIDRs (vpcRawCIDRs )
672+ if err != nil {
673+ return nil , err
674+ }
675+ // By getting here, we have a valid VPC for whatever credential was used. We return "true" in the function below
676+ // when the pod ip falls outside the VPCs configured CIDRs, other we return "false" to ensure that the "all" is NOT injected.
677+ return func (addr netip.Addr ) bool {
678+ return ! networking .IsIPWithinCIDRs (addr , vpcCIDRs )
679+ }, nil
680+ }
681+
629682type podEndpointAndTargetPair struct {
630683 endpoint backend.PodEndpoint
631684 target TargetInfo
@@ -747,3 +800,12 @@ func isELBV2TargetGroupARNInvalidError(err error) bool {
747800 }
748801 return false
749802}
803+
804+ func isVPCNotFoundError (err error ) bool {
805+ var apiErr smithy.APIError
806+ if errors .As (err , & apiErr ) {
807+ code := apiErr .ErrorCode ()
808+ return code == "InvalidVpcID.NotFound"
809+ }
810+ return false
811+ }
0 commit comments