diff --git a/cluster-autoscaler/cloudprovider/hetzner/README.md b/cluster-autoscaler/cloudprovider/hetzner/README.md index 6e7d1a14cf40..2fbab3bac29b 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/README.md +++ b/cluster-autoscaler/cloudprovider/hetzner/README.md @@ -50,6 +50,8 @@ Can be useful when you have many different node pools and run into issues of the `HCLOUD_NETWORK` Default empty , The id or name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks +`HCLOUD_SUBNET` Default empty , The Subnet CIDR of the network that is used in the cluster, must be a subnet of the network defined in `HCLOUD_NETWORK`, example: `10.0.0/16` + `HCLOUD_FIREWALL` Default empty , The id or name of the firewall that is used in the cluster , @see https://docs.hetzner.cloud/#firewalls `HCLOUD_SSH_KEY` Default empty , The id or name of SSH Key that will have access to the fresh created server, @see https://docs.hetzner.cloud/#ssh-keys diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_ip_reserver.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_ip_reserver.go new file mode 100644 index 000000000000..2d4697c79b32 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_ip_reserver.go @@ -0,0 +1,245 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hetzner + +import ( + "context" + "fmt" + "net" + "sync" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner/hcloud-go/hcloud" + "k8s.io/klog/v2" +) + +// ReservedIPLabelName is the label key for reserved IPs on Hetzner servers +const ReservedIPLabelName = "cluster-autoscaler/reserved-ip" + +// ipReserver manages IP address reservations +type ipReserver struct { + client *hcloud.Client + apiCallContext context.Context + cachedServers *serversCache + reservedIPs map[string]net.IP // Uses string representation as key for quick lookups + mutex sync.RWMutex // Protects reservedIPs map for thread safety +} + +// newIPReserver creates a new IP reserver instance +func newIPReserver(ctx context.Context, client *hcloud.Client, cache *serversCache) *ipReserver { + if client == nil { + klog.Fatalf("Failed to create ipReserver: client is nil") + } + if ctx == nil { + klog.Fatalf("Failed to create ipReserver: context is nil") + } + if cache == nil { + klog.Fatalf("Failed to create ipReserver: serversCache is nil") + } + return &ipReserver{ + client: client, + apiCallContext: ctx, + cachedServers: cache, + reservedIPs: make(map[string]net.IP), + } +} + +// getReservedIPLabelName returns the label name for reserved IPs +func (r *ipReserver) getReservedIPLabelName() string { + return ReservedIPLabelName +} + +// getReservedIPs returns a map of all currently reserved IPs with their string representation as key +func (r *ipReserver) getReservedIPs() map[string]net.IP { + serverIPs, err := r.getReservedIPsFromServers() + if err != nil { + klog.Errorf("Failed to get reserved IPs from servers: %v", err) + serverIPs = []net.IP{} // Fallback to empty slice if error occurs + } + + r.mutex.RLock() + defer r.mutex.RUnlock() + + // Create result map with capacity for all IPs + result := make(map[string]net.IP, len(r.reservedIPs)+len(serverIPs)) + + // Add all IPs from local storage + for ipStr, ip := range r.reservedIPs { + result[ipStr] = ip + } + + // Add server IPs if not already in result + for _, ip := range serverIPs { + if ip != nil { + ipStr := ip.String() + if _, exists := result[ipStr]; !exists { + result[ipStr] = ip + } + } + } + + return result +} + +// addReservedIP adds an IP to the list of reserved IPs +func (r *ipReserver) addReservedIP(ip net.IP) { + if ip == nil { + klog.Warning("Attempted to add a nil IP to reserved IPs") + return + } + + r.mutex.Lock() + defer r.mutex.Unlock() + + // Store a copy of the IP to prevent modification + r.reservedIPs[ip.String()] = cloneIP(ip) +} + +// removeReservedIP removes an IP from the list of reserved IPs +func (r *ipReserver) removeReservedIP(ip net.IP) { + if ip == nil { + klog.Warning("Attempted to remove a nil IP from reserved IPs") + return + } + + r.mutex.Lock() + defer r.mutex.Unlock() + + if _, exists := r.reservedIPs[ip.String()]; !exists { + klog.Warningf("Attempted to remove an IP that is not reserved: %s", ip.String()) + return + } + + delete(r.reservedIPs, ip.String()) +} + +// getReservedIPsFromServers retrieves all IPs that are reserved on servers +func (r *ipReserver) getReservedIPsFromServers() ([]net.IP, error) { + servers, err := r.cachedServers.getAllServers() + if err != nil { + return nil, fmt.Errorf("failed to get servers: %w", err) + } + + ips := []net.IP{} + for _, server := range servers { + if server == nil { + klog.Warning("Encountered a nil server while retrieving reserved IPs") + continue + } + + // Check for IPs in labels + if ip, exists := r.getReservedIPFromLabel(server); exists { + ips = append(ips, ip) + } + + // Check for IPs in private networks + for _, privNet := range server.PrivateNet { + if !privNet.IP.IsUnspecified() { + ips = append(ips, cloneIP(privNet.IP)) + } + } + } + return ips, nil +} + +// getReservedIPFromLabel extracts the reserved IP from server label +func (r *ipReserver) getReservedIPFromLabel(server *hcloud.Server) (net.IP, bool) { + if server == nil || server.Labels == nil { + klog.Warning("Attempted to retrieve reserved IP from a nil server or server with nil labels") + return nil, false + } + + ipLabelValue, exists := server.Labels[r.getReservedIPLabelName()] + if !exists { + return nil, false + } + + parsedIP := net.ParseIP(ipLabelValue) + if parsedIP == nil { + klog.Warningf("Invalid reserved IP label value '%s' for server %s", ipLabelValue, server.Name) + return nil, false + } + + return parsedIP, true +} + +// reserveNewIP reserves a new IP from the given subnet +func (r *ipReserver) reserveNewIP(subnet *net.IPNet) (net.IP, error) { + if subnet == nil { + return nil, fmt.Errorf("subnet cannot be nil") + } + + // Get all currently used IPs - already as map for efficient lookup + reservedIPs := r.getReservedIPs() + + // Find first available IP in subnet (skipping network and broadcast addresses) + ip := cloneIP(subnet.IP) + broadcast := getBroadcastAddress(subnet) + + for subnet.Contains(ip) { + // Skip network and broadcast addresses + if ip.Equal(subnet.IP) || ip.Equal(broadcast) { + incrementIP(ip) + continue + } + + ipStr := ip.String() + if _, exists := reservedIPs[ipStr]; !exists { + reserved := cloneIP(ip) + r.addReservedIP(reserved) + return reserved, nil + } + + incrementIP(ip) + } + + return nil, fmt.Errorf("no free IP available in subnet %s", subnet.String()) +} + +// cloneIP creates a copy of an IP address +func cloneIP(ip net.IP) net.IP { + if ip == nil { + klog.Warning("Attempted to clone a nil IP") + return nil + } + clone := make(net.IP, len(ip)) + copy(clone, ip) + return clone +} + +// incrementIP increments an IP address by 1 +func incrementIP(ip net.IP) { + for j := len(ip) - 1; j >= 0; j-- { + ip[j]++ + if ip[j] > 0 { + break + } + } +} + +// getBroadcastAddress returns the broadcast address for a subnet +func getBroadcastAddress(subnet *net.IPNet) net.IP { + if subnet == nil { + klog.Warning("Attempted to get broadcast address for a nil subnet") + return nil + } + + broadcast := cloneIP(subnet.IP) + for i := range broadcast { + broadcast[i] |= ^subnet.Mask[i] + } + return broadcast +} diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go index a1ba45088c80..7996282e3002 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go @@ -22,6 +22,7 @@ import ( "encoding/json" "errors" "fmt" + "net" "net/http" "os" "strconv" @@ -32,6 +33,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner/hcloud-go/hcloud" "k8s.io/autoscaler/cluster-autoscaler/version" + "k8s.io/klog/v2" ) var ( @@ -55,6 +57,8 @@ type hetznerManager struct { publicIPv6 bool cachedServerType *serverTypeCache cachedServers *serversCache + ipReserver *ipReserver + subnet *net.IPNet } // ClusterConfig holds the configuration for all the nodepools @@ -207,6 +211,29 @@ func newManager() (*hetznerManager, error) { } } + serversCache := newServersCache(ctx, client) + + var subnet *net.IPNet + subnetStr := os.Getenv("HCLOUD_SUBNET") + if subnetStr != "" && network != nil { + _, subnet, err = net.ParseCIDR(subnetStr) + if err != nil { + return nil, fmt.Errorf("failed to parse HCLOUD_SUBNET: %s", err) + } + // Validate that the subnet is part of the network + found := false + for _, nSubnet := range network.Subnets { + _, nSubnetRange, _ := net.ParseCIDR(nSubnet.IPRange.String()) + if nSubnetRange.String() == subnet.String() { + found = true + break + } + } + if !found { + return nil, fmt.Errorf("HCLOUD_SUBNET %s is not part of the specified HCLOUD_NETWORK %s", subnetStr, network.Name) + } + } + m := &hetznerManager{ client: client, nodeGroups: make(map[string]*hetznerNodeGroup), @@ -219,7 +246,9 @@ func newManager() (*hetznerManager, error) { publicIPv6: publicIPv6, clusterConfig: clusterConfig, cachedServerType: newServerTypeCache(ctx, client), - cachedServers: newServersCache(ctx, client), + cachedServers: serversCache, + subnet: subnet, + ipReserver: newIPReserver(ctx, client, serversCache), } return m, nil @@ -253,7 +282,137 @@ func (m *hetznerManager) deleteByNode(node *apiv1.Node) error { return m.deleteServer(server) } +func (m *hetznerManager) assignIP(server *hcloud.Server, ip net.IP) error { + // Basic validation + if server == nil || ip == nil || m.network == nil { + return fmt.Errorf("invalid parameters: server=%v, ip=%v, network=%v", server != nil, ip != nil, m.network != nil) + } + + ctx := m.apiCallContext + klog.Infof("Assigning static IP %s to server %s", ip.String(), server.Name) + + // Check if server is already attached to the correct network + isAttached := false + for _, privNet := range server.PrivateNet { + if privNet.Network != nil && privNet.Network.ID == m.network.ID { + isAttached = true + break + } + } + + // Detach from network if already attached + if isAttached { + klog.V(1).Infof("Detaching server %s from network %s", server.Name, m.network.Name) + + detachAction, _, err := m.client.Server.DetachFromNetwork(ctx, server, hcloud.ServerDetachFromNetworkOpts{ + Network: m.network, + }) + if err != nil { + return fmt.Errorf("failed to detach from network: %w", err) + } + + if err := m.client.Action.WaitFor(ctx, detachAction); err != nil { + return fmt.Errorf("waiting for network detachment failed: %w", err) + } + } + + // Attach to network with static IP + klog.V(1).Infof("Connecting server %s to network %s with IP %s", server.Name, m.network.Name, ip.String()) + + attachAction, _, err := m.client.Server.AttachToNetwork(ctx, server, hcloud.ServerAttachToNetworkOpts{ + Network: m.network, + IP: ip, + }) + if err != nil { + return fmt.Errorf("failed to attach to network: %w", err) + } + + if err := m.client.Action.WaitFor(ctx, attachAction); err != nil { + return fmt.Errorf("waiting for network attachment failed: %w", err) + } + + klog.Infof("Server %s successfully connected with IP %s", server.Name, ip.String()) + return nil +} + +func (m *hetznerManager) createServer(ctx context.Context, opts hcloud.ServerCreateOpts) (*hcloud.Server, error) { + // Initialize labels if not present + if opts.Labels == nil { + opts.Labels = make(map[string]string) + } + + // Prepare IP reservation and assignment + var ipToAssign net.IP + if m.network != nil && m.subnet != nil { + // Create server initially stopped for network configuration + startAfterCreate := false + opts.StartAfterCreate = &startAfterCreate + + if opts.Networks != nil { + // remove network from opts if it was set + klog.Warningf("Network is set in ServerCreateOpts, but will be ignored for IP assignment: %s", opts.Networks[0].Name) + opts.Networks = []*hcloud.Network{} + } + + // Reserve IP address + var err error + if ipToAssign, err = m.ipReserver.reserveNewIP(m.subnet); err != nil { + return nil, fmt.Errorf("IP reservation failed: %w", err) + } + + // Store reserved IP as label + opts.Labels[m.ipReserver.getReservedIPLabelName()] = ipToAssign.String() + } + + // Create server + serverCreateResult, _, err := m.client.Server.Create(ctx, opts) + if err != nil { + if ipToAssign != nil { + m.ipReserver.removeReservedIP(ipToAssign) + } + return nil, fmt.Errorf("Server creation of type %s in region %s failed: %w", opts.ServerType.Name, opts.Location.Name, err) + } + + server := serverCreateResult.Server + actions := append(serverCreateResult.NextActions, serverCreateResult.Action) + + // Wait for creation actions to complete + if err = m.client.Action.WaitFor(ctx, actions...); err != nil { + if ipToAssign != nil { + m.ipReserver.removeReservedIP(ipToAssign) + } + return server, fmt.Errorf("Waiting for server actions for %s failed: %w", server.Name, err) + } + + // Assign IP if one was reserved + if ipToAssign != nil { + if err = m.assignIP(server, ipToAssign); err != nil { + m.ipReserver.removeReservedIP(ipToAssign) + return server, fmt.Errorf("IP assignment for server %s failed: %w", server.Name, err) + } + } + + // Start the server if it wasn't started automatically + if opts.StartAfterCreate != nil && !*opts.StartAfterCreate { + powerOnAction, _, err := m.client.Server.Poweron(ctx, server) + if err != nil { + return server, fmt.Errorf("Powering on server %s failed: %v", server.Name, err) + } + if err = m.client.Action.WaitFor(ctx, powerOnAction); err != nil { + return server, fmt.Errorf("Waiting for power-on for server %s failed: %v", server.Name, err) + } + } + + return server, nil +} + func (m *hetznerManager) deleteServer(server *hcloud.Server) error { + if server != nil { + reservedIP, reservedIPExists := m.ipReserver.getReservedIPFromLabel(server) + if reservedIPExists { + m.ipReserver.removeReservedIP(reservedIP) + } + } _, _, err := m.client.Server.DeleteWithResult(m.apiCallContext, server) return err } diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go index 7e5ee63549bb..abce6ec15e19 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go @@ -499,20 +499,13 @@ func createServer(n *hetznerNodeGroup) error { opts.Firewalls = []*hcloud.ServerCreateFirewall{serverCreateFirewall} } - serverCreateResult, _, err := n.manager.client.Server.Create(ctx, opts) - if err != nil { - return fmt.Errorf("could not create server type %s in region %s: %v", n.instanceType, n.region, err) - } - - server := serverCreateResult.Server + server, err := n.manager.createServer(ctx, opts) - actions := append(serverCreateResult.NextActions, serverCreateResult.Action) - - // Delete the server if any action (most importantly create_server & start_server) fails - err = n.manager.client.Action.WaitFor(ctx, actions...) if err != nil { - _ = n.manager.deleteServer(server) - return fmt.Errorf("failed to start server %s error: %v", server.Name, err) + if server != nil { + _ = n.manager.deleteServer(server) + } + return fmt.Errorf("failed to create server %s in region %s: %v", n.instanceType, n.region, err) } return nil