diff --git a/.changelogs/1.1.11/373_add_node_resource_reservation_support.yml b/.changelogs/1.1.11/373_add_node_resource_reservation_support.yml new file mode 100644 index 0000000..801c176 --- /dev/null +++ b/.changelogs/1.1.11/373_add_node_resource_reservation_support.yml @@ -0,0 +1,2 @@ +added: + - Add resource reservation support for PVE nodes (@Chipmonk2). [#373] diff --git a/README.md b/README.md index 90e29f0..dd72c68 100644 --- a/README.md +++ b/README.md @@ -290,6 +290,7 @@ The following options can be set in the configuration file `proxlb.yaml`: | | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]| | | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`, `psi` (pressure)] | | | balance_larger_guests_first | | False | `Bool` | Option to prefer larger/smaller guests first | +| | node_resource_reserve | | { default: { memory: 4 }, { node01: { memory: 6 }} } | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules | | | psi | | { nodes: { memory: { pressure_full: 0.20, pressure_some: 0.20, pressure_spikes: 1.00 }}} | `Dict` | A dict of PSI based thresholds for nodes and guests | | | pools | | pools: { dev: { type: affinity }, de-nbg01-db: { type: anti-affinity }} | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules | | `service` | | | | | | @@ -338,6 +339,11 @@ balancing: method: memory mode: used balance_larger_guests_first: False + node_resource_reserve: + defaults: + memory: 4 + node01: + memory: 6 # # PSI thresholds only apply when using mode 'psi' # # PSI based balancing is currently in beta and req. PVE >= 9 # psi: diff --git a/config/proxlb_example.yaml b/config/proxlb_example.yaml index 35a07fa..5390edc 100644 --- a/config/proxlb_example.yaml +++ b/config/proxlb_example.yaml @@ -33,6 +33,11 @@ balancing: method: memory # 'memory' | 'cpu' | 'disk' mode: used # 'assigned' | 'used' | 'psi' balance_larger_guests_first: False # Option to prioritize balancing of larger or smaller guests first + node_resource_reserve: # Optional: Define resource reservations for nodes (in GB) + defaults: # Default reservation values applying to all nodes (unless explicitly overridden) + memory: 4 # Default: 4 GB memory reserved per node + node01: # Specific node reservation override for node 'node01' + memory: 6 # Specific: 6 GB memory reserved for node 'node01' # # PSI thresholds only apply when using mode 'psi' # psi: # nodes: diff --git a/proxlb/main.py b/proxlb/main.py index 994617e..cf81ff8 100644 --- a/proxlb/main.py +++ b/proxlb/main.py @@ -90,6 +90,7 @@ def main(): # Update the initial node resource assignments # by the previously created groups. Calculations.set_node_assignments(proxlb_data) + Helper.log_node_metrics(proxlb_data, init=False) Calculations.set_node_hot(proxlb_data) Calculations.set_guest_hot(proxlb_data) Calculations.get_most_free_node(proxlb_data, cli_args.best_node) diff --git a/proxlb/models/calculations.py b/proxlb/models/calculations.py index 88d4065..5295abf 100644 --- a/proxlb/models/calculations.py +++ b/proxlb/models/calculations.py @@ -753,10 +753,10 @@ def validate_node_resources(proxlb_data: Dict[str, Any], guest_name: str) -> boo guest_disk_required = proxlb_data["guests"][guest_name]["disk_used"] if guest_memory_required < node_memory_free: - logger.debug(f"Node '{node_target}' has sufficient resources for guest '{guest_name}'.") + logger.debug(f"Node '{node_target}' has sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.") logger.debug("Finished: validate_node_resources.") return True else: - logger.debug(f"Node '{node_target}' lacks sufficient resources for guest '{guest_name}'.") + logger.debug(f"Node '{node_target}' lacks sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.") logger.debug("Finished: validate_node_resources.") return False diff --git a/proxlb/models/nodes.py b/proxlb/models/nodes.py index 3666120..e2f03b0 100644 --- a/proxlb/models/nodes.py +++ b/proxlb/models/nodes.py @@ -24,6 +24,7 @@ import time from typing import Dict, Any from utils.logger import SystemdLogger +from utils.helper import Helper logger = SystemdLogger() @@ -77,7 +78,7 @@ def get_nodes(proxmox_api: any, proxlb_config: Dict[str, Any]) -> Dict[str, Any] nodes["nodes"][node["node"]]["cpu_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some", spikes=True) nodes["nodes"][node["node"]]["cpu_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full", spikes=True) nodes["nodes"][node["node"]]["cpu_pressure_hot"] = False - nodes["nodes"][node["node"]]["memory_total"] = node["maxmem"] + nodes["nodes"][node["node"]]["memory_total"] = Nodes.set_node_resource_reservation(node["node"], node["maxmem"], proxlb_config, "memory") nodes["nodes"][node["node"]]["memory_assigned"] = 0 nodes["nodes"][node["node"]]["memory_used"] = node["mem"] nodes["nodes"][node["node"]]["memory_free"] = node["maxmem"] - node["mem"] @@ -253,3 +254,59 @@ def get_node_pve_version(proxmox_api, node_name: str) -> float: logger.debug(f"Got version {version['version']} for node {node_name}.") logger.debug("Finished: get_node_pve_version.") return version["version"] + + @staticmethod + def set_node_resource_reservation(node_name, resource_value, proxlb_config, resource_type) -> int: + """ + Check if there is a configured resource reservation for the current node and apply it as needed. + Checks for a node specific config first, then if there is any configured default and if neither then nothing is reserved. + Reservations are applied by directly modifying the resource value. + + Args: + node_name (str): The name of the node. + resource_value (int): The total resource value in bytes. + proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration. + resource_type (str): The type of resource ('memory', 'disk', etc.). + + Returns: + int: The resource value after applying any configured reservations. + """ + logger.debug(f"Starting: apply_resource_reservation") + + balancing_cfg = proxlb_config.get("balancing", {}) + reserve_cfg = balancing_cfg.get("node_resource_reserve", {}) + node_resource_reservation = reserve_cfg.get(node_name, {}).get(resource_type, 0) + default_resource_reservation = reserve_cfg.get("defaults", {}).get(resource_type, 0) + + # Ensure reservations are numeric values + node_resource_reservation = node_resource_reservation if isinstance(node_resource_reservation, (int, float)) else 0 + default_resource_reservation = default_resource_reservation if isinstance(default_resource_reservation, (int, float)) else 0 + + # Apply node specific reservation if set + if node_resource_reservation > 0: + if resource_value < (node_resource_reservation * 1024 ** 3): + logger.critical(f"Configured resource reservation for node {node_name} of type {resource_type} with {node_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...") + return resource_value + else: + logger.debug(f"Applying node specific reservation for {node_name} of type {resource_type} with {node_resource_reservation} GB.") + resource_value_new = resource_value - (node_resource_reservation * 1024 ** 3) + logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.') + logger.debug(f"Before: {resource_value} | After: {resource_value_new}") + return resource_value_new + + # Apply default reservation if set and no node specific reservation has been performed + elif default_resource_reservation > 0: + if resource_value < (default_resource_reservation * 1024 ** 3): + logger.critical(f"Configured default reservation for node {node_name} of type {resource_type} with {default_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...") + return resource_value + else: + logger.debug(f"Applying default reservation for {node_name} of type {resource_type} with {default_resource_reservation} GB.") + resource_value_new = resource_value - (default_resource_reservation * 1024 ** 3) + logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.') + logger.debug(f"Before: {resource_value} | After: {resource_value_new}") + return resource_value_new + + else: + logger.debug(f"No default or node specific resource reservation for node {node_name} found. Skipping...") + logger.debug(f"Finished: apply_resource_reservation") + return resource_value diff --git a/proxlb/utils/helper.py b/proxlb/utils/helper.py index 2988c2f..841c15f 100644 --- a/proxlb/utils/helper.py +++ b/proxlb/utils/helper.py @@ -81,6 +81,7 @@ def log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None: """ logger.debug("Starting: log_node_metrics.") nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) + nodes_assigned_memory = " | ".join([f"{key}: {value['memory_assigned_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()]) @@ -90,6 +91,7 @@ def log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None: proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk} logger.debug(f"Nodes usage memory: {nodes_usage_memory}") + logger.debug(f"Nodes usage memory assigned: {nodes_assigned_memory}") logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}") logger.debug(f"Nodes usage disk: {nodes_usage_disk}") logger.debug("Finished: log_node_metrics.")