Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
added:
- Add resource reservation support for PVE nodes (@Chipmonk2). [#373]
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ The following options can be set in the configuration file `proxlb.yaml`:
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
| | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`, `psi` (pressure)] |
| | balance_larger_guests_first | | False | `Bool` | Option to prefer larger/smaller guests first |
| | node_resource_reserve | | { default: { memory: 4 }, { node01: { memory: 6 }} } | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
| | psi | | { nodes: { memory: { pressure_full: 0.20, pressure_some: 0.20, pressure_spikes: 1.00 }}} | `Dict` | A dict of PSI based thresholds for nodes and guests |
| | pools | | pools: { dev: { type: affinity }, de-nbg01-db: { type: anti-affinity }} | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
| `service` | | | | | |
Expand Down Expand Up @@ -338,6 +339,11 @@ balancing:
method: memory
mode: used
balance_larger_guests_first: False
node_resource_reserve:
defaults:
memory: 4
node01:
memory: 6
# # PSI thresholds only apply when using mode 'psi'
# # PSI based balancing is currently in beta and req. PVE >= 9
# psi:
Expand Down
5 changes: 5 additions & 0 deletions config/proxlb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ balancing:
method: memory # 'memory' | 'cpu' | 'disk'
mode: used # 'assigned' | 'used' | 'psi'
balance_larger_guests_first: False # Option to prioritize balancing of larger or smaller guests first
node_resource_reserve: # Optional: Define resource reservations for nodes (in GB)
defaults: # Default reservation values applying to all nodes (unless explicitly overridden)
memory: 4 # Default: 4 GB memory reserved per node
node01: # Specific node reservation override for node 'node01'
memory: 6 # Specific: 6 GB memory reserved for node 'node01'
# # PSI thresholds only apply when using mode 'psi'
# psi:
# nodes:
Expand Down
1 change: 1 addition & 0 deletions proxlb/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def main():
# Update the initial node resource assignments
# by the previously created groups.
Calculations.set_node_assignments(proxlb_data)
Helper.log_node_metrics(proxlb_data, init=False)
Calculations.set_node_hot(proxlb_data)
Calculations.set_guest_hot(proxlb_data)
Calculations.get_most_free_node(proxlb_data, cli_args.best_node)
Expand Down
4 changes: 2 additions & 2 deletions proxlb/models/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,10 +753,10 @@ def validate_node_resources(proxlb_data: Dict[str, Any], guest_name: str) -> boo
guest_disk_required = proxlb_data["guests"][guest_name]["disk_used"]

if guest_memory_required < node_memory_free:
logger.debug(f"Node '{node_target}' has sufficient resources for guest '{guest_name}'.")
logger.debug(f"Node '{node_target}' has sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
logger.debug("Finished: validate_node_resources.")
return True
else:
logger.debug(f"Node '{node_target}' lacks sufficient resources for guest '{guest_name}'.")
logger.debug(f"Node '{node_target}' lacks sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
logger.debug("Finished: validate_node_resources.")
return False
59 changes: 58 additions & 1 deletion proxlb/models/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import time
from typing import Dict, Any
from utils.logger import SystemdLogger
from utils.helper import Helper

logger = SystemdLogger()

Expand Down Expand Up @@ -77,7 +78,7 @@ def get_nodes(proxmox_api: any, proxlb_config: Dict[str, Any]) -> Dict[str, Any]
nodes["nodes"][node["node"]]["cpu_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some", spikes=True)
nodes["nodes"][node["node"]]["cpu_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full", spikes=True)
nodes["nodes"][node["node"]]["cpu_pressure_hot"] = False
nodes["nodes"][node["node"]]["memory_total"] = node["maxmem"]
nodes["nodes"][node["node"]]["memory_total"] = Nodes.set_node_resource_reservation(node["node"], node["maxmem"], proxlb_config, "memory")
nodes["nodes"][node["node"]]["memory_assigned"] = 0
nodes["nodes"][node["node"]]["memory_used"] = node["mem"]
nodes["nodes"][node["node"]]["memory_free"] = node["maxmem"] - node["mem"]
Expand Down Expand Up @@ -253,3 +254,59 @@ def get_node_pve_version(proxmox_api, node_name: str) -> float:
logger.debug(f"Got version {version['version']} for node {node_name}.")
logger.debug("Finished: get_node_pve_version.")
return version["version"]

@staticmethod
def set_node_resource_reservation(node_name, resource_value, proxlb_config, resource_type) -> int:
"""
Check if there is a configured resource reservation for the current node and apply it as needed.
Checks for a node specific config first, then if there is any configured default and if neither then nothing is reserved.
Reservations are applied by directly modifying the resource value.

Args:
node_name (str): The name of the node.
resource_value (int): The total resource value in bytes.
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration.
resource_type (str): The type of resource ('memory', 'disk', etc.).

Returns:
int: The resource value after applying any configured reservations.
"""
logger.debug(f"Starting: apply_resource_reservation")

balancing_cfg = proxlb_config.get("balancing", {})
reserve_cfg = balancing_cfg.get("node_resource_reserve", {})
node_resource_reservation = reserve_cfg.get(node_name, {}).get(resource_type, 0)
default_resource_reservation = reserve_cfg.get("defaults", {}).get(resource_type, 0)

# Ensure reservations are numeric values
node_resource_reservation = node_resource_reservation if isinstance(node_resource_reservation, (int, float)) else 0
default_resource_reservation = default_resource_reservation if isinstance(default_resource_reservation, (int, float)) else 0

# Apply node specific reservation if set
if node_resource_reservation > 0:
if resource_value < (node_resource_reservation * 1024 ** 3):
logger.critical(f"Configured resource reservation for node {node_name} of type {resource_type} with {node_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
return resource_value
else:
logger.debug(f"Applying node specific reservation for {node_name} of type {resource_type} with {node_resource_reservation} GB.")
resource_value_new = resource_value - (node_resource_reservation * 1024 ** 3)
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
return resource_value_new

# Apply default reservation if set and no node specific reservation has been performed
elif default_resource_reservation > 0:
if resource_value < (default_resource_reservation * 1024 ** 3):
logger.critical(f"Configured default reservation for node {node_name} of type {resource_type} with {default_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
return resource_value
else:
logger.debug(f"Applying default reservation for {node_name} of type {resource_type} with {default_resource_reservation} GB.")
resource_value_new = resource_value - (default_resource_reservation * 1024 ** 3)
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
return resource_value_new

else:
logger.debug(f"No default or node specific resource reservation for node {node_name} found. Skipping...")
logger.debug(f"Finished: apply_resource_reservation")
return resource_value
2 changes: 2 additions & 0 deletions proxlb/utils/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None:
"""
logger.debug("Starting: log_node_metrics.")
nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_assigned_memory = " | ".join([f"{key}: {value['memory_assigned_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])

Expand All @@ -90,6 +91,7 @@ def log_node_metrics(proxlb_data: Dict[str, Any], init: bool = True) -> None:
proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk}

logger.debug(f"Nodes usage memory: {nodes_usage_memory}")
logger.debug(f"Nodes usage memory assigned: {nodes_assigned_memory}")
logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}")
logger.debug(f"Nodes usage disk: {nodes_usage_disk}")
logger.debug("Finished: log_node_metrics.")
Expand Down