diff --git a/gprofiler/main.py b/gprofiler/main.py index 9e600f689..4efb1749b 100644 --- a/gprofiler/main.py +++ b/gprofiler/main.py @@ -18,6 +18,9 @@ from typing import Iterable, Optional, Type, cast import configargparse +from granulate_utils.exceptions import AlreadyInCgroup, UnsupportedCGroupV2 +from granulate_utils.linux.cgroups.cpu_cgroup import CpuCgroup +from granulate_utils.linux.cgroups.memory_cgroup import MemoryCgroup from granulate_utils.linux.ns import is_running_in_init_pid from granulate_utils.linux.process import is_process_running from granulate_utils.metadata import Metadata @@ -67,6 +70,10 @@ DEFAULT_PROFILING_DURATION = datetime.timedelta(seconds=60).seconds DEFAULT_SAMPLING_FREQUENCY = 11 +# Limits same as in the k8s DaemonSet. +DEFAULT_CPU_LIMIT = 0.5 # 500m +DEFAULT_MEMORY_LIMIT = (1 << 30) # 1Gi + # 1 KeyboardInterrupt raised per this many seconds, no matter how many SIGINTs we get. SIGINT_RATELIMIT = 0.5 @@ -602,6 +609,30 @@ def parse_cmd_args() -> configargparse.Namespace: " beginning of a session.", ) + parser.add_argument( + "--limit-memory", + default=DEFAULT_MEMORY_LIMIT, + dest="memory_limit", + type=int, + help=f"Limit on the memory used by gProfiler. Units are bytes and the default is '{DEFAULT_MEMORY_LIMIT}'." + ) + + parser.add_argument( + "--limit-cpu", + default=DEFAULT_CPU_LIMIT, + dest="cpu_limit", + type=float, + help=f"Limit on the cpu used by gProfiler. Units are cores and the default is '{DEFAULT_CPU_LIMIT}'." + ) + + parser.add_argument( + "--no-cgroups", + action="store_false", + dest="cgroups_changes", + default=True, + help="Disable the cgroups changes.", + ) + args = parser.parse_args() args.perf_inject = args.nodejs_mode == "perf" @@ -679,12 +710,6 @@ def verify_preconditions(args: configargparse.Namespace) -> None: ) sys.exit(1) - if args.log_usage and get_run_mode() not in ("k8s", "container"): - # TODO: we *can* move into another cpuacct cgroup, to let this work also when run as a standalone - # executable. - print("--log-usage is available only when run as a container!", file=sys.stderr) - sys.exit(1) - def setup_signals() -> None: # When we run under staticx & PyInstaller, both of them forward (some of the) signals to gProfiler. @@ -723,6 +748,30 @@ def init_pid_file(pid_file: str) -> None: Path(pid_file).write_text(str(os.getpid())) +# Set limits and return path of the cgroup. +def set_limits(cpu: float, memory: int): + try: + cpu_cgroup = CpuCgroup() + memory_cgroup = MemoryCgroup() + except UnsupportedCGroupV2: + logger.debug("cgroup v2 is not supported by gProfiler, cpu and memory limits wouldn't be set.") + raise + + try: + cpu_cgroup.move_to_cgroup("gprofiler", os.getpid()) + except AlreadyInCgroup: + logger.debug("gProfiler have already a cpu group.") + else: + cpu_cgroup.set_cpu_limit_cores(cpu) + + try: + memory_cgroup.move_to_cgroup("gprofiler", os.getpid()) + except AlreadyInCgroup: + logger.warning("gProfiler have already a memory group.") + else: + memory_cgroup.set_limit_in_bytes(memory) + + def main() -> None: args = parse_cmd_args() verify_preconditions(args) @@ -738,10 +787,19 @@ def main() -> None: remote_logs_handler, ) + # TODO(Creatone): Check the containerized scenario. + if args.cgroups_changes and get_run_mode() not in ("k8s", "container"): + logger.info(f"Trying to set resource limits, cpu='{args.cpu_limit}' " + f"cores and memory='{args.memory_limit >> 20:.2f}' MB.") + try: + set_limits(args.cpu_limit, args.memory_limit) + except Exception: + logger.exception("Failed to set resource limits, continuing anyway") + setup_signals() reset_umask() - # assume we run in the root cgroup (when containerized, that's our view) - usage_logger = CgroupsUsageLogger(logger, "/") if args.log_usage else NoopUsageLogger() + + usage_logger = CgroupsUsageLogger(logger, CpuCgroup().cgroup) if args.log_usage else NoopUsageLogger() try: init_pid_file(args.pid_file) diff --git a/gprofiler/usage_loggers.py b/gprofiler/usage_loggers.py index b51c265db..e0d6412ad 100644 --- a/gprofiler/usage_loggers.py +++ b/gprofiler/usage_loggers.py @@ -11,7 +11,17 @@ import psutil -CGROUPFS_ROOT = "/sys/fs/cgroup" # TODO extract from /proc/mounts, this may change +from granulate_utils.linux.cgroups.cgroup import find_v1_hierarchies, find_v2_hierarchy + + +# TODO(Creatone): Move it to granulate-utils. Consider change. +def _obtain_cgroup_controller_path(cgroup: str, controller: str) -> str: + cgroup_v1_hierarchies = find_v1_hierarchies() + if len(cgroup_v1_hierarchies) != 1: + assert controller in cgroup_v1_hierarchies + return f"{cgroup_v1_hierarchies[controller]}{cgroup}" + else: + return f"{find_v2_hierarchy()}/{controller}{cgroup}" class UsageLoggerInterface: @@ -30,7 +40,8 @@ class CpuUsageLogger(UsageLoggerInterface): def __init__(self, logger: logging.LoggerAdapter, cgroup: str): self._logger = logger - self._cpuacct_usage = Path(f"{CGROUPFS_ROOT}{cgroup}cpuacct/cpuacct.usage") + cpu_root = _obtain_cgroup_controller_path(cgroup, 'cpuacct') + self._cpuacct_usage = Path(os.path.join(cpu_root, "cpuacct.usage")) self._last_usage: Optional[int] = None self._last_ts: Optional[float] = None @@ -78,7 +89,7 @@ class MemoryUsageLogger(UsageLoggerInterface): def __init__(self, logger: logging.LoggerAdapter, cgroup: str): self._logger = logger - memory_root = f"{CGROUPFS_ROOT}{cgroup}memory" + memory_root = _obtain_cgroup_controller_path(cgroup, 'memory') self._memory_usage = Path(os.path.join(memory_root, "memory.usage_in_bytes")) self._memory_watermark = Path(os.path.join(memory_root, "memory.max_usage_in_bytes")) self._last_usage: Optional[int] = None diff --git a/tests/test_cgroups.py b/tests/test_cgroups.py new file mode 100644 index 000000000..2cbb407cb --- /dev/null +++ b/tests/test_cgroups.py @@ -0,0 +1,59 @@ +# +# Copyright (c) Granulate. All rights reserved. +# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information. +# +import os +import subprocess +from pathlib import Path +from subprocess import Popen +from typing import List + +import pytest +from docker import DockerClient +from docker.models.images import Image + +from tests.utils import run_privileged_container, _print_process_output + + +def test_cgroup_limit_container( + docker_client: DockerClient, + gprofiler_docker_image: Image, + output_directory: Path, +) -> None: + logs = run_privileged_container(docker_client, gprofiler_docker_image, + command=['-v', '--limit-cpu', '0.5', '--limit-memory', '1048576', '-o', + str(output_directory)]) + + limit_log = "Trying to set resource limits, cpu='0.5' cores and memory='1024.00' MB." + + assert limit_log not in logs + + +def test_cgroup_limit_privileged_executable( + gprofiler_exe: Path, + output_directory: Path, +) -> None: + os.mkdir(output_directory) + + command = ( + ['sudo', str(gprofiler_exe), '-v', '--limit-cpu', '0.5', + '--limit-memory', str((1 << 30)), '-o', str(output_directory), "-d", "5", + "--no-java", "--no-python", "--no-php", "--no-ruby", "--no-nodejs", "--no-dotnet"] + ) + + popen = Popen(command, stdout=subprocess.PIPE) + assert popen.wait() == 0 + stdout, _ = popen.communicate() + logs = stdout.decode("utf-8").splitlines() + limit_log = "Trying to set resource limits, cpu='0.5' cores and memory='1024.00' MB." + + present = False + for line in logs: + if limit_log in line: + present = True + assert present + + +# Not implemented yet. +def test_cgroup_try_limit_no_privileged_executable(): + assert False