diff --git a/src/DIRAC/Core/Utilities/Platform.py b/src/DIRAC/Core/Utilities/Platform.py index 1a42899c777..86a46b0dbe8 100644 --- a/src/DIRAC/Core/Utilities/Platform.py +++ b/src/DIRAC/Core/Utilities/Platform.py @@ -5,6 +5,8 @@ import sys import os import re +import subprocess +from pathlib import Path # We need to patch python platform module. It does a string comparison for the libc versions. # it fails when going from 2.9 to 2.10, @@ -144,3 +146,24 @@ def getPlatform(): def getPlatformTuple(): getPlatform() return _gPlatformTuple + + +def availableCgroupsV2Controllers() -> set[str]: + """Get the list of available cgroup2 controllers.""" + controllers = set() + + cmd = ["findmnt", "--source", "cgroup2", "--output", "target", "--noheadings"] + proc = subprocess.run(cmd, check=False, capture_output=True, text=True) + if proc.returncode == 0: + for target in proc.stdout.strip().split("\n"): + subtree_control_path = Path(target) / "cgroup.subtree_control" + if subtree_control_path.is_file(): + subtree_control_info = subtree_control_path.read_text().strip() + print(f"cgroup2 available at {subtree_control_path} with: {subtree_control_info}") + controllers.update(subtree_control_info.split(" ")) + else: + print(f"{subtree_control_path} is not a file") + else: + print(f"Failed to run {cmd}: {proc.returncode} stderr={proc.stderr}") + + return controllers diff --git a/src/DIRAC/Resources/Computing/SingularityComputingElement.py b/src/DIRAC/Resources/Computing/SingularityComputingElement.py index f0e153d11b4..b7d6fdfea03 100644 --- a/src/DIRAC/Resources/Computing/SingularityComputingElement.py +++ b/src/DIRAC/Resources/Computing/SingularityComputingElement.py @@ -22,6 +22,7 @@ import DIRAC from DIRAC import S_OK, S_ERROR, gConfig, gLogger +from DIRAC.Core.Utilities.Platform import availableCgroupsV2Controllers from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.ConfigurationSystem.Client.Helpers import Operations from DIRAC.Core.Utilities.ThreadScheduler import gThreadScheduler @@ -163,6 +164,28 @@ def __hasSingularity(self): # No suitable binaries found return False + def _resourceLimitsArgs(self): + """Get singularity arguments for enforcing resource limits with cgroup2. + + If the associated cgroup2 controllers are not available, the corresponding + options are ignored. + """ + controllers = availableCgroupsV2Controllers() + self.log.debug(f"Available cgroup2 controllers: {controllers}") + args = [] + if "memory" in controllers: + memoryLimit = int(self.ceParameters.get("MemoryLimitMB", 0)) + if memoryLimit: + args.extend(["--memory", f"{memoryLimit}M"]) + memoryRes = int(self.ceParameters.get("MemoryReservationMB", memoryLimit * 4 // 5)) + args.extend(["--memory-reservation", f"{memoryRes}M"]) + swapLimit = int(self.ceParameters.get("SwapLimitMB", 0)) + if swapLimit: + args.extend(["--memory-swap", f"{swapLimit}M"]) + if "cpu" in controllers and self.ceParameters.get("EnforceCPULimit", "no") in ("yes", "true"): + args.extend(["--cpus", str(self.processors)]) + return args + @staticmethod def __findInstallBaseDir(): """Find the path to root of the current DIRAC installation""" @@ -407,6 +430,7 @@ def submitJob(self, executableFile, proxy=None, **kwargs): cmd.extend(["--bind", "/cvmfs"]) if not self.__installDIRACInContainer: cmd.extend(["--bind", "{0}:{0}:ro".format(self.__findInstallBaseDir())]) + cmd.extend(self._resourceLimitsArgs()) bindPaths = self.ceParameters.get("ContainerBind", "").split(",") siteName = gConfig.getValue("/LocalSite/Site", "")