Skip to content

Commit

Permalink
Add emerge options to require free space/inodes in tmpdir
Browse files Browse the repository at this point in the history
This adds
  --jobs-tmpdir-require-free-gb=GB
  --jobs-tmpdir-require-free-kilo-inodes=INODES
as emerge emerge options.

When those are used with --jobs, makes portage/emerge check that
PORTAGE_TMPDIR has sufficient free resources before a new job is
started.

Thanks goes out to Zac Medico, as this was inspired by
gentoo#1351, with the following
differences:

- options are absolute values, not relatives ones
- defaults for both options are specified
- option values are scaled, using a decaying function, considering
  the number or running jobs
- emit a warning once a threshold is reached

Note that the scaling of the resource constraints can not be perfect
in the presence of concurrently running emerge jobs and without
_can_add_job() being provided with the number of jobs that are
potentially added. It is always possible that a emerge job has not yet
used much of the filesystem when we check the remaining filesystem
resources, and later on uses much more than the scaling function
accounted for it.

Ultimately, there is a tradeoff between portage limiting parallelism
needlessly (but still being able to emerge all packages) and portage
failing due to missing resources in PORTAGE_TMPDIR. The chosen
defaults are rather large and most packages use much less
filesystem resources then the scaling function accounts for
them. Therefore, the implemented approach idea is to lean towards
favoring functionality over parallelism.

Bugs: https://bugs.gentoo.org/934382
Signed-off-by: Florian Schmaus <[email protected]>
  • Loading branch information
Flowdalic committed Jun 23, 2024
1 parent 819c863 commit 0c56537
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 2 deletions.
105 changes: 104 additions & 1 deletion lib/_emerge/Scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from portage._sets.base import InternalPackageSet
from portage.util import ensure_dirs, writemsg, writemsg_level
from portage.util.futures import asyncio
from portage.util.path import first_existing
from portage.util.SlotObject import SlotObject
from portage.util._async.SchedulerInterface import SchedulerInterface
from portage.package.ebuild.digestcheck import digestcheck
Expand Down Expand Up @@ -64,7 +65,7 @@


class Scheduler(PollScheduler):
# max time between loadavg checks (seconds)
# max time between loadavg and tmpdir statvfs checks (seconds)
_loadavg_latency = 30

# max time between display status updates (seconds)
Expand Down Expand Up @@ -230,6 +231,18 @@ def __init__(
max_jobs = 1
self._set_max_jobs(max_jobs)
self._running_root = trees[trees._running_eroot]["root_config"]
self._jobs_tmpdir_require_free_gb = myopts.get("--jobs-tmpdir-require-free-gb")
if not self._jobs_tmpdir_require_free_gb:
# dev-lang/rust-1.77.1: ~16 GiB
# www-client/chromium-126.0.6478.57: ~18 GiB
self._jobs_tmpdir_require_free_gb = 18
self._jobs_tmpdir_free_kilo_inodes = myopts.get(
"--jobs-tmpdir-require-free-kilo-inodes"
)
if not self._jobs_tmpdir_free_kilo_inodes:
# dev-lang/rust-1.77.1: ~ 450k inodes
# www-client/chromium-126.0.6478.57: ~1011K
self._jobs_tmpdir_free_kilo_inodes = 1100
self.edebug = 0
if settings.get("PORTAGE_DEBUG", "") == "1":
self.edebug = 1
Expand Down Expand Up @@ -1792,6 +1805,96 @@ def _is_work_scheduled(self):
def _running_job_count(self):
return self._jobs

_warned_tmpdir_free_space = False
_warned_tmpdir_free_inodes = False

def _can_add_job(self):
if not super()._can_add_job():
return False

running_job_count = self._running_job_count()
if running_job_count == 0 and not self._merge_wait_queue:
# Ensure there is forward progress if there are no running
# jobs and no jobs in the _merge_wait_queue.
return True

if (
self._jobs_tmpdir_require_free_gb is not None
or self._jobs_tmpdir_require_free_kilo_inodes is not None
) and hasattr(os, "statvfs"):
tmpdirs = set()
for root in self.trees:
settings = self.trees[root]["root_config"].settings
if settings["PORTAGE_TMPDIR"] in tmpdirs:
continue
tmpdirs.add(settings["PORTAGE_TMPDIR"])
tmpdir = first_existing(
os.path.join(settings["PORTAGE_TMPDIR"], "portage")
)
try:
vfs_stat = os.statvfs(tmpdir)
except OSError as e:
writemsg_level(
f"!!! statvfs('{tmpdir}'): {e}\n",
noiselevel=-1,
level=logging.ERROR,
)
else:
# Use a decaying function to take potential future PORTAGE_TMPDIR consumption
# of currently running jobs and the new job into account.
def scale_to_jobs(num):
# The newly started job is fully taken into account.
res = num
# All currently running jobs are taken into account with less weight,
# since it is likely that they are already using space in PORTAGE_TMPDIR.
for i in range(2, running_job_count + 2):
res += (1 / i) * num
return res

if (
self._jobs_tmpdir_require_free_gb
and self._jobs_tmpdir_require_free_gb != 0
):
required_free_bytes = (
self._jobs_tmpdir_require_free_gb * 1024 * 1024 * 1024
)
required_free_bytes = scale_to_jobs(required_free_bytes)

actual_free_bytes = vfs_stat.f_bsize * vfs_stat.f_bavail

if actual_free_bytes < required_free_bytes:
if not self._warned_tmpdir_free_space:
msg = f"--- {tmpdir} has not enough free space, emerge job parallelism reduced. free: {actual_free_bytes} bytes, required {required_free_bytes} bytes"
portage.writemsg_stdout(
colorize("WARN", f"\n{msg}\n"), noiselevel=-1
)
self._logger.log(msg)

self._warned_tmpdir_free_space = True
return False

if (
self._jobs_tmpdir_require_free_kilo_inodes
and self._jobs_tmpdir_require_free_kilo_inodes != 0
):
required_free_inodes = (
self._jobs_tmpdir_require_free_kilo_inodes * 1000
)
required_free_inodes = scale_to_jobs(required_free_inodes)

if vfs_stat.f_favail < required_free_inodes:
if not self._warned_tmpdir_free_idnoes:
msg = f"--- {tmpdir} has not enough free inodes, emerge job parallelism reduced. free: {vfs_stat.f_favail} inodes, required: {required_free_inodes} inodes"
portage.writemsg_stdout(
colorize("WARN", f"\n{msg}\n"), noiselevel=-1
)
self._logger.log(msg)

self._warned_tmpdir_free_inodes = True
return False

return True

def _schedule_tasks(self):
while True:
state_change = 0
Expand Down
38 changes: 38 additions & 0 deletions lib/_emerge/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def __contains__(self, s):
"--getbinpkgonly": y_or_n,
"--ignore-world": y_or_n,
"--jobs": valid_integers,
"--jobs-tmpdir-require-free-gb": valid_integers,
"--jobs-tmpdir-require-free-kilo-inodes": valid_integers,
"--keep-going": y_or_n,
"--load-average": valid_floats,
"--onlydeps-with-ideps": y_or_n,
Expand Down Expand Up @@ -523,6 +525,14 @@ def parse_opts(tmpcmdline, silent=False):
"help": "Specifies the number of packages to build " + "simultaneously.",
"action": "store",
},
"--jobs-tmpdir-require-free-gb": {
"help": "Specifies the required remaining capacity (in GiB) of PORTAGE_TMPDIR before a new emerge job is started. Set to 0 to disable this check",
"action": "store",
},
"--jobs-tmpdir-require-free-kilo-inodes": {
"help": "Specifies the required remaining inodes (in thousands) of PORTAGE_TMPDIR before a new emerge job is started. Set to 0 to disable this check",
"action": "store",
},
"--keep-going": {
"help": "continue as much as possible after an error",
"choices": true_y_or_n,
Expand Down Expand Up @@ -1033,6 +1043,34 @@ def parse_opts(tmpcmdline, silent=False):

myoptions.jobs = jobs

if myoptions.jobs_tmpdir_require_free_gb:
try:
jobs_tmpdir_require_free_gb = int(myoptions.jobs_tmpdir_require_free_gb)
except ValueError:
jobs_tmpdir_require_free_gb = 0
if not silent:
parser.error(
f"Invalid --jobs-tmpdir-require-free-gb parameter: '{myoptions.jobs_tmpdir_require_free_gb}'\n"
)

myoptions.jobs_tmpdir_require_free_gb = jobs_tmpdir_require_free_gb

if myoptions.jobs_tmpdir_require_free_kilo_inodes:
try:
jobs_tmpdir_require_free_kilo_inodes = int(
myoptions.jobs_tmpdir_require_free_kilo_indoes
)
except ValueError:
jobs_tmpdir_require_free_kilo_inodes = 0
if not silent:
parser.error(
f"Invalid --jobs-tmpdir-require-free-kilo-inodes parameter: '{myoptions.jobs_tmpdir_require_free_kilo_indoes}'\n"
)

myoptions.jobs_tmpdir_require_free_kilo_inodes = (
jobs_tmpdir_require_free_kilo_inodes
)

if myoptions.load_average == "True":
myoptions.load_average = None

Expand Down
12 changes: 11 additions & 1 deletion man/emerge.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH "EMERGE" "1" "May 2024" "Portage @VERSION@" "Portage"
.TH "EMERGE" "1" "June 2024" "Portage @VERSION@" "Portage"
.SH "NAME"
emerge \- Command\-line interface to the Portage system
.SH "SYNOPSIS"
Expand Down Expand Up @@ -693,6 +693,16 @@ Note that interactive packages currently force a setting
of \fI\-\-jobs=1\fR. This issue can be temporarily avoided
by specifying \fI\-\-accept\-properties=\-interactive\fR.
.TP
.BR \-\-jobs\-tmpdir\-require\-free\-gb[=GB]
Specifies the required remainnig capacity (in GiB) or \fPORTAGE_TMPDIR\fR
before a new emerge job started. Specifiy \fI0\fR to disable this check.
Defaults to \fI18\fR GiB.
.TP
.BR \-\-jobs\-tmpdir\-require\-free\-kilo\-inodes[=KILO\-INODES]
Specifies the required remaining inodes (in thousands) of \fPORTAGE_TMPDIR\fR
before a new emerge job is started. Set to \fI0\fR to disable this check.
Defaults to \fI1100\fR.
.TP
.BR "\-\-keep\-going [ y | n ]"
Continue as much as possible after an error. When an error occurs,
dependencies are recalculated for remaining packages and any with
Expand Down

0 comments on commit 0c56537

Please sign in to comment.