From 5f4158ec0c3f517d9866d4f86caf19462e857a74 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 6 Feb 2025 13:56:00 -0800 Subject: [PATCH] resource: conditionally monitor sdmon.idle Problem: nodes are not checked for untracked running work when a Flux instance starts up. This might happen, for example, if - job-exec deems job shell(s) unkillable - housekeeping/prolog/epilog gets stuck on a hung file system When systemd is enabled, the new sdmon module joins the 'sdmon.idle' broker group on startup. However, if there are any running flux units, this is delayed until those units are no longer running. Change the resource module so that it monitors sdmon.idle instead of broker.online when systemd is enabled. This will withhold "busy" nodes from the scheduler until they become idle. Fixes #6590 --- src/modules/resource/monitor.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/modules/resource/monitor.c b/src/modules/resource/monitor.c index b2de5fe0b70f..5b197b19ed1c 100644 --- a/src/modules/resource/monitor.c +++ b/src/modules/resource/monitor.c @@ -17,6 +17,13 @@ * the initial response to the request to watch broker.online cannot * be processed until the reactor runs. * + * If systemd is enabled, watch sdmon.idle instead of broker.online. This + * behaves exactly like broker.online, except that it isn't joined until + * sdmon has verified that the node has no running flux systemd units. + * This guards against scheduling new work on a node that hasn't been + * properly cleaned up. As with broker.online, nodes are automatically + * removed from sdmon.idle when they are shut down or lost. + * * Some synchronization notes: * - rc1 completes on rank 0 before any other ranks can join broker.online, * therefore the scheduler must allow flux module load to complete with @@ -379,7 +386,10 @@ struct monitor *monitor_create (struct resource_ctx *ctx, goto error; } else if (!flux_attr_get (ctx->h, "broker.recovery-mode")) { - if (!(monitor->f_online = group_monitor (ctx->h, "broker.online")) + const char *online_group = "broker.online"; + if (config->systemd_enable) + online_group = "sdmon.idle"; + if (!(monitor->f_online = group_monitor (ctx->h, online_group)) || flux_future_then (monitor->f_online, -1, broker_online_cb,