Skip to content

Commit

Permalink
job-exec: enable new sdexec options for imp-shell
Browse files Browse the repository at this point in the history
Problem: an sdexec imp-shell unit can run into the following problem:
- flux-shell is killed/terminates
- there are unkillable children of flux-shell
- the IMP won't exit until the cgroup is empty
- the job appears to be running until the IMP exits with the shell exit code

This deploys some new techniques to manage such processes and ensure
that the job can complete and release resources and orphaned processes
retain a systemd unit as a handle for monitoring and management.

Specifically:

- Set the KillMode=process so only the IMP is signaled
- Use Type=notify in conjunction with IMP calling sd_notify(3) so
  the unit transitions to deactivating when the shell exits.
- Set TimeoutStopUsec=infinity to disable systemd's stop timeout.
- Enable sdexec's stop timeout which is armed at deactivating,
  delivers SIGUSR1 (proxy for SIGKILL) after 30s, then abandons
  the unit and terminates the exec RPC after another 30s.

Behavior is configurable via the [exec] table.

Fixes flux-framework#6656
  • Loading branch information
garlick committed Feb 26, 2025
1 parent d8982cd commit 6ed0cc7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 6 deletions.
22 changes: 18 additions & 4 deletions src/modules/job-exec/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ static int parse_service_option (json_t *jobspec,
return 0;
}


static struct bulk_exec_ops exec_ops = {
.on_start = start_cb,
.on_exit = exit_cb,
Expand Down Expand Up @@ -604,14 +605,27 @@ static int exec_init (struct jobinfo *job)
goto err;
}
/* The systemd user instance running as user flux is not privileged
* to signal guest processes, therefore only signal the IMP and
* never use SIGKILL. See flux-framework/flux-core#6399
* to signal guest processes, therefore:
* - Set the KillMode=process so only the IMP is signaled
* - Use Type=notify in conjunction with IMP calling sd_notify(3) so
* the unit transitions to deactivating when the shell exits.
* - Set TimeoutStopUsec=infinity to disable systemd's stop timeout.
* - Enable sdexec's stop timeout which is armed at deactivating,
* delivers SIGUSR1 (proxy for SIGKILL) after 30s, then abandons
* the unit and terminates the exec RPC after another 30s.
*/
if (streq (service, "sdexec")) {
if (flux_cmd_setopt (cmd, "SDEXEC_PROP_KillMode", "process") < 0
|| flux_cmd_setopt (cmd, "SDEXEC_PROP_Type", "notify") < 0
|| flux_cmd_setopt (cmd,
"SDEXEC_PROP_TimeoutStopUSec",
"infinity") < 0
|| flux_cmd_setopt (cmd,
"SDEXEC_STOP_TIMER_SIGNAL",
config_get_sdexec_stop_timer_signal ()) < 0
|| flux_cmd_setopt (cmd,
"SDEXEC_PROP_SendSIGKILL",
"off") < 0) {
"SDEXEC_STOP_TIMER_SEC",
config_get_sdexec_stop_timer_sec ()) < 0) {
flux_log_error (job->h,
"Unable to set multiuser sdexec options");
return -1;
Expand Down
42 changes: 40 additions & 2 deletions src/modules/job-exec/exec_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct exec_config {
const char *exec_service;
int exec_service_override;
json_t *sdexec_properties;
int sdexec_stop_timer_sec;
int sdexec_stop_timer_signal;
double default_barrier_timeout;
};

Expand Down Expand Up @@ -107,6 +109,20 @@ json_t *config_get_sdexec_properties (void)
return exec_conf.sdexec_properties;
}

const char *config_get_sdexec_stop_timer_sec (void)
{
static char buf[32];
snprintf (buf, sizeof (buf), "%d", exec_conf.sdexec_stop_timer_sec);
return buf;
}

const char *config_get_sdexec_stop_timer_signal (void)
{
static char buf[32];
snprintf (buf, sizeof (buf), "%d", exec_conf.sdexec_stop_timer_signal);
return buf;
}

double config_get_default_barrier_timeout (void)
{
return exec_conf.default_barrier_timeout;
Expand All @@ -116,15 +132,19 @@ int config_get_stats (json_t **config_stats)
{
json_t *o = NULL;

if (!(o = json_pack ("{s:s? s:s? s:s? s:s? s:i s:f}",
if (!(o = json_pack ("{s:s? s:s? s:s? s:s? s:i s:f s:i s:i}",
"default_cwd", default_cwd,
"default_job_shell", exec_conf.default_job_shell,
"flux_imp_path", exec_conf.flux_imp_path,
"exec_service", exec_conf.exec_service,
"exec_service_override",
exec_conf.exec_service_override,
"default_barrier_timeout",
exec_conf.default_barrier_timeout))) {
exec_conf.default_barrier_timeout,
"sdexec_stop_timer_sec",
exec_conf.sdexec_stop_timer_sec,
"sdexec_stop_timer_signal",
exec_conf.sdexec_stop_timer_signal))) {
errno = ENOMEM;
return -1;
}
Expand Down Expand Up @@ -153,6 +173,8 @@ static void exec_config_init (struct exec_config *ec)
ec->exec_service = "rexec";
ec->exec_service_override = 0;
ec->sdexec_properties = NULL;
ec->sdexec_stop_timer_sec = 30;
ec->sdexec_stop_timer_signal = 10; // SIGUSR1
ec->default_barrier_timeout = 1800.;
}

Expand Down Expand Up @@ -249,6 +271,22 @@ int config_setup (flux_t *h,
}
}

/* Check configuration for exec.stop-timer-* */
if (flux_conf_unpack (conf,
&err,
"{s?{s?i s?i}}",
"exec",
"sdexec-stop-timer-sec",
&tmpconf.sdexec_stop_timer_sec,
"sdexec-stop-timer-signal",
&tmpconf.sdexec_stop_timer_signal) < 0) {
errprintf (errp,
"error reading config values exec.sdexec-stop-timer-sec: %s"
" or exec.sdexec-stop-timer-signal",
err.text);
return -1;
}

/* Check configuration for exec.barrier-timeout */
if (flux_conf_unpack (conf,
&err,
Expand Down
4 changes: 4 additions & 0 deletions src/modules/job-exec/exec_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ double config_get_default_barrier_timeout (void);

int config_get_stats (json_t **config_stats);

const char *config_get_sdexec_stop_timer_sec (void);

const char *config_get_sdexec_stop_timer_signal (void);

int config_setup (flux_t *h,
const flux_conf_t *conf,
int argc,
Expand Down

0 comments on commit 6ed0cc7

Please sign in to comment.