Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: Invoke ops.exit_task() for TASK_DEAD tasks on disable path
Browse files Browse the repository at this point in the history
In commit f266831 ("scx: Close a small race window in the enable path
which can lead to use-after-free"), we fixed a race window on the enable path
that could cause a crash. This fix is fine, but was a bit too aggressive in
that it could also cause us to miss ops.exit_task() invocations in the
following scenario:

1. A task exits and invokes do_task_dead() (making its state TASK_DEAD), but
   someone still holds a refcount on it somewhere.

2. The scheduler is disabled.

3. On the disable path, we don't invoke ops.task_exit()

4. We don't invoke it in sched_ext_free() either later, because by then the
   scheduler has been disabled.

Let's ensure we don't skip on exiting the task by still calling
scx_ops_exit_task() for TASK_DEAD tasks on the disable path.

Signed-off-by: David Vernet <[email protected]>
  • Loading branch information
Byte-Lab committed May 22, 2024
1 parent 98ece8c commit 95a68f8
Showing 1 changed file with 28 additions and 14 deletions.
42 changes: 28 additions & 14 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1333,14 +1333,16 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
}

/**
* scx_task_iter_next_filtered_locked - Next alive non-idle task with its rq locked
* scx_task_iter_next_locked - Next non-idle task with its rq locked
* @iter: iterator to walk
* @include_dead: Whether we should include dead tasks in the iteration
*
* Visit the next alive non-idle task with its rq lock held. See
* scx_task_iter_init() for details.
* Visit the non-idle task with its rq lock held. Allows callers to specify
* whether they would like to filter out dead tasks. See scx_task_iter_init()
* for details.
*/
static struct task_struct *
scx_task_iter_next_filtered_locked(struct scx_task_iter *iter)
scx_task_iter_next_locked(struct scx_task_iter *iter, bool include_dead)
{
struct task_struct *p;
retry:
Expand All @@ -1367,7 +1369,7 @@ scx_task_iter_next_filtered_locked(struct scx_task_iter *iter)
* the final __schedle() while we're locking its rq and thus will stay
* alive until the rq is unlocked.
*/
if (READ_ONCE(p->__state) == TASK_DEAD)
if (include_dead && READ_ONCE(p->__state) == TASK_DEAD)
goto retry;

return p;
Expand Down Expand Up @@ -4393,19 +4395,31 @@ static void scx_ops_disable_workfn(struct kthread_work *work)

spin_lock_irq(&scx_tasks_lock);
scx_task_iter_init(&sti);
while ((p = scx_task_iter_next_filtered_locked(&sti))) {
/*
* Invoke scx_ops_exit_task() on all non-idle tasks, including
* TASK_DEAD tasks. Because dead tasks may have a nonzero refcount,
* we may not have invoked sched_ext_free() on them by the time a
* scheduler is disabled. We must therefore exit the task here, or we'd
* fail to invoke ops.exit_task(), as the scheduler will have been
* unloaded by the time the task is subsequently exited on the
* sched_ext_free() path.
*/
while ((p = scx_task_iter_next_locked(&sti, false))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;

sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
if (READ_ONCE(p->__state) != TASK_DEAD) {
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE,
&ctx);

p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
__setscheduler_prio(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
__setscheduler_prio(p, p->prio);
check_class_changing(task_rq(p), p, old_class);

sched_enq_and_set_task(&ctx);
sched_enq_and_set_task(&ctx);

check_class_changed(task_rq(p), p, old_class, p->prio);
check_class_changed(task_rq(p), p, old_class, p->prio);
}
scx_ops_exit_task(p);
}
scx_task_iter_exit(&sti);
Expand Down Expand Up @@ -5034,7 +5048,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
spin_lock_irq(&scx_tasks_lock);

scx_task_iter_init(&sti);
while ((p = scx_task_iter_next_filtered_locked(&sti))) {
while ((p = scx_task_iter_next_locked(&sti, true))) {
get_task_struct(p);
scx_task_iter_rq_unlock(&sti);
spin_unlock_irq(&scx_tasks_lock);
Expand Down Expand Up @@ -5088,7 +5102,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL));

scx_task_iter_init(&sti);
while ((p = scx_task_iter_next_filtered_locked(&sti))) {
while ((p = scx_task_iter_next_locked(&sti, true))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;

Expand Down

0 comments on commit 95a68f8

Please sign in to comment.