Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace PID with Task Pointer in Rusty #713

Merged
merged 1 commit into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scheds/rust/scx_rusty/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ enum consts {
* this isn't a practical problem as the LB rounds are best-effort
* anyway and will be retried until loads are balanced.
*/
MAX_DOM_ACTIVE_PIDS = 1024,
MAX_DOM_ACTIVE_TPTRS = 1024,
};

/* Statistics */
Expand Down Expand Up @@ -106,7 +106,7 @@ struct task_ctx {
u32 dom_id;
u32 weight;
bool runnable;
u64 dom_active_pids_gen;
u64 dom_active_tptrs_gen;
u64 deadline;

u64 sum_runtime;
Expand Down
130 changes: 85 additions & 45 deletions scheds/rust/scx_rusty/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,21 @@ struct {
__uint(map_flags, 0);
} dom_dcycle_locks SEC(".maps");

struct dom_active_pids {
struct dom_active_tptrs {
u64 gen;
u64 read_idx;
u64 write_idx;
s32 pids[MAX_DOM_ACTIVE_PIDS];
u64 tptrs[MAX_DOM_ACTIVE_TPTRS];
};

struct dom_active_pids dom_active_pids[MAX_DOMS];
struct dom_active_tptrs dom_active_tptrs[MAX_DOMS];

const u64 ravg_1 = 1 << RAVG_FRAC_BITS;

/* Map pid -> task_ctx */
/* Map tptr -> task_ctx */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(key, u64);
__type(value, struct task_ctx);
__uint(max_entries, 1000000);
__uint(map_flags, 0);
Expand All @@ -185,20 +185,37 @@ static struct dom_ctx *lookup_dom_ctx(u32 dom_id)
return domc;
}

static u64 t_to_tptr(struct task_struct *p)
{
u64 tptr;
int err;

err = bpf_probe_read_kernel(&tptr, sizeof(tptr), &p);

if (err){
scx_bpf_error("Failed to cast task_struct addr to tptr");
return 0;
}
return tptr;
}

static struct task_ctx *try_lookup_task_ctx(struct task_struct *p)
{
s32 pid = p->pid;
u64 tptr;
tptr = t_to_tptr(p);

return bpf_map_lookup_elem(&task_data, &pid);
return bpf_map_lookup_elem(&task_data, &tptr);
}

static struct task_ctx *lookup_task_ctx(struct task_struct *p)
{
struct task_ctx *taskc;
u64 tptr;
tptr = t_to_tptr(p);

taskc = try_lookup_task_ctx(p);
if (!taskc)
scx_bpf_error("task_ctx lookup failed for pid %d", p->pid);
scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr);

return taskc;
}
Expand Down Expand Up @@ -397,31 +414,45 @@ static u64 dom_min_vruntime(struct dom_ctx *domc)
return READ_ONCE(domc->min_vruntime);
}

int dom_xfer_task(pid_t pid, u32 new_dom_id, u64 now)
static struct task_struct *tptr_to_task(u64 tptr)
{
struct task_struct *p, *task;
int err_task;

task = (struct task_struct *)tptr;

err_task = bpf_probe_read_kernel(&p, sizeof(struct task_struct *), &task);

if (err_task)
scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr);
if (p)
return p;
}

int dom_xfer_task(u64 tptr, u32 new_dom_id, u64 now)
{
struct dom_ctx *from_domc, *to_domc;
struct task_ctx *taskc;
struct task_struct *p;

p = bpf_task_from_pid(pid);
p = tptr_to_task(tptr);

if (!p) {
scx_bpf_error("Failed to lookup task %d", pid);
scx_bpf_error("Failed to lookup task %llu", tptr);
return 0;
}

taskc = lookup_task_ctx(p);
if (!taskc)
goto free_task;
return 0;

from_domc = lookup_dom_ctx(taskc->dom_id);
to_domc = lookup_dom_ctx(new_dom_id);

if (!from_domc || !to_domc || !taskc)
goto free_task;
return 0;

dom_dcycle_xfer_task(p, taskc, from_domc, to_domc, now);
free_task:
bpf_task_release(p);
return 0;
}

Expand All @@ -445,12 +476,12 @@ static inline void stat_add(enum stat_idx idx, u64 addend)
}

/*
* This is populated from userspace to indicate which pids should be reassigned
* This is populated from userspace to indicate which tptrs should be reassigned
* to new doms.
*/
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(key, u64);
__type(value, u32);
__uint(max_entries, 1000);
__uint(map_flags, 0);
Expand Down Expand Up @@ -752,7 +783,9 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
{
struct dom_ctx *old_domc, *new_domc;
struct bpf_cpumask *d_cpumask, *t_cpumask;
u64 tptr;
u32 old_dom_id = taskc->dom_id;
tptr = t_to_tptr(p);

t_cpumask = taskc->cpumask;
if (!t_cpumask) {
Expand Down Expand Up @@ -788,9 +821,10 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
if (bpf_cpumask_intersects((const struct cpumask *)d_cpumask,
p->cpus_ptr)) {
u64 now = bpf_ktime_get_ns();

if (!init_dsq_vtime)
dom_xfer_task(p->pid, new_dom_id, now);
dom_xfer_task(tptr, new_dom_id, now);

taskc->dom_id = new_dom_id;
p->scx.dsq_vtime = dom_min_vruntime(new_domc);
taskc->deadline = p->scx.dsq_vtime +
Expand Down Expand Up @@ -1095,9 +1129,10 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags)
{
struct task_ctx *taskc;
struct bpf_cpumask *p_cpumask;
pid_t pid = p->pid;
u64 tptr;
u32 *new_dom;
s32 cpu;
tptr = t_to_tptr(p);

if (!(taskc = lookup_task_ctx(p)))
return;
Expand All @@ -1109,7 +1144,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags)
/*
* Migrate @p to a new domain if requested by userland through lb_data.
*/
new_dom = bpf_map_lookup_elem(&lb_data, &pid);
new_dom = bpf_map_lookup_elem(&lb_data, &tptr);
if (new_dom && *new_dom != taskc->dom_id &&
task_set_domain(taskc, p, *new_dom, false)) {
stat_add(RUSTY_STAT_LOAD_BALANCE, 1);
Expand Down Expand Up @@ -1405,21 +1440,21 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p)
* consider recently active tasks. Access synchronization rules aren't
* strict. We just need to be right most of the time.
*/
dap_gen = dom_active_pids[dom_id].gen;
if (taskc->dom_active_pids_gen != dap_gen) {
u64 idx = __sync_fetch_and_add(&dom_active_pids[dom_id].write_idx, 1) %
MAX_DOM_ACTIVE_PIDS;
s32 *pidp;

pidp = MEMBER_VPTR(dom_active_pids, [dom_id].pids[idx]);
if (!pidp) {
scx_bpf_error("dom_active_pids[%u][%llu] indexing failed",
dap_gen = dom_active_tptrs[dom_id].gen;
if (taskc->dom_active_tptrs_gen != dap_gen) {
u64 idx = __sync_fetch_and_add(&dom_active_tptrs[dom_id].write_idx, 1) %
MAX_DOM_ACTIVE_TPTRS;
u64 *tptrp;

tptrp = MEMBER_VPTR(dom_active_tptrs, [dom_id].tptrs[idx]);
if (!tptrp) {
scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed",
dom_id, idx);
return;
}

*pidp = p->pid;
taskc->dom_active_pids_gen = dap_gen;
*tptrp = t_to_tptr(p);
taskc->dom_active_tptrs_gen = dap_gen;
}

if (fifo_sched)
Expand Down Expand Up @@ -1496,12 +1531,14 @@ void BPF_STRUCT_OPS(rusty_quiescent, struct task_struct *p, u64 deq_flags)
void BPF_STRUCT_OPS(rusty_set_weight, struct task_struct *p, u32 weight)
{
struct task_ctx *taskc;
u64 tptr;
tptr = t_to_tptr(p);

if (!(taskc = lookup_task_ctx(p)))
return;

if (debug >= 2)
bpf_printk("%s[%d]: SET_WEIGHT %u -> %u", p->comm, p->pid,
bpf_printk("%s[%llu]: SET_WEIGHT %u -> %u", p->comm, tptr,
taskc->weight, weight);

taskc->weight = weight;
Expand Down Expand Up @@ -1549,14 +1586,17 @@ static void task_pick_and_set_domain(struct task_ctx *taskc,
const struct cpumask *cpumask,
bool init_dsq_vtime)
{
u64 tptr;
u32 dom_id = 0;

tptr = t_to_tptr(p);

if (nr_doms > 1)
dom_id = task_pick_domain(taskc, p, cpumask);

if (!task_set_domain(taskc, p, dom_id, init_dsq_vtime))
scx_bpf_error("Failed to set dom%d for %s[%d]",
dom_id, p->comm, p->pid);
scx_bpf_error("Failed to set dom%d for %s[%llu]",
dom_id, p->comm, tptr);
}

void BPF_STRUCT_OPS(rusty_set_cpumask, struct task_struct *p,
Expand Down Expand Up @@ -1597,50 +1637,50 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
{
u64 now = bpf_ktime_get_ns();
struct task_ctx taskc = {
.dom_active_pids_gen = -1,
.dom_active_tptrs_gen = -1,
.last_blocked_at = now,
.last_woke_at = now,
.preferred_dom_mask = 0,

};
struct task_ctx *map_value;
long ret;
pid_t pid;
u64 tptr;

pid = p->pid;
tptr = t_to_tptr(p);

/*
* XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may
* fail spuriously due to BPF recursion protection triggering
* unnecessarily.
*/
ret = bpf_map_update_elem(&task_data, &pid, &taskc, 0 /*BPF_NOEXIST*/);
ret = bpf_map_update_elem(&task_data, &tptr, &taskc, 0 /*BPF_NOEXIST*/);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return ret;
}

if (debug >= 2)
bpf_printk("%s[%d]: INIT (weight %u))", p->comm, p->pid, p->scx.weight);
bpf_printk("%s[%llu]: INIT (weight %u))", p->comm, tptr, p->scx.weight);

/*
* Read the entry from the map immediately so we can add the cpumask
* with bpf_kptr_xchg().
*/
map_value = bpf_map_lookup_elem(&task_data, &pid);
map_value = bpf_map_lookup_elem(&task_data, &tptr);
if (!map_value)
/* Should never happen -- it was just inserted above. */
return -EINVAL;

ret = create_save_cpumask(&map_value->cpumask);
if (ret) {
bpf_map_delete_elem(&task_data, &pid);
bpf_map_delete_elem(&task_data, &tptr);
return ret;
}

ret = create_save_cpumask(&map_value->tmp_cpumask);
if (ret) {
bpf_map_delete_elem(&task_data, &pid);
bpf_map_delete_elem(&task_data, &tptr);
return ret;
}

Expand All @@ -1652,7 +1692,7 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
struct scx_exit_task_args *args)
{
pid_t pid = p->pid;
u64 tptr = t_to_tptr(p);
long ret;

/*
Expand All @@ -1661,7 +1701,7 @@ void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
* deletions aren't reliable means that we sometimes leak task_ctx and
* can't use BPF_NOEXIST on allocation in .prep_enable().
*/
ret = bpf_map_delete_elem(&task_data, &pid);
ret = bpf_map_delete_elem(&task_data, &tptr);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return;
Expand Down
Loading