Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scx_layered: Add topology related bpf_cpumasks to layer_cpumask_wrapper #752

Open
hodgesds opened this issue Oct 7, 2024 · 0 comments
Open
Labels
bpf help wanted Extra attention is needed scx_layered

Comments

@hodgesds
Copy link
Contributor

hodgesds commented Oct 7, 2024

Add struct bpf_cpumasks for topologies on struct layer_cpumask_wrapper. The change should be rather straightforward:

diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
index 72a7005..43de306 100644
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -275,22 +275,29 @@ static void adj_load(u32 layer_idx, s64 adj, u64 now)
 	bpf_spin_lock(&lockw->lock);
 	layer->load += adj;
 	ravg_accumulate(&layer->load_rd, layer->load, now, USAGE_HALF_LIFE);
 	bpf_spin_unlock(&lockw->lock);
 
 	if (debug && adj < 0 && (s64)layer->load < 0)
 		scx_bpf_error("cpu%d layer%d load underflow (load=%lld adj=%lld)",
 			      bpf_get_smp_processor_id(), layer_idx, layer->load, adj);
 }
 
+struct layered_topo_cpumasks {
+         struct bpf_cpumask __kptr *cpumask;
+};
+
+
 struct layer_cpumask_wrapper {
 	struct bpf_cpumask __kptr *cpumask;
+	struct layered_topo_cpumasks llc_cpumasks[MAX_DOMS];
+	struct layered_topo_cpumasks node_cpumasks[MAX_NUMA_NODES];
 };
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__type(key, u32);
 	__type(value, struct layer_cpumask_wrapper);
 	__uint(max_entries, MAX_LAYERS);
 	__uint(map_flags, 0);
 } layer_cpumasks SEC(".maps");
 
@@ -302,45 +309,58 @@ static struct cpumask *lookup_layer_cpumask(int idx)
 		return (struct cpumask *)cpumaskw->cpumask;
 	} else {
 		scx_bpf_error("no layer_cpumask");
 		return NULL;
 	}
 }
 
 static void refresh_cpumasks(int idx)
 {
 	struct layer_cpumask_wrapper *cpumaskw;
+	struct cpu_ctx *cctx;
 	struct layer *layer;
-	int cpu, total = 0;
+	int cpu, node_idx, total = 0;
 
 	if (!__sync_val_compare_and_swap(&layers[idx].refresh_cpus, 1, 0))
 		return;
 
 	cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx);
 
 	bpf_for(cpu, 0, nr_possible_cpus) {
 		u8 *u8_ptr;
 
+		if (!(cctx = lookup_cpu_ctx(cpu))) {
+			scx_bpf_error("can't happen");
+			return;
+		}
+
+		node_idx = cctx->node_idx;
+		if (node_idx > nr_nodes) {
+			scx_bpf_error("can't happen");
+			return;
+		}
+
 		if ((u8_ptr = MEMBER_VPTR(layers, [idx].cpus[cpu / 8]))) {
 			/*
 			 * XXX - The following test should be outside the loop
 			 * but that makes the verifier think that
 			 * cpumaskw->cpumask might be NULL in the loop.
 			 */
 			barrier_var(cpumaskw);
 			if (!cpumaskw || !cpumaskw->cpumask) {
 				scx_bpf_error("can't happen");
 				return;
 			}
 
 			if (*u8_ptr & (1 << (cpu % 8))) {
 				bpf_cpumask_set_cpu(cpu, cpumaskw->cpumask);
+				bpf_cpumask_set_cpu(cpu, cpumaskw->node_cpumasks[node_idx].cpumask);
 				total++;
 			} else {
 				bpf_cpumask_clear_cpu(cpu, cpumaskw->cpumask);
 			}
 		} else {
 			scx_bpf_error("can't happen");
 		}
 	}
 
 	// XXX - shouldn't be necessary

However, this fails the verifier when trying to use it:

108: (79) r2 = *(u64 *)(r6 +0)        ; R2_w=rcu_ptr_or_null_bpf_cpumask(id=21) R6=map_value(map=layer_cpumasks,ks=4,vs=1032) refs=1,8
109: (55) if r2 != 0x0 goto pc+7 117: R0=map_value(map=cpu_ctxs,ks=4,vs=3632) R1_w=map_value(map=bpf_bpf.bss,ks=4,vs=23411976,smin=smin32=0,smax=umax=smax32=umax32=0x1653cff,var_off=(0x0; 0x1ffffff)) R2_w=rcu_ptr_bpf_cpumask() R6=map_value(map=layer_cpumasks,ks=4,vs=1032) R7=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) R8=0 R9=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3)) R10=fp0 fp-8=mmmm???? fp-16=mmmmmmmm fp-24=mmmm???? fp-32=iter_num(ref_id=8,state=active,depth=2) fp-40=iter_num(ref_id=1,state=active,depth=2) refs=1,8
; if (*u8_ptr & (1 << (cpu % 8))) { @ main.bpf.c:354
117: (71) r1 = *(u8 *)(r1 +0)         ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) refs=1,8
118: (bc) w3 = w7                     ; R3_w=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) R7=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) refs=1,8
119: (54) w3 &= 7                     ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=7,var_off=(0x0; 0x7)) refs=1,8
120: (7c) w1 >>= w3                   ; R1_w=scalar() R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=7,var_off=(0x0; 0x7)) refs=1,8
121: (54) w1 &= 1                     ; R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1)) refs=1,8
122: (16) if w1 == 0x0 goto pc+21     ; R1_w=1 refs=1,8
; bpf_cpumask_set_cpu(cpu, cpumaskw->cpumask); @ main.bpf.c:355
123: (bc) w1 = w7                     ; R1_w=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) R7=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) refs=1,8
124: (85) call bpf_cpumask_set_cpu#99616      ; refs=1,8
; bpf_cpumask_set_cpu(cpu, cpumaskw->node_cpumasks[node_idx].cpumask); @ main.bpf.c:356
125: (67) r9 <<= 32                   ; R9_w=scalar(smin=smin32=0,smax=umax=0x200000000,smax32=umax32=0,var_off=(0x0; 0x300000000)) refs=1,8
126: (c7) r9 s>>= 32                  ; R9_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3)) refs=1,8
127: (67) r9 <<= 3                    ; R9_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=16,var_off=(0x0; 0x18)) refs=1,8
128: (bf) r1 = r6                     ; R1_w=map_value(map=layer_cpumasks,ks=4,vs=1032) R6=map_value(map=layer_cpumasks,ks=4,vs=1032) refs=1,8
129: (0f) r1 += r9                    ; R1_w=map_value(map=layer_cpumasks,ks=4,vs=1032,smin=smin32=0,smax=umax=smax32=umax32=16,var_off=(0x0; 0x18)) R9_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=16,var_off=(0x0; 0x18)) refs=1,8
130: (79) r2 = *(u64 *)(r1 +520)      ; R1_w=map_value(map=layer_cpumasks,ks=4,vs=1032,smin=smin32=0,smax=umax=smax32=umax32=16,var_off=(0x0; 0x18)) R2_w=scalar() refs=1,8
131: (bc) w1 = w7                     ; R1_w=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) R7=scalar(id=19,smin=smin32=0,smax=umax=smax32=umax32=79,var_off=(0x0; 0x7f)) refs=1,8
132: (85) call bpf_cpumask_set_cpu#99616
R2 type=scalar expected=fp
processed 455 insns (limit 1000000) max_states_per_insn 2 total_states 40 peak_states 40 mark_read 14
-- END PROG LOAD LOG --

23:00:07 [WARN] libbpf: prog 'sched_tick_fentry': failed to load: -13

23:00:07 [WARN] libbpf: failed to load object 'bpf_bpf'

23:00:07 [WARN] libbpf: failed to load BPF skeleton 'bpf_bpf': -13

Error: Failed to load BPF program

Caused by:
    Permission denied (os error 13)

In the bpf selftests there is a test for bpf_cpumask kptrs in maps and for nested bpf cpumasks. However, there are no tests that cover nested kptrs in a map. This may not be possible to verify without verifier changes.

If we can get these changes working the in the idle CPU selection path we can possibly remove some of the temporary bpf_cpumasks that create extra overhead in the idle CPU selection path.

@hodgesds hodgesds added help wanted Extra attention is needed bpf scx_layered labels Oct 7, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bpf help wanted Extra attention is needed scx_layered
Projects
None yet
Development

No branches or pull requests

1 participant