Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: Implement scx_bpf_cpuperf_set()
Browse files Browse the repository at this point in the history
This allows the BPF scheduler to request a specific performance level for
each CPU. SCX defaults to max perf if scx_bpf_cpuperf_set() is not called.
  • Loading branch information
htejun committed Apr 12, 2024
1 parent 8b97fe0 commit 5cdeae4
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 7 deletions.
12 changes: 11 additions & 1 deletion kernel/sched/cpufreq_schedutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual,

static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
{
unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
unsigned long min, max;
unsigned long util = cpu_util_cfs_boost(sg_cpu->cpu) +
scx_cpuperf_target(sg_cpu->cpu);

util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
util = max(util, boost);
Expand Down Expand Up @@ -330,6 +332,14 @@ static bool sugov_hold_freq(struct sugov_cpu *sg_cpu)
unsigned long idle_calls;
bool ret;

/*
* The heuristics in this function is for the fair class. For SCX, the
* performance target comes directly from the BPF scheduler. Let's just
* follow it.
*/
if (scx_switched_all())
return false;

/* if capped by uclamp_max, always update to be in compliance */
if (uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)))
return false;
Expand Down
36 changes: 35 additions & 1 deletion kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -4474,7 +4474,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
struct scx_task_iter sti;
struct task_struct *p;
unsigned long timeout;
int i, ret;
int i, cpu, ret;

mutex_lock(&scx_ops_enable_mutex);

Expand Down Expand Up @@ -4523,6 +4523,9 @@ static int scx_ops_enable(struct sched_ext_ops *ops)

atomic_long_set(&scx_nr_rejected, 0);

for_each_possible_cpu(cpu)
cpu_rq(cpu)->scx.cpuperf_target = SCX_CPUPERF_ONE;

/*
* Keep CPUs stable during enable so that the BPF scheduler can track
* online CPUs by watching ->on/offline_cpu() after ->init().
Expand Down Expand Up @@ -6015,6 +6018,36 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu)
return SCX_CPUPERF_ONE;
}

/**
* scx_bpf_cpuperf_set - Set the relative performance target of a CPU
* @cpu: CPU of interest
* @perf: target performance level [0, %SCX_CPUPERF_ONE]
* @flags: %SCX_CPUPERF_* flags
*
* Set the target performance level of @cpu to @perf. @perf is in linear
* relative scale between 0 and %SCX_CPUPERF_ONE. This determines how the
* schedutil cpufreq governor chooses the target frequency. The actual
* performance level chosen is dependent on the hardware and cpufreq driver in
* use and can be monitored using scx_bpf_cpuperf_cur().
*/
__bpf_kfunc void scx_bpf_cpuperf_set(u32 cpu, u32 perf)
{
if (unlikely(perf > SCX_CPUPERF_ONE)) {
scx_ops_error("Invalid cpuperf target %u for CPU %d", perf, cpu);
return;
}

if (ops_cpu_valid(cpu, NULL)) {
struct rq *rq = cpu_rq(cpu);

rq->scx.cpuperf_target = perf;

rcu_read_lock_sched_notrace();
cpufreq_update_util(cpu_rq(cpu), 0);
rcu_read_unlock_sched_notrace();
}
}

/**
* scx_bpf_get_possible_cpumask - Get a referenced kptr to cpu_possible_mask
*/
Expand Down Expand Up @@ -6165,6 +6198,7 @@ BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids)
BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap)
BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur)
BTF_ID_FLAGS(func, scx_bpf_cpuperf_set)
BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE)
Expand Down
9 changes: 9 additions & 0 deletions kernel/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ void scx_next_task_picked(struct rq *rq, struct task_struct *p,
const struct sched_class *active);
void init_sched_ext_class(void);

static inline u32 scx_cpuperf_target(s32 cpu)
{
if (scx_enabled())
return cpu_rq(cpu)->scx.cpuperf_target;
else
return 0;
}

static inline const struct sched_class *next_active_class(const struct sched_class *class)
{
class++;
Expand Down Expand Up @@ -91,6 +99,7 @@ static inline void scx_tick(void) {}
static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p,
const struct sched_class *active) {}
static inline void init_sched_ext_class(void) {}
static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }

#define for_each_active_class for_each_class
#define for_balance_class_range for_class_range
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ struct scx_rq {
u64 extra_enq_flags; /* see move_task_to_local_dsq() */
u32 nr_running;
u32 flags;
u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
bool cpu_released;
cpumask_var_t cpus_to_kick;
cpumask_var_t cpus_to_kick_if_idle;
Expand Down
1 change: 1 addition & 0 deletions tools/sched_ext/include/scx/common.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksy
u32 scx_bpf_nr_cpu_ids(void) __ksym;
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym;
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym;
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym;
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym;
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym;
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym;
Expand Down
65 changes: 62 additions & 3 deletions tools/sched_ext/scx_qmap.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ struct {
},
};

/*
* If enabled, CPU performance target is set according to the queue index
* according to the following table.
*/
static const u32 qidx_to_cpuperf_target[] = {
[0] = SCX_CPUPERF_ONE * 0 / 4,
[1] = SCX_CPUPERF_ONE * 1 / 4,
[2] = SCX_CPUPERF_ONE * 2 / 4,
[3] = SCX_CPUPERF_ONE * 3 / 4,
[4] = SCX_CPUPERF_ONE * 4 / 4,
};

/*
* Per-queue sequence numbers to implement core-sched ordering.
*
Expand Down Expand Up @@ -91,6 +103,8 @@ struct {
struct cpu_ctx {
u64 dsp_idx; /* dispatch index */
u64 dsp_cnt; /* remaining count */
u32 avg_weight;
u32 cpuperf_target;
};

struct {
Expand All @@ -104,6 +118,7 @@ struct {
u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
u64 nr_core_sched_execed;
u32 cpuperf_min, cpuperf_avg, cpuperf_max;
u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;

s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
s32 prev_cpu, u64 wake_flags)
Expand Down Expand Up @@ -300,6 +315,29 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
}
}

void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
{
struct cpu_ctx *cpuc;
u32 zero = 0;
int idx;

if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
scx_bpf_error("failed to look up cpu_ctx");
return;
}

/*
* Use the running avg of weights to select the target cpuperf level.
* This is a demonstration of the cpuperf feature rather than a
* practical strategy to regulate CPU frequency.
*/
cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
idx = weight_to_idx(cpuc->avg_weight);
cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];

scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
}

/*
* The distance from the head of the queue scaled by the weight of the queue.
* The lower the number, the older the task and the higher the priority.
Expand Down Expand Up @@ -454,21 +492,26 @@ struct {
*/
static int cpu_mon_timerfn(void *map, int *key, struct bpf_timer *timer)
{
u32 zero = 0;
u32 nr_cpu_ids = scx_bpf_nr_cpu_ids();
u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
const struct cpumask *online;
int i;
int i, nr_online_cpus = 0;

online = scx_bpf_get_online_cpumask();
if (!online)
return -ENOMEM;

bpf_for(i, 0, nr_cpu_ids) {
struct cpu_ctx *cpuc;
u32 cap, cur;

if (!bpf_cpumask_test_cpu(i, online))
continue;
nr_online_cpus++;

/* collect the capacity and current cpuperf */
cap = scx_bpf_cpuperf_cap(i);
cur = scx_bpf_cpuperf_cur(i);

Expand All @@ -482,15 +525,30 @@ static int cpu_mon_timerfn(void *map, int *key, struct bpf_timer *timer)
*/
cur_sum += cur * cap / SCX_CPUPERF_ONE;
cap_sum += cap;
}

scx_bpf_put_cpumask(online);
if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
scx_bpf_error("failed to look up cpu_ctx");
goto out;
}

/* collect target */
cur = cpuc->cpuperf_target;
target_sum += cur;
target_min = cur < target_min ? cur : target_min;
target_max = cur > target_max ? cur : target_max;
}

cpuperf_min = cur_min;
cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
cpuperf_max = cur_max;

cpuperf_target_min = target_min;
cpuperf_target_avg = target_sum / nr_online_cpus;
cpuperf_target_max = target_max;

bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
out:
scx_bpf_put_cpumask(online);
return 0;
}

Expand Down Expand Up @@ -524,6 +582,7 @@ SCX_OPS_DEFINE(qmap_ops,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
.tick = (void *)qmap_tick,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,
Expand Down
7 changes: 5 additions & 2 deletions tools/sched_ext/scx_qmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,13 @@ int main(int argc, char **argv)
nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
skel->bss->nr_core_sched_execed);
printf("cpuperf: cur min/avg/max=%u/%u/%u\n",
printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
skel->bss->cpuperf_min,
skel->bss->cpuperf_avg,
skel->bss->cpuperf_max);
skel->bss->cpuperf_max,
skel->bss->cpuperf_target_min,
skel->bss->cpuperf_target_avg,
skel->bss->cpuperf_target_max);
fflush(stdout);
sleep(1);
}
Expand Down

0 comments on commit 5cdeae4

Please sign in to comment.