Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Increase compat coverage and other misc updates #188

Merged
merged 10 commits into from
Apr 29, 2024
2 changes: 2 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4584,7 +4584,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->rt.on_rq = 0;
p->rt.on_list = 0;

#ifdef CONFIG_SCHED_CLASS_EXT
init_scx_entity(&p->scx);
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
Expand Down
16 changes: 12 additions & 4 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -2748,6 +2748,8 @@ static struct task_struct *pick_next_task_scx(struct rq *rq)
if (!p)
return NULL;

set_next_task_scx(rq, p, true);

if (unlikely(!p->scx.slice)) {
if (!scx_ops_bypassing() && !scx_warned_zero_slice) {
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
Expand All @@ -2757,8 +2759,6 @@ static struct task_struct *pick_next_task_scx(struct rq *rq)
p->scx.slice = SCX_SLICE_DFL;
}

set_next_task_scx(rq, p, true);

return p;
}

Expand Down Expand Up @@ -4544,7 +4544,11 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)

static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
{
scx_dump_state(scx_exit_info, scx_ops.exit_dump_len);
struct scx_exit_info *ei = scx_exit_info;

if (ei->kind >= SCX_EXIT_ERROR)
scx_dump_state(ei, scx_ops.exit_dump_len);

schedule_scx_ops_disable_work();
}

Expand Down Expand Up @@ -5531,8 +5535,12 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
*is_idle = false;
return prev_cpu;
}

#ifdef CONFIG_SMP
return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
#else
*is_idle = false;
return prev_cpu;
#endif
}

__bpf_kfunc_end_defs();
Expand Down
1 change: 0 additions & 1 deletion kernel/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p,
const struct sched_class *active) {}
static inline void scx_tick(struct rq *rq) {}
static inline void init_scx_entity(struct sched_ext_entity *scx) {}
static inline void scx_pre_fork(struct task_struct *p) {}
static inline int scx_fork(struct task_struct *p) { return 0; }
static inline void scx_post_fork(struct task_struct *p) {}
Expand Down
22 changes: 11 additions & 11 deletions tools/sched_ext/include/scx/common.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,23 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym;
bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym;
bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym __weak;
u32 scx_bpf_reenqueue_local(void) __ksym;
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym;
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym;
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym;
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym __weak;
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym;
void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym;
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym;
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym;
u32 scx_bpf_nr_cpu_ids(void) __ksym;
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym;
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym;
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym;
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
Expand Down
53 changes: 52 additions & 1 deletion tools/sched_ext/include/scx/compat.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
/*
* %SCX_KICK_IDLE is a later addition. To support both before and after, use
* %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
* Users can use %SCX_KICK_IDLE directly in the future.
*/
#define __COMPAT_SCX_KICK_IDLE \
__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)

/*
* scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h.
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the
* future.
*/
void scx_bpf_switch_all(void) __ksym __weak;

Expand All @@ -34,6 +36,55 @@ static inline void __COMPAT_scx_bpf_switch_all(void)
scx_bpf_switch_all();
}

/*
* scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good
* way to noop these kfuncs. Provide a test macro. Users will be able to assume
* existence in the future.
*/
#define __COMPAT_HAS_CPUMASKS \
bpf_ksym_exists(scx_bpf_nr_cpu_ids)

/*
* cpuperf is new. The followings become noop on older kernels. Callers can be
* updated to call cpuperf kfuncs directly in the future.
*/
static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu)
{
if (bpf_ksym_exists(scx_bpf_cpuperf_cap))
return scx_bpf_cpuperf_cap(cpu);
else
return 1024;
}

static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu)
{
if (bpf_ksym_exists(scx_bpf_cpuperf_cur))
return scx_bpf_cpuperf_cur(cpu);
else
return 1024;
}

static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf)
{
if (bpf_ksym_exists(scx_bpf_cpuperf_set))
return scx_bpf_cpuperf_set(cpu, perf);
}

/*
* Iteration and scx_bpf_consume_task() are new. The following become noop on
* older kernels. The users can switch to bpf_for_each(scx_dsq) and directly
* call scx_bpf_consume_task() in the future.
*/
#define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags) \
if (bpf_ksym_exists(bpf_iter_scx_dsq_new)) \
bpf_for_each(scx_dsq, (p), (dsq_id), (flags))

static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
struct task_struct *p)
{
return false;
}

/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
Expand Down
41 changes: 30 additions & 11 deletions tools/sched_ext/include/scx/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v
__val; \
})

static inline bool __COMPAT_has_ksym(const char *ksym)
{
__COMPAT_load_vmlinux_btf();
return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0;
}

static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
{
const struct btf_type *t;
Expand Down Expand Up @@ -104,11 +110,25 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
* An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
* to be called from ops.init(). To support both before and after, use both
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined
* in compat.bpf.h.
* in compat.bpf.h. Users can switch to directly using %SCX_OPS_SWITCH_PARTIAL
* in the future.
*/
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")

/*
* scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. Users
* will be able to assume existence in the future.
*/
#define __COMPAT_HAS_CPUMASKS \
__COMPAT_has_ksym("scx_bpf_nr_cpu_ids")

/*
* DSQ iterator is new. Users will be able to assume existence in the future.
*/
#define __COMPAT_HAS_DSQ_ITER \
__COMPAT_has_ksym("bpf_iter_scx_dsq_new")

static inline long scx_hotplug_seq(void)
{
int fd;
Expand Down Expand Up @@ -137,15 +157,9 @@ static inline long scx_hotplug_seq(void)
* and attach it, backward compatibility is automatically maintained where
* reasonable.
*
* The following values were added in newer kernels:
*
* - sched_ext_ops.exit_dump_len
* o If nonzero and running on an older kernel, the value is set to zero
* and a warning is emitted
*
* - sched_ext_ops.hotplug_seq
* o If nonzero and running on an older kernel, the scheduler will fail to
* load
* - ops.tick(): Ignored on older kernels with a warning.
* - ops.exit_dump_len: Cleared to zero on older kernels with a warning.
* - ops.hotplug_seq: Ignored on older kernels.
*/
#define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \
struct __scx_name *__skel; \
Expand All @@ -160,11 +174,16 @@ static inline long scx_hotplug_seq(void)

#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
if (!__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
(__skel)->struct_ops.__ops_name->exit_dump_len) { \
fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
(__skel)->struct_ops.__ops_name->exit_dump_len = 0; \
} \
if (!__COMPAT_struct_has_field("sched_ext_ops", "tick") && \
(__skel)->struct_ops.__ops_name->tick) { \
fprintf(stderr, "WARNING: kernel doesn't support ops.tick()\n"); \
(__skel)->struct_ops.__ops_name->tick = NULL; \
} \
SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \
})

Expand Down
46 changes: 27 additions & 19 deletions tools/sched_ext/include/scx/user_exit_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,6 @@ struct user_exit_info {
__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \
})

#define UEI_KIND(__skel, __uei_name) ((__skel)->data->__uei_name.kind)

#define ECODE_USER_MASK ((1LLU << 32) - 1)
#define ECODE_SYS_ACT_MASK (((1LLU << 48) - 1) ^ ECODE_USER_MASK)
#define ECODE_SYS_RSN_MASK (~0LLU ^ (ECODE_SYS_ACT_MASK | ECODE_USER_MASK))

#define UEI_ECODE(__skel, __uei_name) (__skel)->data->__uei_name.exit_code
#define UEI_ECODE_SYS_ACT(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_SYS_ACT_MASK)
#define UEI_ECODE_SYS_RSN(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_SYS_RSN_MASK)
#define UEI_ECODE_USER(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_USER_MASK)

#define UEI_REPORT(__skel, __uei_name) ({ \
struct user_exit_info *__uei = &(__skel)->data->__uei_name; \
char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
Expand All @@ -88,16 +77,35 @@ struct user_exit_info {
if (__uei->msg[0] != '\0') \
fprintf(stderr, " (%s)", __uei->msg); \
fputs("\n", stderr); \
__uei->exit_code; \
})

#define UEI_RESET(__skel, __uei_name) ({ \
struct user_exit_info *__uei = &(__skel)->data->__uei_name; \
char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
size_t __uei_dump_len = (__skel)->rodata->__uei_name##_dump_len; \
\
memset(__uei, 0, sizeof(struct user_exit_info)); \
memset(__uei_dump, 0, __uei_dump_len); \
})
/*
* We can't import vmlinux.h while compiling user C code. Let's duplicate
* scx_exit_code definition.
*/
enum scx_exit_code {
/* Reasons */
SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,

/* Actions */
SCX_ECODE_ACT_RESTART = 1LLU << 48,
};

enum uei_ecode_mask {
UEI_ECODE_USER_MASK = ((1LLU << 32) - 1),
UEI_ECODE_SYS_RSN_MASK = ((1LLU << 16) - 1) << 32,
UEI_ECODE_SYS_ACT_MASK = ((1LLU << 16) - 1) << 48,
};

/*
* These macro interpret the ecode returned from UEI_REPORT().
*/
#define UEI_ECODE_USER(__ecode) ((__ecode) & UEI_ECODE_USER_MASK)
#define UEI_ECODE_SYS_RSN(__ecode) ((__ecode) & UEI_ECODE_SYS_RSN_MASK)
#define UEI_ECODE_SYS_ACT(__ecode) ((__ecode) & UEI_ECODE_SYS_ACT_MASK)

#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)

#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */
26 changes: 20 additions & 6 deletions tools/sched_ext/scx_central.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ const char help_fmt[] =
"\n"
" -s SLICE_US Override slice duration\n"
" -c CPU Override the central CPU (default: 0)\n"
" -v Print libbpf debug messages\n"
" -h Display this help and exit\n";

static bool verbose;
static volatile int exit_req;

static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && !verbose)
return 0;
return vfprintf(stderr, format, args);
}

static void sigint_handler(int dummy)
{
exit_req = 1;
Expand All @@ -37,28 +46,30 @@ int main(int argc, char **argv)
{
struct scx_central *skel;
struct bpf_link *link;
__u64 seq = 0;
__u64 seq = 0, ecode;
__s32 opt;
cpu_set_t *cpuset;

libbpf_set_print(libbpf_print_fn);
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);

libbpf_set_strict_mode(LIBBPF_STRICT_ALL);

restart:
skel = SCX_OPS_OPEN(central_ops, scx_central);

skel->rodata->central_cpu = 0;
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();

while ((opt = getopt(argc, argv, "s:c:ph")) != -1) {
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
break;
case 'c':
skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
break;
case 'v':
verbose = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
Expand Down Expand Up @@ -115,7 +126,10 @@ int main(int argc, char **argv)
}

bpf_link__destroy(link);
UEI_REPORT(skel, uei);
ecode = UEI_REPORT(skel, uei);
scx_central__destroy(skel);

if (UEI_ECODE_RESTART(ecode))
goto restart;
return 0;
}
Loading