Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpf: add cpu time counter kfuncs #8653

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <linux/clocksource.h>
#include <linux/memory.h>
#include <linux/sort.h>
#include <asm/extable.h>
#include <asm/ftrace.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/timer.h>
#include <asm/text-patching.h>
#include <asm/unwind.h>
#include <asm/cfi.h>
Expand Down Expand Up @@ -2254,6 +2256,63 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_CALL: {
u8 *ip = image + addrs[i - 1];

if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
IS_ENABLED(CONFIG_BPF_SYSCALL) &&
imm32 == BPF_CALL_IMM(bpf_get_cpu_time_counter) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable()) {
/* The default implementation of this kfunc uses
* ktime_get_raw_ns() which effectively is implemented as
* `(u64)rdtsc_ordered() & S64_MAX`. For JIT We skip
* masking part because we assume it's not needed in BPF
* use case (two measurements close in time).
* Original code for rdtsc_ordered() uses sequence:
* 'rdtsc; nop; nop; nop' to patch it into
* 'lfence; rdtsc' or 'rdtscp' depending on CPU features.
* JIT uses 'lfence; rdtsc' variant because BPF program
* doesn't care about cookie provided by rdtscp in RCX.
* Save RDX because RDTSC will use EDX:EAX to return u64
*/
emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
EMIT_LFENCE();
EMIT2(0x0F, 0x31);

/* shl RDX, 32 */
maybe_emit_1mod(&prog, BPF_REG_3, true);
EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
/* or RAX, RDX */
maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
/* restore RDX from R11 */
emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);

break;
}

if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable()) {
struct cyc2ns_data data;
u32 mult, shift;

cyc2ns_read_begin(&data);
mult = data.cyc2ns_mul;
shift = data.cyc2ns_shift;
cyc2ns_read_end();
/* imul RAX, RDI, mult */
maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
mult);

/* shr RAX, shift (which is less than 64) */
maybe_emit_1mod(&prog, BPF_REG_0, true);
EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);

break;
}

func = (u8 *) __bpf_call_base + imm32;
if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
LOAD_TAIL_CALL_CNT_PTR(stack_depth);
Expand Down Expand Up @@ -3865,3 +3924,16 @@ bool bpf_jit_supports_timed_may_goto(void)
{
return true;
}

/* x86-64 JIT can inline kfunc */
bool bpf_jit_inlines_kfunc_call(s32 imm)
{
if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
return false;
if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) ||
imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable())
return true;
return false;
}
58 changes: 58 additions & 0 deletions arch/x86/net/bpf_jit_comp32.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/clocksource.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/asm-prototypes.h>
#include <asm/timer.h>
#include <linux/bpf.h>

/*
Expand Down Expand Up @@ -2094,6 +2096,50 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
int err;

if (IS_ENABLED(CONFIG_BPF_SYSCALL) &&
imm32 == BPF_CALL_IMM(bpf_get_cpu_time_counter) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable()) {
/* The default implementation of this kfunc uses
* ktime_get_raw_ns() which effectively is implemented as
* `(u64)rdtsc_ordered() & S64_MAX`. For JIT We skip
* masking part because we assume it's not needed in BPF
* use case (two measurements close in time).
* Original code for rdtsc_ordered() uses sequence:
* 'rdtsc; nop; nop; nop' to patch it into
* 'lfence; rdtsc' or 'rdtscp' depending on CPU features.
* JIT uses 'lfence; rdtsc' variant because BPF program
* doesn't care about cookie provided by rdtscp in ECX.
*/
if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
EMIT3(0x0F, 0xAE, 0xE8);
EMIT2(0x0F, 0x31);
break;
}
if (IS_ENABLED(CONFIG_BPF_SYSCALL) &&
imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable()) {
struct cyc2ns_data data;
u32 mult, shift;

cyc2ns_read_begin(&data);
mult = data.cyc2ns_mul;
shift = data.cyc2ns_shift;
cyc2ns_read_end();

/* move parameter to BPF_REG_0 */
emit_ia32_mov_r64(true, bpf2ia32[BPF_REG_0],
bpf2ia32[BPF_REG_1], true, true,
&prog, bpf_prog->aux);
/* multiply parameter by mut */
emit_ia32_mul_i64(bpf2ia32[BPF_REG_0],
mult, true, &prog);
/* shift parameter by shift which is less than 64 */
emit_ia32_rsh_i64(bpf2ia32[BPF_REG_0],
shift, true, &prog);
}

err = emit_kfunc_call(bpf_prog,
image + addrs[i],
insn, &prog);
Expand Down Expand Up @@ -2621,3 +2667,15 @@ bool bpf_jit_supports_kfunc_call(void)
{
return true;
}

bool bpf_jit_inlines_kfunc_call(s32 imm)
{
if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
return false;
if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) ||
imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) &&
cpu_feature_enabled(X86_FEATURE_TSC) &&
using_native_sched_clock() && sched_clock_stable())
return true;
return false;
}
4 changes: 4 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3387,6 +3387,10 @@ void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

/* Inlined kfuncs */
u64 bpf_get_cpu_time_counter(void);
u64 bpf_cpu_time_counter_to_ns(u64 cycles);

#if defined(CONFIG_NET)
bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
Expand Down
1 change: 1 addition & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_inlines_helper_call(s32 imm);
bool bpf_jit_inlines_kfunc_call(s32 imm);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_percpu_insn(void);
bool bpf_jit_supports_kfunc_call(void);
Expand Down
11 changes: 11 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3035,6 +3035,17 @@ bool __weak bpf_jit_inlines_helper_call(s32 imm)
return false;
}

/* Return true if the JIT inlines the call to the kfunc corresponding to
* the imm.
*
* The verifier will not patch the insn->imm for the call to the helper if
* this returns true.
*/
bool __weak bpf_jit_inlines_kfunc_call(s32 imm)
{
return false;
}

/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
bool __weak bpf_jit_supports_subprog_tailcalls(void)
{
Expand Down
12 changes: 12 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -3193,6 +3193,16 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
local_irq_restore(*flags__irq_flag);
}

__bpf_kfunc u64 bpf_get_cpu_time_counter(void)
{
return ktime_get_raw_fast_ns();
}

__bpf_kfunc u64 bpf_cpu_time_counter_to_ns(u64 cycles)
{
return cycles;
}

__bpf_kfunc_end_defs();

BTF_KFUNCS_START(generic_btf_ids)
Expand Down Expand Up @@ -3293,6 +3303,8 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_local_irq_save)
BTF_ID_FLAGS(func, bpf_local_irq_restore)
BTF_ID_FLAGS(func, bpf_get_cpu_time_counter, KF_FASTCALL)
BTF_ID_FLAGS(func, bpf_cpu_time_counter_to_ns, KF_FASTCALL)
BTF_KFUNCS_END(common_btf_ids)

static const struct btf_kfunc_id_set common_kfunc_set = {
Expand Down
41 changes: 35 additions & 6 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -17035,6 +17035,24 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
}
}

/* True if fixup_kfunc_call() replaces calls to kfunc number 'imm',
* replacement patch is presumed to follow bpf_fastcall contract
* (see mark_fastcall_pattern_for_call() below).
*/
static bool verifier_inlines_kfunc_call(struct bpf_verifier_env *env, s32 imm)
{
const struct bpf_kfunc_desc *desc = find_kfunc_desc(env->prog, imm, 0);

if (!env->prog->jit_requested)
return false;

if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast])
return true;

return false;
}

struct call_summary {
u8 num_params;
bool is_void;
Expand Down Expand Up @@ -17077,7 +17095,10 @@ static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call
/* error would be reported later */
return false;
cs->num_params = btf_type_vlen(meta.func_proto);
cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
cs->fastcall = meta.kfunc_flags & KF_FASTCALL &&
(verifier_inlines_kfunc_call(env, call->imm) ||
(meta.btf == btf_vmlinux &&
bpf_jit_inlines_kfunc_call(call->imm)));
cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
return true;
}
Expand Down Expand Up @@ -21223,6 +21244,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
const struct bpf_kfunc_desc *desc;
s32 imm = insn->imm;

if (!insn->imm) {
verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
Expand All @@ -21246,7 +21268,18 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)
return 0;
if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
if (verifier_inlines_kfunc_call(env, imm)) {
if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
} else {
verbose(env, "verifier internal error: kfunc id %d has no inline code\n",
desc->func_id);
return -EFAULT;
}

} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
Expand Down Expand Up @@ -21307,10 +21340,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,

__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
node_offset_reg, insn, insn_buf, cnt);
} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
} else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };

Expand Down
35 changes: 35 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/test_cpu_cycles.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Inc. */

#include <test_progs.h>
#include "test_cpu_cycles.skel.h"

static void cpu_cycles(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
struct test_cpu_cycles *skel;
int err, pfd;

skel = test_cpu_cycles__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_cpu_cycles open and load"))
return;

pfd = bpf_program__fd(skel->progs.bpf_cpu_cycles);
if (!ASSERT_GT(pfd, 0, "test_cpu_cycles fd"))
goto fail;

err = bpf_prog_test_run_opts(pfd, &opts);
if (!ASSERT_OK(err, "test_cpu_cycles test run"))
goto fail;

ASSERT_NEQ(skel->bss->cycles, 0, "test_cpu_cycles 0 cycles");
ASSERT_NEQ(skel->bss->ns, 0, "test_cpu_cycles 0 ns");
fail:
test_cpu_cycles__destroy(skel);
}

void test_cpu_cycles(void)
{
if (test__start_subtest("cpu_cycles"))
cpu_cycles();
}
2 changes: 2 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
#include "verifier_xdp_direct_packet_access.skel.h"
#include "verifier_bits_iter.skel.h"
#include "verifier_lsm.skel.h"
#include "verifier_cpu_cycles.skel.h"
#include "irq.skel.h"

#define MAX_ENTRIES 11
Expand Down Expand Up @@ -236,6 +237,7 @@ void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
void test_verifier_lsm(void) { RUN(verifier_lsm); }
void test_irq(void) { RUN(irq); }
void test_verifier_mtu(void) { RUN(verifier_mtu); }
void test_verifier_cpu_cycles(void) { RUN(verifier_cpu_cycles); }

static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
Expand Down
25 changes: 25 additions & 0 deletions tools/testing/selftests/bpf/progs/test_cpu_cycles.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Inc. */

#include "vmlinux.h"
#include <bpf/bpf_helpers.h>

extern u64 bpf_cpu_time_counter_to_ns(u64 cycles) __weak __ksym;
extern u64 bpf_get_cpu_time_counter(void) __weak __ksym;

__u64 cycles, ns;

SEC("syscall")
int bpf_cpu_cycles(void)
{
struct bpf_pidns_info pidns;
__u64 start;

start = bpf_get_cpu_time_counter();
bpf_get_ns_current_pid_tgid(0, 0, &pidns, sizeof(struct bpf_pidns_info));
cycles = bpf_get_cpu_time_counter() - start;
ns = bpf_cpu_time_counter_to_ns(cycles);
return 0;
}

char _license[] SEC("license") = "GPL";
Loading
Loading