diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d3491cc0898bf..56f7557048d16 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -9,12 +9,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -2254,6 +2256,63 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: { u8 *ip = image + addrs[i - 1]; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + IS_ENABLED(CONFIG_BPF_SYSCALL) && + imm32 == BPF_CALL_IMM(bpf_get_cpu_time_counter) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) { + /* The default implementation of this kfunc uses + * ktime_get_raw_ns() which effectively is implemented as + * `(u64)rdtsc_ordered() & S64_MAX`. For JIT We skip + * masking part because we assume it's not needed in BPF + * use case (two measurements close in time). + * Original code for rdtsc_ordered() uses sequence: + * 'rdtsc; nop; nop; nop' to patch it into + * 'lfence; rdtsc' or 'rdtscp' depending on CPU features. + * JIT uses 'lfence; rdtsc' variant because BPF program + * doesn't care about cookie provided by rdtscp in RCX. + * Save RDX because RDTSC will use EDX:EAX to return u64 + */ + emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3); + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + EMIT_LFENCE(); + EMIT2(0x0F, 0x31); + + /* shl RDX, 32 */ + maybe_emit_1mod(&prog, BPF_REG_3, true); + EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32); + /* or RAX, RDX */ + maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true); + EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3)); + /* restore RDX from R11 */ + emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG); + + break; + } + + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) { + struct cyc2ns_data data; + u32 mult, shift; + + cyc2ns_read_begin(&data); + mult = data.cyc2ns_mul; + shift = data.cyc2ns_shift; + cyc2ns_read_end(); + /* imul RAX, RDI, mult */ + maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true); + EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0), + mult); + + /* shr RAX, shift (which is less than 64) */ + maybe_emit_1mod(&prog, BPF_REG_0, true); + EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift); + + break; + } + func = (u8 *) __bpf_call_base + imm32; if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) { LOAD_TAIL_CALL_CNT_PTR(stack_depth); @@ -3865,3 +3924,16 @@ bool bpf_jit_supports_timed_may_goto(void) { return true; } + +/* x86-64 JIT can inline kfunc */ +bool bpf_jit_inlines_kfunc_call(s32 imm) +{ + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return false; + if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) || + imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) + return true; + return false; +} diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index de0f9e5f9f73a..9791a3fb9d692 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -12,10 +12,12 @@ #include #include #include +#include #include #include #include #include +#include #include /* @@ -2094,6 +2096,50 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { int err; + if (IS_ENABLED(CONFIG_BPF_SYSCALL) && + imm32 == BPF_CALL_IMM(bpf_get_cpu_time_counter) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) { + /* The default implementation of this kfunc uses + * ktime_get_raw_ns() which effectively is implemented as + * `(u64)rdtsc_ordered() & S64_MAX`. For JIT We skip + * masking part because we assume it's not needed in BPF + * use case (two measurements close in time). + * Original code for rdtsc_ordered() uses sequence: + * 'rdtsc; nop; nop; nop' to patch it into + * 'lfence; rdtsc' or 'rdtscp' depending on CPU features. + * JIT uses 'lfence; rdtsc' variant because BPF program + * doesn't care about cookie provided by rdtscp in ECX. + */ + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + EMIT3(0x0F, 0xAE, 0xE8); + EMIT2(0x0F, 0x31); + break; + } + if (IS_ENABLED(CONFIG_BPF_SYSCALL) && + imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) { + struct cyc2ns_data data; + u32 mult, shift; + + cyc2ns_read_begin(&data); + mult = data.cyc2ns_mul; + shift = data.cyc2ns_shift; + cyc2ns_read_end(); + + /* move parameter to BPF_REG_0 */ + emit_ia32_mov_r64(true, bpf2ia32[BPF_REG_0], + bpf2ia32[BPF_REG_1], true, true, + &prog, bpf_prog->aux); + /* multiply parameter by mut */ + emit_ia32_mul_i64(bpf2ia32[BPF_REG_0], + mult, true, &prog); + /* shift parameter by shift which is less than 64 */ + emit_ia32_rsh_i64(bpf2ia32[BPF_REG_0], + shift, true, &prog); + } + err = emit_kfunc_call(bpf_prog, image + addrs[i], insn, &prog); @@ -2621,3 +2667,15 @@ bool bpf_jit_supports_kfunc_call(void) { return true; } + +bool bpf_jit_inlines_kfunc_call(s32 imm) +{ + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return false; + if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) || + imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) && + cpu_feature_enabled(X86_FEATURE_TSC) && + using_native_sched_clock() && sched_clock_stable()) + return true; + return false; +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7d55553de3fce..5c4d35019b229 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3387,6 +3387,10 @@ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +/* Inlined kfuncs */ +u64 bpf_get_cpu_time_counter(void); +u64 bpf_cpu_time_counter_to_ns(u64 cycles); + #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, diff --git a/include/linux/filter.h b/include/linux/filter.h index 590476743f7a3..2fbfa1bc3f497 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1128,6 +1128,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_inlines_helper_call(s32 imm); +bool bpf_jit_inlines_kfunc_call(s32 imm); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_percpu_insn(void); bool bpf_jit_supports_kfunc_call(void); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 62cb9557ad3be..1d811fc39eacd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3035,6 +3035,17 @@ bool __weak bpf_jit_inlines_helper_call(s32 imm) return false; } +/* Return true if the JIT inlines the call to the kfunc corresponding to + * the imm. + * + * The verifier will not patch the insn->imm for the call to the helper if + * this returns true. + */ +bool __weak bpf_jit_inlines_kfunc_call(s32 imm) +{ + return false; +} + /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5449756ba102e..cc986d2048db3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3193,6 +3193,16 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) local_irq_restore(*flags__irq_flag); } +__bpf_kfunc u64 bpf_get_cpu_time_counter(void) +{ + return ktime_get_raw_fast_ns(); +} + +__bpf_kfunc u64 bpf_cpu_time_counter_to_ns(u64 cycles) +{ + return cycles; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3293,6 +3303,8 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) +BTF_ID_FLAGS(func, bpf_get_cpu_time_counter, KF_FASTCALL) +BTF_ID_FLAGS(func, bpf_cpu_time_counter_to_ns, KF_FASTCALL) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3303a3605ee80..0c4ea977973cb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17035,6 +17035,24 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) } } +/* True if fixup_kfunc_call() replaces calls to kfunc number 'imm', + * replacement patch is presumed to follow bpf_fastcall contract + * (see mark_fastcall_pattern_for_call() below). + */ +static bool verifier_inlines_kfunc_call(struct bpf_verifier_env *env, s32 imm) +{ + const struct bpf_kfunc_desc *desc = find_kfunc_desc(env->prog, imm, 0); + + if (!env->prog->jit_requested) + return false; + + if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) + return true; + + return false; +} + struct call_summary { u8 num_params; bool is_void; @@ -17077,7 +17095,10 @@ static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call /* error would be reported later */ return false; cs->num_params = btf_type_vlen(meta.func_proto); - cs->fastcall = meta.kfunc_flags & KF_FASTCALL; + cs->fastcall = meta.kfunc_flags & KF_FASTCALL && + (verifier_inlines_kfunc_call(env, call->imm) || + (meta.btf == btf_vmlinux && + bpf_jit_inlines_kfunc_call(call->imm))); cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type)); return true; } @@ -21223,6 +21244,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; + s32 imm = insn->imm; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); @@ -21246,7 +21268,18 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) return 0; - if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || + if (verifier_inlines_kfunc_call(env, imm)) { + if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); + *cnt = 1; + } else { + verbose(env, "verifier internal error: kfunc id %d has no inline code\n", + desc->func_id); + return -EFAULT; + } + + } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; @@ -21307,10 +21340,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, node_offset_reg, insn, insn_buf, cnt); - } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { - insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); - *cnt = 1; } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) { struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) }; diff --git a/tools/testing/selftests/bpf/prog_tests/test_cpu_cycles.c b/tools/testing/selftests/bpf/prog_tests/test_cpu_cycles.c new file mode 100644 index 0000000000000..d7f3b66594b34 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_cpu_cycles.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Inc. */ + +#include +#include "test_cpu_cycles.skel.h" + +static void cpu_cycles(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_cpu_cycles *skel; + int err, pfd; + + skel = test_cpu_cycles__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_cpu_cycles open and load")) + return; + + pfd = bpf_program__fd(skel->progs.bpf_cpu_cycles); + if (!ASSERT_GT(pfd, 0, "test_cpu_cycles fd")) + goto fail; + + err = bpf_prog_test_run_opts(pfd, &opts); + if (!ASSERT_OK(err, "test_cpu_cycles test run")) + goto fail; + + ASSERT_NEQ(skel->bss->cycles, 0, "test_cpu_cycles 0 cycles"); + ASSERT_NEQ(skel->bss->ns, 0, "test_cpu_cycles 0 ns"); +fail: + test_cpu_cycles__destroy(skel); +} + +void test_cpu_cycles(void) +{ + if (test__start_subtest("cpu_cycles")) + cpu_cycles(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e66a57970d28c..d5e7e302a344f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -102,6 +102,7 @@ #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" #include "verifier_lsm.skel.h" +#include "verifier_cpu_cycles.skel.h" #include "irq.skel.h" #define MAX_ENTRIES 11 @@ -236,6 +237,7 @@ void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { RUN(verifier_mtu); } +void test_verifier_cpu_cycles(void) { RUN(verifier_cpu_cycles); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/progs/test_cpu_cycles.c b/tools/testing/selftests/bpf/progs/test_cpu_cycles.c new file mode 100644 index 0000000000000..a7f8a4c6b8545 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cpu_cycles.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Inc. */ + +#include "vmlinux.h" +#include + +extern u64 bpf_cpu_time_counter_to_ns(u64 cycles) __weak __ksym; +extern u64 bpf_get_cpu_time_counter(void) __weak __ksym; + +__u64 cycles, ns; + +SEC("syscall") +int bpf_cpu_cycles(void) +{ + struct bpf_pidns_info pidns; + __u64 start; + + start = bpf_get_cpu_time_counter(); + bpf_get_ns_current_pid_tgid(0, 0, &pidns, sizeof(struct bpf_pidns_info)); + cycles = bpf_get_cpu_time_counter() - start; + ns = bpf_cpu_time_counter_to_ns(cycles); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_cpu_cycles.c b/tools/testing/selftests/bpf/progs/verifier_cpu_cycles.c new file mode 100644 index 0000000000000..5b62e36903620 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_cpu_cycles.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Inc. */ +#include "vmlinux.h" +#include +#include +#include "bpf_misc.h" + +extern u64 bpf_cpu_time_counter_to_ns(u64 cycles) __weak __ksym; +extern u64 bpf_get_cpu_time_counter(void) __weak __ksym; + +SEC("syscall") +__arch_x86_64 +__xlated("0: call kernel-function") +__naked int bpf_rdtsc(void) +{ + asm volatile( + "call %[bpf_get_cpu_time_counter];" + "exit" + : + : __imm(bpf_get_cpu_time_counter) + : __clobber_all + ); +} + +SEC("syscall") +__arch_x86_64 +/* program entry for bpf_rdtsc_jit_x86_64(), regular function prologue */ +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +/* save RDX in R11 as it will be overwritten */ +__jited(" movq %rdx, %r11") +/* lfence may not be executed depending on cpu features */ +__jited(" {{(lfence|)}}") +__jited(" rdtsc") +/* combine EDX:EAX into RAX */ +__jited(" shlq ${{(32|0x20)}}, %rdx") +__jited(" orq %rdx, %rax") +/* restore RDX from R11 */ +__jited(" movq %r11, %rdx") +__jited(" leave") +__naked int bpf_rdtsc_jit_x86_64(void) +{ + asm volatile( + "call %[bpf_get_cpu_time_counter];" + "exit" + : + : __imm(bpf_get_cpu_time_counter) + : __clobber_all + ); +} + +SEC("syscall") +__arch_x86_64 +__xlated("0: r1 = 42") +__xlated("1: call kernel-function") +__naked int bpf_cyc2ns(void) +{ + asm volatile( + "r1=0x2a;" + "call %[bpf_cpu_time_counter_to_ns];" + "exit" + : + : __imm(bpf_cpu_time_counter_to_ns) + : __clobber_all + ); +} + +SEC("syscall") +__arch_x86_64 +/* program entry for bpf_rdtsc_jit_x86_64(), regular function prologue */ +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +/* save RDX in R11 as it will be overwritten */ +__jited(" movabsq $0x2a2a2a2a2a, %rdi") +__jited(" imulq ${{.*}}, %rdi, %rax") +__jited(" shrq ${{.*}}, %rax") +__jited(" leave") +__naked int bpf_cyc2ns_jit_x86(void) +{ + asm volatile( + "r1=0x2a2a2a2a2a ll;" + "call %[bpf_cpu_time_counter_to_ns];" + "exit" + : + : __imm(bpf_cpu_time_counter_to_ns) + : __clobber_all + ); +} + +void rdtsc(void) +{ + bpf_get_cpu_time_counter(); + bpf_cpu_time_counter_to_ns(42); +} + +char _license[] SEC("license") = "GPL";