diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst index 07433915aa414..7f514cb6b0525 100644 --- a/Documentation/bpf/bpf_iterators.rst +++ b/Documentation/bpf/bpf_iterators.rst @@ -86,7 +86,7 @@ following steps: The following are a few examples of selftest BPF iterator programs: * `bpf_iter_tcp4.c `_ -* `bpf_iter_task_vma.c `_ +* `bpf_iter_task_vmas.c `_ * `bpf_iter_task_file.c `_ Let us look at ``bpf_iter_task_file.c``, which runs in kernel space: diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e390c432f546e..39577f1d079a9 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, + AARCH64_INSN_LDST_LOAD_ACQ, AARCH64_INSN_LDST_LOAD_EX, AARCH64_INSN_LDST_LOAD_ACQ_EX, + AARCH64_INSN_LDST_STORE_REL, AARCH64_INSN_LDST_STORE_EX, AARCH64_INSN_LDST_STORE_REL_EX, AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, @@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) -__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) -__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00) +__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00) +__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000) __AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) @@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, int offset, enum aarch64_insn_variant variant, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register base, enum aarch64_insn_register state, diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index b008a9b46a7ff..9bef696e2230b 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -540,6 +540,35 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, offset >> shift); } +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_ACQ: + insn = aarch64_insn_get_load_acq_value(); + break; + case AARCH64_INSN_LDST_STORE_REL: + insn = aarch64_insn_get_store_rel_value(); + break; + default: + pr_err("%s: unknown load-acquire/store-release encoding %d\n", + __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + reg); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); +} + u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register base, enum aarch64_insn_register state, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index b22ab2f97a300..a3b0e693a125c 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -119,6 +119,26 @@ aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ AARCH64_INSN_LDST_STORE_REL_EX) +/* Load-acquire & store-release */ +#define A64_LDAR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_LOAD_ACQ) +#define A64_STLR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_STORE_REL) + +/* Rt = [Rn] (load acquire) */ +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8) +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16) +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32) +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64) + +/* [Rn] = Rt (store release) */ +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8) +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16) +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32) +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64) + /* * LSE atomics * diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 7409c8acbde35..70d7c89d3ac90 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -647,6 +647,81 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const s32 imm = insn->imm; + const s16 off = insn->off; + const u8 code = insn->code; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + u8 reg; + + switch (imm) { + case BPF_LOAD_ACQ: + reg = src; + break; + case BPF_STORE_REL: + reg = dst; + break; + default: + pr_err_once("unknown atomic load/store op code %02x\n", imm); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (imm) { + case BPF_LOAD_ACQ: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_LDARB(dst, reg), ctx); + break; + case BPF_H: + emit(A64_LDARH(dst, reg), ctx); + break; + case BPF_W: + emit(A64_LDAR32(dst, reg), ctx); + break; + case BPF_DW: + emit(A64_LDAR64(dst, reg), ctx); + break; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_STLRB(src, reg), ctx); + break; + case BPF_H: + emit(A64_STLRH(src, reg), ctx); + break; + case BPF_W: + emit(A64_STLR32(src, reg), ctx); + break; + case BPF_DW: + emit(A64_STLR64(src, reg), ctx); + break; + } + break; + default: + pr_err_once("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + #ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { @@ -1641,11 +1716,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return ret; break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_ld_st(insn, ctx); + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); @@ -2669,7 +2750,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) switch (insn->code) { case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (!bpf_atomic_is_load_store(insn) && + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) return false; } return true; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729eb..0776dfde2dba9 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2919,10 +2919,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index 383c87300b0d3..dddbefc0f4398 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile @@ -6,5 +6,5 @@ ifeq ($(CONFIG_X86_32),y) obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o else - obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a43fc5af973d2..d3491cc0898bf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1242,8 +1242,8 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm) emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); } -static int emit_atomic(u8 **pprog, u8 atomic_op, - u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) +static int emit_atomic_rmw(u8 **pprog, u32 atomic_op, + u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { u8 *prog = *pprog; @@ -1283,8 +1283,9 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } -static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, - u32 dst_reg, u32 src_reg, u32 index_reg, int off) +static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, + int off) { u8 *prog = *pprog; @@ -1297,7 +1298,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); break; default: - pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n"); + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); return -EFAULT; } @@ -1331,6 +1332,49 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, return 0; } +static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg, + u32 src_reg, s16 off, u8 bpf_size) +{ + switch (atomic_op) { + case BPF_LOAD_ACQ: + /* dst_reg = smp_load_acquire(src_reg + off16) */ + emit_ldx(pprog, bpf_size, dst_reg, src_reg, off); + break; + case BPF_STORE_REL: + /* smp_store_release(dst_reg + off16, src_reg) */ + emit_stx(pprog, bpf_size, dst_reg, src_reg, off); + break; + default: + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", + atomic_op); + return -EFAULT; + } + + return 0; +} + +static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, + int off) +{ + switch (atomic_op) { + case BPF_LOAD_ACQ: + /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */ + emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off); + break; + case BPF_STORE_REL: + /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */ + emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off); + break; + default: + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", + atomic_op); + return -EFAULT; + } + + return 0; +} + #define DONT_CLEAR 1 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) @@ -2113,6 +2157,13 @@ st: if (is_imm8(insn->off)) } break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + if (!bpf_atomic_is_load_store(insn)) { + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); + return -EFAULT; + } + fallthrough; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: if (insn->imm == (BPF_AND | BPF_FETCH) || @@ -2148,10 +2199,10 @@ st: if (is_imm8(insn->off)) EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ - err = emit_atomic(&prog, BPF_CMPXCHG, - real_dst_reg, AUX_REG, - insn->off, - BPF_SIZE(insn->code)); + err = emit_atomic_rmw(&prog, BPF_CMPXCHG, + real_dst_reg, AUX_REG, + insn->off, + BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; /* @@ -2166,17 +2217,35 @@ st: if (is_imm8(insn->off)) break; } - err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + if (bpf_atomic_is_load_store(insn)) + err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg, + insn->off, BPF_SIZE(insn->code)); + else + err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg, + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: + if (!bpf_atomic_is_load_store(insn)) { + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); + return -EFAULT; + } + fallthrough; case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: start_of_ldx = prog; - err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code), - dst_reg, src_reg, X86_REG_R12, insn->off); + + if (bpf_atomic_is_load_store(insn)) + err = emit_atomic_ld_st_index(&prog, insn->imm, + BPF_SIZE(insn->code), dst_reg, + src_reg, X86_REG_R12, insn->off); + else + err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code), + dst_reg, src_reg, X86_REG_R12, + insn->off); if (err) return err; goto populate_extable; @@ -3791,3 +3860,8 @@ u64 bpf_arch_uaddress_limit(void) { return 0; } + +bool bpf_jit_supports_timed_may_goto(void) +{ + return true; +} diff --git a/arch/x86/net/bpf_timed_may_goto.S b/arch/x86/net/bpf_timed_may_goto.S new file mode 100644 index 0000000000000..20de4a14dc4cf --- /dev/null +++ b/arch/x86/net/bpf_timed_may_goto.S @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + + .code64 + .section .text, "ax" + +SYM_FUNC_START(arch_bpf_timed_may_goto) + ANNOTATE_NOENDBR + + /* Save r0-r5. */ + pushq %rax + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + + /* + * r10 passes us stack depth, load the pointer to count and timestamp as + * first argument to the call below. + */ + leaq (%rbp, %r10, 1), %rdi + + /* Emit call depth accounting for call below. */ + CALL_DEPTH_ACCOUNT + call bpf_check_timed_may_goto + + /* BPF_REG_AX=r10 will be stored into count, so move return value to it. */ + movq %rax, %r10 + + /* Restore r5-r0. */ + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rax + + RET +SYM_FUNC_END(arch_bpf_timed_may_goto) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7fc69083e7450..9de7adb682948 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -111,6 +111,7 @@ struct bpf_prog_list { struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + u32 flags; }; int cgroup_bpf_inherit(struct cgroup *cgrp); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 15164787ce7f8..7d55553de3fce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -991,6 +991,21 @@ static inline bool bpf_pseudo_func(const struct bpf_insn *insn) return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } +/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an + * atomic load or store, and false if it is a read-modify-write instruction. + */ +static inline bool +bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) +{ + switch (atomic_insn->imm) { + case BPF_LOAD_ACQ: + case BPF_STORE_REL: + return true; + default: + return false; + } +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1531,6 +1546,7 @@ struct bpf_prog_aux { bool jits_use_priv_stack; bool priv_stack_requested; bool changes_pkt_data; + bool might_sleep; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; @@ -1986,6 +2002,7 @@ struct bpf_array { */ enum { BPF_MAX_LOOPS = 8 * 1024 * 1024, + BPF_MAX_TIMED_LOOPS = 0xffff, }; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ @@ -2354,7 +2371,7 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index bbd013c38ff9f..d6cfc4ee6820c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -591,6 +591,8 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* registers alive before this instruction. */ + u16 live_regs_before; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -667,6 +669,7 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; bool changes_pkt_data: 1; + bool might_sleep: 1; enum priv_stack_mode priv_stack_mode; u8 arg_cnt; @@ -747,7 +750,11 @@ struct bpf_verifier_env { struct { int *insn_state; int *insn_stack; + /* vector of instruction indexes sorted in post-order */ + int *insn_postorder; int cur_stack; + /* current position in the insn_postorder vector */ + int cur_postorder; } cfg; struct backtrack_state bt; struct bpf_insn_hist_entry *insn_hist; diff --git a/include/linux/filter.h b/include/linux/filter.h index 3ed6eb9e7c733..590476743f7a3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ @@ -669,6 +671,11 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct bpf_timed_may_goto { + u64 count; + u64 timestamp; +}; + struct sk_filter { refcount_t refcnt; struct rcu_head rcu; @@ -1130,8 +1137,11 @@ bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); bool bpf_jit_supports_private_stack(void); +bool bpf_jit_supports_timed_may_goto(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); +u64 arch_bpf_timed_may_goto(void); +u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *); bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6bb1a5a7a4ae3..ceb226c2e25c5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,6 +39,25 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags) +{ + /* + * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed. + * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd. + * All GFP_* flags including GFP_NOWAIT use one or both flags. + * try_alloc_pages() is the only API that doesn't specify either flag. + * + * This is stronger than GFP_NOWAIT or GFP_ATOMIC because + * those are guaranteed to never block on a sleeping lock. + * Here we are enforcing that the allocation doesn't ever spin + * on any locks (i.e. only trylocks). There is no high level + * GFP_$FOO flag for this use in try_alloc_pages() as the + * regular page allocator doesn't fully support this + * allocation mode. + */ + return !(gfp_flags & __GFP_RECLAIM); +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else @@ -335,6 +354,9 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) +struct page *try_alloc_pages_noprof(int nid, unsigned int order); +#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__)) + extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) @@ -357,6 +379,7 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); +extern void free_pages_nolock(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 091dc0b6bdfb9..1a0bc35839e36 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -51,6 +51,76 @@ #define local_unlock_irqrestore(lock, flags) \ __local_unlock_irqrestore(lock, flags) +/** + * localtry_lock_init - Runtime initialize a lock instance + */ +#define localtry_lock_init(lock) __localtry_lock_init(lock) + +/** + * localtry_lock - Acquire a per CPU local lock + * @lock: The lock variable + */ +#define localtry_lock(lock) __localtry_lock(lock) + +/** + * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts + * @lock: The lock variable + */ +#define localtry_lock_irq(lock) __localtry_lock_irq(lock) + +/** + * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable + * interrupts + * @lock: The lock variable + * @flags: Storage for interrupt flags + */ +#define localtry_lock_irqsave(lock, flags) \ + __localtry_lock_irqsave(lock, flags) + +/** + * localtry_trylock - Try to acquire a per CPU local lock. + * @lock: The lock variable + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define localtry_trylock(lock) __localtry_trylock(lock) + +/** + * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired + * @lock: The lock variable + * @flags: Storage for interrupt flags + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define localtry_trylock_irqsave(lock, flags) \ + __localtry_trylock_irqsave(lock, flags) + +/** + * local_unlock - Release a per CPU local lock + * @lock: The lock variable + */ +#define localtry_unlock(lock) __localtry_unlock(lock) + +/** + * local_unlock_irq - Release a per CPU local lock and enable interrupts + * @lock: The lock variable + */ +#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) + +/** + * localtry_unlock_irqrestore - Release a per CPU local lock and restore + * interrupt flags + * @lock: The lock variable + * @flags: Interrupt flags to restore + */ +#define localtry_unlock_irqrestore(lock, flags) \ + __localtry_unlock_irqrestore(lock, flags) + DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), local_unlock(_T)) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8dd71fbbb6d2b..67bd13d142fac 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,6 +15,11 @@ typedef struct { #endif } local_lock_t; +typedef struct { + local_lock_t llock; + unsigned int acquired; +} localtry_lock_t; + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ .dep_map = { \ @@ -31,6 +36,13 @@ static inline void local_lock_acquire(local_lock_t *l) l->owner = current; } +static inline void local_trylock_acquire(local_lock_t *l) +{ + lock_map_acquire_try(&l->dep_map); + DEBUG_LOCKS_WARN_ON(l->owner); + l->owner = current; +} + static inline void local_lock_release(local_lock_t *l) { DEBUG_LOCKS_WARN_ON(l->owner != current); @@ -45,11 +57,13 @@ static inline void local_lock_debug_init(local_lock_t *l) #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } +#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} #define __local_lock_init(lock) \ do { \ @@ -118,6 +132,104 @@ do { \ #define __local_unlock_nested_bh(lock) \ local_lock_release(this_cpu_ptr(lock)) +/* localtry_lock_t variants */ + +#define __localtry_lock_init(lock) \ +do { \ + __local_lock_init(&(lock)->llock); \ + WRITE_ONCE((lock)->acquired, 0); \ +} while (0) + +#define __localtry_lock(lock) \ + do { \ + localtry_lock_t *lt; \ + preempt_disable(); \ + lt = this_cpu_ptr(lock); \ + local_lock_acquire(<->llock); \ + WRITE_ONCE(lt->acquired, 1); \ + } while (0) + +#define __localtry_lock_irq(lock) \ + do { \ + localtry_lock_t *lt; \ + local_irq_disable(); \ + lt = this_cpu_ptr(lock); \ + local_lock_acquire(<->llock); \ + WRITE_ONCE(lt->acquired, 1); \ + } while (0) + +#define __localtry_lock_irqsave(lock, flags) \ + do { \ + localtry_lock_t *lt; \ + local_irq_save(flags); \ + lt = this_cpu_ptr(lock); \ + local_lock_acquire(<->llock); \ + WRITE_ONCE(lt->acquired, 1); \ + } while (0) + +#define __localtry_trylock(lock) \ + ({ \ + localtry_lock_t *lt; \ + bool _ret; \ + \ + preempt_disable(); \ + lt = this_cpu_ptr(lock); \ + if (!READ_ONCE(lt->acquired)) { \ + WRITE_ONCE(lt->acquired, 1); \ + local_trylock_acquire(<->llock); \ + _ret = true; \ + } else { \ + _ret = false; \ + preempt_enable(); \ + } \ + _ret; \ + }) + +#define __localtry_trylock_irqsave(lock, flags) \ + ({ \ + localtry_lock_t *lt; \ + bool _ret; \ + \ + local_irq_save(flags); \ + lt = this_cpu_ptr(lock); \ + if (!READ_ONCE(lt->acquired)) { \ + WRITE_ONCE(lt->acquired, 1); \ + local_trylock_acquire(<->llock); \ + _ret = true; \ + } else { \ + _ret = false; \ + local_irq_restore(flags); \ + } \ + _ret; \ + }) + +#define __localtry_unlock(lock) \ + do { \ + localtry_lock_t *lt; \ + lt = this_cpu_ptr(lock); \ + WRITE_ONCE(lt->acquired, 0); \ + local_lock_release(<->llock); \ + preempt_enable(); \ + } while (0) + +#define __localtry_unlock_irq(lock) \ + do { \ + localtry_lock_t *lt; \ + lt = this_cpu_ptr(lock); \ + WRITE_ONCE(lt->acquired, 0); \ + local_lock_release(<->llock); \ + local_irq_enable(); \ + } while (0) + +#define __localtry_unlock_irqrestore(lock, flags) \ + do { \ + localtry_lock_t *lt; \ + lt = this_cpu_ptr(lock); \ + WRITE_ONCE(lt->acquired, 0); \ + local_lock_release(<->llock); \ + local_irq_restore(flags); \ + } while (0) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -125,8 +237,10 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; +typedef spinlock_t localtry_lock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) +#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) #define __local_lock_init(l) \ do { \ @@ -169,4 +283,36 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) +/* localtry_lock_t variants */ + +#define __localtry_lock_init(lock) __local_lock_init(lock) +#define __localtry_lock(lock) __local_lock(lock) +#define __localtry_lock_irq(lock) __local_lock(lock) +#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) +#define __localtry_unlock(lock) __local_unlock(lock) +#define __localtry_unlock_irq(lock) __local_unlock(lock) +#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) + +#define __localtry_trylock(lock) \ + ({ \ + int __locked; \ + \ + if (in_nmi() | in_hardirq()) { \ + __locked = 0; \ + } else { \ + migrate_disable(); \ + __locked = spin_trylock(this_cpu_ptr((lock))); \ + if (!__locked) \ + migrate_enable(); \ + } \ + __locked; \ + }) + +#define __localtry_trylock_irqsave(lock, flags) \ + ({ \ + typecheck(unsigned long, flags); \ + flags = 0; \ + __localtry_trylock(lock); \ + }) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b27db7f94963..483aa90242cdf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -99,6 +99,10 @@ struct page { /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; + struct { + struct llist_node pcp_llist; + unsigned int order; + }; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9540b41894da6..e169395539304 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -972,6 +972,9 @@ struct zone { /* Primarily protects free_area */ spinlock_t lock; + /* Pages to be freed when next trylock succeeds */ + struct llist_head trylock_free_pages; + /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fff6cdb8d11a2..bb37897c03939 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; @@ -1207,6 +1210,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 647b709d7d77f..0d56cea716022 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -287,7 +287,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) return VM_FAULT_SIGSEGV; /* Account into memcg of the process that created bpf_arena */ - ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page); + ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page); if (ret) { range_tree_set(&arena->rt, vmf->pgoff, 1); return VM_FAULT_SIGSEGV; @@ -465,8 +465,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt if (ret) goto out_free_pages; - ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO, - node_id, page_cnt, pages); + ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages); if (ret) goto out; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 46e5db65dbc8d..84f58f3d028a3 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -369,7 +369,7 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) /* count number of elements in the list. * it's slow but the list cannot be long */ -static u32 prog_list_length(struct hlist_head *head) +static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt) { struct bpf_prog_list *pl; u32 cnt = 0; @@ -377,6 +377,8 @@ static u32 prog_list_length(struct hlist_head *head) hlist_for_each_entry(pl, head, node) { if (!prog_list_prog(pl)) continue; + if (preorder_cnt && (pl->flags & BPF_F_PREORDER)) + (*preorder_cnt)++; cnt++; } return cnt; @@ -400,7 +402,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, if (flags & BPF_F_ALLOW_MULTI) return true; - cnt = prog_list_length(&p->bpf.progs[atype]); + cnt = prog_list_length(&p->bpf.progs[atype], NULL); WARN_ON_ONCE(cnt > 1); if (cnt == 1) return !!(flags & BPF_F_ALLOW_OVERRIDE); @@ -423,12 +425,12 @@ static int compute_effective_progs(struct cgroup *cgrp, struct bpf_prog_array *progs; struct bpf_prog_list *pl; struct cgroup *p = cgrp; - int cnt = 0; + int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart; /* count number of effective programs by walking parents */ do { if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) - cnt += prog_list_length(&p->bpf.progs[atype]); + cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt); p = cgroup_parent(p); } while (p); @@ -439,20 +441,34 @@ static int compute_effective_progs(struct cgroup *cgrp, /* populate the array with effective progs */ cnt = 0; p = cgrp; + fstart = preorder_cnt; + bstart = preorder_cnt - 1; do { if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) continue; + init_bstart = bstart; hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { if (!prog_list_prog(pl)) continue; - item = &progs->items[cnt]; + if (pl->flags & BPF_F_PREORDER) { + item = &progs->items[bstart]; + bstart--; + } else { + item = &progs->items[fstart]; + fstart++; + } item->prog = prog_list_prog(pl); bpf_cgroup_storages_assign(item->cgroup_storage, pl->storage); cnt++; } + + /* reverse pre-ordering progs at this cgroup level */ + for (i = bstart + 1, j = init_bstart; i < j; i++, j--) + swap(progs->items[i], progs->items[j]); + } while ((p = cgroup_parent(p))); *array = progs; @@ -663,7 +679,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; - if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) + if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; pl = find_attach_entry(progs, prog, link, replace_prog, @@ -698,6 +714,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, pl->prog = prog; pl->link = link; + pl->flags = flags; bpf_cgroup_storages_assign(pl->storage, storage); cgrp->bpf.flags[atype] = saved_flags; @@ -1073,7 +1090,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, lockdep_is_held(&cgroup_mutex)); total_cnt += bpf_prog_array_length(effective); } else { - total_cnt += prog_list_length(&cgrp->bpf.progs[atype]); + total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL); } } @@ -1105,7 +1122,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, u32 id; progs = &cgrp->bpf.progs[atype]; - cnt = min_t(int, prog_list_length(progs), total_cnt); + cnt = min_t(int, prog_list_length(progs, NULL), total_cnt); i = 0; hlist_for_each_entry(pl, progs, node) { prog = prog_list_prog(pl); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a0200fbbace99..62cb9557ad3be 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_3(JMP, JSET, K), \ INSN_2(JMP, JA), \ INSN_2(JMP32, JA), \ + /* Atomic operations. */ \ + INSN_3(STX, ATOMIC, B), \ + INSN_3(STX, ATOMIC, H), \ + INSN_3(STX, ATOMIC, W), \ + INSN_3(STX, ATOMIC, DW), \ /* Store instructions. */ \ /* Register based. */ \ INSN_3(STX, MEM, B), \ INSN_3(STX, MEM, H), \ INSN_3(STX, MEM, W), \ INSN_3(STX, MEM, DW), \ - INSN_3(STX, ATOMIC, W), \ - INSN_3(STX, ATOMIC, DW), \ /* Immediate based. */ \ INSN_3(ST, MEM, B), \ INSN_3(ST, MEM, H), \ @@ -2152,24 +2155,33 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) if (BPF_SIZE(insn->code) == BPF_W) \ atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ (DST + insn->off)); \ - else \ + else if (BPF_SIZE(insn->code) == BPF_DW) \ atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ (DST + insn->off)); \ + else \ + goto default_label; \ break; \ case BOP | BPF_FETCH: \ if (BPF_SIZE(insn->code) == BPF_W) \ SRC = (u32) atomic_fetch_##KOP( \ (u32) SRC, \ (atomic_t *)(unsigned long) (DST + insn->off)); \ - else \ + else if (BPF_SIZE(insn->code) == BPF_DW) \ SRC = (u64) atomic64_fetch_##KOP( \ (u64) SRC, \ (atomic64_t *)(unsigned long) (DST + insn->off)); \ + else \ + goto default_label; \ break; STX_ATOMIC_DW: STX_ATOMIC_W: + STX_ATOMIC_H: + STX_ATOMIC_B: switch (IMM) { + /* Atomic read-modify-write instructions support only W and DW + * size modifiers. + */ ATOMIC_ALU_OP(BPF_ADD, add) ATOMIC_ALU_OP(BPF_AND, and) ATOMIC_ALU_OP(BPF_OR, or) @@ -2181,20 +2193,63 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) SRC = (u32) atomic_xchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) SRC); - else + else if (BPF_SIZE(insn->code) == BPF_DW) SRC = (u64) atomic64_xchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) SRC); + else + goto default_label; break; case BPF_CMPXCHG: if (BPF_SIZE(insn->code) == BPF_W) BPF_R0 = (u32) atomic_cmpxchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) BPF_R0, (u32) SRC); - else + else if (BPF_SIZE(insn->code) == BPF_DW) BPF_R0 = (u64) atomic64_cmpxchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) BPF_R0, (u64) SRC); + else + goto default_label; + break; + /* Atomic load and store instructions support all size + * modifiers. + */ + case BPF_LOAD_ACQ: + switch (BPF_SIZE(insn->code)) { +#define LOAD_ACQUIRE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + DST = (SIZE)smp_load_acquire( \ + (SIZE *)(unsigned long)(SRC + insn->off)); \ + break; + LOAD_ACQUIRE(B, u8) + LOAD_ACQUIRE(H, u16) + LOAD_ACQUIRE(W, u32) +#ifdef CONFIG_64BIT + LOAD_ACQUIRE(DW, u64) +#endif +#undef LOAD_ACQUIRE + default: + goto default_label; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(insn->code)) { +#define STORE_RELEASE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + smp_store_release( \ + (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \ + break; + STORE_RELEASE(B, u8) + STORE_RELEASE(H, u16) + STORE_RELEASE(W, u32) +#ifdef CONFIG_64BIT + STORE_RELEASE(DW, u64) +#endif +#undef STORE_RELEASE + default: + goto default_label; + } break; default: @@ -3069,6 +3124,32 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, { } +bool __weak bpf_jit_supports_timed_may_goto(void) +{ + return false; +} + +u64 __weak arch_bpf_timed_may_goto(void) +{ + return 0; +} + +u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p) +{ + u64 time = ktime_get_mono_fast_ns(); + + /* Populate the timestamp for this stack frame, and refresh count. */ + if (!p->timestamp) { + p->timestamp = time; + return BPF_MAX_TIMED_LOOPS; + } + /* Check if we've exhausted our time slice, and zero count. */ + if (time - p->timestamp >= (NSEC_PER_SEC / 4)) + return 0; + /* Refresh the count for the stack frame. */ + return BPF_MAX_TIMED_LOOPS; +} + /* for configs without MMU or 32-bit */ __weak const struct bpf_map_ops arena_map_ops; __weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena) diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 309c4aa1b026a..974d172d6735a 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -267,6 +267,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, BPF_SIZE(insn->code) == BPF_DW ? "64" : "", bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_LOAD_ACQ) { + verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n", + insn->code, insn->dst_reg, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->src_reg, insn->off); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_STORE_REL) { + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 183298fc11ba4..5449756ba102e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1759,8 +1759,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; -BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, - u32, offset, u64, flags) +static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, + u32 offset, u64 flags) { enum bpf_dynptr_type type; int err; @@ -1793,6 +1793,12 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern } } +BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, + u32, offset, u64, flags) +{ + return __bpf_dynptr_read(dst, len, src, offset, flags); +} + static const struct bpf_func_proto bpf_dynptr_read_proto = { .func = bpf_dynptr_read, .gpl_only = false, @@ -1804,8 +1810,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg5_type = ARG_ANYTHING, }; -BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, - u32, len, u64, flags) +static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, + u32 len, u64 flags) { enum bpf_dynptr_type type; int err; @@ -1843,6 +1849,12 @@ BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, v } } +BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, + u32, len, u64, flags) +{ + return __bpf_dynptr_write(dst, offset, src, len, flags); +} + static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, @@ -2758,6 +2770,61 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, return 0; } +/** + * bpf_dynptr_copy() - Copy data from one dynptr to another. + * @dst_ptr: Destination dynptr - where data should be copied to + * @dst_off: Offset into the destination dynptr + * @src_ptr: Source dynptr - where data should be copied from + * @src_off: Offset into the source dynptr + * @size: Length of the data to copy from source to destination + * + * Copies data from source dynptr to destination dynptr. + * Returns 0 on success; negative error, otherwise. + */ +__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, + struct bpf_dynptr *src_ptr, u32 src_off, u32 size) +{ + struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; + struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; + void *src_slice, *dst_slice; + char buf[256]; + u32 off; + + src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); + dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); + + if (src_slice && dst_slice) { + memmove(dst_slice, src_slice, size); + return 0; + } + + if (src_slice) + return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0); + + if (dst_slice) + return __bpf_dynptr_read(dst_slice, size, src, src_off, 0); + + if (bpf_dynptr_check_off_len(dst, dst_off, size) || + bpf_dynptr_check_off_len(src, src_off, size)) + return -E2BIG; + + off = 0; + while (off < size) { + u32 chunk_sz = min_t(u32, sizeof(buf), size - off); + int err; + + err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); + if (err) + return err; + err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0); + if (err) + return err; + + off += chunk_sz; + } + return 0; +} + __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; @@ -3206,6 +3273,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) +BTF_ID_FLAGS(func, bpf_dynptr_copy) #ifdef CONFIG_NET BTF_ID_FLAGS(func, bpf_modify_return_test_tp) #endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dbd89c13dd328..57a4387062159 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -569,7 +569,24 @@ static void bpf_map_release_memcg(struct bpf_map *map) } #endif -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, +static bool can_alloc_pages(void) +{ + return preempt_count() == 0 && !irqs_disabled() && + !IS_ENABLED(CONFIG_PREEMPT_RT); +} + +static struct page *__bpf_alloc_page(int nid) +{ + if (!can_alloc_pages()) + return try_alloc_pages(nid, 0); + + return alloc_pages_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT + | __GFP_NOWARN, + 0); +} + +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **pages) { unsigned long i, j; @@ -582,14 +599,14 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, old_memcg = set_active_memcg(memcg); #endif for (i = 0; i < nr_pages; i++) { - pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0); + pg = __bpf_alloc_page(nid); if (pg) { pages[i] = pg; continue; } for (j = 0; j < i; j++) - __free_page(pages[j]); + free_pages_nolock(pages[j], 0); ret = -ENOMEM; break; } @@ -1593,11 +1610,8 @@ struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) { - spin_lock_bh(&map_idr_lock); - map = __bpf_map_inc_not_zero(map, false); - spin_unlock_bh(&map_idr_lock); - - return map; + lockdep_assert(rcu_read_lock_held()); + return __bpf_map_inc_not_zero(map, false); } EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); @@ -4170,7 +4184,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, #define BPF_F_ATTACH_MASK_BASE \ (BPF_F_ALLOW_OVERRIDE | \ BPF_F_ALLOW_MULTI | \ - BPF_F_REPLACE) + BPF_F_REPLACE | \ + BPF_F_PREORDER) #define BPF_F_ATTACH_MASK_MPROG \ (BPF_F_REPLACE | \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dcd0da4e62fca..6c18628fa9d80 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -579,6 +579,13 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } +static bool is_atomic_load_insn(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_LOAD_ACQ; +} + static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; @@ -3353,6 +3360,15 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return 0; } +static int jmp_offset(struct bpf_insn *insn) +{ + u8 code = insn->code; + + if (code == (BPF_JMP32 | BPF_JA)) + return insn->imm; + return insn->off; +} + static int check_subprogs(struct bpf_verifier_env *env) { int i, subprog_start, subprog_end, off, cur_subprog = 0; @@ -3379,10 +3395,7 @@ static int check_subprogs(struct bpf_verifier_env *env) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) goto next; - if (code == (BPF_JMP32 | BPF_JA)) - off = i + insn[i].imm + 1; - else - off = i + insn[i].off + 1; + off = i + jmp_offset(&insn[i]) + 1; if (off < subprog_start || off >= subprog_end) { verbose(env, "jump out of range from insn %d to %d\n", i, off); return -EINVAL; @@ -3567,7 +3580,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, } if (class == BPF_STX) { - /* BPF_STX (including atomic variants) has multiple source + /* BPF_STX (including atomic variants) has one or more source * operands, one of which is a ptr. Check whether the caller is * asking about it. */ @@ -3912,6 +3925,17 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) return btf_name_by_offset(desc_btf, func->name_off); } +static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, + .cb_print = verbose, + .private_data = env, + }; + + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); +} + static inline void bt_init(struct backtrack_state *bt, u32 frame) { bt->frame = frame; @@ -4112,11 +4136,6 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, struct bpf_insn_hist_entry *hist, struct backtrack_state *bt) { - const struct bpf_insn_cbs cbs = { - .cb_call = disasm_kfunc_name, - .cb_print = verbose, - .private_data = env, - }; struct bpf_insn *insn = env->prog->insnsi + idx; u8 class = BPF_CLASS(insn->code); u8 opcode = BPF_OP(insn->code); @@ -4134,7 +4153,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); verbose(env, "stack=%s before ", env->tmp_str_buf); verbose(env, "%d: ", idx); - print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + verbose_insn(env, insn); } /* If there is a history record that some registers gained range at this insn, @@ -4181,7 +4200,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * dreg still needs precision before this insn */ } - } else if (class == BPF_LDX) { + } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { if (!bt_is_reg_set(bt, dreg)) return 0; bt_clear_reg(bt, dreg); @@ -6195,6 +6214,26 @@ static bool is_arena_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_ARENA; } +/* Return false if @regno contains a pointer whose type isn't supported for + * atomic instruction @insn. + */ +static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno, + struct bpf_insn *insn) +{ + if (is_ctx_reg(env, regno)) + return false; + if (is_pkt_reg(env, regno)) + return false; + if (is_flow_key_reg(env, regno)) + return false; + if (is_sk_reg(env, regno)) + return false; + if (is_arena_reg(env, regno)) + return bpf_jit_supports_insn(insn, true); + + return true; +} + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #ifdef CONFIG_NET [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], @@ -7596,27 +7635,72 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, bool allow_trust_mismatch); -static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) +static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn, + bool strict_alignment_once, bool is_ldsx, + bool allow_trust_mismatch, const char *ctx) { - int load_reg; + struct bpf_reg_state *regs = cur_regs(env); + enum bpf_reg_type src_reg_type; int err; - switch (insn->imm) { - case BPF_ADD: - case BPF_ADD | BPF_FETCH: - case BPF_AND: - case BPF_AND | BPF_FETCH: - case BPF_OR: - case BPF_OR | BPF_FETCH: - case BPF_XOR: - case BPF_XOR | BPF_FETCH: - case BPF_XCHG: - case BPF_CMPXCHG: - break; - default: - verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); - return -EINVAL; - } + /* check src operand */ + err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + /* check dst operand */ + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + + src_reg_type = regs[insn->src_reg].type; + + /* Check if (src_reg + off) is readable. The state of dst_reg will be + * updated by this call. + */ + err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, + strict_alignment_once, is_ldsx); + err = err ?: save_aux_ptr_type(env, src_reg_type, + allow_trust_mismatch); + err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], ctx); + + return err; +} + +static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn, + bool strict_alignment_once) +{ + struct bpf_reg_state *regs = cur_regs(env); + enum bpf_reg_type dst_reg_type; + int err; + + /* check src1 operand */ + err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + /* check src2 operand */ + err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + dst_reg_type = regs[insn->dst_reg].type; + + /* Check if (dst_reg + off) is writeable. */ + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, + strict_alignment_once, false); + err = err ?: save_aux_ptr_type(env, dst_reg_type, false); + + return err; +} + +static int check_atomic_rmw(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + int load_reg; + int err; if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { verbose(env, "invalid atomic operand size\n"); @@ -7652,11 +7736,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i return -EACCES; } - if (is_ctx_reg(env, insn->dst_reg) || - is_pkt_reg(env, insn->dst_reg) || - is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg) || - (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) { + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str(env, reg_state(env, insn->dst_reg)->type)); @@ -7683,12 +7763,12 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i /* Check whether we can read the memory, with second call for fetch * case to simulate the register fill. */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true, false); if (!err && load_reg >= 0) - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, - true, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_READ, load_reg, true, false); if (err) return err; @@ -7698,13 +7778,74 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i return err; } /* Check whether we can write into the same memory. */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); if (err) return err; return 0; } +static int check_atomic_load(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", + insn->src_reg, + reg_type_str(env, reg_state(env, insn->src_reg)->type)); + return -EACCES; + } + + return check_load_mem(env, insn, true, false, false, "atomic_load"); +} + +static int check_atomic_store(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", + insn->dst_reg, + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + + return check_store_reg(env, insn, true); +} + +static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + switch (insn->imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + case BPF_CMPXCHG: + return check_atomic_rmw(env, insn); + case BPF_LOAD_ACQ: + if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { + verbose(env, + "64-bit load-acquires are only supported on 64-bit arches\n"); + return -EOPNOTSUPP; + } + return check_atomic_load(env, insn); + case BPF_STORE_REL: + if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { + verbose(env, + "64-bit store-releases are only supported on 64-bit arches\n"); + return -EOPNOTSUPP; + } + return check_atomic_store(env, insn); + default: + verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", + insn->imm); + return -EINVAL; + } +} + /* When register 'regno' is used to read the stack (either directly or through * a helper function) make sure that it's within stack boundary and, depending * on the access type and privileges, that all elements of the stack are @@ -10317,23 +10458,18 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (subprog_is_global(env, subprog)) { const char *sub_name = subprog_name(env, subprog); - /* Only global subprogs cannot be called with a lock held. */ if (env->cur_state->active_locks) { verbose(env, "global function calls are not allowed while holding a lock,\n" "use static function instead\n"); return -EINVAL; } - /* Only global subprogs cannot be called with preemption disabled. */ - if (env->cur_state->active_preempt_locks) { - verbose(env, "global function calls are not allowed with preemption disabled,\n" - "use static function instead\n"); - return -EINVAL; - } - - if (env->cur_state->active_irq_id) { - verbose(env, "global function calls are not allowed with IRQs disabled,\n" - "use static function instead\n"); + if (env->subprog_info[subprog].might_sleep && + (env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks || + env->cur_state->active_irq_id || !in_sleepable(env))) { + verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n" + "i.e., in a RCU/IRQ/preempt-disabled section, or in\n" + "a non-sleepable BPF program context\n"); return -EINVAL; } @@ -16703,6 +16839,14 @@ static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) subprog->changes_pkt_data = true; } +static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *subprog; + + subprog = find_containing_subprog(env, off); + subprog->might_sleep = true; +} + /* 't' is an index of a call-site. * 'w' is a callee entry point. * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. @@ -16716,6 +16860,7 @@ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) caller = find_containing_subprog(env, t); callee = find_containing_subprog(env, w); caller->changes_pkt_data |= callee->changes_pkt_data; + caller->might_sleep |= callee->might_sleep; } /* non-recursive DFS pseudo code @@ -16874,27 +17019,6 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, /* Bitmask with 1s for all caller saved registers */ #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) -/* Return a bitmask specifying which caller saved registers are - * clobbered by a call to a helper *as if* this helper follows - * bpf_fastcall contract: - * - includes R0 if function is non-void; - * - includes R1-R5 if corresponding parameter has is described - * in the function prototype. - */ -static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn) -{ - u32 mask; - int i; - - mask = 0; - if (fn->ret_type != RET_VOID) - mask |= BIT(BPF_REG_0); - for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) - if (fn->arg_type[i] != ARG_DONTCARE) - mask |= BIT(BPF_REG_1 + i); - return mask; -} - /* True if do_misc_fixups() replaces calls to helper number 'imm', * replacement patch is presumed to follow bpf_fastcall contract * (see mark_fastcall_pattern_for_call() below). @@ -16911,24 +17035,54 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) } } -/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */ -static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) +struct call_summary { + u8 num_params; + bool is_void; + bool fastcall; +}; + +/* If @call is a kfunc or helper call, fills @cs and returns true, + * otherwise returns false. + */ +static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call, + struct call_summary *cs) { - u32 vlen, i, mask; + struct bpf_kfunc_call_arg_meta meta; + const struct bpf_func_proto *fn; + int i; - vlen = btf_type_vlen(meta->func_proto); - mask = 0; - if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type))) - mask |= BIT(BPF_REG_0); - for (i = 0; i < vlen; ++i) - mask |= BIT(BPF_REG_1 + i); - return mask; -} + if (bpf_helper_call(call)) { -/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ -static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) -{ - return meta->kfunc_flags & KF_FASTCALL; + if (get_helper_proto(env, call->imm, &fn) < 0) + /* error would be reported later */ + return false; + cs->fastcall = fn->allow_fastcall && + (verifier_inlines_helper_call(env, call->imm) || + bpf_jit_inlines_helper_call(call->imm)); + cs->is_void = fn->ret_type == RET_VOID; + cs->num_params = 0; + for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) { + if (fn->arg_type[i] == ARG_DONTCARE) + break; + cs->num_params++; + } + return true; + } + + if (bpf_pseudo_kfunc_call(call)) { + int err; + + err = fetch_kfunc_meta(env, call, &meta, NULL); + if (err < 0) + /* error would be reported later */ + return false; + cs->num_params = btf_type_vlen(meta.func_proto); + cs->fastcall = meta.kfunc_flags & KF_FASTCALL; + cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type)); + return true; + } + + return false; } /* LLVM define a bpf_fastcall function attribute. @@ -17011,39 +17165,23 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, { struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; struct bpf_insn *call = &env->prog->insnsi[insn_idx]; - const struct bpf_func_proto *fn; - u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS; + u32 clobbered_regs_mask; + struct call_summary cs; u32 expected_regs_mask; - bool can_be_inlined = false; s16 off; int i; - if (bpf_helper_call(call)) { - if (get_helper_proto(env, call->imm, &fn) < 0) - /* error would be reported later */ - return; - clobbered_regs_mask = helper_fastcall_clobber_mask(fn); - can_be_inlined = fn->allow_fastcall && - (verifier_inlines_helper_call(env, call->imm) || - bpf_jit_inlines_helper_call(call->imm)); - } - - if (bpf_pseudo_kfunc_call(call)) { - struct bpf_kfunc_call_arg_meta meta; - int err; - - err = fetch_kfunc_meta(env, call, &meta, NULL); - if (err < 0) - /* error would be reported later */ - return; - - clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta); - can_be_inlined = is_fastcall_kfunc_call(&meta); - } - - if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS) + if (!get_call_summary(env, call, &cs)) return; + /* A bitmask specifying which caller saved registers are clobbered + * by a call to a helper/kfunc *as if* this helper/kfunc follows + * bpf_fastcall contract: + * - includes R0 if function is non-void; + * - includes R1-R5 if corresponding parameter has is described + * in the function prototype. + */ + clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0); /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */ expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS; @@ -17101,7 +17239,7 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() * does not remove spill/fill pair {4,6}. */ - if (can_be_inlined) + if (cs.fastcall) env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; else subprog->keep_fastcall_stack = 1; @@ -17183,9 +17321,20 @@ static int visit_insn(int t, struct bpf_verifier_env *env) mark_prune_point(env, t); mark_jmp_point(env, t); } - if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm)) - mark_subprog_changes_pkt_data(env, t); - if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + if (bpf_helper_call(insn)) { + const struct bpf_func_proto *fp; + + ret = get_helper_proto(env, insn->imm, &fp); + /* If called in a non-sleepable context program will be + * rejected anyway, so we should end up with precise + * sleepable marks on subprogs, except for dead code + * elimination. + */ + if (ret == 0 && fp->might_sleep) + mark_subprog_might_sleep(env, t); + if (bpf_helper_changes_pkt_data(insn->imm)) + mark_subprog_changes_pkt_data(env, t); + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; ret = fetch_kfunc_meta(env, insn, &meta, NULL); @@ -17204,6 +17353,13 @@ static int visit_insn(int t, struct bpf_verifier_env *env) */ mark_force_checkpoint(env, t); } + /* Same as helpers, if called in a non-sleepable context + * program will be rejected anyway, so we should end up + * with precise sleepable marks on subprogs, except for + * dead code elimination. + */ + if (ret == 0 && is_kfunc_sleepable(&meta)) + mark_subprog_might_sleep(env, t); } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); @@ -17246,9 +17402,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) static int check_cfg(struct bpf_verifier_env *env) { int insn_cnt = env->prog->len; - int *insn_stack, *insn_state; + int *insn_stack, *insn_state, *insn_postorder; int ex_insn_beg, i, ret = 0; - bool ex_done = false; insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) @@ -17260,6 +17415,17 @@ static int check_cfg(struct bpf_verifier_env *env) return -ENOMEM; } + insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + if (!insn_postorder) { + kvfree(insn_state); + kvfree(insn_stack); + return -ENOMEM; + } + + ex_insn_beg = env->exception_callback_subprog + ? env->subprog_info[env->exception_callback_subprog].start + : 0; + insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ insn_stack[0] = 0; /* 0 is the first instruction */ env->cfg.cur_stack = 1; @@ -17273,6 +17439,7 @@ static int check_cfg(struct bpf_verifier_env *env) case DONE_EXPLORING: insn_state[t] = EXPLORED; env->cfg.cur_stack--; + insn_postorder[env->cfg.cur_postorder++] = t; break; case KEEP_EXPLORING: break; @@ -17291,13 +17458,10 @@ static int check_cfg(struct bpf_verifier_env *env) goto err_free; } - if (env->exception_callback_subprog && !ex_done) { - ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start; - + if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) { insn_state[ex_insn_beg] = DISCOVERED; insn_stack[0] = ex_insn_beg; env->cfg.cur_stack = 1; - ex_done = true; goto walk_cfg; } @@ -17320,6 +17484,7 @@ static int check_cfg(struct bpf_verifier_env *env) } ret = 0; /* cfg looks good */ env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; + env->prog->aux->might_sleep = env->subprog_info[0].might_sleep; err_free: kvfree(insn_state); @@ -17946,7 +18111,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, if (sl->state.branches) continue; loop_entry = get_loop_entry(env, &sl->state); - if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) + if (!IS_ERR_OR_NULL(loop_entry)) continue; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) @@ -18335,15 +18500,17 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c * the current state will reach 'bpf_exit' instruction safely */ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, enum exact_level exact) + struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) { - int i; + u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; + u16 i; if (old->callback_depth > cur->callback_depth) return false; for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(env, &old->regs[i], &cur->regs[i], + if (((1 << i) & live_regs) && + !regsafe(env, &old->regs[i], &cur->regs[i], &env->idmap_scratch, exact)) return false; @@ -18364,6 +18531,7 @@ static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, enum exact_level exact) { + u32 insn_idx; int i; if (old->curframe != cur->curframe) @@ -18387,9 +18555,12 @@ static bool states_equal(struct bpf_verifier_env *env, * and all frame states need to be equivalent */ for (i = 0; i <= old->curframe; i++) { + insn_idx = i == old->curframe + ? env->insn_idx + : old->frame[i + 1]->callsite; if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) + if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) return false; } return true; @@ -18645,7 +18816,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; int i, j, n, err, states_cnt = 0; - bool force_new_state, add_new_state, force_exact; + bool force_new_state, add_new_state; struct list_head *pos, *tmp, *head; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || @@ -18825,9 +18996,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) loop_entry = get_loop_entry(env, &sl->state); if (IS_ERR(loop_entry)) return PTR_ERR(loop_entry); - force_exact = loop_entry && loop_entry->branches > 0; - if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { - if (force_exact) + if (states_equal(env, &sl->state, cur, loop_entry ? RANGE_WITHIN : NOT_EXACT)) { + if (loop_entry) update_loop_entry(env, cur, loop_entry); hit: sl->hit_cnt++; @@ -19121,19 +19291,13 @@ static int do_check(struct bpf_verifier_env *env) } if (env->log.level & BPF_LOG_LEVEL) { - const struct bpf_insn_cbs cbs = { - .cb_call = disasm_kfunc_name, - .cb_print = verbose, - .private_data = env, - }; - if (verifier_state_scratched(env)) print_insn_state(env, state, state->curframe); verbose_linfo(env, env->insn_idx, "; "); env->prev_log_pos = env->log.end_pos; verbose(env, "%d: ", env->insn_idx); - print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + verbose_insn(env, insn); env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos; env->prev_log_pos = env->log.end_pos; } @@ -19155,37 +19319,18 @@ static int do_check(struct bpf_verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX; - /* check for reserved fields is already done */ - - /* check src operand */ - err = check_reg_arg(env, insn->src_reg, SRC_OP); - if (err) - return err; - - err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); - if (err) - return err; - - src_reg_type = regs[insn->src_reg].type; - - /* check that memory (src_reg + off) is readable, - * the state of dst_reg will be updated by this func + /* Check for reserved fields is already done in + * resolve_pseudo_ldimm64(). */ - err = check_mem_access(env, env->insn_idx, insn->src_reg, - insn->off, BPF_SIZE(insn->code), - BPF_READ, insn->dst_reg, false, - BPF_MODE(insn->code) == BPF_MEMSX); - err = err ?: save_aux_ptr_type(env, src_reg_type, true); - err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], "ldx"); + err = check_load_mem(env, insn, false, is_ldsx, true, + "ldx"); if (err) return err; } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; - if (BPF_MODE(insn->code) == BPF_ATOMIC) { - err = check_atomic(env, env->insn_idx, insn); + err = check_atomic(env, insn); if (err) return err; env->insn_idx++; @@ -19197,25 +19342,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - /* check src1 operand */ - err = check_reg_arg(env, insn->src_reg, SRC_OP); - if (err) - return err; - /* check src2 operand */ - err = check_reg_arg(env, insn->dst_reg, SRC_OP); - if (err) - return err; - - dst_reg_type = regs[insn->dst_reg].type; - - /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, env->insn_idx, insn->dst_reg, - insn->off, BPF_SIZE(insn->code), - BPF_WRITE, insn->src_reg, false, false); - if (err) - return err; - - err = save_aux_ptr_type(env, dst_reg_type, false); + err = check_store_reg(env, insn, false); if (err) return err; } else if (class == BPF_ST) { @@ -20537,7 +20664,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); @@ -20845,6 +20974,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; + func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; if (!i) func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); @@ -21503,7 +21633,50 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } - if (is_may_goto_insn(insn)) { + if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) { + int stack_off_cnt = -stack_depth - 16; + + /* + * Two 8 byte slots, depth-16 stores the count, and + * depth-8 stores the start timestamp of the loop. + * + * The starting value of count is BPF_MAX_TIMED_LOOPS + * (0xffff). Every iteration loads it and subs it by 1, + * until the value becomes 0 in AX (thus, 1 in stack), + * after which we call arch_bpf_timed_may_goto, which + * either sets AX to 0xffff to keep looping, or to 0 + * upon timeout. AX is then stored into the stack. In + * the next iteration, we either see 0 and break out, or + * continue iterating until the next time value is 0 + * after subtraction, rinse and repeat. + */ + stack_depth_extra = 16; + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt); + if (insn->off >= 0) + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5); + else + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); + insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); + insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2); + /* + * AX is used as an argument to pass in stack_off_cnt + * (to add to r10/fp), and also as the return value of + * the call to arch_bpf_timed_may_goto. + */ + insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt); + insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto); + insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt); + cnt = 7; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } else if (is_may_goto_insn(insn)) { int stack_off = -stack_depth - 8; stack_depth_extra = 8; @@ -22044,23 +22217,33 @@ static int do_misc_fixups(struct bpf_verifier_env *env) env->prog->aux->stack_depth = subprogs[0].stack_depth; for (i = 0; i < env->subprog_cnt; i++) { + int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1; int subprog_start = subprogs[i].start; int stack_slots = subprogs[i].stack_extra / 8; + int slots = delta, cnt = 0; if (!stack_slots) continue; - if (stack_slots > 1) { + /* We need two slots in case timed may_goto is supported. */ + if (stack_slots > slots) { verbose(env, "verifier bug: stack_slots supports may_goto only\n"); return -EFAULT; } - /* Add ST insn to subprog prologue to init extra stack */ - insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, - -subprogs[i].stack_depth, BPF_MAX_LOOPS); + stack_depth = subprogs[i].stack_depth; + if (bpf_jit_supports_timed_may_goto()) { + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, + BPF_MAX_TIMED_LOOPS); + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0); + } else { + /* Add ST insn to subprog prologue to init extra stack */ + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, + BPF_MAX_LOOPS); + } /* Copy first actual insn to preserve it */ - insn_buf[1] = env->prog->insnsi[subprog_start]; + insn_buf[cnt++] = env->prog->insnsi[subprog_start]; - new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2); + new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt); if (!new_prog) return -ENOMEM; env->prog = prog = new_prog; @@ -22070,7 +22253,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) * to insn after BPF_ST that inits may_goto count. * Adjustment will succeed because bpf_patch_insn_data() didn't fail. */ - WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1)); + WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta)); } /* Since poke tab is now finalized, publish aux to tracker. */ @@ -22723,6 +22906,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; bool tgt_changes_pkt_data; + bool tgt_might_sleep; if (bpf_prog_is_dev_bound(prog->aux) && !bpf_prog_dev_bound_match(prog, tgt_prog)) { @@ -22765,6 +22949,15 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, "Extension program changes packet data, while original does not\n"); return -EINVAL; } + + tgt_might_sleep = aux->func + ? aux->func[subprog]->aux->might_sleep + : aux->might_sleep; + if (prog->aux->might_sleep && !tgt_might_sleep) { + bpf_log(log, + "Extension program may sleep, while original does not\n"); + return -EINVAL; + } } if (!tgt_prog->jited) { bpf_log(log, "Can attach to only JITed progs\n"); @@ -23199,6 +23392,302 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, return 0; } +static bool can_fallthrough(struct bpf_insn *insn) +{ + u8 class = BPF_CLASS(insn->code); + u8 opcode = BPF_OP(insn->code); + + if (class != BPF_JMP && class != BPF_JMP32) + return true; + + if (opcode == BPF_EXIT || opcode == BPF_JA) + return false; + + return true; +} + +static bool can_jump(struct bpf_insn *insn) +{ + u8 class = BPF_CLASS(insn->code); + u8 opcode = BPF_OP(insn->code); + + if (class != BPF_JMP && class != BPF_JMP32) + return false; + + switch (opcode) { + case BPF_JA: + case BPF_JEQ: + case BPF_JNE: + case BPF_JLT: + case BPF_JLE: + case BPF_JGT: + case BPF_JGE: + case BPF_JSGT: + case BPF_JSGE: + case BPF_JSLT: + case BPF_JSLE: + case BPF_JCOND: + return true; + } + + return false; +} + +static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]) +{ + struct bpf_insn *insn = &prog->insnsi[idx]; + int i = 0, insn_sz; + u32 dst; + + insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; + if (can_fallthrough(insn) && idx + 1 < prog->len) + succ[i++] = idx + insn_sz; + + if (can_jump(insn)) { + dst = idx + jmp_offset(insn) + 1; + if (i == 0 || succ[0] != dst) + succ[i++] = dst; + } + + return i; +} + +/* Each field is a register bitmask */ +struct insn_live_regs { + u16 use; /* registers read by instruction */ + u16 def; /* registers written by instruction */ + u16 in; /* registers that may be alive before instruction */ + u16 out; /* registers that may be alive after instruction */ +}; + +/* Bitmask with 1s for all caller saved registers */ +#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) + +/* Compute info->{use,def} fields for the instruction */ +static void compute_insn_live_regs(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct insn_live_regs *info) +{ + struct call_summary cs; + u8 class = BPF_CLASS(insn->code); + u8 code = BPF_OP(insn->code); + u8 mode = BPF_MODE(insn->code); + u16 src = BIT(insn->src_reg); + u16 dst = BIT(insn->dst_reg); + u16 r0 = BIT(0); + u16 def = 0; + u16 use = 0xffff; + + switch (class) { + case BPF_LD: + switch (mode) { + case BPF_IMM: + if (BPF_SIZE(insn->code) == BPF_DW) { + def = dst; + use = 0; + } + break; + case BPF_LD | BPF_ABS: + case BPF_LD | BPF_IND: + /* stick with defaults */ + break; + } + break; + case BPF_LDX: + switch (mode) { + case BPF_MEM: + case BPF_MEMSX: + def = dst; + use = src; + break; + } + break; + case BPF_ST: + switch (mode) { + case BPF_MEM: + def = 0; + use = dst; + break; + } + break; + case BPF_STX: + switch (mode) { + case BPF_MEM: + def = 0; + use = dst | src; + break; + case BPF_ATOMIC: + switch (insn->imm) { + case BPF_CMPXCHG: + use = r0 | dst | src; + def = r0; + break; + case BPF_LOAD_ACQ: + def = dst; + use = src; + break; + case BPF_STORE_REL: + def = 0; + use = dst | src; + break; + default: + use = dst | src; + if (insn->imm & BPF_FETCH) + def = src; + else + def = 0; + } + break; + } + break; + case BPF_ALU: + case BPF_ALU64: + switch (code) { + case BPF_END: + use = dst; + def = dst; + break; + case BPF_MOV: + def = dst; + if (BPF_SRC(insn->code) == BPF_K) + use = 0; + else + use = src; + break; + default: + def = dst; + if (BPF_SRC(insn->code) == BPF_K) + use = dst; + else + use = dst | src; + } + break; + case BPF_JMP: + case BPF_JMP32: + switch (code) { + case BPF_JA: + case BPF_JCOND: + def = 0; + use = 0; + break; + case BPF_EXIT: + def = 0; + use = r0; + break; + case BPF_CALL: + def = ALL_CALLER_SAVED_REGS; + use = def & ~BIT(BPF_REG_0); + if (get_call_summary(env, insn, &cs)) + use = GENMASK(cs.num_params, 1); + break; + default: + def = 0; + if (BPF_SRC(insn->code) == BPF_K) + use = dst; + else + use = dst | src; + } + break; + } + + info->def = def; + info->use = use; +} + +/* Compute may-live registers after each instruction in the program. + * The register is live after the instruction I if it is read by some + * instruction S following I during program execution and is not + * overwritten between I and S. + * + * Store result in env->insn_aux_data[i].live_regs. + */ +static int compute_live_registers(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *insn_aux = env->insn_aux_data; + struct bpf_insn *insns = env->prog->insnsi; + struct insn_live_regs *state; + int insn_cnt = env->prog->len; + int err = 0, i, j; + bool changed; + + /* Use the following algorithm: + * - define the following: + * - I.use : a set of all registers read by instruction I; + * - I.def : a set of all registers written by instruction I; + * - I.in : a set of all registers that may be alive before I execution; + * - I.out : a set of all registers that may be alive after I execution; + * - insn_successors(I): a set of instructions S that might immediately + * follow I for some program execution; + * - associate separate empty sets 'I.in' and 'I.out' with each instruction; + * - visit each instruction in a postorder and update + * state[i].in, state[i].out as follows: + * + * state[i].out = U [state[s].in for S in insn_successors(i)] + * state[i].in = (state[i].out / state[i].def) U state[i].use + * + * (where U stands for set union, / stands for set difference) + * - repeat the computation while {in,out} fields changes for + * any instruction. + */ + state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL); + if (!state) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < insn_cnt; ++i) + compute_insn_live_regs(env, &insns[i], &state[i]); + + changed = true; + while (changed) { + changed = false; + for (i = 0; i < env->cfg.cur_postorder; ++i) { + int insn_idx = env->cfg.insn_postorder[i]; + struct insn_live_regs *live = &state[insn_idx]; + int succ_num; + u32 succ[2]; + u16 new_out = 0; + u16 new_in = 0; + + succ_num = insn_successors(env->prog, insn_idx, succ); + for (int s = 0; s < succ_num; ++s) + new_out |= state[succ[s]].in; + new_in = (new_out & ~live->def) | live->use; + if (new_out != live->out || new_in != live->in) { + live->in = new_in; + live->out = new_out; + changed = true; + } + } + } + + for (i = 0; i < insn_cnt; ++i) + insn_aux[i].live_regs_before = state[i].in; + + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "Live regs before insn:\n"); + for (i = 0; i < insn_cnt; ++i) { + verbose(env, "%3d: ", i); + for (j = BPF_REG_0; j < BPF_REG_10; ++j) + if (insn_aux[i].live_regs_before & BIT(j)) + verbose(env, "%d", j); + else + verbose(env, "."); + verbose(env, " "); + verbose_insn(env, &insns[i]); + if (bpf_is_ldimm64(&insns[i])) + i++; + } + } + +out: + kvfree(state); + kvfree(env->cfg.insn_postorder); + env->cfg.insn_postorder = NULL; + env->cfg.cur_postorder = 0; + return err; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -23320,6 +23809,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto skip_full_check; + ret = compute_live_registers(env); + if (ret < 0) + goto skip_full_check; + ret = mark_fastcall_patterns(env); if (ret < 0) goto skip_full_check; @@ -23458,6 +23951,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 vfree(env->insn_aux_data); kvfree(env->insn_hist); err_free_env: + kvfree(env->cfg.insn_postorder); kvfree(env); return ret; } diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 245d5b4166999..73d7b50924ef6 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -591,7 +591,8 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, depot_stack_handle_t handle = 0; struct page *page = NULL; void *prealloc = NULL; - bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; + bool allow_spin = gfpflags_allow_spinning(alloc_flags); + bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin; unsigned long flags; u32 hash; @@ -630,7 +631,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - if (in_nmi()) { + if (in_nmi() || !allow_spin) { /* We can never allocate in NMI context. */ WARN_ON_ONCE(can_alloc); /* Best effort; bail if we fail to take the lock. */ @@ -671,7 +672,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ - free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); + if (!allow_spin) + free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER); + else + free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); } if (found) handle = found->handle.handle; diff --git a/mm/internal.h b/mm/internal.h index 109ef30fee11f..10a8b4b3b86ec 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1187,6 +1187,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ +#define ALLOC_TRYLOCK 0x400 /* Only use spin_trylock in allocation path */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ /* Flags that allow allocations below the min watermark. */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4de6acb9b8ecb..385100b730128 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1739,7 +1739,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } struct memcg_stock_pcp { - local_lock_t stock_lock; + localtry_lock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1754,7 +1754,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { - .stock_lock = INIT_LOCAL_LOCK(stock_lock), + .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); @@ -1766,6 +1766,7 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, * consume_stock: Try to consume stocked charge on this cpu. * @memcg: memcg to consume from. * @nr_pages: how many pages to charge. + * @gfp_mask: allocation mask. * * The charges will only happen if @memcg matches the current cpu's memcg * stock, and at least @nr_pages are available in that stock. Failure to @@ -1773,7 +1774,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, * * returns true if successful, false otherwise. */ -static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask) { struct memcg_stock_pcp *stock; unsigned int stock_pages; @@ -1783,7 +1785,11 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) if (nr_pages > MEMCG_CHARGE_BATCH) return ret; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + if (!gfpflags_allow_spinning(gfp_mask)) + return ret; + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + } stock = this_cpu_ptr(&memcg_stock); stock_pages = READ_ONCE(stock->nr_pages); @@ -1792,7 +1798,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) ret = true; } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -1831,14 +1837,14 @@ static void drain_local_stock(struct work_struct *dummy) * drain_stock races is that we always operate on local CPU stock * here with IRQ disabled */ - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); old = drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -1868,9 +1874,20 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long flags; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + /* + * In case of unlikely failure to lock percpu stock_lock + * uncharge memcg directly. + */ + if (mem_cgroup_is_root(memcg)) + return; + page_counter_uncharge(&memcg->memory, nr_pages); + if (do_memsw_account()) + page_counter_uncharge(&memcg->memsw, nr_pages); + return; + } __refill_stock(memcg, nr_pages); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); } /* @@ -2213,9 +2230,13 @@ int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned long pflags; retry: - if (consume_stock(memcg, nr_pages)) + if (consume_stock(memcg, nr_pages, gfp_mask)) return 0; + if (!gfpflags_allow_spinning(gfp_mask)) + /* Avoid the refill and flush of the older stock */ + batch = nr_pages; + if (!do_memsw_account() || page_counter_try_charge(&memcg->memsw, batch, &counter)) { if (page_counter_try_charge(&memcg->memory, batch, &counter)) @@ -2699,7 +2720,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, unsigned long flags; int *bytes; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); /* @@ -2752,7 +2773,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) __mod_objcg_mlstate(objcg, pgdat, idx, nr); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -2762,7 +2783,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { @@ -2770,7 +2791,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -2862,7 +2883,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages = 0; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ @@ -2880,7 +2901,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); if (nr_pages) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 579789600a3c7..a0904315d4da2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -88,6 +88,9 @@ typedef int __bitwise fpi_t; */ #define FPI_TO_TAIL ((__force fpi_t)BIT(1)) +/* Free the page without taking locks. Rely on trylock only. */ +#define FPI_TRYLOCK ((__force fpi_t)BIT(2)) + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8) @@ -1249,13 +1252,44 @@ static void split_large_buddy(struct zone *zone, struct page *page, } while (1); } +static void add_page_to_zone_llist(struct zone *zone, struct page *page, + unsigned int order) +{ + /* Remember the order */ + page->order = order; + /* Add the page to the free list */ + llist_add(&page->pcp_llist, &zone->trylock_free_pages); +} + static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn, unsigned int order, fpi_t fpi_flags) { + struct llist_head *llhead; unsigned long flags; - spin_lock_irqsave(&zone->lock, flags); + if (!spin_trylock_irqsave(&zone->lock, flags)) { + if (unlikely(fpi_flags & FPI_TRYLOCK)) { + add_page_to_zone_llist(zone, page, order); + return; + } + spin_lock_irqsave(&zone->lock, flags); + } + + /* The lock succeeded. Process deferred pages. */ + llhead = &zone->trylock_free_pages; + if (unlikely(!llist_empty(llhead) && !(fpi_flags & FPI_TRYLOCK))) { + struct llist_node *llnode; + struct page *p, *tmp; + + llnode = llist_del_all(llhead); + llist_for_each_entry_safe(p, tmp, llnode, pcp_llist) { + unsigned int p_order = p->order; + + split_large_buddy(zone, p, page_to_pfn(p), p_order, fpi_flags); + __count_vm_events(PGFREE, 1 << p_order); + } + } split_large_buddy(zone, page, pfn, order, fpi_flags); spin_unlock_irqrestore(&zone->lock, flags); @@ -2307,7 +2341,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long flags; int i; - spin_lock_irqsave(&zone->lock, flags); + if (!spin_trylock_irqsave(&zone->lock, flags)) { + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + return 0; + spin_lock_irqsave(&zone->lock, flags); + } for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, alloc_flags); @@ -2595,7 +2633,7 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone, static void free_frozen_page_commit(struct zone *zone, struct per_cpu_pages *pcp, struct page *page, int migratetype, - unsigned int order) + unsigned int order, fpi_t fpi_flags) { int high, batch; int pindex; @@ -2630,6 +2668,14 @@ static void free_frozen_page_commit(struct zone *zone, } if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX)) pcp->free_count += (1 << order); + + if (unlikely(fpi_flags & FPI_TRYLOCK)) { + /* + * Do not attempt to take a zone lock. Let pcp->count get + * over high mark temporarily. + */ + return; + } high = nr_pcp_high(pcp, zone, batch, free_high); if (pcp->count >= high) { free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high), @@ -2644,7 +2690,8 @@ static void free_frozen_page_commit(struct zone *zone, /* * Free a pcp page */ -void free_frozen_pages(struct page *page, unsigned int order) +static void __free_frozen_pages(struct page *page, unsigned int order, + fpi_t fpi_flags) { unsigned long __maybe_unused UP_flags; struct per_cpu_pages *pcp; @@ -2653,7 +2700,7 @@ void free_frozen_pages(struct page *page, unsigned int order) int migratetype; if (!pcp_allowed_order(order)) { - __free_pages_ok(page, order, FPI_NONE); + __free_pages_ok(page, order, fpi_flags); return; } @@ -2671,23 +2718,33 @@ void free_frozen_pages(struct page *page, unsigned int order) migratetype = get_pfnblock_migratetype(page, pfn); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(zone, page, pfn, order, FPI_NONE); + free_one_page(zone, page, pfn, order, fpi_flags); return; } migratetype = MIGRATE_MOVABLE; } + if (unlikely((fpi_flags & FPI_TRYLOCK) && IS_ENABLED(CONFIG_PREEMPT_RT) + && (in_nmi() || in_hardirq()))) { + add_page_to_zone_llist(zone, page, order); + return; + } pcp_trylock_prepare(UP_flags); pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { - free_frozen_page_commit(zone, pcp, page, migratetype, order); + free_frozen_page_commit(zone, pcp, page, migratetype, order, fpi_flags); pcp_spin_unlock(pcp); } else { - free_one_page(zone, page, pfn, order, FPI_NONE); + free_one_page(zone, page, pfn, order, fpi_flags); } pcp_trylock_finish(UP_flags); } +void free_frozen_pages(struct page *page, unsigned int order) +{ + __free_frozen_pages(page, order, FPI_NONE); +} + /* * Free a batch of folios */ @@ -2776,7 +2833,7 @@ void free_unref_folios(struct folio_batch *folios) trace_mm_page_free_batched(&folio->page); free_frozen_page_commit(zone, pcp, &folio->page, migratetype, - order); + order, FPI_NONE); } if (pcp) { @@ -2907,7 +2964,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, do { page = NULL; - spin_lock_irqsave(&zone->lock, flags); + if (!spin_trylock_irqsave(&zone->lock, flags)) { + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + return NULL; + spin_lock_irqsave(&zone->lock, flags); + } if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { @@ -4511,7 +4572,12 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, might_alloc(gfp_mask); - if (should_fail_alloc_page(gfp_mask, order)) + /* + * Don't invoke should_fail logic, since it may call + * get_random_u32() and printk() which need to spin_lock. + */ + if (!(*alloc_flags & ALLOC_TRYLOCK) && + should_fail_alloc_page(gfp_mask, order)) return false; *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags); @@ -4809,9 +4875,10 @@ unsigned long get_zeroed_page_noprof(gfp_t gfp_mask) EXPORT_SYMBOL(get_zeroed_page_noprof); /** - * __free_pages - Free pages allocated with alloc_pages(). + * ___free_pages - Free pages allocated with alloc_pages(). * @page: The page pointer returned from alloc_pages(). * @order: The order of the allocation. + * @fpi_flags: Free Page Internal flags. * * This function can free multi-page allocations that are not compound * pages. It does not check that the @order passed in matches that of @@ -4828,22 +4895,37 @@ EXPORT_SYMBOL(get_zeroed_page_noprof); * Context: May be called in interrupt context or while holding a normal * spinlock, but not in NMI context or while holding a raw spinlock. */ -void __free_pages(struct page *page, unsigned int order) +static void ___free_pages(struct page *page, unsigned int order, + fpi_t fpi_flags) { /* get PageHead before we drop reference */ int head = PageHead(page); struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) - free_frozen_pages(page, order); + __free_frozen_pages(page, order, fpi_flags); else if (!head) { pgalloc_tag_sub_pages(tag, (1 << order) - 1); while (order-- > 0) - free_frozen_pages(page + (1 << order), order); + __free_frozen_pages(page + (1 << order), order, + fpi_flags); } } +void __free_pages(struct page *page, unsigned int order) +{ + ___free_pages(page, order, FPI_NONE); +} EXPORT_SYMBOL(__free_pages); +/* + * Can be called while holding raw_spin_lock or from IRQ and NMI for any + * page type (not only those that came from try_alloc_pages) + */ +void free_pages_nolock(struct page *page, unsigned int order) +{ + ___free_pages(page, order, FPI_TRYLOCK); +} + void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { @@ -7071,3 +7153,94 @@ static bool __free_unaccepted(struct page *page) } #endif /* CONFIG_UNACCEPTED_MEMORY */ + +/** + * try_alloc_pages - opportunistic reentrant allocation from any context + * @nid: node to allocate from + * @order: allocation order size + * + * Allocates pages of a given order from the given node. This is safe to + * call from any context (from atomic, NMI, and also reentrant + * allocator -> tracepoint -> try_alloc_pages_noprof). + * Allocation is best effort and to be expected to fail easily so nobody should + * rely on the success. Failures are not reported via warn_alloc(). + * See always fail conditions below. + * + * Return: allocated page or NULL on failure. + */ +struct page *try_alloc_pages_noprof(int nid, unsigned int order) +{ + /* + * Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed. + * Do not specify __GFP_KSWAPD_RECLAIM either, since wake up of kswapd + * is not safe in arbitrary context. + * + * These two are the conditions for gfpflags_allow_spinning() being true. + * + * Specify __GFP_NOWARN since failing try_alloc_pages() is not a reason + * to warn. Also warn would trigger printk() which is unsafe from + * various contexts. We cannot use printk_deferred_enter() to mitigate, + * since the running context is unknown. + * + * Specify __GFP_ZERO to make sure that call to kmsan_alloc_page() below + * is safe in any context. Also zeroing the page is mandatory for + * BPF use cases. + * + * Though __GFP_NOMEMALLOC is not checked in the code path below, + * specify it here to highlight that try_alloc_pages() + * doesn't want to deplete reserves. + */ + gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC + | __GFP_ACCOUNT; + unsigned int alloc_flags = ALLOC_TRYLOCK; + struct alloc_context ac = { }; + struct page *page; + + /* + * In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is + * unsafe in NMI. If spin_trylock() is called from hard IRQ the current + * task may be waiting for one rt_spin_lock, but rt_spin_trylock() will + * mark the task as the owner of another rt_spin_lock which will + * confuse PI logic, so return immediately if called form hard IRQ or + * NMI. + * + * Note, irqs_disabled() case is ok. This function can be called + * from raw_spin_lock_irqsave region. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq())) + return NULL; + if (!pcp_allowed_order(order)) + return NULL; + +#ifdef CONFIG_UNACCEPTED_MEMORY + /* Bailout, since try_to_accept_memory_one() needs to take a lock */ + if (has_unaccepted_memory()) + return NULL; +#endif + /* Bailout, since _deferred_grow_zone() needs to take a lock */ + if (deferred_pages_enabled()) + return NULL; + + if (nid == NUMA_NO_NODE) + nid = numa_node_id(); + + prepare_alloc_pages(alloc_gfp, order, nid, NULL, &ac, + &alloc_gfp, &alloc_flags); + + /* + * Best effort allocation from percpu free list. + * If it's empty attempt to spin_trylock zone->lock. + */ + page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac); + + /* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */ + + if (memcg_kmem_online() && page && + unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) { + free_pages_nolock(page, order); + page = NULL; + } + trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype); + kmsan_alloc_page(page, order, alloc_gfp); + return page; +} diff --git a/mm/page_owner.c b/mm/page_owner.c index 2d6360eaccbb6..90e31d0e3ed78 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -294,7 +294,13 @@ void __reset_page_owner(struct page *page, unsigned short order) page_owner = get_page_owner(page_ext); alloc_handle = page_owner->handle; - handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); + /* + * Do not specify GFP_NOWAIT to make gfpflags_allow_spinning() == false + * to prevent issues in stack_depot_save(). + * This is similar to try_alloc_pages() gfp flags, but only used + * to signal stack_depot to avoid spin_locks. + */ + handle = save_stack(__GFP_NOWARN); __update_page_owner_free_handle(page_ext, handle, order, current->pid, current->tgid, free_ts_nsec); page_ext_put(page_ext); diff --git a/net/core/filter.c b/net/core/filter.c index 827108c6dad9f..dcc53ac5c5458 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9637,7 +9637,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, queue_mapping): if (type == BPF_WRITE) { - u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); + u32 offset = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { *insn++ = BPF_JMP_A(0); /* noop */ @@ -9646,7 +9646,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, if (BPF_CLASS(si->code) == BPF_STX) *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); - *insn++ = BPF_EMIT_STORE(BPF_H, si, off); + *insn++ = BPF_EMIT_STORE(BPF_H, si, offset); } else { *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fff6cdb8d11a2..bb37897c03939 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; @@ -1207,6 +1210,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 899e98225f3b6..8e32286854ef3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -670,11 +670,18 @@ struct elf_state { struct usdt_manager; +enum bpf_object_state { + OBJ_OPEN, + OBJ_PREPARED, + OBJ_LOADED, +}; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; + enum bpf_object_state state; struct bpf_program *programs; size_t nr_programs; struct bpf_map *maps; @@ -686,7 +693,6 @@ struct bpf_object { int nr_extern; int kconfig_map_idx; - bool loaded; bool has_subcalls; bool has_rodata; @@ -1511,7 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); - obj->loaded = false; + obj->state = OBJ_OPEN; return obj; } @@ -4845,6 +4851,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->state >= OBJ_PREPARED || map->reused; +} + bool bpf_map__autocreate(const struct bpf_map *map) { return map->autocreate; @@ -4852,7 +4863,7 @@ bool bpf_map__autocreate(const struct bpf_map *map) int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->autocreate = autocreate; @@ -4946,7 +4957,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.max_entries = max_entries; @@ -5191,11 +5202,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool map_is_created(const struct bpf_map *map) -{ - return map->obj->loaded || map->reused; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -7895,13 +7901,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) size_t i; int err; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - err = bpf_object__sanitize_prog(obj, prog); - if (err) - return err; - } - for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) @@ -7927,6 +7926,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } +static int bpf_object_prepare_progs(struct bpf_object *obj) +{ + struct bpf_program *prog; + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + err = bpf_object__sanitize_prog(obj, prog); + if (err) + return err; + } + return 0; +} + static const struct bpf_sec_def *find_sec_def(const char *sec_name); static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) @@ -8543,14 +8557,77 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj) return 0; } +static void bpf_object_unpin(struct bpf_object *obj) +{ + int i; + + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); +} + +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + int i; + + /* clean up fd_array */ + zfree(&obj->fd_array); + + /* clean up module BTFs */ + for (i = 0; i < obj->btf_module_cnt; i++) { + close(obj->btf_modules[i].fd); + btf__free(obj->btf_modules[i].btf); + free(obj->btf_modules[i].name); + } + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules); + + /* clean up vmlinux BTF */ + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; +} + +static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) +{ + int err; + + if (obj->state >= OBJ_PREPARED) { + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); + return -EINVAL; + } + + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object_prepare_progs(obj); + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + obj->state = OBJ_LOADED; + return err; + } + + obj->state = OBJ_PREPARED; + return 0; +} + static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { - int err, i; + int err; if (!obj) return libbpf_err(-EINVAL); - if (obj->loaded) { + if (obj->state >= OBJ_LOADED) { pr_warn("object '%s': load can't be attempted twice\n", obj->name); return libbpf_err(-EINVAL); } @@ -8565,17 +8642,12 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-LIBBPF_ERRNO__ENDIAN); } - err = bpf_object_prepare_token(obj); - err = err ? : bpf_object__probe_loading(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj, false); - err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); - err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__init_kern_struct_ops_maps(obj); - err = err ? : bpf_object_adjust_struct_ops_autoload(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); - err = err ? : bpf_object__sanitize_and_load_btf(obj); - err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__load_progs(obj, extra_log_level); + if (obj->state < OBJ_PREPARED) { + err = bpf_object_prepare(obj, target_btf_path); + if (err) + return libbpf_err(err); + } + err = bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); err = err ? : bpf_object_prepare_struct_ops(obj); @@ -8587,36 +8659,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } - /* clean up fd_array */ - zfree(&obj->fd_array); + bpf_object_post_load_cleanup(obj); + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ - /* clean up module BTFs */ - for (i = 0; i < obj->btf_module_cnt; i++) { - close(obj->btf_modules[i].fd); - btf__free(obj->btf_modules[i].btf); - free(obj->btf_modules[i].name); + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return libbpf_err(err); } - free(obj->btf_modules); - - /* clean up vmlinux BTF */ - btf__free(obj->btf_vmlinux); - obj->btf_vmlinux = NULL; - - obj->loaded = true; /* doesn't matter if successfully or not */ - - if (err) - goto out; return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); +} - bpf_object_unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return libbpf_err(err); +int bpf_object__prepare(struct bpf_object *obj) +{ + return libbpf_err(bpf_object_prepare(obj, NULL)); } int bpf_object__load(struct bpf_object *obj) @@ -8866,7 +8924,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_PREPARED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -8945,7 +9003,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_LOADED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -9064,6 +9122,13 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; + /* + * if user called bpf_object__prepare() without ever getting to + * bpf_object__load(), we need to clean up stuff that is normally + * cleaned up at the end of loading step + */ + bpf_object_post_load_cleanup(obj); + usdt_manager_free(obj->usdt_man); obj->usdt_man = NULL; @@ -9132,7 +9197,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { - if (obj->loaded) + if (obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -9229,7 +9294,7 @@ bool bpf_program__autoload(const struct bpf_program *prog) int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); prog->autoload = autoload; @@ -9261,7 +9326,7 @@ int bpf_program__set_insns(struct bpf_program *prog, { struct bpf_insn *insns; - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); @@ -9304,7 +9369,7 @@ static int last_custom_sec_def_handler_id; int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); /* if type is not changed, do nothing */ @@ -9335,7 +9400,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->expected_attach_type = type; @@ -9349,7 +9414,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog) int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->prog_flags = flags; @@ -9363,7 +9428,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog) int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_level = log_level; @@ -9382,7 +9447,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log return libbpf_err(-EINVAL); if (prog->log_size > UINT_MAX) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_buf = log_buf; @@ -10299,7 +10364,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (map->mmaped) { @@ -10345,7 +10410,7 @@ int bpf_map__set_initial_value(struct bpf_map *map, { size_t actual_sz; - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) @@ -13666,7 +13731,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3020ee45303a0..e0605403f9773 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -241,6 +241,19 @@ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); +/** + * @brief **bpf_object__prepare()** prepares BPF object for loading: + * performs ELF processing, relocations, prepares final state of BPF program + * instructions (accessible with bpf_program__insns()), creates and + * (potentially) pins maps. Leaves BPF object in the state ready for program + * loading. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** API + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ +int bpf_object__prepare(struct bpf_object *obj); + /** * @brief **bpf_object__load()** loads BPF object into kernel. * @param obj Pointer to a valid BPF object instance returned by diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b5a838de6f47c..d8b71f22f197c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -436,6 +436,7 @@ LIBBPF_1.6.0 { bpf_linker__add_buf; bpf_linker__add_fd; bpf_linker__new_fd; + bpf_object__prepare; btf__add_decl_attr; btf__add_type_attr; } LIBBPF_1.5.0; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index abfb450c26bb3..e6a02d5b87d12 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -100,7 +100,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ - test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ test_xdp_vlan_mode_generic.sh \ diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 4ebd0da898f5c..1d53a8561ee2f 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test) .log_size = sizeof(bpf_vlog), .log_level = 2, ); + const char *main_pass_start = "0: R1=ctx() R10=fp0"; const char *line_ptr; int cur_line = -1; int prog_len, i; + char *start; int fd_prog; int ret; @@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test) ret = 0; /* We make a local copy so that we can strtok() it */ strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy)); - line_ptr = strtok(bpf_vlog_copy, "\n"); + start = strstr(bpf_vlog_copy, main_pass_start); + if (!start) { + ret = 1; + printf("Can't find initial line '%s'\n", main_pass_start); + goto out; + } + line_ptr = strtok(start, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; const char *p; @@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test) break; } } +out: if (fd_prog >= 0) close(fd_prog); } diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c index 26e7c06c6cb4d..d98577a6babc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel) ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); } +static void test_load_acquire(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.load_acquire); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->load_acquire8_result, 0x12, + "load_acquire8_result"); + ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234, + "load_acquire16_result"); + ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678, + "load_acquire32_result"); + ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef, + "load_acquire64_result"); +} + +static void test_store_release(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.store_release); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->store_release8_result, 0x12, + "store_release8_result"); + ASSERT_EQ(skel->arena->store_release16_result, 0x1234, + "store_release16_result"); + ASSERT_EQ(skel->arena->store_release32_result, 0x12345678, + "store_release32_result"); + ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef, + "store_release64_result"); +} + void test_arena_atomics(void) { struct arena_atomics *skel; @@ -171,7 +231,7 @@ void test_arena_atomics(void) if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open")) return; - if (skel->data->skip_tests) { + if (skel->data->skip_all_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang", __func__); test__skip(); @@ -198,6 +258,10 @@ void test_arena_atomics(void) test_xchg(skel); if (test__start_subtest("uaf")) test_uaf(skel); + if (test__start_subtest("load_acquire")) + test_load_acquire(skel); + if (test__start_subtest("store_release")) + test_store_release(skel); cleanup: arena_atomics__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index a4a1f93878d40..dbd13f8e42a7a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -72,11 +72,14 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK(system(cmd), cmd)) goto end; - srv_port = (mode == TEST_XDP) ? 5005 : 5006; - srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS); if (!ASSERT_GE(srv_fd, 0, "start_server")) goto end; + srv_port = get_socket_local_port(srv_fd); + if (!ASSERT_GE(srv_port, 0, "get_sock_local_port")) + goto end; + client_fd = connect_to_server(srv_fd); if (!ASSERT_GE(client_fd, 0, "connect_to_server")) goto end; @@ -91,7 +94,7 @@ static void test_bpf_nf_ct(int mode) skel->bss->saddr = peer_addr.sin_addr.s_addr; skel->bss->sport = peer_addr.sin_port; skel->bss->daddr = peer_addr.sin_addr.s_addr; - skel->bss->dport = htons(srv_port); + skel->bss->dport = srv_port; if (mode == TEST_XDP) prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c new file mode 100644 index 0000000000000..d4d583872fa26 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype; + int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + buf = 0x00; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (!ASSERT_OK(err, "setsockopt")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.child; + prog_c_fd = bpf_program__fd(prog); + prog_c_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.child_2; + prog_c2_fd = bpf_program__fd(prog); + prog_c2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2")) + goto detach_child; + + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_child_2; + + result = skel->bss->result; + if (all_preorder) + ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only"); + else + ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only"); + + skel->bss->idx = 0; + memset(result, 0, 4); + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto detach_child_2; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + if (all_preorder) + ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2, + "parent and child"); + else + ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3, + "parent and child"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +detach_child_2: + ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype), + "bpf_prog_detach2-child_2"); +detach_child: + ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype), + "bpf_prog_detach2-child"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_preorder(void) +{ + int cg_parent = -1, cg_child = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + cg_child = test__join_cgroup("/parent/child"); + if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_child); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 64abba72ac107..37c1cc52ed98e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -10,12 +10,18 @@ static int run_test(int cgroup_fd, int server_fd, bool classid) { struct connect4_dropper *skel; - int fd, err = 0; + int fd, err = 0, port; skel = connect4_dropper__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) return -1; + port = get_socket_local_port(server_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + + skel->bss->port = ntohs(port); + skel->links.connect_v4_dropper = bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, cgroup_fd); @@ -48,10 +54,9 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) return; client_fd = connect_to_fd_opts(server_fd, &opts); @@ -66,7 +71,7 @@ void test_cgroup_v1v2(void) cgroup_fd = test__join_cgroup("/connect_dropper"); if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) return; - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) { close(cgroup_fd); return; diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c deleted file mode 100644 index 7526de3790814..0000000000000 --- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "bpf/libbpf.h" -#include "changes_pkt_data_freplace.skel.h" -#include "changes_pkt_data.skel.h" -#include - -static void print_verifier_log(const char *log) -{ - if (env.verbosity >= VERBOSE_VERY) - fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); -} - -static void test_aux(const char *main_prog_name, - const char *to_be_replaced, - const char *replacement, - bool expect_load) -{ - struct changes_pkt_data_freplace *freplace = NULL; - struct bpf_program *freplace_prog = NULL; - struct bpf_program *main_prog = NULL; - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct changes_pkt_data *main = NULL; - char log[16*1024]; - int err; - - opts.kernel_log_buf = log; - opts.kernel_log_size = sizeof(log); - if (env.verbosity >= VERBOSE_SUPER) - opts.kernel_log_level = 1 | 2 | 4; - main = changes_pkt_data__open_opts(&opts); - if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) - goto out; - main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); - if (!ASSERT_OK_PTR(main_prog, "main_prog")) - goto out; - bpf_program__set_autoload(main_prog, true); - err = changes_pkt_data__load(main); - print_verifier_log(log); - if (!ASSERT_OK(err, "changes_pkt_data__load")) - goto out; - freplace = changes_pkt_data_freplace__open_opts(&opts); - if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) - goto out; - freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); - if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) - goto out; - bpf_program__set_autoload(freplace_prog, true); - bpf_program__set_autoattach(freplace_prog, true); - bpf_program__set_attach_target(freplace_prog, - bpf_program__fd(main_prog), - to_be_replaced); - err = changes_pkt_data_freplace__load(freplace); - print_verifier_log(log); - if (expect_load) { - ASSERT_OK(err, "changes_pkt_data_freplace__load"); - } else { - ASSERT_ERR(err, "changes_pkt_data_freplace__load"); - ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); - } - -out: - changes_pkt_data_freplace__destroy(freplace); - changes_pkt_data__destroy(main); -} - -/* There are two global subprograms in both changes_pkt_data.skel.h: - * - one changes packet data; - * - another does not. - * It is ok to freplace subprograms that change packet data with those - * that either do or do not. It is only ok to freplace subprograms - * that do not change packet data with those that do not as well. - * The below tests check outcomes for each combination of such freplace. - * Also test a case when main subprogram itself is replaced and is a single - * subprogram in a program. - */ -void test_changes_pkt_data_freplace(void) -{ - struct { - const char *main; - const char *to_be_replaced; - bool changes; - } mains[] = { - { "main_with_subprogs", "changes_pkt_data", true }, - { "main_with_subprogs", "does_not_change_pkt_data", false }, - { "main_changes", "main_changes", true }, - { "main_does_not_change", "main_does_not_change", false }, - }; - struct { - const char *func; - bool changes; - } replacements[] = { - { "changes_pkt_data", true }, - { "does_not_change_pkt_data", false } - }; - char buf[64]; - - for (int i = 0; i < ARRAY_SIZE(mains); ++i) { - for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { - snprintf(buf, sizeof(buf), "%s_with_%s", - mains[i].to_be_replaced, replacements[j].func); - if (!test__start_subtest(buf)) - continue; - test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, - mains[i].changes || !replacements[j].changes); - } - } -} diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c new file mode 100644 index 0000000000000..285f20241fe14 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "compute_live_registers.skel.h" +#include "test_progs.h" + +void test_compute_live_registers(void) +{ + RUN_TESTS(compute_live_registers); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b614a5272dfd6..e29cc16124c2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -10,6 +10,7 @@ enum test_setup_type { SETUP_SYSCALL_SLEEP, SETUP_SKB_PROG, SETUP_SKB_PROG_TP, + SETUP_XDP_PROG, }; static struct { @@ -18,6 +19,8 @@ static struct { } success_tests[] = { {"test_read_write", SETUP_SYSCALL_SLEEP}, {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, @@ -120,6 +123,24 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ break; } + case SETUP_XDP_PROG: + { + char data[5000]; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .repeat = 1, + ); + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &opts); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } } ASSERT_EQ(skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c new file mode 100644 index 0000000000000..fb5cdad971168 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prepare.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ + +#include +#include +#include "prepare.skel.h" + +static bool check_prepared(struct bpf_object *obj) +{ + bool is_prepared = true; + const struct bpf_map *map; + + bpf_object__for_each_map(map, obj) { + if (bpf_map__fd(map) < 0) + is_prepared = false; + } + + return is_prepared; +} + +static void test_prepare_no_load(void) +{ + struct prepare *skel; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + +cleanup: + prepare__destroy(skel); +} + +static void test_prepare_load(void) +{ + struct prepare *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + + err = prepare__load(skel); + if (!ASSERT_OK(err, "prepare__load")) + goto cleanup; + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.program); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + goto cleanup; + + ASSERT_EQ(skel->bss->err, 0, "err"); + +cleanup: + prepare__destroy(skel); +} + +void test_prepare(void) +{ + if (test__start_subtest("prepare_load")) + test_prepare_load(); + if (test__start_subtest("prepare_no_load")) + test_prepare_no_load(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index ebe0c12b55363..c9f855e5da240 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -81,6 +81,9 @@ static const char * const inproper_region_tests[] = { "nested_rcu_region", "rcu_read_lock_global_subprog_lock", "rcu_read_lock_global_subprog_unlock", + "rcu_read_lock_sleepable_helper_global_subprog", + "rcu_read_lock_sleepable_kfunc_global_subprog", + "rcu_read_lock_sleepable_global_subprog_indirect", }; static void test_inproper_region(void) diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index 2b0068742ef9f..e3ea5dc2f697c 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -50,6 +50,9 @@ static struct { { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" }, }; static int match_regex(const char *pattern, const char *string) diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c new file mode 100644 index 0000000000000..5dd6c120a8386 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/summarization.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "summarization_freplace.skel.h" +#include "summarization.skel.h" +#include + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load, + const char *err_msg) +{ + struct summarization_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct summarization *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = summarization__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "summarization__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = summarization__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "summarization__load")) + goto out; + freplace = summarization_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = summarization_freplace__load(freplace); + print_verifier_log(log); + + /* The might_sleep extension doesn't work yet as sleepable calls are not + * allowed, but preserve the check in case it's supported later and then + * this particular combination can be enabled. + */ + if (!strcmp("might_sleep", replacement) && err) { + ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log"); + ASSERT_EQ(err, -EINVAL, "err"); + test__skip(); + goto out; + } + + if (expect_load) { + ASSERT_OK(err, "summarization_freplace__load"); + } else { + ASSERT_ERR(err, "summarization_freplace__load"); + ASSERT_HAS_SUBSTR(log, err_msg, "error log"); + } + +out: + summarization_freplace__destroy(freplace); + summarization__destroy(main); +} + +/* There are two global subprograms in both summarization.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + * + * This holds for might_sleep programs. It is ok to replace might_sleep with + * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced + * with might_sleep. + */ +void test_summarization_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool has_side_effect; + } mains[2][4] = { + { + { "main_changes_with_subprogs", "changes_pkt_data", true }, + { "main_changes_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }, + { + { "main_might_sleep_with_subprogs", "might_sleep", true }, + { "main_might_sleep_with_subprogs", "does_not_sleep", false }, + { "main_might_sleep", "main_might_sleep", true }, + { "main_does_not_sleep", "main_does_not_sleep", false }, + }, + }; + const char *pkt_err = "Extension program changes packet data"; + const char *slp_err = "Extension program may sleep"; + struct { + const char *func; + bool has_side_effect; + const char *err_msg; + } replacements[2][2] = { + { + { "changes_pkt_data", true, pkt_err }, + { "does_not_change_pkt_data", false, pkt_err }, + }, + { + { "might_sleep", true, slp_err }, + { "does_not_sleep", false, slp_err }, + }, + }; + char buf[64]; + + for (int t = 0; t < 2; t++) { + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[t][i].to_be_replaced, replacements[t][j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func, + mains[t][i].has_side_effect || !replacements[t][j].has_side_effect, + replacements[t][j].err_msg); + } + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index cec746e77cd3a..bae0e9de277d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -71,6 +71,8 @@ #define IP4_ADDR2_VETH1 "172.16.1.20" #define IP4_ADDR_TUNL_DEV0 "10.1.1.100" #define IP4_ADDR_TUNL_DEV1 "10.1.1.200" +#define IP6_ADDR_TUNL_DEV0 "fc80::100" +#define IP6_ADDR_TUNL_DEV1 "fc80::200" #define IP6_ADDR_VETH0 "::11" #define IP6_ADDR1_VETH1 "::22" @@ -98,6 +100,27 @@ #define XFRM_SPI_IN_TO_OUT 0x1 #define XFRM_SPI_OUT_TO_IN 0x2 +#define GRE_TUNL_DEV0 "gre00" +#define GRE_TUNL_DEV1 "gre11" + +#define IP6GRE_TUNL_DEV0 "ip6gre00" +#define IP6GRE_TUNL_DEV1 "ip6gre11" + +#define ERSPAN_TUNL_DEV0 "erspan00" +#define ERSPAN_TUNL_DEV1 "erspan11" + +#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00" +#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11" + +#define GENEVE_TUNL_DEV0 "geneve00" +#define GENEVE_TUNL_DEV1 "geneve11" + +#define IP6GENEVE_TUNL_DEV0 "ip6geneve00" +#define IP6GENEVE_TUNL_DEV1 "ip6geneve11" + +#define IP6TNL_TUNL_DEV0 "ip6tnl00" +#define IP6TNL_TUNL_DEV1 "ip6tnl11" + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -216,6 +239,18 @@ static int set_ipip_encap(const char *ipproto, const char *type) return -1; } +static int set_ipv4_addr(const char *dev0, const char *dev1) +{ + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip link set dev %s up", dev1); + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return 1; +} + static int add_ipip_tunnel(enum ipip_encap encap) { int err; @@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void) IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); } +static int add_ipv4_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static void delete_tunnel(const char *dev0, const char *dev1) +{ + if (!dev0 || !dev1) + return; + + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0); + SYS_NOFAIL("ip link delete dev %s", dev1); +} + +static int set_ipv6_addr(const char *dev0, const char *dev1) +{ + /* disable IPv6 DAD because it might take too long and fail tests */ + SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0); + SYS(fail, "ip -n at_ns0 link set dev veth0 up"); + SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1); + SYS(fail, "ip link set dev veth1 up"); + + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s up", dev1); + return 0; +fail: + return 1; +} + +static int add_ipv6_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + +static int add_geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s", + dev0, type, opt, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static int add_ip6geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s", + dev0, type, opt, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -364,32 +492,76 @@ static int test_ping(int family, const char *addr) return -1; } -static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +static void ping_dev0(void) { + /* ping from root namespace test */ + test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); +} + +static void ping_dev1(void) +{ + struct nstoken *nstoken; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; + + test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); +} + +static void ping6_veth0(void) +{ + test_ping(AF_INET6, IP6_ADDR_VETH0); +} + +static void ping6_dev0(void) +{ + test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0); +} + +static void ping6_dev1(void) +{ + struct nstoken *nstoken; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; + + test_ping(AF_INET, IP6_ADDR_TUNL_DEV1); + close_netns(nstoken); +} + +static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, .prog_fd = igr_fd); DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1, .prog_fd = egr_fd); int ret; - ret = bpf_tc_hook_create(hook); + ret = bpf_tc_hook_create(&hook); if (!ASSERT_OK(ret, "create tc hook")) return ret; if (igr_fd >= 0) { - hook->attach_point = BPF_TC_INGRESS; - ret = bpf_tc_attach(hook, &opts1); + hook.attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(&hook, &opts1); if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); + bpf_tc_hook_destroy(&hook); return ret; } } if (egr_fd >= 0) { - hook->attach_point = BPF_TC_EGRESS; - ret = bpf_tc_attach(hook, &opts2); + hook.attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(&hook, &opts2); if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); + bpf_tc_hook_destroy(&hook); return ret; } } @@ -397,6 +569,50 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) return 0; } +static int generic_attach(const char *dev, int igr_fd, int egr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) + return -1; + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, igr_fd, egr_fd); +} + +static int generic_attach_igr(const char *dev, int igr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, igr_fd, -1); +} + +static int generic_attach_egr(const char *dev, int egr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, -1, egr_fd); +} + static void test_vxlan_tunnel(void) { struct test_tunnel_kern *skel = NULL; @@ -404,11 +620,9 @@ static void test_vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_vxlan_tunnel(); @@ -419,42 +633,22 @@ static void test_vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach bpf prog to veth dev tc hook point */ - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + if (generic_attach_igr("veth1", set_dst_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -468,9 +662,7 @@ static void test_vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete vxlan tunnel */ @@ -488,11 +680,9 @@ static void test_ip6vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_ip6vxlan_tunnel(); @@ -503,31 +693,17 @@ static void test_ip6vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -541,9 +717,7 @@ static void test_ip6vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete ipv6 vxlan tunnel */ @@ -557,12 +731,8 @@ static void test_ip6vxlan_tunnel(void) static void test_ipip_tunnel(enum ipip_encap encap) { struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int set_src_prog_fd, get_src_prog_fd; - int ifindex = -1; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add ipip tunnel */ err = add_ipip_tunnel(encap); @@ -573,10 +743,6 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IPIP_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; switch (encap) { case FOU: @@ -598,26 +764,11 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel->progs.ipip_set_tunnel); } - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; - /* ping from root namespace test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; - - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - if (!ASSERT_OK(err, "test_ping")) - goto done; - close_netns(nstoken); + ping_dev0(); + ping_dev1(); done: /* delete ipip tunnel */ @@ -628,11 +779,8 @@ static void test_ipip_tunnel(enum ipip_encap encap) static void test_xfrm_tunnel(void) { - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int xdp_prog_fd; int tc_prog_fd; int ifindex; @@ -646,19 +794,16 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; /* attach tc prog to tunnel dev */ - tc_hook.ifindex = ifindex; tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); - if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + if (generic_attach_igr("veth1", tc_prog_fd)) goto done; /* attach xdp prog to tunnel dev */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) goto done; @@ -666,14 +811,7 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK(err, "bpf_xdp_attach")) goto done; - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - close_netns(nstoken); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev1(); if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) goto done; @@ -690,6 +828,281 @@ static void test_xfrm_tunnel(void) test_tunnel_kern__destroy(skel); } +enum gre_test { + GRE, + GRE_NOKEY, + GRETAP, + GRETAP_NOKEY, +}; + +static void test_gre_tunnel(enum gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case GRE: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRE_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); + +done: + delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6gre_test { + IP6GRE, + IP6GRETAP +}; + +static void test_ip6gre_tunnel(enum ip6gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case IP6GRE: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gre", "flowlabel 0xbcdef key 2"); + break; + case IP6GRETAP: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gretap", "flowlabel 0xbcdef key 2"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel); + if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping6_dev1(); + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum erspan_test { + V1, + V2 +}; + +static void test_erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel); + if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel); + if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping_dev1(); +done: + delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1, + "geneve", "dstport 6081"); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel); + if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1, + "geneve", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel); + if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6tnl_test { + IPIP6, + IP6IP6 +}; + +static void test_ip6tnl_tunnel(enum ip6tnl_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + switch (test) { + case IPIP6: + set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel); + break; + case IP6IP6: + set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel); + break; + } + if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + switch (test) { + case IPIP6: + ping_dev0(); + ping_dev1(); + break; + case IP6IP6: + ping6_dev0(); + ping6_dev1(); + break; + } + +done: + delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ @@ -707,6 +1120,20 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); RUN_TEST(xfrm_tunnel); + RUN_TEST(gre_tunnel, GRE); + RUN_TEST(gre_tunnel, GRE_NOKEY); + RUN_TEST(gre_tunnel, GRETAP); + RUN_TEST(gre_tunnel, GRETAP_NOKEY); + RUN_TEST(ip6gre_tunnel, IP6GRE); + RUN_TEST(ip6gre_tunnel, IP6GRETAP); + RUN_TEST(erspan_tunnel, V1); + RUN_TEST(erspan_tunnel, V2); + RUN_TEST(ip6erspan_tunnel, V1); + RUN_TEST(ip6erspan_tunnel, V2); + RUN_TEST(geneve_tunnel); + RUN_TEST(ip6geneve_tunnel); + RUN_TEST(ip6tnl_tunnel, IPIP6); + RUN_TEST(ip6tnl_tunnel, IP6IP6); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8a0e1ff8a2dc6..cfe47b529e01a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -45,6 +45,7 @@ #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_linked_scalars.skel.h" +#include "verifier_load_acquire.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" #include "verifier_map_in_map.skel.h" @@ -80,6 +81,7 @@ #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" #include "verifier_tailcall_jit.skel.h" @@ -173,6 +175,7 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } +void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } @@ -211,6 +214,7 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index 40dd57fca5cc2..a52feff981126 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -6,6 +6,8 @@ #include #include #include "bpf_arena_common.h" +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARENA); @@ -19,9 +21,17 @@ struct { } arena SEC(".maps"); #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) -bool skip_tests __attribute((__section__(".data"))) = false; +bool skip_all_tests __attribute((__section__(".data"))) = false; #else -bool skip_tests = true; +bool skip_all_tests = true; +#endif + +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; +#else +bool skip_lacq_srel_tests = true; #endif __u32 pid = 0; @@ -274,4 +284,111 @@ int uaf(const void *ctx) return 0; } +#if __clang_major__ >= 18 +__u8 __arena_global load_acquire8_value = 0x12; +__u16 __arena_global load_acquire16_value = 0x1234; +__u32 __arena_global load_acquire32_value = 0x12345678; +__u64 __arena_global load_acquire64_value = 0x1234567890abcdef; + +__u8 __arena_global load_acquire8_result = 0; +__u16 __arena_global load_acquire16_result = 0; +__u32 __arena_global load_acquire32_result = 0; +__u64 __arena_global load_acquire64_result = 0; +#else +/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around + * this issue by defining the below variables as 64-bit. + */ +__u64 __arena_global load_acquire8_value; +__u64 __arena_global load_acquire16_value; +__u64 __arena_global load_acquire32_value; +__u64 __arena_global load_acquire64_value; + +__u64 __arena_global load_acquire8_result; +__u64 __arena_global load_acquire16_result; +__u64 __arena_global load_acquire32_result; +__u64 __arena_global load_acquire64_result; +#endif + +SEC("raw_tp/sys_enter") +int load_acquire(const void *ctx) +{ +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ + { asm volatile ( \ + "r1 = %[" #SRC "] ll;" \ + "r1 = addr_space_cast(r1, 0x0, 0x1);" \ + ".8byte %[load_acquire_insn];" \ + "r3 = %[" #DST "] ll;" \ + "r3 = addr_space_cast(r3, 0x0, 0x1);" \ + "*(" #SIZE " *)(r3 + 0) = r2;" \ + : \ + : __imm_addr(SRC), \ + __imm_insn(load_acquire_insn, \ + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \ + BPF_REG_2, BPF_REG_1, 0)), \ + __imm_addr(DST) \ + : __clobber_all); } \ + + LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result) + LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value, + load_acquire16_result) + LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value, + load_acquire32_result) + LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value, + load_acquire64_result) +#undef LOAD_ACQUIRE_ARENA + +#endif + return 0; +} + +#if __clang_major__ >= 18 +__u8 __arena_global store_release8_result = 0; +__u16 __arena_global store_release16_result = 0; +__u32 __arena_global store_release32_result = 0; +__u64 __arena_global store_release64_result = 0; +#else +/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around + * this issue by defining the below variables as 64-bit. + */ +__u64 __arena_global store_release8_result; +__u64 __arena_global store_release16_result; +__u64 __arena_global store_release32_result; +__u64 __arena_global store_release64_result; +#endif + +SEC("raw_tp/sys_enter") +int store_release(const void *ctx) +{ +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ + { asm volatile ( \ + "r1 = " VAL ";" \ + "r2 = %[" #DST "] ll;" \ + "r2 = addr_space_cast(r2, 0x0, 0x1);" \ + ".8byte %[store_release_insn];" \ + : \ + : __imm_addr(DST), \ + __imm_insn(store_release_insn, \ + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \ + BPF_REG_2, BPF_REG_1, 0)) \ + : __clobber_all); } \ + + STORE_RELEASE_ARENA(B, store_release8_result, "0x12") + STORE_RELEASE_ARENA(H, store_release16_result, "0x1234") + STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678") + STORE_RELEASE_ARENA(DW, store_release64_result, + "0x1234567890abcdef ll") +#undef STORE_RELEASE_ARENA + +#endif + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 34f555da546fb..13a2e22f54658 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -213,4 +213,21 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +#define CAN_USE_GOTOL +#endif + +#if _clang_major__ >= 18 +#define CAN_USE_BPF_ST +#endif + +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#define CAN_USE_LOAD_ACQ_STORE_REL +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c new file mode 100644 index 0000000000000..4ef6202baa0a4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include + +char _license[] SEC("license") = "GPL"; + +unsigned int idx; +__u8 result[4]; + +SEC("cgroup/getsockopt") +int child(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 1; + return 1; +} + +SEC("cgroup/getsockopt") +int child_2(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 2; + return 1; +} + +SEC("cgroup/getsockopt") +int parent(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 3; + return 1; +} + +SEC("cgroup/getsockopt") +int parent_2(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 4; + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c deleted file mode 100644 index 43cada48b28ad..0000000000000 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include - -__noinline -long changes_pkt_data(struct __sk_buff *sk) -{ - return bpf_skb_pull_data(sk, 0); -} - -__noinline __weak -long does_not_change_pkt_data(struct __sk_buff *sk) -{ - return 0; -} - -SEC("?tc") -int main_with_subprogs(struct __sk_buff *sk) -{ - changes_pkt_data(sk); - does_not_change_pkt_data(sk); - return 0; -} - -SEC("?tc") -int main_changes(struct __sk_buff *sk) -{ - bpf_skb_pull_data(sk, 0); - return 0; -} - -SEC("?tc") -int main_does_not_change(struct __sk_buff *sk) -{ - return 0; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c new file mode 100644 index 0000000000000..f3d79aecbf935 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_arena_common.h" +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} test_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); +} arena SEC(".maps"); + +SEC("socket") +__log_level(2) +__msg(" 0: .......... (b7) r0 = 42") +__msg(" 1: 0......... (bf) r1 = r0") +__msg(" 2: .1........ (bf) r2 = r1") +__msg(" 3: ..2....... (bf) r3 = r2") +__msg(" 4: ...3...... (bf) r4 = r3") +__msg(" 5: ....4..... (bf) r5 = r4") +__msg(" 6: .....5.... (bf) r6 = r5") +__msg(" 7: ......6... (bf) r7 = r6") +__msg(" 8: .......7.. (bf) r8 = r7") +__msg(" 9: ........8. (bf) r9 = r8") +__msg("10: .........9 (bf) r0 = r9") +__msg("11: 0......... (95) exit") +__naked void assign_chain(void) +{ + asm volatile ( + "r0 = 42;" + "r1 = r0;" + "r2 = r1;" + "r3 = r2;" + "r4 = r3;" + "r5 = r4;" + "r6 = r5;" + "r7 = r6;" + "r8 = r7;" + "r9 = r8;" + "r0 = r9;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 7") +__msg("1: .1........ (07) r1 += 7") +__msg("2: .......... (b7) r2 = 7") +__msg("3: ..2....... (b7) r3 = 42") +__msg("4: ..23...... (0f) r2 += r3") +__msg("5: .......... (b7) r0 = 0") +__msg("6: 0......... (95) exit") +__naked void arithmetics(void) +{ + asm volatile ( + "r1 = 7;" + "r1 += 7;" + "r2 = 7;" + "r3 = 42;" + "r2 += r3;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#ifdef CAN_USE_BPF_ST +SEC("socket") +__log_level(2) +__msg(" 1: .1........ (07) r1 += -8") +__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7") +__msg(" 3: .1........ (b7) r2 = 42") +__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2") +__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2") +__msg(" 6: .......... (b7) r0 = 0") +__naked void store(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "*(u64 *)(r1 +0) = 7;" + "r2 = 42;" + "*(u64 *)(r1 +0) = r2;" + "*(u64 *)(r1 +0) = r2;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} +#endif + +SEC("socket") +__log_level(2) +__msg("1: ....4..... (07) r4 += -8") +__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)") +__msg("3: ....45.... (07) r4 += -8") +__naked void load(void) +{ + asm volatile ( + "r4 = r10;" + "r4 += -8;" + "r5 = *(u64 *)(r4 +0);" + "r4 += -8;" + "r0 = r5;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)") +__msg("1: ..2....... (d4) r2 = le64 r2") +__msg("2: ..2....... (bf) r0 = r2") +__naked void endian(void) +{ + asm volatile ( + "r2 = *(u32 *)(r1 +0);" + "r2 = le64 r2;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg(" 8: 0......... (b7) r1 = 1") +__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)") +__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1") +__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)") +__msg("12: 01........ (bf) r2 = r0") +__msg("13: .12....... (bf) r0 = r1") +__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)") +__naked void atomic(void) +{ + asm volatile ( + "r2 = r10;" + "r2 += -8;" + "r1 = 0;" + "*(u64 *)(r2 +0) = r1;" + "r1 = %[test_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r1 = 1;" + "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);" + ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */ + "r1 = xchg_64(r0 + 0, r1);" + "r2 = r0;" + "r0 = r1;" + "r0 = cmpxchg_64(r2 + 0, r0, r1);" + "1: exit;" + : + : __imm(bpf_map_lookup_elem), + __imm_addr(test_map), + __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +#ifdef CAN_USE_LOAD_ACQ_STORE_REL + +SEC("socket") +__log_level(2) +__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)") +__msg("3: .......... (bf) r3 = r10") +__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))") +__naked void atomic_load_acq_store_rel(void) +{ + asm volatile ( + "r1 = 42;" + "r2 = r10;" + ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */ + "r3 = r10;" + ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */ + "r0 = r4;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8)) + : __clobber_all); +} + +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ + +SEC("socket") +__log_level(2) +__msg("4: .12....7.. (85) call bpf_trace_printk#6") +__msg("5: 0......7.. (0f) r0 += r7") +__naked void regular_call(void) +{ + asm volatile ( + "r7 = 1;" + "r1 = r10;" + "r1 += -8;" + "r2 = 1;" + "call %[bpf_trace_printk];" + "r0 += r7;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("2: 012....... (25) if r1 > 0x7 goto pc+1") +__msg("3: ..2....... (bf) r0 = r2") +__naked void if1(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 > 0x7 goto +1;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("3: 0123...... (2d) if r1 > r3 goto pc+1") +__msg("4: ..2....... (bf) r0 = r2") +__naked void if2(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "r3 = 7;" + "if r1 > r3 goto +1;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 0") +__msg("1: .1........ (b7) r2 = 7") +__msg("2: .12....... (25) if r1 > 0x7 goto pc+4") +__msg("3: .12....... (07) r1 += 1") +__msg("4: .12....... (27) r2 *= 2") +__msg("5: .12....... (05) goto pc+0") +__msg("6: .12....... (05) goto pc-5") +__msg("7: .......... (b7) r0 = 0") +__msg("8: 0......... (95) exit") +__naked void loop(void) +{ + asm volatile ( + "r1 = 0;" + "r2 = 7;" + "if r1 > 0x7 goto +4;" + "r1 += 1;" + "r2 *= 2;" + "goto +0;" + "goto -5;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} + +#ifdef CAN_USE_GOTOL +SEC("socket") +__log_level(2) +__msg("2: .123...... (25) if r1 > 0x7 goto pc+2") +__msg("3: ..2....... (bf) r0 = r2") +__msg("4: 0......... (06) gotol pc+1") +__msg("5: ...3...... (bf) r0 = r3") +__msg("6: 0......... (95) exit") +__naked void gotol(void) +{ + asm volatile ( + "r2 = 42;" + "r3 = 24;" + "if r1 > 0x7 goto +2;" + "r0 = r2;" + "gotol +1;" + "r0 = r3;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} +#endif + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 1") +__msg("1: .1........ (e5) may_goto pc+1") +__msg("2: .......... (05) goto pc-3") +__msg("3: .1........ (bf) r0 = r1") +__msg("4: 0......... (95) exit") +__naked void may_goto(void) +{ + asm volatile ( + "1: r1 = 1;" + ".8byte %[may_goto];" + "goto 1b;" + "r0 = r1;" + "exit;" + : + : __imm(bpf_get_smp_processor_id), + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("1: 0......... (18) r2 = 0x7") +__msg("3: 0.2....... (0f) r0 += r2") +__naked void ldimm64(void) +{ + asm volatile ( + "r0 = 0;" + "r2 = 0x7 ll;" + "r0 += r2;" + "exit;" + : + :: __clobber_all); +} + +/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */ +SEC("socket") +__log_level(2) +__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]") +__msg("3: 012.456789 (0f) r7 += r0") +__msg("4: 012.456789 (b7) r3 = 42") +__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]") +__msg("6: 0......7.. (0f) r7 += r0") +__naked void ldabs(void) +{ + asm volatile ( + "r6 = r1;" + "r7 = 0;" + "r0 = *(u8 *)skb[42];" + "r7 += r0;" + "r3 = 42;" + ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */ + "r7 += r0;" + "r0 = r7;" + "exit;" + : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0)) + : __clobber_all); +} + + +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__log_level(2) +__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages") +__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)") +__msg(" 8: .1........ (b7) r2 = 42") +__naked void addr_space_cast(void) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = 0;" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r1 = addr_space_cast(r0, 0, 1);" + "r2 = 42;" + "*(u64 *)(r1 +0) = r2;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_arena_alloc_pages), + __imm_addr(arena) + : __clobber_all); +} +#endif + +static __used __naked int aux1(void) +{ + asm volatile ( + "r0 = r1;" + "r0 += r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: ....45.... (b7) r1 = 1") +__msg("1: .1..45.... (b7) r2 = 2") +__msg("2: .12.45.... (b7) r3 = 3") +/* Conservative liveness for subprog parameters. */ +__msg("3: .12345.... (85) call pc+2") +__msg("4: .......... (b7) r0 = 0") +__msg("5: 0......... (95) exit") +__msg("6: .12....... (bf) r0 = r1") +__msg("7: 0.2....... (0f) r0 += r2") +/* Conservative liveness for subprog return value. */ +__msg("8: 0......... (95) exit") +__naked void subprog1(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "call aux1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* to retain debug info for BTF generation */ +void kfunc_root(void) +{ + bpf_arena_alloc_pages(0, 0, 0, 0, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c index d3f4c5e4fb696..a3819a5d09c8f 100644 --- a/tools/testing/selftests/bpf/progs/connect4_dropper.c +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -13,12 +13,14 @@ #define VERDICT_REJECT 0 #define VERDICT_PROCEED 1 +int port; + SEC("cgroup/connect4") int connect_v4_dropper(struct bpf_sock_addr *ctx) { if (ctx->type != SOCK_STREAM) return VERDICT_PROCEED; - if (ctx->user_port == bpf_htons(60120)) + if (ctx->user_port == bpf_htons(port)) return VERDICT_REJECT; return VERDICT_PROCEED; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index bfcc85686cf04..e1fba28e4a868 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -1,20 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Facebook */ +#include #include #include -#include #include #include #include "bpf_misc.h" -#include "bpf_kfuncs.h" #include "errno.h" char _license[] SEC("license") = "GPL"; int pid, err, val; -struct sample { +struct ringbuf_sample { int pid; int seq; long value; @@ -121,7 +120,7 @@ int test_dynptr_data(void *ctx) static int ringbuf_callback(__u32 index, void *data) { - struct sample *sample; + struct ringbuf_sample *sample; struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; @@ -138,7 +137,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep") int test_ringbuf(void *ctx) { struct bpf_dynptr ptr; - struct sample *sample; + struct ringbuf_sample *sample; if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; @@ -567,3 +566,117 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location) return 1; } + +static inline int bpf_memcmp(const char *a, const char *b, u32 size) +{ + int i; + + bpf_for(i, 0, size) { + if (a[i] != b[i]) + return a[i] < b[i] ? -1 : 1; + } + return 0; +} + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_copy(void *ctx) +{ + char data[] = "hello there, world!!"; + char buf[32] = {'\0'}; + __u32 sz = sizeof(data); + struct bpf_dynptr src, dst; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src); + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst); + + /* Test basic case of copying contiguous memory backed dynptrs */ + err = bpf_dynptr_write(&src, 0, data, sz, 0); + err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz); + err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0); + err = err ?: bpf_memcmp(data, buf, sz); + + /* Test that offsets are handled correctly */ + err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5); + err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0); + err = err ?: bpf_memcmp(data + 5, buf, sz - 5); + + bpf_ringbuf_discard_dynptr(&src, 0); + bpf_ringbuf_discard_dynptr(&dst, 0); + return 0; +} + +SEC("xdp") +int test_dynptr_copy_xdp(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr_buf, ptr_xdp; + char data[] = "qwertyuiopasdfghjkl"; + char buf[32] = {'\0'}; + __u32 len = sizeof(data); + int i, chunks = 200; + + /* ptr_xdp is backed by non-contiguous memory */ + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); + + /* Destination dynptr is backed by non-contiguous memory */ + bpf_for(i, 0, chunks) { + err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0); + if (err) + goto out; + } + + err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks); + if (err) + goto out; + + bpf_for(i, 0, chunks) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + /* Source dynptr is backed by non-contiguous memory */ + __builtin_memset(buf, 0, sizeof(buf)); + bpf_for(i, 0, chunks) { + err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0); + if (err) + goto out; + } + + err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks); + if (err) + goto out; + + bpf_for(i, 0, chunks) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + /* Both source and destination dynptrs are backed by non-contiguous memory */ + err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1)); + if (err) + goto out; + + bpf_for(i, 0, chunks - 1) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) + err = 1; + +out: + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); + return XDP_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c index b0b53d9809642..298d48d7886d9 100644 --- a/tools/testing/selftests/bpf/progs/irq.c +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -222,7 +222,7 @@ int __noinline global_local_irq_balance(void) } SEC("?tc") -__failure __msg("global function calls are not allowed with IRQs disabled") +__success int irq_global_subprog(struct __sk_buff *ctx) { unsigned long flags; @@ -441,4 +441,73 @@ int irq_ooo_refs_array(struct __sk_buff *ctx) return 0; } +int __noinline +global_subprog(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +__success +int irq_non_sleepable_global_subprog(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_subprog(0); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int irq_sleepable_helper_global_subprog(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_sleepable_helper_subprog(0); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int irq_sleepable_global_subprog_indirect(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_subprog_calling_sleepable_global(0); + bpf_local_irq_restore(&flags); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 427b72954b87e..ffb574321404f 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1651,4 +1651,69 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 6c5797bf0ead5..7d04254e61f1c 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -134,7 +134,7 @@ int __noinline preempt_global_subprog(void) } SEC("?tc") -__failure __msg("global function calls are not allowed with preemption disabled") +__success int preempt_global_subprog_test(struct __sk_buff *ctx) { preempt_disable(); @@ -143,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx) return 0; } +int __noinline +global_subprog(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_sleepable_helper_subprog(ctx->mark); + preempt_enable(); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_sleepable_kfunc_subprog(ctx->mark); + preempt_enable(); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_subprog_calling_sleepable_global(ctx->mark); + preempt_enable(); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c new file mode 100644 index 0000000000000..1f1dd547e4ee2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/prepare.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ +#include +#include +//#include + +char _license[] SEC("license") = "GPL"; + +int err; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +SEC("cgroup_skb/egress") +int program(struct __sk_buff *skb) +{ + err = 0; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index ab3a532b7dd6d..5cf1ae637ec7f 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -439,3 +439,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx) ret += global_subprog_unlock(ret); return 0; } + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_helper_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_sleepable_helper_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_sleepable_kfunc_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_subprog_calling_sleepable_global(ret); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c new file mode 100644 index 0000000000000..f89effe82c9e2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/summarization.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_changes_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +__noinline +long might_sleep(struct pt_regs *ctx __arg_ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +__noinline __weak +long does_not_sleep(struct pt_regs *ctx __arg_ctx) +{ + return 0; +} + +SEC("?uprobe.s") +int main_might_sleep_with_subprogs(struct pt_regs *ctx) +{ + might_sleep(ctx); + does_not_sleep(ctx); + return 0; +} + +SEC("?uprobe.s") +int main_might_sleep(struct pt_regs *ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +SEC("?uprobe.s") +int main_does_not_sleep(struct pt_regs *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c similarity index 57% rename from tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c rename to tools/testing/selftests/bpf/progs/summarization_freplace.c index f9a622705f1b3..935f00e0e9ea0 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c +++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include SEC("?freplace") @@ -15,4 +15,19 @@ long does_not_change_pkt_data(struct __sk_buff *sk) return 0; } +SEC("?freplace") +long might_sleep(struct pt_regs *ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +SEC("?freplace") +long does_not_sleep(struct pt_regs *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 5eb25c6ad75b1..a5be3267dbb01 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Facebook */ -#include #include #include #include diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 1c8b678e2e9a3..f678ee6bd7eaf 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx) return ret; } +int __noinline +global_subprog_int(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog_int(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_sleepable_helper_subprog(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?syscall") +int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_sleepable_kfunc_subprog(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?syscall") +int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_subprog_calling_sleepable_global(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index 5094c288cfd7b..a9be6ae494545 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -620,23 +620,61 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void) SEC("raw_tp") __arch_x86_64 +__log_level(4) __msg("stack depth 24") +/* may_goto counter at -24 */ +__xlated("0: *(u64 *)(r10 -24) =") +/* may_goto timestamp at -16 */ +__xlated("1: *(u64 *)(r10 -16) =") +__xlated("2: r1 = 1") +__xlated("...") +__xlated("4: r0 = &(void __percpu *)(r0)") +__xlated("...") +/* may_goto expansion starts */ +__xlated("6: r11 = *(u64 *)(r10 -24)") +__xlated("7: if r11 == 0x0 goto pc+6") +__xlated("8: r11 -= 1") +__xlated("9: if r11 != 0x0 goto pc+2") +__xlated("10: r11 = -24") +__xlated("11: call unknown") +__xlated("12: *(u64 *)(r10 -24) = r11") +/* may_goto expansion ends */ +__xlated("13: *(u64 *)(r10 -8) = r1") +__xlated("14: exit") +__success +__naked void may_goto_interaction_x86_64(void) +{ + asm volatile ( + "r1 = 1;" + "*(u64 *)(r10 - 16) = r1;" + "call %[bpf_get_smp_processor_id];" + "r1 = *(u64 *)(r10 - 16);" + ".8byte %[may_goto];" + /* just touch some stack at -8 */ + "*(u64 *)(r10 - 8) = r1;" + "exit;" + : + : __imm(bpf_get_smp_processor_id), + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__arch_arm64 __log_level(4) __msg("stack depth 16") /* may_goto counter at -16 */ __xlated("0: *(u64 *)(r10 -16) =") __xlated("1: r1 = 1") -__xlated("...") -__xlated("3: r0 = &(void __percpu *)(r0)") -__xlated("...") +__xlated("2: call bpf_get_smp_processor_id") /* may_goto expansion starts */ -__xlated("5: r11 = *(u64 *)(r10 -16)") -__xlated("6: if r11 == 0x0 goto pc+3") -__xlated("7: r11 -= 1") -__xlated("8: *(u64 *)(r10 -16) = r11") +__xlated("3: r11 = *(u64 *)(r10 -16)") +__xlated("4: if r11 == 0x0 goto pc+3") +__xlated("5: r11 -= 1") +__xlated("6: *(u64 *)(r10 -16) = r11") /* may_goto expansion ends */ -__xlated("9: *(u64 *)(r10 -8) = r1") -__xlated("10: exit") +__xlated("7: *(u64 *)(r10 -8) = r1") +__xlated("8: exit") __success -__naked void may_goto_interaction(void) +__naked void may_goto_interaction_arm64(void) { asm volatile ( "r1 = 1;" diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 05a329ee45ee9..d5d8f24df3949 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -4,11 +4,7 @@ #include #include "bpf_misc.h" -#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ - defined(__TARGET_ARCH_loongarch)) && \ - __clang_major__ >= 18 +#ifdef CAN_USE_GOTOL SEC("socket") __description("gotol, small_imm") diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index e54bb5385bc1b..75dd922e4e9f8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -407,11 +407,7 @@ l0_%=: call %[bpf_jiffies64]; \ : __clobber_all); } -#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ - defined(__TARGET_ARCH_loongarch)) && \ - __clang_major__ >= 18 +#ifdef CAN_USE_GOTOL SEC("socket") __success __retval(0) __naked void gotol_and_may_goto(void) diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c new file mode 100644 index 0000000000000..6080974538327 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC. */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +#ifdef CAN_USE_LOAD_ACQ_STORE_REL + +SEC("socket") +__description("load-acquire, 8-bit") +__success __success_unpriv __retval(0x12) +__naked void load_acquire_8(void) +{ + asm volatile ( + "w1 = 0x12;" + "*(u8 *)(r10 - 1) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 16-bit") +__success __success_unpriv __retval(0x1234) +__naked void load_acquire_16(void) +{ + asm volatile ( + "w1 = 0x1234;" + "*(u16 *)(r10 - 2) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 32-bit") +__success __success_unpriv __retval(0x12345678) +__naked void load_acquire_32(void) +{ + asm volatile ( + "w1 = 0x12345678;" + "*(u32 *)(r10 - 4) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 64-bit") +__success __success_unpriv __retval(0x1234567890abcdef) +__naked void load_acquire_64(void) +{ + asm volatile ( + "r1 = 0x1234567890abcdef ll;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire with uninitialized src_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void load_acquire_with_uninitialized_src_reg(void) +{ + asm volatile ( + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire with non-pointer src_reg") +__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") +__naked void load_acquire_with_non_pointer_src_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("socket") +__description("misaligned load-acquire") +__failure __failure_unpriv __msg("misaligned stack access off") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void load_acquire_misaligned(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire from ctx pointer") +__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed") +__naked void load_acquire_from_ctx_pointer(void) +{ + asm volatile ( + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("xdp") +__description("load-acquire from pkt pointer") +__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed") +__naked void load_acquire_from_pkt_pointer(void) +{ + asm volatile ( + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); + "exit;" + : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("load-acquire from flow_keys pointer") +__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed") +__naked void load_acquire_from_flow_keys_pointer(void) +{ + asm volatile ( + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); + "exit;" + : + : __imm_const(__sk_buff_flow_keys, + offsetof(struct __sk_buff, flow_keys)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("sk_reuseport") +__description("load-acquire from sock pointer") +__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed") +__naked void load_acquire_from_sock_pointer(void) +{ + asm volatile ( + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); + "exit;" + : + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +#else /* CAN_USE_LOAD_ACQ_STORE_REL */ + +SEC("socket") +__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c index e81097c96fe27..3966d827f2889 100644 --- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -69,8 +69,38 @@ __naked void may_goto_batch_1(void) } SEC("raw_tp") -__description("may_goto batch with offsets 2/0") +__description("may_goto batch with offsets 2/0 - x86_64") __arch_x86_64 +__xlated("0: *(u64 *)(r10 -16) = 65535") +__xlated("1: *(u64 *)(r10 -8) = 0") +__xlated("2: r11 = *(u64 *)(r10 -16)") +__xlated("3: if r11 == 0x0 goto pc+6") +__xlated("4: r11 -= 1") +__xlated("5: if r11 != 0x0 goto pc+2") +__xlated("6: r11 = -16") +__xlated("7: call unknown") +__xlated("8: *(u64 *)(r10 -16) = r11") +__xlated("9: r0 = 1") +__xlated("10: r0 = 2") +__xlated("11: exit") +__success +__naked void may_goto_batch_2_x86_64(void) +{ + asm volatile ( + ".8byte %[may_goto1];" + ".8byte %[may_goto3];" + "r0 = 1;" + "r0 = 2;" + "exit;" + : + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), + __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__description("may_goto batch with offsets 2/0 - arm64") +__arch_arm64 __xlated("0: *(u64 *)(r10 -8) = 8388608") __xlated("1: r11 = *(u64 *)(r10 -8)") __xlated("2: if r11 == 0x0 goto pc+3") @@ -80,7 +110,7 @@ __xlated("5: r0 = 1") __xlated("6: r0 = 2") __xlated("7: exit") __success -__naked void may_goto_batch_2(void) +__naked void may_goto_batch_2_arm64(void) { asm volatile ( ".8byte %[may_goto1];" diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 6b564d4c09866..6662d4b39969a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -2,6 +2,7 @@ /* Copyright (C) 2023 SUSE LLC */ #include #include +#include "../../../include/linux/filter.h" #include "bpf_misc.h" SEC("?raw_tp") @@ -90,6 +91,54 @@ __naked int bpf_end_bswap(void) ::: __clobber_all); } +#if defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_load_acquire(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10") +__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_store_release(void) +{ + asm volatile ( + "r1 = 8;" + ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */ + "r1 = *(u64 *)(r10 - 8);" + "r2 = r10;" + "r2 += r1;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +#endif /* load-acquire, store-release */ #endif /* v4 instruction */ SEC("?raw_tp") diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c new file mode 100644 index 0000000000000..f1c64c08f25fb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC. */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +SEC("socket") +__description("store-release, 8-bit") +__success __success_unpriv __retval(0x12) +__naked void store_release_8(void) +{ + asm volatile ( + "w1 = 0x12;" + ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); + "w0 = *(u8 *)(r10 - 1);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 16-bit") +__success __success_unpriv __retval(0x1234) +__naked void store_release_16(void) +{ + asm volatile ( + "w1 = 0x1234;" + ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); + "w0 = *(u16 *)(r10 - 2);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 32-bit") +__success __success_unpriv __retval(0x12345678) +__naked void store_release_32(void) +{ + asm volatile ( + "w1 = 0x12345678;" + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); + "w0 = *(u32 *)(r10 - 4);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 64-bit") +__success __success_unpriv __retval(0x1234567890abcdef) +__naked void store_release_64(void) +{ + asm volatile ( + "r1 = 0x1234567890abcdef ll;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); + "r0 = *(u64 *)(r10 - 8);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with uninitialized src_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void store_release_with_uninitialized_src_reg(void) +{ + asm volatile ( + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with uninitialized dst_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void store_release_with_uninitialized_dst_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with non-pointer dst_reg") +__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") +__naked void store_release_with_non_pointer_dst_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("socket") +__description("misaligned store-release") +__failure __failure_unpriv __msg("misaligned stack access off") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void store_release_misaligned(void) +{ + asm volatile ( + "w0 = 0;" + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5)) + : __clobber_all); +} + +SEC("socket") +__description("store-release to ctx pointer") +__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed") +__naked void store_release_to_ctx_pointer(void) +{ + asm volatile ( + "w0 = 0;" + ".8byte %[store_release_insn];" // store_release((u8 *)(r1 + 0), w0); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("xdp") +__description("store-release to pkt pointer") +__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed") +__naked void store_release_to_pkt_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); + "exit;" + : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("store-release to flow_keys pointer") +__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed") +__naked void store_release_to_flow_keys_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); + "exit;" + : + : __imm_const(__sk_buff_flow_keys, + offsetof(struct __sk_buff, flow_keys)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("sk_reuseport") +__description("store-release to sock pointer") +__failure __msg("BPF_ATOMIC stores into R2 sock is not allowed") +__naked void store_release_to_sock_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); + "exit;" + : + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, leak pointer to stack") +__success __success_unpriv __retval(0) +__naked void store_release_leak_pointer_to_stack(void) +{ + asm volatile ( + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); + "r0 = 0;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("store-release, leak pointer to map") +__success __retval(0) +__failure_unpriv __msg_unpriv("R6 leaks addr into map") +__naked void store_release_leak_pointer_to_map(void) +{ + asm volatile ( + "r6 = r1;" + "r1 = %[map_hash_8b] ll;" + "r2 = 0;" + "*(u64 *)(r10 - 8) = r2;" + "r2 = r10;" + "r2 += -8;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto l0_%=;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6); +"l0_%=:" + "r0 = 0;" + "exit;" + : + : __imm_addr(map_hash_8b), + __imm(bpf_map_lookup_elem), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0)) + : __clobber_all); +} + +#else + +SEC("socket") +__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 8b40e9496af16..986ce32b113a3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,9 +1396,10 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static bool retry_for_again_or_busy(int err) +static bool can_retry(int err) { - return (err == EAGAIN || err == EBUSY); + return (err == EAGAIN || err == EBUSY || + (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC)); } int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, @@ -1451,12 +1452,12 @@ static void test_update_delete(unsigned int fn, void *data) if (do_update) { err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, - retry_for_again_or_busy); + can_retry); if (err) printf("error %d %d\n", err, errno); assert(err == 0); err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, - retry_for_again_or_busy); + can_retry); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh deleted file mode 100755 index d9661b9988ba9..0000000000000 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ /dev/null @@ -1,645 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# End-to-end eBPF tunnel test suite -# The script tests BPF network tunnel implementation. -# -# Topology: -# --------- -# root namespace | at_ns0 namespace -# | -# ----------- | ----------- -# | tnl dev | | | tnl dev | (overlay network) -# ----------- | ----------- -# metadata-mode | native-mode -# with bpf | -# | -# ---------- | ---------- -# | veth1 | --------- | veth0 | (underlay network) -# ---------- peer ---------- -# -# -# Device Configuration -# -------------------- -# Root namespace with metadata-mode tunnel + BPF -# Device names and addresses: -# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) -# tunnel dev 11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) -# -# Namespace at_ns0 with native tunnel -# Device names and addresses: -# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) -# tunnel dev 00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) -# -# -# End-to-end ping packet flow -# --------------------------- -# Most of the tests start by namespace creation, device configuration, -# then ping the underlay and overlay network. When doing 'ping 10.1.1.100' -# from root namespace, the following operations happen: -# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. -# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, -# with remote_ip=172.16.1.100 and others. -# 3) Outer tunnel header is prepended and route the packet to veth1's egress -# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0 -# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet -# 6) Forward the packet to the overlay tnl dev - -BPF_FILE="test_tunnel_kern.bpf.o" -BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" -PING_ARG="-c 3 -w 10 -q" -ret=0 -GREEN='\033[0;92m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -config_device() -{ - ip netns add at_ns0 - ip link add veth0 type veth peer name veth1 - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up mtu 1500 - ip addr add dev veth1 172.16.1.200/24 -} - -add_gre_tunnel() -{ - tun_key= - if [ -n "$1" ]; then - tun_key="key $1" - fi - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq $tun_key \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE $tun_key external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6gretap_tunnel() -{ - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ - local ::11 remote ::22 - - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV fc80::200/24 - ip link set dev $DEV up -} - -add_erspan_tunnel() -{ - # at_ns0 namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 2 erspan_dir egress erspan_hwid 3 - fi - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6erspan_tunnel() -{ - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 2 erspan_dir egress erspan_hwid 7 - fi - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -add_geneve_tunnel() -{ - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - id 2 dstport 6081 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE dstport 6081 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6geneve_tunnel() -{ - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 22 \ - remote ::22 # geneve has no local option - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -add_ipip_tunnel() -{ - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6tnl_tunnel() -{ - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - local ::11 remote ::22 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV 1::22/96 - ip link set dev $DEV up -} - -test_gre() -{ - TYPE=gretap - DEV_NS=gretap00 - DEV=gretap11 - ret=0 - - check $TYPE - config_device - add_gre_tunnel 2 - attach_bpf $DEV gre_set_tunnel gre_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_gre_no_tunnel_key() -{ - TYPE=gre - DEV_NS=gre00 - DEV=gre11 - ret=0 - - check $TYPE - config_device - add_gre_tunnel - attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6gre() -{ - TYPE=ip6gre - DEV_NS=ip6gre00 - DEV=ip6gre11 - ret=0 - - check $TYPE - config_device - # reuse the ip6gretap function - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - ping $PING_ARG 10.1.1.100 - check_err $? - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6gretap() -{ - TYPE=ip6gretap - DEV_NS=ip6gretap00 - DEV=ip6gretap11 - ret=0 - - check $TYPE - config_device - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - ping $PING_ARG 10.1.1.100 - check_err $? - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_erspan() -{ - TYPE=erspan - DEV_NS=erspan00 - DEV=erspan11 - ret=0 - - check $TYPE - config_device - add_erspan_tunnel $1 - attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6erspan() -{ - TYPE=ip6erspan - DEV_NS=ip6erspan00 - DEV=ip6erspan11 - ret=0 - - check $TYPE - config_device - add_ip6erspan_tunnel $1 - attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel - ping6 $PING_ARG ::11 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_geneve() -{ - TYPE=geneve - DEV_NS=geneve00 - DEV=geneve11 - ret=0 - - check $TYPE - config_device - add_geneve_tunnel - attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6geneve() -{ - TYPE=geneve - DEV_NS=ip6geneve00 - DEV=ip6geneve11 - ret=0 - - check $TYPE - config_device - add_ip6geneve_tunnel - attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - -test_ipip() -{ - TYPE=ipip - DEV_NS=ipip00 - DEV=ipip11 - ret=0 - - check $TYPE - config_device - add_ipip_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ipip6() -{ - TYPE=ip6tnl - DEV_NS=ipip6tnl00 - DEV=ipip6tnl11 - ret=0 - - check $TYPE - config_device - add_ip6tnl_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip4 over ip6 - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6ip6() -{ - TYPE=ip6tnl - DEV_NS=ip6ip6tnl00 - DEV=ip6ip6tnl11 - ret=0 - - check $TYPE - config_device - add_ip6tnl_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip6 over ip6 - ping6 $PING_ARG 1::11 - check_err $? - ip netns exec at_ns0 ping6 $PING_ARG 1::22 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - -attach_bpf() -{ - DEV=$1 - SET=$2 - GET=$3 - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ - tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET - tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET -} - -cleanup() -{ - rm -rf ${BPF_PIN_TUNNEL_DIR} - - ip netns delete at_ns0 2> /dev/null - ip link del veth1 2> /dev/null - ip link del ipip11 2> /dev/null - ip link del ipip6tnl11 2> /dev/null - ip link del ip6ip6tnl11 2> /dev/null - ip link del gretap11 2> /dev/null - ip link del gre11 2> /dev/null - ip link del ip6gre11 2> /dev/null - ip link del ip6gretap11 2> /dev/null - ip link del geneve11 2> /dev/null - ip link del ip6geneve11 2> /dev/null - ip link del erspan11 2> /dev/null - ip link del ip6erspan11 2> /dev/null -} - -cleanup_exit() -{ - echo "CATCH SIGKILL or SIGINT, cleanup and exit" - cleanup - exit 0 -} - -check() -{ - ip link help 2>&1 | grep -q "\s$1\s" - if [ $? -ne 0 ];then - echo "SKIP $1: iproute2 not support" - cleanup - return 1 - fi -} - -enable_debug() -{ - echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control -} - -check_err() -{ - if [ $ret -eq 0 ]; then - ret=$1 - fi -} - -bpf_tunnel_test() -{ - local errors=0 - - echo "Testing GRE tunnel..." - test_gre - errors=$(( $errors + $? )) - - echo "Testing GRE tunnel (without tunnel keys)..." - test_gre_no_tunnel_key - errors=$(( $errors + $? )) - - echo "Testing IP6GRE tunnel..." - test_ip6gre - errors=$(( $errors + $? )) - - echo "Testing IP6GRETAP tunnel..." - test_ip6gretap - errors=$(( $errors + $? )) - - echo "Testing ERSPAN tunnel..." - test_erspan v2 - errors=$(( $errors + $? )) - - echo "Testing IP6ERSPAN tunnel..." - test_ip6erspan v2 - errors=$(( $errors + $? )) - - echo "Testing GENEVE tunnel..." - test_geneve - errors=$(( $errors + $? )) - - echo "Testing IP6GENEVE tunnel..." - test_ip6geneve - errors=$(( $errors + $? )) - - echo "Testing IPIP tunnel..." - test_ipip - errors=$(( $errors + $? )) - - echo "Testing IPIP6 tunnel..." - test_ipip6 - errors=$(( $errors + $? )) - - echo "Testing IP6IP6 tunnel..." - test_ip6ip6 - errors=$(( $errors + $? )) - - return $errors -} - -trap cleanup 0 3 6 -trap cleanup_exit 2 9 - -cleanup -bpf_tunnel_test - -if [ $? -ne 0 ]; then - echo -e "$(basename $0): ${RED}FAIL${NC}" - exit 1 -fi -echo -e "$(basename $0): ${GREEN}PASS${NC}" -exit 0 diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 175a03e6c5ef4..a18972ffdeb6f 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -268,10 +268,11 @@ static int append_filter(struct filter **filters, int *cnt, const char *str); static int append_filter_file(const char *path); static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr); static int append_var_preset_file(const char *filename); +static int append_file(const char *path); +static int append_file_from_file(const char *path); static error_t parse_arg(int key, char *arg, struct argp_state *state) { - void *tmp; int err; switch (key) { @@ -381,14 +382,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) break; } case ARGP_KEY_ARG: - tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); - if (!tmp) - return -ENOMEM; - env.filenames = tmp; - env.filenames[env.filename_cnt] = strdup(arg); - if (!env.filenames[env.filename_cnt]) - return -ENOMEM; - env.filename_cnt++; + if (arg[0] == '@') + err = append_file_from_file(arg + 1); + else + err = append_file(arg); + if (err) { + fprintf(stderr, "Failed to collect BPF object files: %d\n", err); + return err; + } break; default: return ARGP_ERR_UNKNOWN; @@ -659,7 +660,7 @@ static int append_filter_file(const char *path) f = fopen(path, "r"); if (!f) { err = -errno; - fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(err)); + fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err)); return err; } @@ -689,6 +690,49 @@ static const struct stat_specs default_output_spec = { }, }; +static int append_file(const char *path) +{ + void *tmp; + + tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); + if (!tmp) + return -ENOMEM; + env.filenames = tmp; + env.filenames[env.filename_cnt] = strdup(path); + if (!env.filenames[env.filename_cnt]) + return -ENOMEM; + env.filename_cnt++; + return 0; +} + +static int append_file_from_file(const char *path) +{ + char buf[1024]; + int err = 0; + FILE *f; + + f = fopen(path, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open object files list in '%s': %s\n", + path, strerror(errno)); + return err; + } + + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { + /* lines starting with # are comments, skip them */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + err = append_file(buf); + if (err) + goto cleanup; + } + +cleanup: + fclose(f); + return err; +} + static const struct stat_specs default_csv_output_spec = { .spec_cnt = 14, .ids = { @@ -1190,13 +1234,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch bpf_program__set_expected_attach_type(prog, attach_type); if (!env.quiet) { - printf("Using guessed program type '%s' for %s/%s...\n", + fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n", libbpf_bpf_prog_type_str(prog_type), filename, prog_name); } } else { if (!env.quiet) { - printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", + fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", ctx_name, filename, prog_name); } } @@ -1378,7 +1422,7 @@ static int append_var_preset_file(const char *filename) f = fopen(filename, "rt"); if (!f) { err = -errno; - fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(err)); + fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err)); return -EINVAL; }