Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resilient Queued Spin Lock #8599

Open
wants to merge 25 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7a56fb5
locking: Move MCS struct definition to public header
kkdwivedi Mar 3, 2025
4ce8b17
locking: Move common qspinlock helpers to a private header
kkdwivedi Mar 3, 2025
33100fa
locking: Allow obtaining result of arch_mcs_spin_lock_contended
kkdwivedi Mar 3, 2025
fdb6217
locking: Copy out qspinlock.c to rqspinlock.c
kkdwivedi Mar 3, 2025
15eb982
rqspinlock: Add rqspinlock.h header
kkdwivedi Mar 3, 2025
062282f
rqspinlock: Drop PV and virtualization support
kkdwivedi Mar 3, 2025
d487d49
rqspinlock: Add support for timeouts
kkdwivedi Mar 3, 2025
ce370d4
rqspinlock: Hardcode cond_acquire loops for arm64
kkdwivedi Mar 3, 2025
02bbddc
rqspinlock: Protect pending bit owners from stalls
kkdwivedi Mar 3, 2025
deafa0a
rqspinlock: Protect waiters in queue from stalls
kkdwivedi Mar 3, 2025
c1abc96
rqspinlock: Protect waiters in trylock fallback from stalls
kkdwivedi Mar 3, 2025
bcc3c8c
rqspinlock: Add deadlock detection and recovery
kkdwivedi Mar 3, 2025
946d4e9
rqspinlock: Add a test-and-set fallback
kkdwivedi Mar 3, 2025
2b5498e
rqspinlock: Add basic support for CONFIG_PARAVIRT
kkdwivedi Mar 3, 2025
0dd997b
rqspinlock: Add helper to print a splat on timeout or deadlock
kkdwivedi Mar 3, 2025
a01ed0a
rqspinlock: Add macros for rqspinlock usage
kkdwivedi Mar 3, 2025
b4534e9
rqspinlock: Add locktorture support
kkdwivedi Mar 3, 2025
13d6244
rqspinlock: Add entry to Makefile, MAINTAINERS
kkdwivedi Mar 3, 2025
c67ac42
bpf: Convert hashtab.c to rqspinlock
kkdwivedi Mar 3, 2025
8906825
bpf: Convert percpu_freelist.c to rqspinlock
kkdwivedi Mar 3, 2025
feb5eea
bpf: Convert lpm_trie.c to rqspinlock
kkdwivedi Mar 3, 2025
b0ceaeb
bpf: Introduce rqspinlock kfuncs
kkdwivedi Mar 3, 2025
595c1d7
bpf: Implement verifier support for rqspinlock
kkdwivedi Mar 3, 2025
9e971c5
bpf: Maintain FIFO property for rqspinlock unlock
kkdwivedi Mar 3, 2025
c3770ac
selftests/bpf: Add tests for rqspinlock
kkdwivedi Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -4297,6 +4297,9 @@ F: include/uapi/linux/filter.h
F: kernel/bpf/
F: kernel/trace/bpf_trace.c
F: lib/buildid.c
F: arch/*/include/asm/rqspinlock.h
F: include/asm-generic/rqspinlock.h
F: kernel/locking/rqspinlock.c
F: lib/test_bpf.c
F: net/bpf/
F: net/core/filter.c
Expand Down
93 changes: 93 additions & 0 deletions arch/arm64/include/asm/rqspinlock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RQSPINLOCK_H
#define _ASM_RQSPINLOCK_H

#include <asm/barrier.h>

/*
* Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
* version based on [0]. In rqspinlock code, our conditional expression involves
* checking the value _and_ additionally a timeout. However, on arm64, the
* WFE-based implementation may never spin again if no stores occur to the
* locked byte in the lock word. As such, we may be stuck forever if
* event-stream based unblocking is not available on the platform for WFE spin
* loops (arch_timer_evtstrm_available).
*
* Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
* copy-paste.
*
* While we rely on the implementation to amortize the cost of sampling
* cond_expr for us, it will not happen when event stream support is
* unavailable, time_expr check is amortized. This is not the common case, and
* it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
* comparison, hence just let it be. In case of event-stream, the loop is woken
* up at microsecond granularity.
*
* [0]: https://lore.kernel.org/lkml/[email protected]
*/

#ifndef smp_cond_load_acquire_timewait

#define smp_cond_time_check_count 200

#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
time_limit_ns) ({ \
typeof(ptr) __PTR = (ptr); \
__unqual_scalar_typeof(*ptr) VAL; \
unsigned int __count = 0; \
for (;;) { \
VAL = READ_ONCE(*__PTR); \
if (cond_expr) \
break; \
cpu_relax(); \
if (__count++ < smp_cond_time_check_count) \
continue; \
if ((time_expr_ns) >= (time_limit_ns)) \
break; \
__count = 0; \
} \
(typeof(*ptr))VAL; \
})

#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
time_expr_ns, time_limit_ns) \
({ \
typeof(ptr) __PTR = (ptr); \
__unqual_scalar_typeof(*ptr) VAL; \
for (;;) { \
VAL = smp_load_acquire(__PTR); \
if (cond_expr) \
break; \
__cmpwait_relaxed(__PTR, VAL); \
if ((time_expr_ns) >= (time_limit_ns)) \
break; \
} \
(typeof(*ptr))VAL; \
})

#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
time_expr_ns, time_limit_ns) \
({ \
__unqual_scalar_typeof(*ptr) _val; \
int __wfe = arch_timer_evtstrm_available(); \
\
if (likely(__wfe)) { \
_val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
time_expr_ns, \
time_limit_ns); \
} else { \
_val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
time_expr_ns, \
time_limit_ns); \
smp_acquire__after_ctrl_dep(); \
} \
(typeof(*ptr))_val; \
})

#endif

#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)

#include <asm-generic/rqspinlock.h>

#endif /* _ASM_RQSPINLOCK_H */
33 changes: 33 additions & 0 deletions arch/x86/include/asm/rqspinlock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_RQSPINLOCK_H
#define _ASM_X86_RQSPINLOCK_H

#include <asm/paravirt.h>

#ifdef CONFIG_PARAVIRT
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);

#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled
static __always_inline bool resilient_virt_spin_lock_enabled(void)
{
return static_branch_likely(&virt_spin_lock_key);
}

#ifdef CONFIG_QUEUED_SPINLOCKS
typedef struct qspinlock rqspinlock_t;
#else
typedef struct rqspinlock rqspinlock_t;
#endif
extern int resilient_tas_spin_lock(rqspinlock_t *lock);

#define resilient_virt_spin_lock resilient_virt_spin_lock
static inline int resilient_virt_spin_lock(rqspinlock_t *lock)
{
return resilient_tas_spin_lock(lock);
}

#endif /* CONFIG_PARAVIRT */

#include <asm-generic/rqspinlock.h>

#endif /* _ASM_X86_RQSPINLOCK_H */
1 change: 1 addition & 0 deletions include/asm-generic/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
mandatory-y += rqspinlock.h
mandatory-y += runtime-const.h
mandatory-y += rwonce.h
mandatory-y += sections.h
Expand Down
6 changes: 6 additions & 0 deletions include/asm-generic/mcs_spinlock.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
#ifndef __ASM_MCS_SPINLOCK_H
#define __ASM_MCS_SPINLOCK_H

struct mcs_spinlock {
struct mcs_spinlock *next;
int locked; /* 1 if lock acquired */
int count; /* nesting count, see qspinlock.c */
};

/*
* Architectures can define their own:
*
Expand Down
Loading
Loading