Skip to content

Commit e4d68c0

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-next/try_alloc_pages' into bpf-next/master
Merge try_alloc_pages feature branch into bpf-next/master. Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 42c5e6d + 93ed6fc commit e4d68c0

13 files changed

+509
-42
lines changed

include/linux/bpf.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2354,7 +2354,7 @@ int generic_map_delete_batch(struct bpf_map *map,
23542354
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
23552355
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
23562356

2357-
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
2357+
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
23582358
unsigned long nr_pages, struct page **page_array);
23592359
#ifdef CONFIG_MEMCG
23602360
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,

include/linux/gfp.h

+23
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,25 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
3939
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
4040
}
4141

42+
static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags)
43+
{
44+
/*
45+
* !__GFP_DIRECT_RECLAIM -> direct claim is not allowed.
46+
* !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd.
47+
* All GFP_* flags including GFP_NOWAIT use one or both flags.
48+
* try_alloc_pages() is the only API that doesn't specify either flag.
49+
*
50+
* This is stronger than GFP_NOWAIT or GFP_ATOMIC because
51+
* those are guaranteed to never block on a sleeping lock.
52+
* Here we are enforcing that the allocation doesn't ever spin
53+
* on any locks (i.e. only trylocks). There is no high level
54+
* GFP_$FOO flag for this use in try_alloc_pages() as the
55+
* regular page allocator doesn't fully support this
56+
* allocation mode.
57+
*/
58+
return !(gfp_flags & __GFP_RECLAIM);
59+
}
60+
4261
#ifdef CONFIG_HIGHMEM
4362
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
4463
#else
@@ -335,6 +354,9 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
335354
}
336355
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
337356

357+
struct page *try_alloc_pages_noprof(int nid, unsigned int order);
358+
#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
359+
338360
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
339361
#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
340362

@@ -357,6 +379,7 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
357379
__get_free_pages((gfp_mask) | GFP_DMA, (order))
358380

359381
extern void __free_pages(struct page *page, unsigned int order);
382+
extern void free_pages_nolock(struct page *page, unsigned int order);
360383
extern void free_pages(unsigned long addr, unsigned int order);
361384

362385
#define __free_page(page) __free_pages((page), 0)

include/linux/local_lock.h

+70
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,76 @@
5151
#define local_unlock_irqrestore(lock, flags) \
5252
__local_unlock_irqrestore(lock, flags)
5353

54+
/**
55+
* localtry_lock_init - Runtime initialize a lock instance
56+
*/
57+
#define localtry_lock_init(lock) __localtry_lock_init(lock)
58+
59+
/**
60+
* localtry_lock - Acquire a per CPU local lock
61+
* @lock: The lock variable
62+
*/
63+
#define localtry_lock(lock) __localtry_lock(lock)
64+
65+
/**
66+
* localtry_lock_irq - Acquire a per CPU local lock and disable interrupts
67+
* @lock: The lock variable
68+
*/
69+
#define localtry_lock_irq(lock) __localtry_lock_irq(lock)
70+
71+
/**
72+
* localtry_lock_irqsave - Acquire a per CPU local lock, save and disable
73+
* interrupts
74+
* @lock: The lock variable
75+
* @flags: Storage for interrupt flags
76+
*/
77+
#define localtry_lock_irqsave(lock, flags) \
78+
__localtry_lock_irqsave(lock, flags)
79+
80+
/**
81+
* localtry_trylock - Try to acquire a per CPU local lock.
82+
* @lock: The lock variable
83+
*
84+
* The function can be used in any context such as NMI or HARDIRQ. Due to
85+
* locking constrains it will _always_ fail to acquire the lock in NMI or
86+
* HARDIRQ context on PREEMPT_RT.
87+
*/
88+
#define localtry_trylock(lock) __localtry_trylock(lock)
89+
90+
/**
91+
* localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
92+
* interrupts if acquired
93+
* @lock: The lock variable
94+
* @flags: Storage for interrupt flags
95+
*
96+
* The function can be used in any context such as NMI or HARDIRQ. Due to
97+
* locking constrains it will _always_ fail to acquire the lock in NMI or
98+
* HARDIRQ context on PREEMPT_RT.
99+
*/
100+
#define localtry_trylock_irqsave(lock, flags) \
101+
__localtry_trylock_irqsave(lock, flags)
102+
103+
/**
104+
* local_unlock - Release a per CPU local lock
105+
* @lock: The lock variable
106+
*/
107+
#define localtry_unlock(lock) __localtry_unlock(lock)
108+
109+
/**
110+
* local_unlock_irq - Release a per CPU local lock and enable interrupts
111+
* @lock: The lock variable
112+
*/
113+
#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock)
114+
115+
/**
116+
* localtry_unlock_irqrestore - Release a per CPU local lock and restore
117+
* interrupt flags
118+
* @lock: The lock variable
119+
* @flags: Interrupt flags to restore
120+
*/
121+
#define localtry_unlock_irqrestore(lock, flags) \
122+
__localtry_unlock_irqrestore(lock, flags)
123+
54124
DEFINE_GUARD(local_lock, local_lock_t __percpu*,
55125
local_lock(_T),
56126
local_unlock(_T))

include/linux/local_lock_internal.h

+146
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ typedef struct {
1515
#endif
1616
} local_lock_t;
1717

18+
typedef struct {
19+
local_lock_t llock;
20+
unsigned int acquired;
21+
} localtry_lock_t;
22+
1823
#ifdef CONFIG_DEBUG_LOCK_ALLOC
1924
# define LOCAL_LOCK_DEBUG_INIT(lockname) \
2025
.dep_map = { \
@@ -31,6 +36,13 @@ static inline void local_lock_acquire(local_lock_t *l)
3136
l->owner = current;
3237
}
3338

39+
static inline void local_trylock_acquire(local_lock_t *l)
40+
{
41+
lock_map_acquire_try(&l->dep_map);
42+
DEBUG_LOCKS_WARN_ON(l->owner);
43+
l->owner = current;
44+
}
45+
3446
static inline void local_lock_release(local_lock_t *l)
3547
{
3648
DEBUG_LOCKS_WARN_ON(l->owner != current);
@@ -45,11 +57,13 @@ static inline void local_lock_debug_init(local_lock_t *l)
4557
#else /* CONFIG_DEBUG_LOCK_ALLOC */
4658
# define LOCAL_LOCK_DEBUG_INIT(lockname)
4759
static inline void local_lock_acquire(local_lock_t *l) { }
60+
static inline void local_trylock_acquire(local_lock_t *l) { }
4861
static inline void local_lock_release(local_lock_t *l) { }
4962
static inline void local_lock_debug_init(local_lock_t *l) { }
5063
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
5164

5265
#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) }
66+
#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }}
5367

5468
#define __local_lock_init(lock) \
5569
do { \
@@ -118,15 +132,115 @@ do { \
118132
#define __local_unlock_nested_bh(lock) \
119133
local_lock_release(this_cpu_ptr(lock))
120134

135+
/* localtry_lock_t variants */
136+
137+
#define __localtry_lock_init(lock) \
138+
do { \
139+
__local_lock_init(&(lock)->llock); \
140+
WRITE_ONCE((lock)->acquired, 0); \
141+
} while (0)
142+
143+
#define __localtry_lock(lock) \
144+
do { \
145+
localtry_lock_t *lt; \
146+
preempt_disable(); \
147+
lt = this_cpu_ptr(lock); \
148+
local_lock_acquire(&lt->llock); \
149+
WRITE_ONCE(lt->acquired, 1); \
150+
} while (0)
151+
152+
#define __localtry_lock_irq(lock) \
153+
do { \
154+
localtry_lock_t *lt; \
155+
local_irq_disable(); \
156+
lt = this_cpu_ptr(lock); \
157+
local_lock_acquire(&lt->llock); \
158+
WRITE_ONCE(lt->acquired, 1); \
159+
} while (0)
160+
161+
#define __localtry_lock_irqsave(lock, flags) \
162+
do { \
163+
localtry_lock_t *lt; \
164+
local_irq_save(flags); \
165+
lt = this_cpu_ptr(lock); \
166+
local_lock_acquire(&lt->llock); \
167+
WRITE_ONCE(lt->acquired, 1); \
168+
} while (0)
169+
170+
#define __localtry_trylock(lock) \
171+
({ \
172+
localtry_lock_t *lt; \
173+
bool _ret; \
174+
\
175+
preempt_disable(); \
176+
lt = this_cpu_ptr(lock); \
177+
if (!READ_ONCE(lt->acquired)) { \
178+
WRITE_ONCE(lt->acquired, 1); \
179+
local_trylock_acquire(&lt->llock); \
180+
_ret = true; \
181+
} else { \
182+
_ret = false; \
183+
preempt_enable(); \
184+
} \
185+
_ret; \
186+
})
187+
188+
#define __localtry_trylock_irqsave(lock, flags) \
189+
({ \
190+
localtry_lock_t *lt; \
191+
bool _ret; \
192+
\
193+
local_irq_save(flags); \
194+
lt = this_cpu_ptr(lock); \
195+
if (!READ_ONCE(lt->acquired)) { \
196+
WRITE_ONCE(lt->acquired, 1); \
197+
local_trylock_acquire(&lt->llock); \
198+
_ret = true; \
199+
} else { \
200+
_ret = false; \
201+
local_irq_restore(flags); \
202+
} \
203+
_ret; \
204+
})
205+
206+
#define __localtry_unlock(lock) \
207+
do { \
208+
localtry_lock_t *lt; \
209+
lt = this_cpu_ptr(lock); \
210+
WRITE_ONCE(lt->acquired, 0); \
211+
local_lock_release(&lt->llock); \
212+
preempt_enable(); \
213+
} while (0)
214+
215+
#define __localtry_unlock_irq(lock) \
216+
do { \
217+
localtry_lock_t *lt; \
218+
lt = this_cpu_ptr(lock); \
219+
WRITE_ONCE(lt->acquired, 0); \
220+
local_lock_release(&lt->llock); \
221+
local_irq_enable(); \
222+
} while (0)
223+
224+
#define __localtry_unlock_irqrestore(lock, flags) \
225+
do { \
226+
localtry_lock_t *lt; \
227+
lt = this_cpu_ptr(lock); \
228+
WRITE_ONCE(lt->acquired, 0); \
229+
local_lock_release(&lt->llock); \
230+
local_irq_restore(flags); \
231+
} while (0)
232+
121233
#else /* !CONFIG_PREEMPT_RT */
122234

123235
/*
124236
* On PREEMPT_RT local_lock maps to a per CPU spinlock, which protects the
125237
* critical section while staying preemptible.
126238
*/
127239
typedef spinlock_t local_lock_t;
240+
typedef spinlock_t localtry_lock_t;
128241

129242
#define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname))
243+
#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname)
130244

131245
#define __local_lock_init(l) \
132246
do { \
@@ -169,4 +283,36 @@ do { \
169283
spin_unlock(this_cpu_ptr((lock))); \
170284
} while (0)
171285

286+
/* localtry_lock_t variants */
287+
288+
#define __localtry_lock_init(lock) __local_lock_init(lock)
289+
#define __localtry_lock(lock) __local_lock(lock)
290+
#define __localtry_lock_irq(lock) __local_lock(lock)
291+
#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags)
292+
#define __localtry_unlock(lock) __local_unlock(lock)
293+
#define __localtry_unlock_irq(lock) __local_unlock(lock)
294+
#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags)
295+
296+
#define __localtry_trylock(lock) \
297+
({ \
298+
int __locked; \
299+
\
300+
if (in_nmi() | in_hardirq()) { \
301+
__locked = 0; \
302+
} else { \
303+
migrate_disable(); \
304+
__locked = spin_trylock(this_cpu_ptr((lock))); \
305+
if (!__locked) \
306+
migrate_enable(); \
307+
} \
308+
__locked; \
309+
})
310+
311+
#define __localtry_trylock_irqsave(lock, flags) \
312+
({ \
313+
typecheck(unsigned long, flags); \
314+
flags = 0; \
315+
__localtry_trylock(lock); \
316+
})
317+
172318
#endif /* CONFIG_PREEMPT_RT */

include/linux/mm_types.h

+4
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ struct page {
9999
/* Or, free page */
100100
struct list_head buddy_list;
101101
struct list_head pcp_list;
102+
struct {
103+
struct llist_node pcp_llist;
104+
unsigned int order;
105+
};
102106
};
103107
/* See page-flags.h for PAGE_MAPPING_FLAGS */
104108
struct address_space *mapping;

include/linux/mmzone.h

+3
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,9 @@ struct zone {
972972
/* Primarily protects free_area */
973973
spinlock_t lock;
974974

975+
/* Pages to be freed when next trylock succeeds */
976+
struct llist_head trylock_free_pages;
977+
975978
/* Write-intensive fields used by compaction and vmstats. */
976979
CACHELINE_PADDING(_pad2_);
977980

kernel/bpf/arena.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
287287
return VM_FAULT_SIGSEGV;
288288

289289
/* Account into memcg of the process that created bpf_arena */
290-
ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
290+
ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page);
291291
if (ret) {
292292
range_tree_set(&arena->rt, vmf->pgoff, 1);
293293
return VM_FAULT_SIGSEGV;
@@ -465,8 +465,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
465465
if (ret)
466466
goto out_free_pages;
467467

468-
ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO,
469-
node_id, page_cnt, pages);
468+
ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
470469
if (ret)
471470
goto out;
472471

0 commit comments

Comments
 (0)