Skip to content

Commit 6bf90bd

Browse files
Olivier Langloisaxboe
Olivier Langlois
authored andcommitted
io_uring/napi: add static napi tracking strategy
Add the static napi tracking strategy. That allows the user to manually manage the napi ids list for busy polling, and eliminate the overhead of dynamically updating the list from the fast path. Signed-off-by: Olivier Langlois <[email protected]> Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 71afd92 commit 6bf90bd

File tree

5 files changed

+160
-27
lines changed

5 files changed

+160
-27
lines changed

include/linux/io_uring_types.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ struct io_ring_ctx {
408408
/* napi busy poll default timeout */
409409
ktime_t napi_busy_poll_dt;
410410
bool napi_prefer_busy_poll;
411-
bool napi_enabled;
411+
u8 napi_track_mode;
412412

413413
DECLARE_HASHTABLE(napi_ht, 4);
414414
#endif

include/uapi/linux/io_uring.h

+30-2
Original file line numberDiff line numberDiff line change
@@ -790,12 +790,40 @@ struct io_uring_buf_status {
790790
__u32 resv[8];
791791
};
792792

793+
enum io_uring_napi_op {
794+
/* register/ungister backward compatible opcode */
795+
IO_URING_NAPI_REGISTER_OP = 0,
796+
797+
/* opcodes to update napi_list when static tracking is used */
798+
IO_URING_NAPI_STATIC_ADD_ID = 1,
799+
IO_URING_NAPI_STATIC_DEL_ID = 2
800+
};
801+
802+
enum io_uring_napi_tracking_strategy {
803+
/* value must be 0 for backward compatibility */
804+
IO_URING_NAPI_TRACKING_DYNAMIC = 0,
805+
IO_URING_NAPI_TRACKING_STATIC = 1,
806+
IO_URING_NAPI_TRACKING_INACTIVE = 255
807+
};
808+
793809
/* argument for IORING_(UN)REGISTER_NAPI */
794810
struct io_uring_napi {
795811
__u32 busy_poll_to;
796812
__u8 prefer_busy_poll;
797-
__u8 pad[3];
798-
__u64 resv;
813+
814+
/* a io_uring_napi_op value */
815+
__u8 opcode;
816+
__u8 pad[2];
817+
818+
/*
819+
* for IO_URING_NAPI_REGISTER_OP, it is a
820+
* io_uring_napi_tracking_strategy value.
821+
*
822+
* for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID
823+
* it is the napi id to add/del from napi_list.
824+
*/
825+
__u32 op_param;
826+
__u32 resv;
799827
};
800828

801829
/*

io_uring/fdinfo.c

+41-13
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,46 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
4646
return 0;
4747
}
4848

49+
#ifdef CONFIG_NET_RX_BUSY_POLL
50+
static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
51+
struct seq_file *m,
52+
const char *tracking_strategy)
53+
{
54+
seq_puts(m, "NAPI:\tenabled\n");
55+
seq_printf(m, "napi tracking:\t%s\n", tracking_strategy);
56+
seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt);
57+
if (ctx->napi_prefer_busy_poll)
58+
seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
59+
else
60+
seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
61+
}
62+
63+
static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx,
64+
struct seq_file *m)
65+
{
66+
unsigned int mode = READ_ONCE(ctx->napi_track_mode);
67+
68+
switch (mode) {
69+
case IO_URING_NAPI_TRACKING_INACTIVE:
70+
seq_puts(m, "NAPI:\tdisabled\n");
71+
break;
72+
case IO_URING_NAPI_TRACKING_DYNAMIC:
73+
common_tracking_show_fdinfo(ctx, m, "dynamic");
74+
break;
75+
case IO_URING_NAPI_TRACKING_STATIC:
76+
common_tracking_show_fdinfo(ctx, m, "static");
77+
break;
78+
default:
79+
seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode);
80+
}
81+
}
82+
#else
83+
static inline void napi_show_fdinfo(struct io_ring_ctx *ctx,
84+
struct seq_file *m)
85+
{
86+
}
87+
#endif
88+
4989
/*
5090
* Caller holds a reference to the file already, we don't need to do
5191
* anything else to get an extra reference.
@@ -219,18 +259,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
219259

220260
}
221261
spin_unlock(&ctx->completion_lock);
222-
223-
#ifdef CONFIG_NET_RX_BUSY_POLL
224-
if (ctx->napi_enabled) {
225-
seq_puts(m, "NAPI:\tenabled\n");
226-
seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt);
227-
if (ctx->napi_prefer_busy_poll)
228-
seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
229-
else
230-
seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
231-
} else {
232-
seq_puts(m, "NAPI:\tdisabled\n");
233-
}
234-
#endif
262+
napi_show_fdinfo(ctx, m);
235263
}
236264
#endif

io_uring/napi.c

+87-10
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,27 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
8181
return 0;
8282
}
8383

84+
static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
85+
{
86+
struct hlist_head *hash_list;
87+
struct io_napi_entry *e;
88+
89+
/* Non-NAPI IDs can be rejected. */
90+
if (napi_id < MIN_NAPI_ID)
91+
return -EINVAL;
92+
93+
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
94+
guard(spinlock)(&ctx->napi_lock);
95+
e = io_napi_hash_find(hash_list, napi_id);
96+
if (!e)
97+
return -ENOENT;
98+
99+
list_del_rcu(&e->list);
100+
hash_del_rcu(&e->node);
101+
kfree_rcu(e, rcu);
102+
return 0;
103+
}
104+
84105
static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
85106
{
86107
struct io_napi_entry *e;
@@ -136,9 +157,25 @@ static bool io_napi_busy_loop_should_end(void *data,
136157
return false;
137158
}
138159

139-
static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
140-
bool (*loop_end)(void *, unsigned long),
141-
void *loop_end_arg)
160+
/*
161+
* never report stale entries
162+
*/
163+
static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
164+
bool (*loop_end)(void *, unsigned long),
165+
void *loop_end_arg)
166+
{
167+
struct io_napi_entry *e;
168+
169+
list_for_each_entry_rcu(e, &ctx->napi_list, list)
170+
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
171+
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
172+
return false;
173+
}
174+
175+
static bool
176+
dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
177+
bool (*loop_end)(void *, unsigned long),
178+
void *loop_end_arg)
142179
{
143180
struct io_napi_entry *e;
144181
bool is_stale = false;
@@ -154,6 +191,16 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
154191
return is_stale;
155192
}
156193

194+
static inline bool
195+
__io_napi_do_busy_loop(struct io_ring_ctx *ctx,
196+
bool (*loop_end)(void *, unsigned long),
197+
void *loop_end_arg)
198+
{
199+
if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
200+
return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
201+
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
202+
}
203+
157204
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
158205
struct io_wait_queue *iowq)
159206
{
@@ -195,6 +242,7 @@ void io_napi_init(struct io_ring_ctx *ctx)
195242
spin_lock_init(&ctx->napi_lock);
196243
ctx->napi_prefer_busy_poll = false;
197244
ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
245+
ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
198246
}
199247

200248
/*
@@ -215,6 +263,24 @@ void io_napi_free(struct io_ring_ctx *ctx)
215263
INIT_LIST_HEAD_RCU(&ctx->napi_list);
216264
}
217265

266+
static int io_napi_register_napi(struct io_ring_ctx *ctx,
267+
struct io_uring_napi *napi)
268+
{
269+
switch (napi->op_param) {
270+
case IO_URING_NAPI_TRACKING_DYNAMIC:
271+
case IO_URING_NAPI_TRACKING_STATIC:
272+
break;
273+
default:
274+
return -EINVAL;
275+
}
276+
/* clean the napi list for new settings */
277+
io_napi_free(ctx);
278+
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
279+
WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
280+
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
281+
return 0;
282+
}
283+
218284
/*
219285
* io_napi_register() - Register napi with io-uring
220286
* @ctx: pointer to io-uring context structure
@@ -226,24 +292,35 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
226292
{
227293
const struct io_uring_napi curr = {
228294
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
229-
.prefer_busy_poll = ctx->napi_prefer_busy_poll
295+
.prefer_busy_poll = ctx->napi_prefer_busy_poll,
296+
.op_param = ctx->napi_track_mode
230297
};
231298
struct io_uring_napi napi;
232299

233300
if (ctx->flags & IORING_SETUP_IOPOLL)
234301
return -EINVAL;
235302
if (copy_from_user(&napi, arg, sizeof(napi)))
236303
return -EFAULT;
237-
if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
304+
if (napi.pad[0] || napi.pad[1] || napi.resv)
238305
return -EINVAL;
239306

240307
if (copy_to_user(arg, &curr, sizeof(curr)))
241308
return -EFAULT;
242309

243-
WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
244-
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
245-
WRITE_ONCE(ctx->napi_enabled, true);
246-
return 0;
310+
switch (napi.opcode) {
311+
case IO_URING_NAPI_REGISTER_OP:
312+
return io_napi_register_napi(ctx, &napi);
313+
case IO_URING_NAPI_STATIC_ADD_ID:
314+
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
315+
return -EINVAL;
316+
return __io_napi_add_id(ctx, napi.op_param);
317+
case IO_URING_NAPI_STATIC_DEL_ID:
318+
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
319+
return -EINVAL;
320+
return __io_napi_del_id(ctx, napi.op_param);
321+
default:
322+
return -EINVAL;
323+
}
247324
}
248325

249326
/*
@@ -266,7 +343,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
266343

267344
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
268345
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
269-
WRITE_ONCE(ctx->napi_enabled, false);
346+
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
270347
return 0;
271348
}
272349

io_uring/napi.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ static inline void io_napi_add(struct io_kiocb *req)
4444
struct io_ring_ctx *ctx = req->ctx;
4545
struct socket *sock;
4646

47-
if (!READ_ONCE(ctx->napi_enabled))
47+
if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC)
4848
return;
4949

5050
sock = sock_from_file(req->file);

0 commit comments

Comments
 (0)