Skip to content

Commit 6ee7835

Browse files
MaxKellermannaxboe
authored andcommitted
io_uring/io-wq: cache work->flags in variable
This eliminates several redundant atomic reads and therefore reduces the duration the surrounding spinlocks are held. In several io_uring benchmarks, this reduced the CPU time spent in queued_spin_lock_slowpath() considerably: io_uring benchmark with a flood of `IORING_OP_NOP` and `IOSQE_ASYNC`: 38.86% -1.49% [kernel.kallsyms] [k] queued_spin_lock_slowpath 6.75% +0.36% [kernel.kallsyms] [k] io_worker_handle_work 2.60% +0.19% [kernel.kallsyms] [k] io_nop 3.92% +0.18% [kernel.kallsyms] [k] io_req_task_complete 6.34% -0.18% [kernel.kallsyms] [k] io_wq_submit_work HTTP server, static file: 42.79% -2.77% [kernel.kallsyms] [k] queued_spin_lock_slowpath 2.08% +0.23% [kernel.kallsyms] [k] io_wq_submit_work 1.19% +0.20% [kernel.kallsyms] [k] amd_iommu_iotlb_sync_map 1.46% +0.15% [kernel.kallsyms] [k] ep_poll_callback 1.80% +0.15% [kernel.kallsyms] [k] io_worker_handle_work HTTP server, PHP: 35.03% -1.80% [kernel.kallsyms] [k] queued_spin_lock_slowpath 0.84% +0.21% [kernel.kallsyms] [k] amd_iommu_iotlb_sync_map 1.39% +0.12% [kernel.kallsyms] [k] _copy_to_iter 0.21% +0.10% [kernel.kallsyms] [k] update_sd_lb_stats Signed-off-by: Max Kellermann <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 751eedc commit 6ee7835

File tree

2 files changed

+27
-13
lines changed

2 files changed

+27
-13
lines changed

io_uring/io-wq.c

+21-12
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
170170
}
171171

172172
static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
173-
struct io_wq_work *work)
173+
unsigned int work_flags)
174174
{
175-
return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND));
175+
return io_get_acct(wq, !(work_flags & IO_WQ_WORK_UNBOUND));
176176
}
177177

178178
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
@@ -457,9 +457,14 @@ static void __io_worker_idle(struct io_wq_acct *acct, struct io_worker *worker)
457457
}
458458
}
459459

460+
static inline unsigned int __io_get_work_hash(unsigned int work_flags)
461+
{
462+
return work_flags >> IO_WQ_HASH_SHIFT;
463+
}
464+
460465
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
461466
{
462-
return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT;
467+
return __io_get_work_hash(atomic_read(&work->flags));
463468
}
464469

465470
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
@@ -489,17 +494,19 @@ static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct,
489494
struct io_wq *wq = worker->wq;
490495

491496
wq_list_for_each(node, prev, &acct->work_list) {
497+
unsigned int work_flags;
492498
unsigned int hash;
493499

494500
work = container_of(node, struct io_wq_work, list);
495501

496502
/* not hashed, can run anytime */
497-
if (!io_wq_is_hashed(work)) {
503+
work_flags = atomic_read(&work->flags);
504+
if (!__io_wq_is_hashed(work_flags)) {
498505
wq_list_del(&acct->work_list, node, prev);
499506
return work;
500507
}
501508

502-
hash = io_get_work_hash(work);
509+
hash = __io_get_work_hash(work_flags);
503510
/* all items with this hash lie in [work, tail] */
504511
tail = wq->hash_tail[hash];
505512

@@ -596,12 +603,13 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
596603
/* handle a whole dependent link */
597604
do {
598605
struct io_wq_work *next_hashed, *linked;
599-
unsigned int hash = io_get_work_hash(work);
606+
unsigned int work_flags = atomic_read(&work->flags);
607+
unsigned int hash = __io_get_work_hash(work_flags);
600608

601609
next_hashed = wq_next_work(work);
602610

603611
if (do_kill &&
604-
(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND))
612+
(work_flags & IO_WQ_WORK_UNBOUND))
605613
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
606614
wq->do_work(work);
607615
io_assign_current_work(worker, NULL);
@@ -917,18 +925,19 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
917925
} while (work);
918926
}
919927

920-
static void io_wq_insert_work(struct io_wq *wq, struct io_wq_acct *acct, struct io_wq_work *work)
928+
static void io_wq_insert_work(struct io_wq *wq, struct io_wq_acct *acct,
929+
struct io_wq_work *work, unsigned int work_flags)
921930
{
922931
unsigned int hash;
923932
struct io_wq_work *tail;
924933

925-
if (!io_wq_is_hashed(work)) {
934+
if (!__io_wq_is_hashed(work_flags)) {
926935
append:
927936
wq_list_add_tail(&work->list, &acct->work_list);
928937
return;
929938
}
930939

931-
hash = io_get_work_hash(work);
940+
hash = __io_get_work_hash(work_flags);
932941
tail = wq->hash_tail[hash];
933942
wq->hash_tail[hash] = work;
934943
if (!tail)
@@ -944,8 +953,8 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
944953

945954
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
946955
{
947-
struct io_wq_acct *acct = io_work_get_acct(wq, work);
948956
unsigned int work_flags = atomic_read(&work->flags);
957+
struct io_wq_acct *acct = io_work_get_acct(wq, work_flags);
949958
struct io_cb_cancel_data match = {
950959
.fn = io_wq_work_match_item,
951960
.data = work,
@@ -964,7 +973,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
964973
}
965974

966975
raw_spin_lock(&acct->lock);
967-
io_wq_insert_work(wq, acct, work);
976+
io_wq_insert_work(wq, acct, work, work_flags);
968977
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
969978
raw_spin_unlock(&acct->lock);
970979

io_uring/io-wq.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,14 @@ int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
5454
int io_wq_max_workers(struct io_wq *wq, int *new_count);
5555
bool io_wq_worker_stopped(void);
5656

57+
static inline bool __io_wq_is_hashed(unsigned int work_flags)
58+
{
59+
return work_flags & IO_WQ_WORK_HASHED;
60+
}
61+
5762
static inline bool io_wq_is_hashed(struct io_wq_work *work)
5863
{
59-
return atomic_read(&work->flags) & IO_WQ_WORK_HASHED;
64+
return __io_wq_is_hashed(atomic_read(&work->flags));
6065
}
6166

6267
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);

0 commit comments

Comments
 (0)