Skip to content

Commit e946b4f

Browse files
Ming Leimehmetb0
Ming Lei
authored andcommitted
blk-mq: move cpuhp callback registering out of q->sysfs_lock
BugLink: https://bugs.launchpad.net/bugs/2097332 [ Upstream commit 22465bb ] Registering and unregistering cpuhp callback requires global cpu hotplug lock, which is used everywhere. Meantime q->sysfs_lock is used in block layer almost everywhere. It is easy to trigger lockdep warning[1] by connecting the two locks. Fix the warning by moving blk-mq's cpuhp callback registering out of q->sysfs_lock. Add one dedicated global lock for covering registering & unregistering hctx's cpuhp, and it is safe to do so because hctx is guaranteed to be live if our request_queue is live. [1] https://lore.kernel.org/lkml/Z04pz3AlvI4o0Mr8@agluck-desk3/ Cc: Reinette Chatre <[email protected]> Cc: Fenghua Yu <[email protected]> Cc: Peter Newman <[email protected]> Cc: Babu Moger <[email protected]> Reported-by: Luck Tony <[email protected]> Signed-off-by: Ming Lei <[email protected]> Tested-by: Tony Luck <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]> Stable-dep-of: be26ba9 ("block: Fix potential deadlock while freezing queue and acquiring sysfs_lock") Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Koichiro Den <[email protected]>
1 parent a68cb09 commit e946b4f

File tree

1 file changed

+92
-6
lines changed

1 file changed

+92
-6
lines changed

block/blk-mq.c

+92-6
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
4545
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
46+
static DEFINE_MUTEX(blk_mq_cpuhp_lock);
4647

4748
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
4849
static void blk_mq_request_bypass_insert(struct request *rq,
@@ -3736,13 +3737,91 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
37363737
return 0;
37373738
}
37383739

3739-
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
3740+
static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
37403741
{
3741-
if (!(hctx->flags & BLK_MQ_F_STACKING))
3742+
lockdep_assert_held(&blk_mq_cpuhp_lock);
3743+
3744+
if (!(hctx->flags & BLK_MQ_F_STACKING) &&
3745+
!hlist_unhashed(&hctx->cpuhp_online)) {
37423746
cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
37433747
&hctx->cpuhp_online);
3744-
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
3745-
&hctx->cpuhp_dead);
3748+
INIT_HLIST_NODE(&hctx->cpuhp_online);
3749+
}
3750+
3751+
if (!hlist_unhashed(&hctx->cpuhp_dead)) {
3752+
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
3753+
&hctx->cpuhp_dead);
3754+
INIT_HLIST_NODE(&hctx->cpuhp_dead);
3755+
}
3756+
}
3757+
3758+
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
3759+
{
3760+
mutex_lock(&blk_mq_cpuhp_lock);
3761+
__blk_mq_remove_cpuhp(hctx);
3762+
mutex_unlock(&blk_mq_cpuhp_lock);
3763+
}
3764+
3765+
static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx)
3766+
{
3767+
lockdep_assert_held(&blk_mq_cpuhp_lock);
3768+
3769+
if (!(hctx->flags & BLK_MQ_F_STACKING) &&
3770+
hlist_unhashed(&hctx->cpuhp_online))
3771+
cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
3772+
&hctx->cpuhp_online);
3773+
3774+
if (hlist_unhashed(&hctx->cpuhp_dead))
3775+
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD,
3776+
&hctx->cpuhp_dead);
3777+
}
3778+
3779+
static void __blk_mq_remove_cpuhp_list(struct list_head *head)
3780+
{
3781+
struct blk_mq_hw_ctx *hctx;
3782+
3783+
lockdep_assert_held(&blk_mq_cpuhp_lock);
3784+
3785+
list_for_each_entry(hctx, head, hctx_list)
3786+
__blk_mq_remove_cpuhp(hctx);
3787+
}
3788+
3789+
/*
3790+
* Unregister cpuhp callbacks from exited hw queues
3791+
*
3792+
* Safe to call if this `request_queue` is live
3793+
*/
3794+
static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q)
3795+
{
3796+
LIST_HEAD(hctx_list);
3797+
3798+
spin_lock(&q->unused_hctx_lock);
3799+
list_splice_init(&q->unused_hctx_list, &hctx_list);
3800+
spin_unlock(&q->unused_hctx_lock);
3801+
3802+
mutex_lock(&blk_mq_cpuhp_lock);
3803+
__blk_mq_remove_cpuhp_list(&hctx_list);
3804+
mutex_unlock(&blk_mq_cpuhp_lock);
3805+
3806+
spin_lock(&q->unused_hctx_lock);
3807+
list_splice(&hctx_list, &q->unused_hctx_list);
3808+
spin_unlock(&q->unused_hctx_lock);
3809+
}
3810+
3811+
/*
3812+
* Register cpuhp callbacks from all hw queues
3813+
*
3814+
* Safe to call if this `request_queue` is live
3815+
*/
3816+
static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q)
3817+
{
3818+
struct blk_mq_hw_ctx *hctx;
3819+
unsigned long i;
3820+
3821+
mutex_lock(&blk_mq_cpuhp_lock);
3822+
queue_for_each_hw_ctx(q, hctx, i)
3823+
__blk_mq_add_cpuhp(hctx);
3824+
mutex_unlock(&blk_mq_cpuhp_lock);
37463825
}
37473826

37483827
/*
@@ -3793,8 +3872,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
37933872
if (set->ops->exit_hctx)
37943873
set->ops->exit_hctx(hctx, hctx_idx);
37953874

3796-
blk_mq_remove_cpuhp(hctx);
3797-
37983875
xa_erase(&q->hctx_table, hctx_idx);
37993876

38003877
spin_lock(&q->unused_hctx_lock);
@@ -3811,6 +3888,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
38113888
queue_for_each_hw_ctx(q, hctx, i) {
38123889
if (i == nr_queue)
38133890
break;
3891+
blk_mq_remove_cpuhp(hctx);
38143892
blk_mq_exit_hctx(q, set, hctx, i);
38153893
}
38163894
}
@@ -3874,6 +3952,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
38743952
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
38753953
spin_lock_init(&hctx->lock);
38763954
INIT_LIST_HEAD(&hctx->dispatch);
3955+
INIT_HLIST_NODE(&hctx->cpuhp_dead);
3956+
INIT_HLIST_NODE(&hctx->cpuhp_online);
38773957
hctx->queue = q;
38783958
hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
38793959

@@ -4412,6 +4492,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
44124492
xa_for_each_start(&q->hctx_table, j, hctx, j)
44134493
blk_mq_exit_hctx(q, set, hctx, j);
44144494
mutex_unlock(&q->sysfs_lock);
4495+
4496+
/* unregister cpuhp callbacks for exited hctxs */
4497+
blk_mq_remove_hw_queues_cpuhp(q);
4498+
4499+
/* register cpuhp for new initialized hctxs */
4500+
blk_mq_add_hw_queues_cpuhp(q);
44154501
}
44164502

44174503
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,

0 commit comments

Comments
 (0)