Skip to content

Commit 4669e13

Browse files
committed
Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - gendisk freeing fix (Christoph) - blk-iocost wake ordering fix (Tejun) - tag allocation error handling fix (John) - loop locking fix. While this isn't the prettiest fix in the world, nobody has any good alternatives for 5.14. Something to likely revisit for 5.15. (Tetsuo) * tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block: block: delay freeing the gendisk blk-iocost: fix operation ordering in iocg_wake_fn() blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling loop: reintroduce global lock for safe loop_validate_file() traversal
2 parents 27eb687 + 340e845 commit 4669e13

File tree

5 files changed

+110
-51
lines changed

5 files changed

+110
-51
lines changed

block/blk-iocost.c

+6-5
Original file line numberDiff line numberDiff line change
@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
14401440
return -1;
14411441

14421442
iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
1443+
wait->committed = true;
14431444

14441445
/*
14451446
* autoremove_wake_function() removes the wait entry only when it
1446-
* actually changed the task state. We want the wait always
1447-
* removed. Remove explicitly and use default_wake_function().
1447+
* actually changed the task state. We want the wait always removed.
1448+
* Remove explicitly and use default_wake_function(). Note that the
1449+
* order of operations is important as finish_wait() tests whether
1450+
* @wq_entry is removed without grabbing the lock.
14481451
*/
1449-
list_del_init(&wq_entry->entry);
1450-
wait->committed = true;
1451-
14521452
default_wake_function(wq_entry, mode, flags, key);
1453+
list_del_init_careful(&wq_entry->entry);
14531454
return 0;
14541455
}
14551456

block/blk-mq-sched.c

+4-13
Original file line numberDiff line numberDiff line change
@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
515515
percpu_ref_put(&q->q_usage_counter);
516516
}
517517

518-
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
519-
struct blk_mq_hw_ctx *hctx,
520-
unsigned int hctx_idx)
521-
{
522-
if (hctx->sched_tags) {
523-
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
524-
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
525-
hctx->sched_tags = NULL;
526-
}
527-
}
528-
529518
static int blk_mq_sched_alloc_tags(struct request_queue *q,
530519
struct blk_mq_hw_ctx *hctx,
531520
unsigned int hctx_idx)
@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
539528
return -ENOMEM;
540529

541530
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
542-
if (ret)
543-
blk_mq_sched_free_tags(set, hctx, hctx_idx);
531+
if (ret) {
532+
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
533+
hctx->sched_tags = NULL;
534+
}
544535

545536
return ret;
546537
}

block/genhd.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
10791079
disk_release_events(disk);
10801080
kfree(disk->random);
10811081
xa_destroy(&disk->part_tbl);
1082-
bdput(disk->part0);
10831082
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
10841083
blk_put_queue(disk->queue);
1085-
kfree(disk);
1084+
bdput(disk->part0); /* frees the disk */
10861085
}
10871086
struct class block_class = {
10881087
.name = "block",

drivers/block/loop.c

+97-31
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,47 @@
8888

8989
static DEFINE_IDR(loop_index_idr);
9090
static DEFINE_MUTEX(loop_ctl_mutex);
91+
static DEFINE_MUTEX(loop_validate_mutex);
92+
93+
/**
94+
* loop_global_lock_killable() - take locks for safe loop_validate_file() test
95+
*
96+
* @lo: struct loop_device
97+
* @global: true if @lo is about to bind another "struct loop_device", false otherwise
98+
*
99+
* Returns 0 on success, -EINTR otherwise.
100+
*
101+
* Since loop_validate_file() traverses on other "struct loop_device" if
102+
* is_loop_device() is true, we need a global lock for serializing concurrent
103+
* loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
104+
*/
105+
static int loop_global_lock_killable(struct loop_device *lo, bool global)
106+
{
107+
int err;
108+
109+
if (global) {
110+
err = mutex_lock_killable(&loop_validate_mutex);
111+
if (err)
112+
return err;
113+
}
114+
err = mutex_lock_killable(&lo->lo_mutex);
115+
if (err && global)
116+
mutex_unlock(&loop_validate_mutex);
117+
return err;
118+
}
119+
120+
/**
121+
* loop_global_unlock() - release locks taken by loop_global_lock_killable()
122+
*
123+
* @lo: struct loop_device
124+
* @global: true if @lo was about to bind another "struct loop_device", false otherwise
125+
*/
126+
static void loop_global_unlock(struct loop_device *lo, bool global)
127+
{
128+
mutex_unlock(&lo->lo_mutex);
129+
if (global)
130+
mutex_unlock(&loop_validate_mutex);
131+
}
91132

92133
static int max_part;
93134
static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
672713
while (is_loop_device(f)) {
673714
struct loop_device *l;
674715

716+
lockdep_assert_held(&loop_validate_mutex);
675717
if (f->f_mapping->host->i_rdev == bdev->bd_dev)
676718
return -EBADF;
677719

678720
l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
679-
if (l->lo_state != Lo_bound) {
721+
if (l->lo_state != Lo_bound)
680722
return -EINVAL;
681-
}
723+
/* Order wrt setting lo->lo_backing_file in loop_configure(). */
724+
rmb();
682725
f = l->lo_backing_file;
683726
}
684727
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
697740
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
698741
unsigned int arg)
699742
{
700-
struct file *file = NULL, *old_file;
701-
int error;
702-
bool partscan;
743+
struct file *file = fget(arg);
744+
struct file *old_file;
745+
int error;
746+
bool partscan;
747+
bool is_loop;
703748

704-
error = mutex_lock_killable(&lo->lo_mutex);
749+
if (!file)
750+
return -EBADF;
751+
is_loop = is_loop_device(file);
752+
error = loop_global_lock_killable(lo, is_loop);
705753
if (error)
706-
return error;
754+
goto out_putf;
707755
error = -ENXIO;
708756
if (lo->lo_state != Lo_bound)
709757
goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
713761
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
714762
goto out_err;
715763

716-
error = -EBADF;
717-
file = fget(arg);
718-
if (!file)
719-
goto out_err;
720-
721764
error = loop_validate_file(file, bdev);
722765
if (error)
723766
goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
740783
loop_update_dio(lo);
741784
blk_mq_unfreeze_queue(lo->lo_queue);
742785
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
743-
mutex_unlock(&lo->lo_mutex);
786+
loop_global_unlock(lo, is_loop);
787+
788+
/*
789+
* Flush loop_validate_file() before fput(), for l->lo_backing_file
790+
* might be pointing at old_file which might be the last reference.
791+
*/
792+
if (!is_loop) {
793+
mutex_lock(&loop_validate_mutex);
794+
mutex_unlock(&loop_validate_mutex);
795+
}
744796
/*
745797
* We must drop file reference outside of lo_mutex as dropping
746798
* the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
752804
return 0;
753805

754806
out_err:
755-
mutex_unlock(&lo->lo_mutex);
756-
if (file)
757-
fput(file);
807+
loop_global_unlock(lo, is_loop);
808+
out_putf:
809+
fput(file);
758810
return error;
759811
}
760812

@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
11361188
struct block_device *bdev,
11371189
const struct loop_config *config)
11381190
{
1139-
struct file *file;
1140-
struct inode *inode;
1191+
struct file *file = fget(config->fd);
1192+
struct inode *inode;
11411193
struct address_space *mapping;
1142-
int error;
1143-
loff_t size;
1144-
bool partscan;
1145-
unsigned short bsize;
1194+
int error;
1195+
loff_t size;
1196+
bool partscan;
1197+
unsigned short bsize;
1198+
bool is_loop;
1199+
1200+
if (!file)
1201+
return -EBADF;
1202+
is_loop = is_loop_device(file);
11461203

11471204
/* This is safe, since we have a reference from open(). */
11481205
__module_get(THIS_MODULE);
11491206

1150-
error = -EBADF;
1151-
file = fget(config->fd);
1152-
if (!file)
1153-
goto out;
1154-
11551207
/*
11561208
* If we don't hold exclusive handle for the device, upgrade to it
11571209
* here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
11621214
goto out_putf;
11631215
}
11641216

1165-
error = mutex_lock_killable(&lo->lo_mutex);
1217+
error = loop_global_lock_killable(lo, is_loop);
11661218
if (error)
11671219
goto out_bdev;
11681220

@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
12421294
size = get_loop_size(lo, file);
12431295
loop_set_size(lo, size);
12441296

1297+
/* Order wrt reading lo_state in loop_validate_file(). */
1298+
wmb();
1299+
12451300
lo->lo_state = Lo_bound;
12461301
if (part_shift)
12471302
lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,21 +1308,20 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
12531308
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
12541309
*/
12551310
bdgrab(bdev);
1256-
mutex_unlock(&lo->lo_mutex);
1311+
loop_global_unlock(lo, is_loop);
12571312
if (partscan)
12581313
loop_reread_partitions(lo);
12591314
if (!(mode & FMODE_EXCL))
12601315
bd_abort_claiming(bdev, loop_configure);
12611316
return 0;
12621317

12631318
out_unlock:
1264-
mutex_unlock(&lo->lo_mutex);
1319+
loop_global_unlock(lo, is_loop);
12651320
out_bdev:
12661321
if (!(mode & FMODE_EXCL))
12671322
bd_abort_claiming(bdev, loop_configure);
12681323
out_putf:
12691324
fput(file);
1270-
out:
12711325
/* This is safe: open() is still holding a reference. */
12721326
module_put(THIS_MODULE);
12731327
return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
12831337
int lo_number;
12841338
struct loop_worker *pos, *worker;
12851339

1340+
/*
1341+
* Flush loop_configure() and loop_change_fd(). It is acceptable for
1342+
* loop_validate_file() to succeed, for actual clear operation has not
1343+
* started yet.
1344+
*/
1345+
mutex_lock(&loop_validate_mutex);
1346+
mutex_unlock(&loop_validate_mutex);
1347+
/*
1348+
* loop_validate_file() now fails because l->lo_state != Lo_bound
1349+
* became visible.
1350+
*/
1351+
12861352
mutex_lock(&lo->lo_mutex);
12871353
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
12881354
err = -ENXIO;

fs/block_dev.c

+2
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
812812
free_percpu(bdev->bd_stats);
813813
kfree(bdev->bd_meta_info);
814814

815+
if (!bdev_is_partition(bdev))
816+
kfree(bdev->bd_disk);
815817
kmem_cache_free(bdev_cachep, BDEV_I(inode));
816818
}
817819

0 commit comments

Comments
 (0)