mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-04-06 17:49:11 +03:00
Make zvol minor functionality more robust
Close the race window in zvol_open() to prevent removal of zvol_state in the 'first open' code path. Move the call to check_disk_change() under zvol_state_lock to make sure the zvol_media_changed() and zvol_revalidate_disk() called by check_disk_change() are invoked with positive zv_open_count. Skip opened zvols when removing minors and set private_data to NULL for zvols that are not in use whose minors are being removed, to indicate to zvol_open() that the state is gone. Skip opened zvols when renaming minors to avoid modifying zv_name that might be in use, e.g. in zvol_ioctl(). Drop zvol_state_lock before calling add_disk() when creating minors to avoid deadlocks with zvol_open(). Wrap dmu_objset_find() with spl_fstran_mark()/unmark(). Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <ryao@gentoo.org> Closes #4344
This commit is contained in:
parent
19a47cb1c2
commit
5428dc51fb
@ -35,6 +35,7 @@
|
|||||||
* needs to be run before opening and using a device.
|
* needs to be run before opening and using a device.
|
||||||
*
|
*
|
||||||
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dbuf.h>
|
#include <sys/dbuf.h>
|
||||||
@ -607,6 +608,8 @@ zvol_write(zvol_state_t *zv, uio_t *uio, boolean_t sync)
|
|||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
||||||
RL_WRITER);
|
RL_WRITER);
|
||||||
|
|
||||||
@ -675,6 +678,8 @@ zvol_discard(struct bio *bio)
|
|||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
if (end > zv->zv_volsize)
|
if (end > zv->zv_volsize)
|
||||||
return (SET_ERROR(EIO));
|
return (SET_ERROR(EIO));
|
||||||
|
|
||||||
@ -722,6 +727,8 @@ zvol_read(zvol_state_t *zv, uio_t *uio)
|
|||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
||||||
RL_READER);
|
RL_READER);
|
||||||
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
|
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
|
||||||
@ -1020,7 +1027,7 @@ zvol_last_close(zvol_state_t *zv)
|
|||||||
static int
|
static int
|
||||||
zvol_open(struct block_device *bdev, fmode_t flag)
|
zvol_open(struct block_device *bdev, fmode_t flag)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv = bdev->bd_disk->private_data;
|
zvol_state_t *zv;
|
||||||
int error = 0, drop_mutex = 0;
|
int error = 0, drop_mutex = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1034,7 +1041,17 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|||||||
drop_mutex = 1;
|
drop_mutex = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3P(zv, !=, NULL);
|
/*
|
||||||
|
* Obtain a copy of private_data under the lock to make sure
|
||||||
|
* that either the result of zvol_freeg() setting
|
||||||
|
* bdev->bd_disk->private_data to NULL is observed, or zvol_free()
|
||||||
|
* is not called on this zv because of the positive zv_open_count.
|
||||||
|
*/
|
||||||
|
zv = bdev->bd_disk->private_data;
|
||||||
|
if (zv == NULL) {
|
||||||
|
error = -ENXIO;
|
||||||
|
goto out_mutex;
|
||||||
|
}
|
||||||
|
|
||||||
if (zv->zv_open_count == 0) {
|
if (zv->zv_open_count == 0) {
|
||||||
error = zvol_first_open(zv);
|
error = zvol_first_open(zv);
|
||||||
@ -1049,6 +1066,8 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|||||||
|
|
||||||
zv->zv_open_count++;
|
zv->zv_open_count++;
|
||||||
|
|
||||||
|
check_disk_change(bdev);
|
||||||
|
|
||||||
out_open_count:
|
out_open_count:
|
||||||
if (zv->zv_open_count == 0)
|
if (zv->zv_open_count == 0)
|
||||||
zvol_last_close(zv);
|
zvol_last_close(zv);
|
||||||
@ -1057,8 +1076,6 @@ out_mutex:
|
|||||||
if (drop_mutex)
|
if (drop_mutex)
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
|
||||||
check_disk_change(bdev);
|
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1072,16 +1089,16 @@ zvol_release(struct gendisk *disk, fmode_t mode)
|
|||||||
zvol_state_t *zv = disk->private_data;
|
zvol_state_t *zv = disk->private_data;
|
||||||
int drop_mutex = 0;
|
int drop_mutex = 0;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
if (!mutex_owned(&zvol_state_lock)) {
|
if (!mutex_owned(&zvol_state_lock)) {
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
drop_mutex = 1;
|
drop_mutex = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zv->zv_open_count > 0) {
|
zv->zv_open_count--;
|
||||||
zv->zv_open_count--;
|
if (zv->zv_open_count == 0)
|
||||||
if (zv->zv_open_count == 0)
|
zvol_last_close(zv);
|
||||||
zvol_last_close(zv);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (drop_mutex)
|
if (drop_mutex)
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
@ -1098,8 +1115,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
|
|||||||
zvol_state_t *zv = bdev->bd_disk->private_data;
|
zvol_state_t *zv = bdev->bd_disk->private_data;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
if (zv == NULL)
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
return (SET_ERROR(-ENXIO));
|
|
||||||
|
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case BLKFLSBUF:
|
case BLKFLSBUF:
|
||||||
@ -1133,6 +1149,8 @@ static int zvol_media_changed(struct gendisk *disk)
|
|||||||
{
|
{
|
||||||
zvol_state_t *zv = disk->private_data;
|
zvol_state_t *zv = disk->private_data;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
return (zv->zv_changed);
|
return (zv->zv_changed);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1140,6 +1158,8 @@ static int zvol_revalidate_disk(struct gendisk *disk)
|
|||||||
{
|
{
|
||||||
zvol_state_t *zv = disk->private_data;
|
zvol_state_t *zv = disk->private_data;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
zv->zv_changed = 0;
|
zv->zv_changed = 0;
|
||||||
set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
|
set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
|
||||||
|
|
||||||
@ -1156,7 +1176,11 @@ static int
|
|||||||
zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv = bdev->bd_disk->private_data;
|
zvol_state_t *zv = bdev->bd_disk->private_data;
|
||||||
sector_t sectors = get_capacity(zv->zv_disk);
|
sector_t sectors;
|
||||||
|
|
||||||
|
ASSERT(zv && zv->zv_open_count > 0);
|
||||||
|
|
||||||
|
sectors = get_capacity(zv->zv_disk);
|
||||||
|
|
||||||
if (sectors > 2048) {
|
if (sectors > 2048) {
|
||||||
geo->heads = 16;
|
geo->heads = 16;
|
||||||
@ -1312,9 +1336,14 @@ out_kmem:
|
|||||||
static void
|
static void
|
||||||
zvol_free(zvol_state_t *zv)
|
zvol_free(zvol_state_t *zv)
|
||||||
{
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||||
|
ASSERT(zv->zv_open_count == 0);
|
||||||
|
|
||||||
avl_destroy(&zv->zv_znode.z_range_avl);
|
avl_destroy(&zv->zv_znode.z_range_avl);
|
||||||
mutex_destroy(&zv->zv_znode.z_range_lock);
|
mutex_destroy(&zv->zv_znode.z_range_lock);
|
||||||
|
|
||||||
|
zv->zv_disk->private_data = NULL;
|
||||||
|
|
||||||
del_gendisk(zv->zv_disk);
|
del_gendisk(zv->zv_disk);
|
||||||
blk_cleanup_queue(zv->zv_queue);
|
blk_cleanup_queue(zv->zv_queue);
|
||||||
put_disk(zv->zv_disk);
|
put_disk(zv->zv_disk);
|
||||||
@ -1448,7 +1477,15 @@ out:
|
|||||||
|
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
zvol_insert(zv);
|
zvol_insert(zv);
|
||||||
|
/*
|
||||||
|
* Drop the lock to prevent deadlock with sys_open() ->
|
||||||
|
* zvol_open(), which first takes bd_disk->bd_mutex and then
|
||||||
|
* takes zvol_state_lock, whereas this code path first takes
|
||||||
|
* zvol_state_lock, and then takes bd_disk->bd_mutex.
|
||||||
|
*/
|
||||||
|
mutex_exit(&zvol_state_lock);
|
||||||
add_disk(zv->zv_disk);
|
add_disk(zv->zv_disk);
|
||||||
|
mutex_enter(&zvol_state_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
@ -1545,10 +1582,15 @@ int
|
|||||||
zvol_create_minors(const char *name)
|
zvol_create_minors(const char *name)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
fstrans_cookie_t cookie;
|
||||||
|
|
||||||
if (!zvol_inhibit_dev)
|
if (zvol_inhibit_dev)
|
||||||
error = dmu_objset_find((char *)name, zvol_create_minors_cb,
|
return (0);
|
||||||
NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
|
|
||||||
|
cookie = spl_fstrans_mark();
|
||||||
|
error = dmu_objset_find((char *)name, zvol_create_minors_cb,
|
||||||
|
NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
}
|
}
|
||||||
@ -1572,7 +1614,13 @@ zvol_remove_minors(const char *name)
|
|||||||
|
|
||||||
if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
|
if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
|
||||||
(strncmp(zv->zv_name, name, namelen) == 0 &&
|
(strncmp(zv->zv_name, name, namelen) == 0 &&
|
||||||
zv->zv_name[namelen] == '/')) {
|
(zv->zv_name[namelen] == '/' ||
|
||||||
|
zv->zv_name[namelen] == '@'))) {
|
||||||
|
|
||||||
|
/* If in use, leave alone */
|
||||||
|
if (zv->zv_open_count > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
zvol_free(zv);
|
zvol_free(zv);
|
||||||
}
|
}
|
||||||
@ -1603,6 +1651,10 @@ zvol_rename_minors(const char *oldname, const char *newname)
|
|||||||
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
|
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
|
||||||
zv_next = list_next(&zvol_state_list, zv);
|
zv_next = list_next(&zvol_state_list, zv);
|
||||||
|
|
||||||
|
/* If in use, leave alone */
|
||||||
|
if (zv->zv_open_count > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (strcmp(zv->zv_name, oldname) == 0) {
|
if (strcmp(zv->zv_name, oldname) == 0) {
|
||||||
__zvol_rename_minor(zv, newname);
|
__zvol_rename_minor(zv, newname);
|
||||||
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
|
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
|
||||||
@ -1643,8 +1695,17 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
|
|||||||
|
|
||||||
int
|
int
|
||||||
zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
|
zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
|
||||||
|
fstrans_cookie_t cookie;
|
||||||
|
|
||||||
|
if (zvol_inhibit_dev)
|
||||||
|
/* caller should continue to modify snapdev property */
|
||||||
|
return (-1);
|
||||||
|
|
||||||
|
cookie = spl_fstrans_mark();
|
||||||
(void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb,
|
(void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb,
|
||||||
&snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
|
&snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
|
|
||||||
/* caller should continue to modify snapdev property */
|
/* caller should continue to modify snapdev property */
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user