mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Make zvol minor functionality more robust
Close the race window in zvol_open() to prevent removal of zvol_state in the 'first open' code path. Move the call to check_disk_change() under zvol_state_lock to make sure the zvol_media_changed() and zvol_revalidate_disk() called by check_disk_change() are invoked with positive zv_open_count. Skip opened zvols when removing minors and set private_data to NULL for zvols that are not in use whose minors are being removed, to indicate to zvol_open() that the state is gone. Skip opened zvols when renaming minors to avoid modifying zv_name that might be in use, e.g. in zvol_ioctl(). Drop zvol_state_lock before calling add_disk() when creating minors to avoid deadlocks with zvol_open(). Wrap dmu_objset_find() with spl_fstran_mark()/unmark(). Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <ryao@gentoo.org> Closes #4344
This commit is contained in:
		
							parent
							
								
									19a47cb1c2
								
							
						
					
					
						commit
						5428dc51fb
					
				| @ -35,6 +35,7 @@ | |||||||
|  * needs to be run before opening and using a device. |  * needs to be run before opening and using a device. | ||||||
|  * |  * | ||||||
|  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved. |  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved. | ||||||
|  |  * Copyright (c) 2016 Actifio, Inc. All rights reserved. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| #include <sys/dbuf.h> | #include <sys/dbuf.h> | ||||||
| @ -607,6 +608,8 @@ zvol_write(zvol_state_t *zv, uio_t *uio, boolean_t sync) | |||||||
| 	rl_t *rl; | 	rl_t *rl; | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, | 	rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, | ||||||
| 	    RL_WRITER); | 	    RL_WRITER); | ||||||
| 
 | 
 | ||||||
| @ -675,6 +678,8 @@ zvol_discard(struct bio *bio) | |||||||
| 	rl_t *rl; | 	rl_t *rl; | ||||||
| 	dmu_tx_t *tx; | 	dmu_tx_t *tx; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	if (end > zv->zv_volsize) | 	if (end > zv->zv_volsize) | ||||||
| 		return (SET_ERROR(EIO)); | 		return (SET_ERROR(EIO)); | ||||||
| 
 | 
 | ||||||
| @ -722,6 +727,8 @@ zvol_read(zvol_state_t *zv, uio_t *uio) | |||||||
| 	rl_t *rl; | 	rl_t *rl; | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, | 	rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, | ||||||
| 	    RL_READER); | 	    RL_READER); | ||||||
| 	while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { | 	while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { | ||||||
| @ -1020,7 +1027,7 @@ zvol_last_close(zvol_state_t *zv) | |||||||
| static int | static int | ||||||
| zvol_open(struct block_device *bdev, fmode_t flag) | zvol_open(struct block_device *bdev, fmode_t flag) | ||||||
| { | { | ||||||
| 	zvol_state_t *zv = bdev->bd_disk->private_data; | 	zvol_state_t *zv; | ||||||
| 	int error = 0, drop_mutex = 0; | 	int error = 0, drop_mutex = 0; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| @ -1034,7 +1041,17 @@ zvol_open(struct block_device *bdev, fmode_t flag) | |||||||
| 		drop_mutex = 1; | 		drop_mutex = 1; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ASSERT3P(zv, !=, NULL); | 	/*
 | ||||||
|  | 	 * Obtain a copy of private_data under the lock to make sure | ||||||
|  | 	 * that either the result of zvol_freeg() setting | ||||||
|  | 	 * bdev->bd_disk->private_data to NULL is observed, or zvol_free() | ||||||
|  | 	 * is not called on this zv because of the positive zv_open_count. | ||||||
|  | 	 */ | ||||||
|  | 	zv = bdev->bd_disk->private_data; | ||||||
|  | 	if (zv == NULL) { | ||||||
|  | 		error = -ENXIO; | ||||||
|  | 		goto out_mutex; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	if (zv->zv_open_count == 0) { | 	if (zv->zv_open_count == 0) { | ||||||
| 		error = zvol_first_open(zv); | 		error = zvol_first_open(zv); | ||||||
| @ -1049,6 +1066,8 @@ zvol_open(struct block_device *bdev, fmode_t flag) | |||||||
| 
 | 
 | ||||||
| 	zv->zv_open_count++; | 	zv->zv_open_count++; | ||||||
| 
 | 
 | ||||||
|  | 	check_disk_change(bdev); | ||||||
|  | 
 | ||||||
| out_open_count: | out_open_count: | ||||||
| 	if (zv->zv_open_count == 0) | 	if (zv->zv_open_count == 0) | ||||||
| 		zvol_last_close(zv); | 		zvol_last_close(zv); | ||||||
| @ -1057,8 +1076,6 @@ out_mutex: | |||||||
| 	if (drop_mutex) | 	if (drop_mutex) | ||||||
| 		mutex_exit(&zvol_state_lock); | 		mutex_exit(&zvol_state_lock); | ||||||
| 
 | 
 | ||||||
| 	check_disk_change(bdev); |  | ||||||
| 
 |  | ||||||
| 	return (SET_ERROR(error)); | 	return (SET_ERROR(error)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1072,16 +1089,16 @@ zvol_release(struct gendisk *disk, fmode_t mode) | |||||||
| 	zvol_state_t *zv = disk->private_data; | 	zvol_state_t *zv = disk->private_data; | ||||||
| 	int drop_mutex = 0; | 	int drop_mutex = 0; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	if (!mutex_owned(&zvol_state_lock)) { | 	if (!mutex_owned(&zvol_state_lock)) { | ||||||
| 		mutex_enter(&zvol_state_lock); | 		mutex_enter(&zvol_state_lock); | ||||||
| 		drop_mutex = 1; | 		drop_mutex = 1; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (zv->zv_open_count > 0) { |  | ||||||
| 	zv->zv_open_count--; | 	zv->zv_open_count--; | ||||||
| 	if (zv->zv_open_count == 0) | 	if (zv->zv_open_count == 0) | ||||||
| 		zvol_last_close(zv); | 		zvol_last_close(zv); | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	if (drop_mutex) | 	if (drop_mutex) | ||||||
| 		mutex_exit(&zvol_state_lock); | 		mutex_exit(&zvol_state_lock); | ||||||
| @ -1098,8 +1115,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, | |||||||
| 	zvol_state_t *zv = bdev->bd_disk->private_data; | 	zvol_state_t *zv = bdev->bd_disk->private_data; | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
| 
 | 
 | ||||||
| 	if (zv == NULL) | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
| 		return (SET_ERROR(-ENXIO)); |  | ||||||
| 
 | 
 | ||||||
| 	switch (cmd) { | 	switch (cmd) { | ||||||
| 	case BLKFLSBUF: | 	case BLKFLSBUF: | ||||||
| @ -1133,6 +1149,8 @@ static int zvol_media_changed(struct gendisk *disk) | |||||||
| { | { | ||||||
| 	zvol_state_t *zv = disk->private_data; | 	zvol_state_t *zv = disk->private_data; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	return (zv->zv_changed); | 	return (zv->zv_changed); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1140,6 +1158,8 @@ static int zvol_revalidate_disk(struct gendisk *disk) | |||||||
| { | { | ||||||
| 	zvol_state_t *zv = disk->private_data; | 	zvol_state_t *zv = disk->private_data; | ||||||
| 
 | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
| 	zv->zv_changed = 0; | 	zv->zv_changed = 0; | ||||||
| 	set_capacity(zv->zv_disk, zv->zv_volsize >> 9); | 	set_capacity(zv->zv_disk, zv->zv_volsize >> 9); | ||||||
| 
 | 
 | ||||||
| @ -1156,7 +1176,11 @@ static int | |||||||
| zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) | zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) | ||||||
| { | { | ||||||
| 	zvol_state_t *zv = bdev->bd_disk->private_data; | 	zvol_state_t *zv = bdev->bd_disk->private_data; | ||||||
| 	sector_t sectors = get_capacity(zv->zv_disk); | 	sector_t sectors; | ||||||
|  | 
 | ||||||
|  | 	ASSERT(zv && zv->zv_open_count > 0); | ||||||
|  | 
 | ||||||
|  | 	sectors = get_capacity(zv->zv_disk); | ||||||
| 
 | 
 | ||||||
| 	if (sectors > 2048) { | 	if (sectors > 2048) { | ||||||
| 		geo->heads = 16; | 		geo->heads = 16; | ||||||
| @ -1312,9 +1336,14 @@ out_kmem: | |||||||
| static void | static void | ||||||
| zvol_free(zvol_state_t *zv) | zvol_free(zvol_state_t *zv) | ||||||
| { | { | ||||||
|  | 	ASSERT(MUTEX_HELD(&zvol_state_lock)); | ||||||
|  | 	ASSERT(zv->zv_open_count == 0); | ||||||
|  | 
 | ||||||
| 	avl_destroy(&zv->zv_znode.z_range_avl); | 	avl_destroy(&zv->zv_znode.z_range_avl); | ||||||
| 	mutex_destroy(&zv->zv_znode.z_range_lock); | 	mutex_destroy(&zv->zv_znode.z_range_lock); | ||||||
| 
 | 
 | ||||||
|  | 	zv->zv_disk->private_data = NULL; | ||||||
|  | 
 | ||||||
| 	del_gendisk(zv->zv_disk); | 	del_gendisk(zv->zv_disk); | ||||||
| 	blk_cleanup_queue(zv->zv_queue); | 	blk_cleanup_queue(zv->zv_queue); | ||||||
| 	put_disk(zv->zv_disk); | 	put_disk(zv->zv_disk); | ||||||
| @ -1448,7 +1477,15 @@ out: | |||||||
| 
 | 
 | ||||||
| 	if (error == 0) { | 	if (error == 0) { | ||||||
| 		zvol_insert(zv); | 		zvol_insert(zv); | ||||||
|  | 		/*
 | ||||||
|  | 		 * Drop the lock to prevent deadlock with sys_open() -> | ||||||
|  | 		 * zvol_open(), which first takes bd_disk->bd_mutex and then | ||||||
|  | 		 * takes zvol_state_lock, whereas this code path first takes | ||||||
|  | 		 * zvol_state_lock, and then takes bd_disk->bd_mutex. | ||||||
|  | 		 */ | ||||||
|  | 		mutex_exit(&zvol_state_lock); | ||||||
| 		add_disk(zv->zv_disk); | 		add_disk(zv->zv_disk); | ||||||
|  | 		mutex_enter(&zvol_state_lock); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return (SET_ERROR(error)); | 	return (SET_ERROR(error)); | ||||||
| @ -1545,10 +1582,15 @@ int | |||||||
| zvol_create_minors(const char *name) | zvol_create_minors(const char *name) | ||||||
| { | { | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
|  | 	fstrans_cookie_t cookie; | ||||||
| 
 | 
 | ||||||
| 	if (!zvol_inhibit_dev) | 	if (zvol_inhibit_dev) | ||||||
|  | 		return (0); | ||||||
|  | 
 | ||||||
|  | 	cookie = spl_fstrans_mark(); | ||||||
| 	error = dmu_objset_find((char *)name, zvol_create_minors_cb, | 	error = dmu_objset_find((char *)name, zvol_create_minors_cb, | ||||||
| 	    NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); | 	    NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); | ||||||
|  | 	spl_fstrans_unmark(cookie); | ||||||
| 
 | 
 | ||||||
| 	return (SET_ERROR(error)); | 	return (SET_ERROR(error)); | ||||||
| } | } | ||||||
| @ -1572,7 +1614,13 @@ zvol_remove_minors(const char *name) | |||||||
| 
 | 
 | ||||||
| 		if (name == NULL || strcmp(zv->zv_name, name) == 0 || | 		if (name == NULL || strcmp(zv->zv_name, name) == 0 || | ||||||
| 		    (strncmp(zv->zv_name, name, namelen) == 0 && | 		    (strncmp(zv->zv_name, name, namelen) == 0 && | ||||||
| 		    zv->zv_name[namelen] == '/')) { | 		    (zv->zv_name[namelen] == '/' || | ||||||
|  | 		    zv->zv_name[namelen] == '@'))) { | ||||||
|  | 
 | ||||||
|  | 			/* If in use, leave alone */ | ||||||
|  | 			if (zv->zv_open_count > 0) | ||||||
|  | 				continue; | ||||||
|  | 
 | ||||||
| 			zvol_remove(zv); | 			zvol_remove(zv); | ||||||
| 			zvol_free(zv); | 			zvol_free(zv); | ||||||
| 		} | 		} | ||||||
| @ -1603,6 +1651,10 @@ zvol_rename_minors(const char *oldname, const char *newname) | |||||||
| 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { | 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { | ||||||
| 		zv_next = list_next(&zvol_state_list, zv); | 		zv_next = list_next(&zvol_state_list, zv); | ||||||
| 
 | 
 | ||||||
|  | 		/* If in use, leave alone */ | ||||||
|  | 		if (zv->zv_open_count > 0) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
| 		if (strcmp(zv->zv_name, oldname) == 0) { | 		if (strcmp(zv->zv_name, oldname) == 0) { | ||||||
| 			__zvol_rename_minor(zv, newname); | 			__zvol_rename_minor(zv, newname); | ||||||
| 		} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && | 		} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && | ||||||
| @ -1643,8 +1695,17 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) { | |||||||
| 
 | 
 | ||||||
| int | int | ||||||
| zvol_set_snapdev(const char *dsname, uint64_t snapdev) { | zvol_set_snapdev(const char *dsname, uint64_t snapdev) { | ||||||
|  | 	fstrans_cookie_t cookie; | ||||||
|  | 
 | ||||||
|  | 	if (zvol_inhibit_dev) | ||||||
|  | 		/* caller should continue to modify snapdev property */ | ||||||
|  | 		return (-1); | ||||||
|  | 
 | ||||||
|  | 	cookie = spl_fstrans_mark(); | ||||||
| 	(void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb, | 	(void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb, | ||||||
| 		&snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); | 		&snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); | ||||||
|  | 	spl_fstrans_unmark(cookie); | ||||||
|  | 
 | ||||||
| 	/* caller should continue to modify snapdev property */ | 	/* caller should continue to modify snapdev property */ | ||||||
| 	return (-1); | 	return (-1); | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Boris Protopopov
						Boris Protopopov