mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-01-24 17:52:12 +03:00
zvol: remove the OS-side minor before freeing the zvol
When destroying a zvol, it is not "unpublished" from the system (that is, /dev/zd* node removed) until zvol_os_free(). Under Linux, at the time del_gendisk() and put_disk() are called, the device node may still be have an active hold, from a userspace program or something inside the kernel (a partition probe). As it is currently, this can lead to calls to zvol_open() or zvol_release() while the zvol_state_t is partially or fully freed. zvol_open() has some protection against this by checking that private_data is NULL, but zvol_release does not. This implements a better ordering for all of this by adding a new OS-side method, zvol_os_remove_minor(), which is responsible for fully decoupling the "private" (OS-side) objects from the zvol_state_t. For Linux, that means calling put_disk(), nulling private_data, and freeing zv_zso. This takes the place of zvol_os_clear_private(), which was a nod in that direction but did not do enough, and did not do it early enough. Equivalent changes are made on the FreeBSD side to follow the API change. Sponsored-by: Klara, Inc. Sponsored-by: Railway Corporation Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Fedor Uporov <fuporov.vstack@gmail.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #17625
This commit is contained in:
parent
b2c792778c
commit
96f9d271ea
@ -20,7 +20,7 @@
|
|||||||
* CDDL HEADER END
|
* CDDL HEADER END
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2024, Klara, Inc.
|
* Copyright (c) 2024, 2025, Klara, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_ZVOL_IMPL_H
|
#ifndef _SYS_ZVOL_IMPL_H
|
||||||
@ -135,7 +135,7 @@ int zvol_os_rename_minor(zvol_state_t *zv, const char *newname);
|
|||||||
int zvol_os_create_minor(const char *name);
|
int zvol_os_create_minor(const char *name);
|
||||||
int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize);
|
int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize);
|
||||||
boolean_t zvol_os_is_zvol(const char *path);
|
boolean_t zvol_os_is_zvol(const char *path);
|
||||||
void zvol_os_clear_private(zvol_state_t *zv);
|
void zvol_os_remove_minor(zvol_state_t *zv);
|
||||||
void zvol_os_set_disk_ro(zvol_state_t *zv, int flags);
|
void zvol_os_set_disk_ro(zvol_state_t *zv, int flags);
|
||||||
void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity);
|
void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity);
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@
|
|||||||
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014 Integros [integros.com]
|
* Copyright (c) 2014 Integros [integros.com]
|
||||||
* Copyright (c) 2024, Klara, Inc.
|
* Copyright (c) 2024, 2025, Klara, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
|
/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
|
||||||
@ -196,7 +196,6 @@ DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
|
|||||||
|
|
||||||
static int zvol_geom_open(struct g_provider *pp, int flag, int count);
|
static int zvol_geom_open(struct g_provider *pp, int flag, int count);
|
||||||
static int zvol_geom_close(struct g_provider *pp, int flag, int count);
|
static int zvol_geom_close(struct g_provider *pp, int flag, int count);
|
||||||
static void zvol_geom_destroy(zvol_state_t *zv);
|
|
||||||
static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
|
static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
|
||||||
static void zvol_geom_bio_start(struct bio *bp);
|
static void zvol_geom_bio_start(struct bio *bp);
|
||||||
static int zvol_geom_bio_getattr(struct bio *bp);
|
static int zvol_geom_bio_getattr(struct bio *bp);
|
||||||
@ -408,20 +407,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
zvol_geom_destroy(zvol_state_t *zv)
|
|
||||||
{
|
|
||||||
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
|
|
||||||
struct g_provider *pp = zsg->zsg_provider;
|
|
||||||
|
|
||||||
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
|
|
||||||
|
|
||||||
g_topology_assert();
|
|
||||||
|
|
||||||
zsg->zsg_provider = NULL;
|
|
||||||
g_wither_geom(pp->geom, ENXIO);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
zvol_wait_close(zvol_state_t *zv)
|
zvol_wait_close(zvol_state_t *zv)
|
||||||
{
|
{
|
||||||
@ -1400,42 +1385,65 @@ zvol_alloc(const char *name, uint64_t volsize, uint64_t volblocksize,
|
|||||||
* Remove minor node for the specified volume.
|
* Remove minor node for the specified volume.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zvol_os_free(zvol_state_t *zv)
|
zvol_os_remove_minor(zvol_state_t *zv)
|
||||||
{
|
{
|
||||||
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
|
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||||
ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
|
|
||||||
ASSERT0(zv->zv_open_count);
|
ASSERT0(zv->zv_open_count);
|
||||||
|
ASSERT0(atomic_read(&zv->zv_suspend_ref));
|
||||||
|
ASSERT(zv->zv_flags & ZVOL_REMOVING);
|
||||||
|
|
||||||
ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
|
struct zvol_state_os *zso = zv->zv_zso;
|
||||||
|
zv->zv_zso = NULL;
|
||||||
rw_destroy(&zv->zv_suspend_lock);
|
|
||||||
zfs_rangelock_fini(&zv->zv_rangelock);
|
|
||||||
|
|
||||||
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
|
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
|
||||||
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
|
struct zvol_state_geom *zsg = &zso->zso_geom;
|
||||||
struct g_provider *pp __maybe_unused = zsg->zsg_provider;
|
struct g_provider *pp = zsg->zsg_provider;
|
||||||
|
pp->private = NULL;
|
||||||
ASSERT0P(pp->private);
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
|
||||||
g_topology_lock();
|
g_topology_lock();
|
||||||
zvol_geom_destroy(zv);
|
g_wither_geom(pp->geom, ENXIO);
|
||||||
g_topology_unlock();
|
g_topology_unlock();
|
||||||
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
|
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
|
||||||
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
|
struct zvol_state_dev *zsd = &zso->zso_dev;
|
||||||
struct cdev *dev = zsd->zsd_cdev;
|
struct cdev *dev = zsd->zsd_cdev;
|
||||||
|
|
||||||
|
if (dev != NULL)
|
||||||
|
dev->si_drv2 = NULL;
|
||||||
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
|
||||||
if (dev != NULL) {
|
if (dev != NULL) {
|
||||||
ASSERT0P(dev->si_drv2);
|
|
||||||
destroy_dev(dev);
|
destroy_dev(dev);
|
||||||
knlist_clear(&zsd->zsd_selinfo.si_note, 0);
|
knlist_clear(&zsd->zsd_selinfo.si_note, 0);
|
||||||
knlist_destroy(&zsd->zsd_selinfo.si_note);
|
knlist_destroy(&zsd->zsd_selinfo.si_note);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kmem_free(zso, sizeof (struct zvol_state_os));
|
||||||
|
|
||||||
|
mutex_enter(&zv->zv_state_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zvol_os_free(zvol_state_t *zv)
|
||||||
|
{
|
||||||
|
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
|
||||||
|
ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
|
||||||
|
ASSERT0(zv->zv_open_count);
|
||||||
|
ASSERT0P(zv->zv_zso);
|
||||||
|
|
||||||
|
ASSERT0P(zv->zv_objset);
|
||||||
|
ASSERT0P(zv->zv_zilog);
|
||||||
|
ASSERT0P(zv->zv_dn);
|
||||||
|
|
||||||
|
ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
|
||||||
|
|
||||||
|
rw_destroy(&zv->zv_suspend_lock);
|
||||||
|
zfs_rangelock_fini(&zv->zv_rangelock);
|
||||||
|
|
||||||
mutex_destroy(&zv->zv_state_lock);
|
mutex_destroy(&zv->zv_state_lock);
|
||||||
cv_destroy(&zv->zv_removing_cv);
|
cv_destroy(&zv->zv_removing_cv);
|
||||||
dataset_kstats_destroy(&zv->zv_kstat);
|
dataset_kstats_destroy(&zv->zv_kstat);
|
||||||
kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
|
|
||||||
kmem_free(zv, sizeof (zvol_state_t));
|
kmem_free(zv, sizeof (zvol_state_t));
|
||||||
zvol_minors--;
|
zvol_minors--;
|
||||||
}
|
}
|
||||||
@ -1538,28 +1546,6 @@ out_doi:
|
|||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
zvol_os_clear_private(zvol_state_t *zv)
|
|
||||||
{
|
|
||||||
ASSERT(RW_LOCK_HELD(&zvol_state_lock));
|
|
||||||
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
|
|
||||||
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
|
|
||||||
struct g_provider *pp = zsg->zsg_provider;
|
|
||||||
|
|
||||||
if (pp->private == NULL) /* already cleared */
|
|
||||||
return;
|
|
||||||
|
|
||||||
pp->private = NULL;
|
|
||||||
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
|
|
||||||
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
|
|
||||||
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
|
|
||||||
struct cdev *dev = zsd->zsd_cdev;
|
|
||||||
|
|
||||||
if (dev != NULL)
|
|
||||||
dev->si_drv2 = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
|
zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -22,7 +22,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
|
* Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
|
||||||
* Copyright (c) 2024, Klara, Inc.
|
* Copyright (c) 2024, 2025, Klara, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dataset_kstats.h>
|
#include <sys/dataset_kstats.h>
|
||||||
@ -971,16 +971,6 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
zvol_os_clear_private(zvol_state_t *zv)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Cleared while holding zvol_state_lock as a writer
|
|
||||||
* which will prevent zvol_open() from opening it.
|
|
||||||
*/
|
|
||||||
zv->zv_zso->zvo_disk->private_data = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Provide a simple virtual geometry for legacy compatibility. For devices
|
* Provide a simple virtual geometry for legacy compatibility. For devices
|
||||||
* smaller than 1 MiB a small head and sector count is used to allow very
|
* smaller than 1 MiB a small head and sector count is used to allow very
|
||||||
@ -1417,6 +1407,54 @@ out_kmem:
|
|||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zvol_os_remove_minor(zvol_state_t *zv)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||||
|
ASSERT0(zv->zv_open_count);
|
||||||
|
ASSERT0(atomic_read(&zv->zv_suspend_ref));
|
||||||
|
ASSERT(zv->zv_flags & ZVOL_REMOVING);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleared while holding zvol_state_lock as a writer
|
||||||
|
* which will prevent zvol_open() from opening it.
|
||||||
|
*/
|
||||||
|
struct zvol_state_os *zso = zv->zv_zso;
|
||||||
|
zv->zv_zso = NULL;
|
||||||
|
|
||||||
|
/* Clearing private_data will make new callers return immediately. */
|
||||||
|
zso->zvo_disk->private_data = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop the state lock before calling del_gendisk(). There may be
|
||||||
|
* callers waiting to acquire it, but del_gendisk() will block until
|
||||||
|
* they exit, which would deadlock.
|
||||||
|
*/
|
||||||
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
|
||||||
|
del_gendisk(zso->zvo_disk);
|
||||||
|
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
||||||
|
(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
|
||||||
|
#if defined(HAVE_BLK_CLEANUP_DISK)
|
||||||
|
blk_cleanup_disk(zso->zvo_disk);
|
||||||
|
#else
|
||||||
|
put_disk(zso->zvo_disk);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
blk_cleanup_queue(zso->zvo_queue);
|
||||||
|
put_disk(zso->zvo_disk);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (zso->use_blk_mq)
|
||||||
|
blk_mq_free_tag_set(&zso->tag_set);
|
||||||
|
|
||||||
|
ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS);
|
||||||
|
|
||||||
|
kmem_free(zso, sizeof (struct zvol_state_os));
|
||||||
|
|
||||||
|
mutex_enter(&zv->zv_state_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cleanup then free a zvol_state_t which was created by zvol_alloc().
|
* Cleanup then free a zvol_state_t which was created by zvol_alloc().
|
||||||
* At this time, the structure is not opened by anyone, is taken off
|
* At this time, the structure is not opened by anyone, is taken off
|
||||||
@ -1435,35 +1473,19 @@ zvol_os_free(zvol_state_t *zv)
|
|||||||
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
|
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
|
||||||
ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
|
ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
|
||||||
ASSERT0(zv->zv_open_count);
|
ASSERT0(zv->zv_open_count);
|
||||||
ASSERT0P(zv->zv_zso->zvo_disk->private_data);
|
ASSERT0P(zv->zv_zso);
|
||||||
|
|
||||||
|
ASSERT0P(zv->zv_objset);
|
||||||
|
ASSERT0P(zv->zv_zilog);
|
||||||
|
ASSERT0P(zv->zv_dn);
|
||||||
|
|
||||||
rw_destroy(&zv->zv_suspend_lock);
|
rw_destroy(&zv->zv_suspend_lock);
|
||||||
zfs_rangelock_fini(&zv->zv_rangelock);
|
zfs_rangelock_fini(&zv->zv_rangelock);
|
||||||
|
|
||||||
del_gendisk(zv->zv_zso->zvo_disk);
|
|
||||||
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
|
||||||
(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
|
|
||||||
#if defined(HAVE_BLK_CLEANUP_DISK)
|
|
||||||
blk_cleanup_disk(zv->zv_zso->zvo_disk);
|
|
||||||
#else
|
|
||||||
put_disk(zv->zv_zso->zvo_disk);
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
blk_cleanup_queue(zv->zv_zso->zvo_queue);
|
|
||||||
put_disk(zv->zv_zso->zvo_disk);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (zv->zv_zso->use_blk_mq)
|
|
||||||
blk_mq_free_tag_set(&zv->zv_zso->tag_set);
|
|
||||||
|
|
||||||
ida_simple_remove(&zvol_ida,
|
|
||||||
MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
|
|
||||||
|
|
||||||
cv_destroy(&zv->zv_removing_cv);
|
cv_destroy(&zv->zv_removing_cv);
|
||||||
mutex_destroy(&zv->zv_state_lock);
|
mutex_destroy(&zv->zv_state_lock);
|
||||||
dataset_kstats_destroy(&zv->zv_kstat);
|
dataset_kstats_destroy(&zv->zv_kstat);
|
||||||
|
|
||||||
kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
|
|
||||||
kmem_free(zv, sizeof (zvol_state_t));
|
kmem_free(zv, sizeof (zvol_state_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -38,7 +38,7 @@
|
|||||||
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
|
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2024, Klara, Inc.
|
* Copyright (c) 2024, 2025, Klara, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1599,8 +1599,8 @@ zvol_remove_minor_task(void *arg)
|
|||||||
rw_enter(&zvol_state_lock, RW_WRITER);
|
rw_enter(&zvol_state_lock, RW_WRITER);
|
||||||
mutex_enter(&zv->zv_state_lock);
|
mutex_enter(&zv->zv_state_lock);
|
||||||
|
|
||||||
|
zvol_os_remove_minor(zv);
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
zvol_os_clear_private(zv);
|
|
||||||
|
|
||||||
mutex_exit(&zv->zv_state_lock);
|
mutex_exit(&zv->zv_state_lock);
|
||||||
rw_exit(&zvol_state_lock);
|
rw_exit(&zvol_state_lock);
|
||||||
@ -1669,9 +1669,9 @@ zvol_remove_minors_impl(zvol_task_t *task)
|
|||||||
* If in use, try to throw everyone off and try again
|
* If in use, try to throw everyone off and try again
|
||||||
* later.
|
* later.
|
||||||
*/
|
*/
|
||||||
|
zv->zv_flags |= ZVOL_REMOVING;
|
||||||
if (zv->zv_open_count > 0 ||
|
if (zv->zv_open_count > 0 ||
|
||||||
atomic_read(&zv->zv_suspend_ref)) {
|
atomic_read(&zv->zv_suspend_ref)) {
|
||||||
zv->zv_flags |= ZVOL_REMOVING;
|
|
||||||
t = taskq_dispatch(
|
t = taskq_dispatch(
|
||||||
zv->zv_objset->os_spa->spa_zvol_taskq,
|
zv->zv_objset->os_spa->spa_zvol_taskq,
|
||||||
zvol_remove_minor_task, zv, TQ_SLEEP);
|
zvol_remove_minor_task, zv, TQ_SLEEP);
|
||||||
@ -1687,14 +1687,9 @@ zvol_remove_minors_impl(zvol_task_t *task)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zvol_os_remove_minor(zv);
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
|
|
||||||
/*
|
|
||||||
* Cleared while holding zvol_state_lock as a writer
|
|
||||||
* which will prevent zvol_open() from opening it.
|
|
||||||
*/
|
|
||||||
zvol_os_clear_private(zv);
|
|
||||||
|
|
||||||
/* Drop zv_state_lock before zvol_free() */
|
/* Drop zv_state_lock before zvol_free() */
|
||||||
mutex_exit(&zv->zv_state_lock);
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user