Add support for asynchronous zvol minor operations

zfsonlinux issue #2217 - zvol minor operations: check snapdev
property before traversing snapshots of a dataset

zfsonlinux issue #3681 - lock order inversion between zvol_open()
and dsl_pool_sync()...zvol_rename_minors()

Create a per-pool zvol taskq for asynchronous zvol tasks.
There are a few key design decisions to be aware of.

* Each taskq must be single threaded to ensure tasks are always
  processed in the order in which they were dispatched.

* There is a taskq per-pool in order to keep the pools independent.
  This way if one pool is suspended it will not impact another.

* The preferred location to dispatch a zvol minor task is a sync
  task.  In this context there is easy access to the spa_t and
  minimal error handling is required because the sync task must
  succeed.

Support for asynchronous zvol minor operations address issue #3681.

Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2217
Closes #3678
Closes #3681
This commit is contained in:
Boris Protopopov 2014-03-22 05:07:14 -04:00 committed by Brian Behlendorf
parent eb0856779f
commit a0bd735adb
12 changed files with 485 additions and 217 deletions

View File

@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@ -253,6 +254,7 @@ struct spa {
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor managment */
/*
* spa_refcount & spa_config_lock must be the last elements

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#ifndef _SYS_ZVOL_H
@ -31,24 +32,22 @@
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
#ifdef _KERNEL
extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_rename_minors(spa_t *spa, const char *oldname,
const char *newname, boolean_t async);
#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *name);
extern int zvol_create_minors(const char *name);
extern int zvol_remove_minor(const char *name);
extern void zvol_remove_minors(const char *name);
extern void zvol_rename_minors(const char *oldname, const char *newname);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
extern int zvol_init(void);
extern void zvol_fini(void);
#endif /* _KERNEL */
#endif /* _SYS_ZVOL_H */

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <assert.h>
@ -1354,3 +1355,24 @@ spl_fstrans_check(void)
{
return (0);
}
void
zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
{
}
void
zvol_remove_minor(spa_t *spa, const char *name, boolean_t async)
{
}
void
zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
{
}
void
zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
boolean_t async)
{
}

View File

@ -26,6 +26,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -868,6 +869,8 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
}
spa_history_log_internal_ds(ds, "create", tx, "");
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
dsl_dataset_rele(ds, FTAG);
dsl_dir_rele(pdd, FTAG);
}
@ -961,6 +964,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_name(origin, namebuf);
spa_history_log_internal_ds(ds, "clone", tx,
"origin=%s (%llu)", namebuf, origin->ds_object);
zvol_create_minors(dp->dp_spa, doca->doca_clone, B_TRUE);
dsl_dataset_rele(ds, FTAG);
dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);

View File

@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@ -54,6 +55,7 @@
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
#include <sys/bqueue.h>
#include <sys/zvol.h>
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
@ -2646,6 +2648,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
}
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
/*
* Release the hold from dmu_recv_begin. This must be done before
* we return to open context, so that when we free the dataset's dnode,

View File

@ -24,6 +24,7 @@
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu_objset.h>
@ -1424,6 +1425,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
dsl_props_set_sync_impl(ds->ds_prev,
ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
}
zvol_create_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
@ -1498,16 +1500,6 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
fnvlist_free(suspended);
}
#ifdef _KERNEL
if (error == 0) {
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
char *snapname = nvpair_name(pair);
zvol_create_minors(snapname);
}
}
#endif
return (error);
}
@ -1930,6 +1922,8 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname,
ddrsa->ddrsa_newsnapname, B_TRUE);
dsl_dataset_rele(ds, FTAG);
return (0);
@ -1958,11 +1952,6 @@ int
dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive)
{
#ifdef _KERNEL
char *oldname, *newname;
#endif
int error;
dsl_dataset_rename_snapshot_arg_t ddrsa;
ddrsa.ddrsa_fsname = fsname;
@ -1970,22 +1959,9 @@ dsl_dataset_rename_snapshot(const char *fsname,
ddrsa.ddrsa_newsnapname = newsnapname;
ddrsa.ddrsa_recursive = recursive;
error = dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
dsl_dataset_rename_snapshot_sync, &ddrsa,
1, ZFS_SPACE_CHECK_RESERVED);
if (error)
return (SET_ERROR(error));
#ifdef _KERNEL
oldname = kmem_asprintf("%s@%s", fsname, oldsnapname);
newname = kmem_asprintf("%s@%s", fsname, newsnapname);
zvol_rename_minors(oldname, newname);
strfree(newname);
strfree(oldname);
#endif
return (0);
1, ZFS_SPACE_CHECK_RESERVED));
}
/*

View File

@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -40,6 +41,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
#include <sys/zvol.h>
typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
@ -243,9 +245,6 @@ dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
#ifdef ZFS_DEBUG
int err;
#endif
spa_feature_t f;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
@ -441,6 +440,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
#ifdef ZFS_DEBUG
{
uint64_t val;
int err;
err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
@ -490,6 +490,7 @@ dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
@ -889,6 +890,7 @@ dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
dsl_dataset_rele(ds, FTAG);
}

View File

@ -24,6 +24,7 @@
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@ -1909,9 +1910,8 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
#ifdef _KERNEL
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
#endif
zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
ddra->ddra_newname, B_TRUE);
dsl_prop_notify_all(dd);

View File

@ -24,6 +24,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/*
@ -1136,6 +1137,24 @@ spa_activate(spa_t *spa, int mode)
avl_create(&spa->spa_errlist_last,
spa_error_entry_compare, sizeof (spa_error_entry_t),
offsetof(spa_error_entry_t, se_avl));
/*
* This taskq is used to perform zvol-minor-related tasks
* asynchronously. This has several advantages, including easy
* resolution of various deadlocks (zfsonlinux bug #3681).
*
* The taskq must be single threaded to ensure tasks are always
* processed in the order in which they were dispatched.
*
* A taskq per pool allows one to keep the pools independent.
* This way if one pool is suspended, it will not impact another.
*
* The preferred location to dispatch a zvol minor task is a sync
* task. In this context, there is easy access to the spa_t and minimal
* error handling is required because the sync task must succeed.
*/
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
1, INT_MAX, 0);
}
/*
@ -1154,6 +1173,11 @@ spa_deactivate(spa_t *spa)
spa_evicting_os_wait(spa);
if (spa->spa_zvol_taskq) {
taskq_destroy(spa->spa_zvol_taskq);
spa->spa_zvol_taskq = NULL;
}
txg_list_destroy(&spa->spa_vdev_txg_list);
list_destroy(&spa->spa_config_dirty_list);
@ -3088,10 +3112,8 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
mutex_exit(&spa_namespace_lock);
}
#ifdef _KERNEL
if (firstopen)
zvol_create_minors(spa->spa_name);
#endif
zvol_create_minors(spa, spa_name(spa), B_TRUE);
*spapp = spa;
@ -4211,10 +4233,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, "import");
#ifdef _KERNEL
zvol_create_minors(pool);
#endif
zvol_create_minors(spa, pool, B_TRUE);
return (0);
}
@ -4349,6 +4368,10 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
spa_async_suspend(spa);
if (spa->spa_zvol_taskq) {
zvol_remove_minors(spa, spa_name(spa), B_TRUE);
taskq_wait(spa->spa_zvol_taskq);
}
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);

View File

@ -29,6 +29,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/*
@ -1499,8 +1500,7 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc)
int error;
zfs_log_history(zc);
error = spa_destroy(zc->zc_name);
if (error == 0)
zvol_remove_minors(zc->zc_name);
return (error);
}
@ -1552,8 +1552,7 @@ zfs_ioc_pool_export(zfs_cmd_t *zc)
zfs_log_history(zc);
error = spa_export(zc->zc_name, NULL, force, hardforce);
if (error == 0)
zvol_remove_minors(zc->zc_name);
return (error);
}
@ -2394,7 +2393,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
err = zvol_set_volsize(dsname, intval);
break;
case ZFS_PROP_SNAPDEV:
err = zvol_set_snapdev(dsname, intval);
err = zvol_set_snapdev(dsname, source, intval);
break;
case ZFS_PROP_VERSION:
{
@ -3188,12 +3187,6 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
#ifdef _KERNEL
if (error == 0 && type == DMU_OST_ZVOL)
zvol_create_minors(fsname);
#endif
return (error);
}
@ -3236,12 +3229,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
#ifdef _KERNEL
if (error == 0)
zvol_create_minors(fsname);
#endif
return (error);
}
@ -3304,11 +3291,6 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_snapshot(snaps, props, outnvl);
#ifdef _KERNEL
if (error == 0)
zvol_create_minors(poolname);
#endif
return (error);
}
@ -3434,7 +3416,6 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
(void) zfs_unmount_snap(nvpair_name(pair));
(void) zvol_remove_minor(nvpair_name(pair));
}
return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
@ -3560,8 +3541,7 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
else
err = dsl_destroy_head(zc->zc_name);
if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
(void) zvol_remove_minor(zc->zc_name);
return (err);
}
@ -4127,11 +4107,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)
}
#endif
#ifdef _KERNEL
if (error == 0)
zvol_create_minors(tofs);
#endif
/*
* On error, restore the original props.
*/
@ -6032,16 +6007,16 @@ _init(void)
return (error);
}
if ((error = -zvol_init()) != 0)
return (error);
spa_init(FREAD | FWRITE);
zfs_init();
if ((error = -zvol_init()) != 0)
goto out1;
zfs_ioctl_init();
if ((error = zfs_attach()) != 0)
goto out2;
goto out;
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
@ -6057,11 +6032,10 @@ _init(void)
return (0);
out2:
(void) zvol_fini();
out1:
out:
zfs_fini();
spa_fini();
(void) zvol_fini();
printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
ZFS_DEBUG_STR, error);
@ -6073,9 +6047,9 @@ static void __exit
_fini(void)
{
zfs_detach();
zvol_fini();
zfs_fini();
spa_fini();
zvol_fini();
tsd_destroy(&zfs_fsyncer_key);
tsd_destroy(&rrw_tsd_key);

View File

@ -42,6 +42,7 @@
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dir.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
@ -49,6 +50,7 @@
#include <sys/zio.h>
#include <sys/zfs_rlock.h>
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>
#include <linux/blkdev_compat.h>
@ -81,6 +83,23 @@ typedef struct zvol_state {
list_node_t zv_next; /* next zvol_state_t linkage */
} zvol_state_t;
typedef enum {
ZVOL_ASYNC_CREATE_MINORS,
ZVOL_ASYNC_REMOVE_MINORS,
ZVOL_ASYNC_RENAME_MINORS,
ZVOL_ASYNC_SET_SNAPDEV,
ZVOL_ASYNC_MAX
} zvol_async_op_t;
typedef struct {
zvol_async_op_t op;
char pool[MAXNAMELEN];
char name1[MAXNAMELEN];
char name2[MAXNAMELEN];
zprop_source_t source;
uint64_t snapdev;
} zvol_task_t;
#define ZVOL_RDONLY 0x1
/*
@ -977,6 +996,7 @@ zvol_first_open(zvol_state_t *zv)
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error) {
dmu_objset_disown(os, zvol_tag);
zv->zv_objset = NULL;
goto out_mutex;
}
@ -984,6 +1004,7 @@ zvol_first_open(zvol_state_t *zv)
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
if (error) {
dmu_objset_disown(os, zvol_tag);
zv->zv_objset = NULL;
goto out_mutex;
}
@ -1036,7 +1057,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
/*
* If the caller is already holding the mutex do not take it
* again, this will happen as part of zvol_create_minor().
* again, this will happen as part of zvol_create_minor_impl().
* Once add_disk() is called the device is live and the kernel
* will attempt to open it to read the partition information.
*/
@ -1355,31 +1376,13 @@ zvol_free(zvol_state_t *zv)
kmem_free(zv, sizeof (zvol_state_t));
}
/*
* Create a block device minor node and setup the linkage between it
* and the specified volume. Once this function returns the block
* device is live and ready for use.
*/
static int
__zvol_snapdev_hidden(const char *name)
{
uint64_t snapdev;
char *parent;
char *atp;
int error = 0;
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) strlcpy(parent, name, MAXPATHLEN);
if ((atp = strrchr(parent, '@')) != NULL) {
*atp = '\0';
error = dsl_prop_get_integer(parent, "snapdev", &snapdev, NULL);
if ((error == 0) && (snapdev == ZFS_SNAPDEV_HIDDEN))
error = SET_ERROR(ENODEV);
}
kmem_free(parent, MAXPATHLEN);
return (SET_ERROR(error));
}
static int
__zvol_create_minor(const char *name, boolean_t ignore_snapdev)
zvol_create_minor_impl(const char *name)
{
zvol_state_t *zv;
objset_t *os;
@ -1389,7 +1392,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
unsigned minor = 0;
int error = 0;
ASSERT(MUTEX_HELD(&zvol_state_lock));
mutex_enter(&zvol_state_lock);
zv = zvol_find_by_name(name);
if (zv) {
@ -1397,12 +1400,6 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
goto out;
}
if (ignore_snapdev == B_FALSE) {
error = __zvol_snapdev_hidden(name);
if (error)
goto out;
}
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
@ -1489,69 +1486,18 @@ out:
*/
mutex_exit(&zvol_state_lock);
add_disk(zv->zv_disk);
mutex_enter(&zvol_state_lock);
} else {
mutex_exit(&zvol_state_lock);
}
return (SET_ERROR(error));
}
/*
* Create a block device minor node and setup the linkage between it
* and the specified volume. Once this function returns the block
* device is live and ready for use.
*/
int
zvol_create_minor(const char *name)
{
int error;
mutex_enter(&zvol_state_lock);
error = __zvol_create_minor(name, B_FALSE);
mutex_exit(&zvol_state_lock);
return (SET_ERROR(error));
}
static int
__zvol_remove_minor(const char *name)
{
zvol_state_t *zv;
ASSERT(MUTEX_HELD(&zvol_state_lock));
zv = zvol_find_by_name(name);
if (zv == NULL)
return (SET_ERROR(ENXIO));
if (zv->zv_open_count > 0)
return (SET_ERROR(EBUSY));
zvol_remove(zv);
zvol_free(zv);
return (0);
}
/*
* Remove a block device minor node for the specified volume.
*/
int
zvol_remove_minor(const char *name)
{
int error;
mutex_enter(&zvol_state_lock);
error = __zvol_remove_minor(name);
mutex_exit(&zvol_state_lock);
return (SET_ERROR(error));
}
/*
* Rename a block device minor mode for the specified volume.
*/
static void
__zvol_rename_minor(zvol_state_t *zv, const char *newname)
zvol_rename_minor(zvol_state_t *zv, const char *newname)
{
int readonly = get_disk_ro(zv->zv_disk);
@ -1571,30 +1517,120 @@ __zvol_rename_minor(zvol_state_t *zv, const char *newname)
set_disk_ro(zv->zv_disk, readonly);
}
/*
* Mask errors to continue dmu_objset_find() traversal
*/
static int
zvol_create_minors_cb(const char *dsname, void *arg)
zvol_create_snap_minor_cb(const char *dsname, void *arg)
{
(void) zvol_create_minor(dsname);
const char *name = (const char *)arg;
/* skip the designated dataset */
if (name && strcmp(dsname, name) == 0)
return (0);
/* at this point, the dsname should name a snapshot */
if (strchr(dsname, '@') == 0) {
dprintf("zvol_create_snap_minor_cb(): "
"%s is not a shapshot name\n", dsname);
} else {
(void) zvol_create_minor_impl(dsname);
}
return (0);
}
/*
* Create minors for specified dataset including children and snapshots.
* Mask errors to continue dmu_objset_find() traversal
*/
int
zvol_create_minors(const char *name)
static int
zvol_create_minors_cb(const char *dsname, void *arg)
{
uint64_t snapdev;
int error;
error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
if (error)
return (0);
/*
* Given the name and the 'snapdev' property, create device minor nodes
* with the linkages to zvols/snapshots as needed.
* If the name represents a zvol, create a minor node for the zvol, then
* check if its snapshots are 'visible', and if so, iterate over the
* snapshots and create device minor nodes for those.
*/
if (strchr(dsname, '@') == 0) {
/* create minor for the 'dsname' explicitly */
error = zvol_create_minor_impl(dsname);
if ((error == 0 || error == EEXIST) &&
(snapdev == ZFS_SNAPDEV_VISIBLE)) {
fstrans_cookie_t cookie = spl_fstrans_mark();
/*
* traverse snapshots only, do not traverse children,
* and skip the 'dsname'
*/
error = dmu_objset_find((char *)dsname,
zvol_create_snap_minor_cb, (void *)dsname,
DS_FIND_SNAPSHOTS);
spl_fstrans_unmark(cookie);
}
} else {
dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
dsname);
}
return (0);
}
/*
* Create minors for the specified dataset, including children and snapshots.
* Pay attention to the 'snapdev' property and iterate over the snapshots
* only if they are 'visible'. This approach allows one to assure that the
* snapshot metadata is read from disk only if it is needed.
*
* The name can represent a dataset to be recursively scanned for zvols and
* their snapshots, or a single zvol snapshot. If the name represents a
* dataset, the scan is performed in two nested stages:
* - scan the dataset for zvols, and
* - for each zvol, create a minor node, then check if the zvol's snapshots
* are 'visible', and only then iterate over the snapshots if needed
*
* If the name represents a snapshot, a check is perfromed if the snapshot is
* 'visible' (which also verifies that the parent is a zvol), and if so,
* a minor node for that snapshot is created.
*/
static int
zvol_create_minors_impl(const char *name)
{
int error = 0;
fstrans_cookie_t cookie;
char *atp, *parent;
if (zvol_inhibit_dev)
return (0);
cookie = spl_fstrans_mark();
error = dmu_objset_find((char *)name, zvol_create_minors_cb,
NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
spl_fstrans_unmark(cookie);
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) strlcpy(parent, name, MAXPATHLEN);
if ((atp = strrchr(parent, '@')) != NULL) {
uint64_t snapdev;
*atp = '\0';
error = dsl_prop_get_integer(parent, "snapdev",
&snapdev, NULL);
if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
error = zvol_create_minor_impl(name);
} else {
cookie = spl_fstrans_mark();
error = dmu_objset_find(parent, zvol_create_minors_cb,
NULL, DS_FIND_CHILDREN);
spl_fstrans_unmark(cookie);
}
kmem_free(parent, MAXPATHLEN);
return (SET_ERROR(error));
}
@ -1602,8 +1638,8 @@ zvol_create_minors(const char *name)
/*
* Remove minors for specified dataset including children and snapshots.
*/
void
zvol_remove_minors(const char *name)
static void
zvol_remove_minors_impl(const char *name)
{
zvol_state_t *zv, *zv_next;
int namelen = ((name) ? strlen(name) : 0);
@ -1633,11 +1669,41 @@ zvol_remove_minors(const char *name)
mutex_exit(&zvol_state_lock);
}
/* Remove minor for this specific snapshot only */
static void
zvol_remove_minor_impl(const char *name)
{
zvol_state_t *zv, *zv_next;
if (zvol_inhibit_dev)
return;
if (strchr(name, '@') == NULL)
return;
mutex_enter(&zvol_state_lock);
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
if (strcmp(zv->zv_name, name) == 0) {
/* If in use, leave alone */
if (zv->zv_open_count > 0)
continue;
zvol_remove(zv);
zvol_free(zv);
break;
}
}
mutex_exit(&zvol_state_lock);
}
/*
* Rename minors for specified dataset including children and snapshots.
*/
void
zvol_rename_minors(const char *oldname, const char *newname)
static void
zvol_rename_minors_impl(const char *oldname, const char *newname)
{
zvol_state_t *zv, *zv_next;
int oldnamelen, newnamelen;
@ -1660,14 +1726,14 @@ zvol_rename_minors(const char *oldname, const char *newname)
continue;
if (strcmp(zv->zv_name, oldname) == 0) {
__zvol_rename_minor(zv, newname);
zvol_rename_minor(zv, newname);
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
(zv->zv_name[oldnamelen] == '/' ||
zv->zv_name[oldnamelen] == '@')) {
snprintf(name, MAXNAMELEN, "%s%c%s", newname,
zv->zv_name[oldnamelen],
zv->zv_name + oldnamelen + 1);
__zvol_rename_minor(zv, name);
zvol_rename_minor(zv, name);
}
}
@ -1676,42 +1742,227 @@ zvol_rename_minors(const char *oldname, const char *newname)
kmem_free(name, MAXNAMELEN);
}
typedef struct zvol_snapdev_cb_arg {
uint64_t snapdev;
} zvol_snapdev_cb_arg_t;
static int
snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
uint64_t snapdev = *(uint64_t *) arg;
zvol_set_snapdev_cb(const char *dsname, void *param) {
zvol_snapdev_cb_arg_t *arg = param;
if (strchr(dsname, '@') == NULL)
return (0);
switch (snapdev) {
switch (arg->snapdev) {
case ZFS_SNAPDEV_VISIBLE:
mutex_enter(&zvol_state_lock);
(void) __zvol_create_minor(dsname, B_TRUE);
mutex_exit(&zvol_state_lock);
(void) zvol_create_minor_impl(dsname);
break;
case ZFS_SNAPDEV_HIDDEN:
(void) zvol_remove_minor(dsname);
(void) zvol_remove_minor_impl(dsname);
break;
}
return (0);
}
int
zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
fstrans_cookie_t cookie;
if (zvol_inhibit_dev)
/* caller should continue to modify snapdev property */
return (-1);
cookie = spl_fstrans_mark();
(void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb,
&snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
static void
zvol_set_snapdev_impl(char *name, uint64_t snapdev)
{
zvol_snapdev_cb_arg_t arg = {snapdev};
fstrans_cookie_t cookie = spl_fstrans_mark();
/*
* The zvol_set_snapdev_sync() sets snapdev appropriately
* in the dataset hierarchy. Here, we only scan snapshots.
*/
dmu_objset_find(name, zvol_set_snapdev_cb, &arg, DS_FIND_SNAPSHOTS);
spl_fstrans_unmark(cookie);
}
/* caller should continue to modify snapdev property */
return (-1);
static zvol_task_t *
zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
uint64_t snapdev)
{
zvol_task_t *task;
char *delim;
/* Never allow tasks on hidden names. */
if (name1[0] == '$')
return (NULL);
task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
task->op = op;
task->snapdev = snapdev;
delim = strchr(name1, '/');
strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN);
strlcpy(task->name1, name1, MAXNAMELEN);
if (name2 != NULL)
strlcpy(task->name2, name2, MAXNAMELEN);
return (task);
}
static void
zvol_task_free(zvol_task_t *task)
{
kmem_free(task, sizeof (zvol_task_t));
}
/*
* The worker thread function performed asynchronously.
*/
static void
zvol_task_cb(void *param)
{
zvol_task_t *task = (zvol_task_t *)param;
switch (task->op) {
case ZVOL_ASYNC_CREATE_MINORS:
(void) zvol_create_minors_impl(task->name1);
break;
case ZVOL_ASYNC_REMOVE_MINORS:
zvol_remove_minors_impl(task->name1);
break;
case ZVOL_ASYNC_RENAME_MINORS:
zvol_rename_minors_impl(task->name1, task->name2);
break;
case ZVOL_ASYNC_SET_SNAPDEV:
zvol_set_snapdev_impl(task->name1, task->snapdev);
break;
default:
VERIFY(0);
break;
}
zvol_task_free(task);
}
typedef struct zvol_set_snapdev_arg {
const char *zsda_name;
uint64_t zsda_value;
zprop_source_t zsda_source;
dmu_tx_t *zsda_tx;
} zvol_set_snapdev_arg_t;
/*
* Sanity check the dataset for safe use by the sync task. No additional
* conditions are imposed.
*/
static int
zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
{
zvol_set_snapdev_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
int error;
error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
if (error != 0)
return (error);
dsl_dir_rele(dd, FTAG);
return (error);
}
static int
zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
zvol_set_snapdev_arg_t *zsda = arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
dsl_dataset_name(ds, dsname);
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
NULL, zsda->zsda_value);
if (task == NULL)
return (0);
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
task, TQ_SLEEP);
return (0);
}
/*
* Traverse all child snapshot datasets and apply snapdev appropriately.
*/
static void
zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
{
zvol_set_snapdev_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
zsda->zsda_tx = tx;
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
zsda, DS_FIND_CHILDREN);
dsl_dir_rele(dd, FTAG);
}
int
zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
{
zvol_set_snapdev_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = snapdev;
return (dsl_sync_task(ddname, zvol_set_snapdev_check,
zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
void
zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
{
zvol_task_t *task;
taskqid_t id;
task = zvol_task_alloc(ZVOL_ASYNC_CREATE_MINORS, name, NULL, ~0ULL);
if (task == NULL)
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
if ((async == B_FALSE) && (id != 0))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
void
zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
{
zvol_task_t *task;
taskqid_t id;
task = zvol_task_alloc(ZVOL_ASYNC_REMOVE_MINORS, name, NULL, ~0ULL);
if (task == NULL)
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
if ((async == B_FALSE) && (id != 0))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
void
zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
boolean_t async)
{
zvol_task_t *task;
taskqid_t id;
task = zvol_task_alloc(ZVOL_ASYNC_RENAME_MINORS, name1, name2, ~0ULL);
if (task == NULL)
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
if ((async == B_FALSE) && (id != 0))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
int
@ -1721,7 +1972,6 @@ zvol_init(void)
list_create(&zvol_state_list, sizeof (zvol_state_t),
offsetof(zvol_state_t, zv_next));
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
error = register_blkdev(zvol_major, ZVOL_DRIVER);
@ -1745,11 +1995,13 @@ out:
void
zvol_fini(void)
{
zvol_remove_minors(NULL);
zvol_remove_minors_impl(NULL);
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
unregister_blkdev(zvol_major, ZVOL_DRIVER);
mutex_destroy(&zvol_state_lock);
list_destroy(&zvol_state_list);
mutex_destroy(&zvol_state_lock);
}
module_param(zvol_inhibit_dev, uint, 0644);

View File

@ -217,15 +217,26 @@ test_3() {
zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 11
# Toggle the snapdev and observe snapshot device links toggled
${ZFS} set snapdev=hidden ${FULL_ZVOL_NAME} || fail 12
zconfig_zvol_device_stat 7 ${POOL_NAME} ${FULL_ZVOL_NAME} \
"invalid" ${FULL_CLONE_NAME} || fail 13
${ZFS} set snapdev=visible ${FULL_ZVOL_NAME} || fail 14
zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 15
# Destroy the pool and consequently the devices
${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 12
${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 16
# verify the devices were removed
zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 13
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 17
${ZFS_SH} -u || fail 14
rm -f ${TMP_CACHE} || fail 15
${ZFS_SH} -u || fail 18
rm -f ${TMP_CACHE} || fail 19
pass
}