diff --git a/include/sys/zcp.h b/include/sys/zcp.h index 5cc520da5..3d1480050 100644 --- a/include/sys/zcp.h +++ b/include/sys/zcp.h @@ -131,6 +131,14 @@ typedef struct zcp_run_info { */ nvlist_t *zri_outnvl; + /* + * The keys of this nvlist are datasets which may be zvols and may need + * to have device minor nodes created. This information is passed from + * syncing context (where the zvol is created) to open context (where we + * create the minor nodes). + */ + nvlist_t *zri_new_zvols; + /* * The errno number returned to caller of zcp_eval(). */ diff --git a/include/sys/zvol.h b/include/sys/zvol.h index 7852838f8..8efb7f5e6 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -35,20 +35,20 @@ #define SPEC_MAXOFFSET_T ((1LL << ((NBBY * sizeof (daddr32_t)) + \ DEV_BSHIFT - 1)) - 1) -extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async); -extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async); -extern void zvol_rename_minors(spa_t *spa, const char *oldname, - const char *newname, boolean_t async); +extern void zvol_create_minor(const char *); +extern void zvol_create_minors_recursive(const char *); +extern void zvol_remove_minors(spa_t *, const char *, boolean_t); +extern void zvol_rename_minors(spa_t *, const char *, const char *, boolean_t); #ifdef _KERNEL struct zvol_state; typedef struct zvol_state zvol_state_handle_t; -extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize); -extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize); -extern int zvol_get_stats(objset_t *os, nvlist_t *nv); +extern int zvol_check_volsize(uint64_t, uint64_t); +extern int zvol_check_volblocksize(const char *, uint64_t); +extern int zvol_get_stats(objset_t *, nvlist_t *); extern boolean_t zvol_is_zvol(const char *); -extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); +extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volblocksize(const char *, uint64_t); extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index cd6553eeb..d19ecc18f 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1013,7 +1013,12 @@ kmem_cache_reap_active(void) void *zvol_tag = "zvol_tag"; void -zvol_create_minors(spa_t *spa, const char *name, boolean_t async) +zvol_create_minor(spa_t *spa, const char *name, boolean_t async) +{ +} + +void +zvol_create_minors_recursive(spa_t *spa, const char *name, boolean_t async) { } diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 2b1c82e67..ce719734c 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -842,7 +842,7 @@ zvol_free(zvol_state_t *zv) * device is live and ready for use. */ static int -zvol_create_minor(const char *name) +zvol_os_create_minor(const char *name) { zvol_state_t *zv; objset_t *os; @@ -967,7 +967,7 @@ out_doi: ida_simple_remove(&zvol_ida, idx); } - return (SET_ERROR(error)); + return (error); } static void @@ -1014,7 +1014,7 @@ zvol_set_capacity_impl(zvol_state_t *zv, uint64_t capacity) const static zvol_platform_ops_t zvol_linux_ops = { .zv_free = zvol_free, .zv_rename_minor = zvol_rename_minor, - .zv_create_minor = zvol_create_minor, + .zv_create_minor = zvol_os_create_minor, .zv_update_volsize = zvol_update_volsize, .zv_clear_private = zvol_clear_private, .zv_is_zvol = zvol_is_zvol_impl, diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 2a9464e2a..4db8e581c 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1263,7 +1263,6 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) } spa_history_log_internal_ds(ds, "create", tx, " "); - zvol_create_minors(spa, doca->doca_name, B_TRUE); dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1293,9 +1292,13 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, */ doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp; - return (dsl_sync_task(name, + int rv = dsl_sync_task(name, dmu_objset_create_check, dmu_objset_create_sync, &doca, - 6, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL); + + if (rv == 0) + zvol_create_minor(name); + return (rv); } typedef struct dmu_objset_clone_arg { @@ -1376,7 +1379,6 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) dsl_dataset_name(origin, namebuf); spa_history_log_internal_ds(ds, "clone", tx, "origin=%s (%llu)", namebuf, (u_longlong_t)origin->ds_object); - zvol_create_minors(dp->dp_spa, doca->doca_clone, B_TRUE); dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(origin, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1391,9 +1393,14 @@ dmu_objset_clone(const char *clone, const char *origin) doca.doca_origin = origin; doca.doca_cred = CRED(); - return (dsl_sync_task(clone, + int rv = dsl_sync_task(clone, dmu_objset_clone_check, dmu_objset_clone_sync, &doca, - 6, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL); + + if (rv == 0) + zvol_create_minor(clone); + + return (rv); } int diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 46a42197b..97a3c7cee 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -2859,6 +2859,12 @@ out: if (drc->drc_next_rrd != NULL) kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd)); + /* + * The objset will be invalidated by dmu_recv_end() when we do + * dsl_dataset_clone_swap_sync_impl(). + */ + drc->drc_os = NULL; + kmem_free(rwa, sizeof (*rwa)); nvlist_free(drc->drc_begin_nvl); if ((drc->drc_featureflags & DMU_BACKUP_FEATURE_DEDUP) && @@ -3085,8 +3091,6 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) &drc->drc_ivset_guid, tx)); } - zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE); - /* * Release the hold from dmu_recv_begin. This must be done before * we return to open context, so that when we free the dataset's dnode @@ -3195,9 +3199,20 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) if (error != 0) { dmu_recv_cleanup_ds(drc); nvlist_free(drc->drc_keynvl); - } else if (drc->drc_guid_to_ds_map != NULL) { - (void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map, - drc->drc_newsnapobj, drc->drc_raw); + } else { + if (drc->drc_newfs) { + zvol_create_minor(drc->drc_tofs); + } + char *snapname = kmem_asprintf("%s@%s", + drc->drc_tofs, drc->drc_tosnap); + zvol_create_minor(snapname); + kmem_strfree(snapname); + + if (drc->drc_guid_to_ds_map != NULL) { + (void) add_ds_to_guidmap(drc->drc_tofs, + drc->drc_guid_to_ds_map, + drc->drc_newsnapobj, drc->drc_raw); + } } return (error); } diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 762933dd0..8e7f15ad2 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -854,7 +854,7 @@ spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, dsl_pool_rele(dp, FTAG); /* create any zvols under this ds */ - zvol_create_minors(dp->dp_spa, dsname, B_TRUE); + zvol_create_minors_recursive(dsname); return (0); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index ea2b60076..126d1b688 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -1869,7 +1869,6 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) dsl_props_set_sync_impl(ds->ds_prev, ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); } - zvol_create_minors(dp->dp_spa, nvpair_name(pair), B_TRUE); dsl_dataset_rele(ds, FTAG); } } @@ -1944,6 +1943,13 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) fnvlist_free(suspended); } + if (error == 0) { + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + zvol_create_minor(nvpair_name(pair)); + } + } + return (error); } diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 39b59d5ce..c12b20270 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5114,7 +5114,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, } if (firstopen) - zvol_create_minors(spa, spa_name(spa), B_TRUE); + zvol_create_minors_recursive(spa_name(spa)); *spapp = spa; @@ -6083,7 +6083,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) mutex_exit(&spa_namespace_lock); - zvol_create_minors(spa, pool, B_TRUE); + zvol_create_minors_recursive(pool); return (0); } diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 870ba0366..e8cf96d49 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -100,6 +100,7 @@ #include #include #include +#include #ifndef KM_NORMALPRI #define KM_NORMALPRI 0 @@ -1155,6 +1156,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, runinfo.zri_space_used = 0; runinfo.zri_curinstrs = 0; runinfo.zri_maxinstrs = instrlimit; + runinfo.zri_new_zvols = fnvlist_alloc(); if (sync) { err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, @@ -1166,6 +1168,16 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, } lua_close(state); + /* + * Create device minor nodes for any new zvols. + */ + for (nvpair_t *pair = nvlist_next_nvpair(runinfo.zri_new_zvols, NULL); + pair != NULL; + pair = nvlist_next_nvpair(runinfo.zri_new_zvols, pair)) { + zvol_create_minor(nvpair_name(pair)); + } + fnvlist_free(runinfo.zri_new_zvols); + return (runinfo.zri_result); } diff --git a/module/zfs/zcp_synctask.c b/module/zfs/zcp_synctask.c index 3b6015f24..22fec6f3f 100644 --- a/module/zfs/zcp_synctask.c +++ b/module/zfs/zcp_synctask.c @@ -276,6 +276,16 @@ zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details) err = zcp_sync_task(state, dsl_dataset_snapshot_check, dsl_dataset_snapshot_sync, &ddsa, sync, dsname); + if (err == 0) { + /* + * We may need to create a new device minor node for this + * dataset (if it is a zvol and the "snapdev" property is set). + * Save it in the nvlist so that it can be processed in open + * context. + */ + fnvlist_add_boolean(ri->zri_new_zvols, dsname); + } + zcp_deregister_cleanup(state, zch); fnvlist_free(ddsa.ddsa_snaps); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index fb8034f70..b2517d84f 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -3312,8 +3312,9 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) /* * Volumes will return EBUSY and cannot be destroyed - * until all asynchronous minor handling has completed. - * Wait for the spa_zvol_taskq to drain then retry. + * until all asynchronous minor handling (e.g. from + * setting the volmode property) has completed. Wait for + * the spa_zvol_taskq to drain then retry. */ error2 = dsl_destroy_head(fsname); while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 5006b6af8..bbda652ee 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -97,7 +97,6 @@ krwlock_t zvol_state_lock; const zvol_platform_ops_t *ops; typedef enum { - ZVOL_ASYNC_CREATE_MINORS, ZVOL_ASYNC_REMOVE_MINORS, ZVOL_ASYNC_RENAME_MINORS, ZVOL_ASYNC_SET_SNAPDEV, @@ -1098,17 +1097,14 @@ zvol_create_minors_cb(const char *dsname, void *arg) * 'visible' (which also verifies that the parent is a zvol), and if so, * a minor node for that snapshot is created. */ -static int -zvol_create_minors_impl(const char *name) +void +zvol_create_minors_recursive(const char *name) { - int error = 0; - fstrans_cookie_t cookie; - char *atp, *parent; list_t minors_list; minors_job_t *job; if (zvol_inhibit_dev) - return (0); + return; /* * This is the list for prefetch jobs. Whenever we found a match @@ -1122,26 +1118,22 @@ zvol_create_minors_impl(const char *name) list_create(&minors_list, sizeof (minors_job_t), offsetof(minors_job_t, link)); - parent = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) strlcpy(parent, name, MAXPATHLEN); - if ((atp = strrchr(parent, '@')) != NULL) { + if (strchr(name, '@') != NULL) { uint64_t snapdev; - *atp = '\0'; - error = dsl_prop_get_integer(parent, "snapdev", + int error = dsl_prop_get_integer(name, "snapdev", &snapdev, NULL); if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE) - error = ops->zv_create_minor(name); + (void) ops->zv_create_minor(name); } else { - cookie = spl_fstrans_mark(); - error = dmu_objset_find(parent, zvol_create_minors_cb, + fstrans_cookie_t cookie = spl_fstrans_mark(); + (void) dmu_objset_find(name, zvol_create_minors_cb, &minors_list, DS_FIND_CHILDREN); spl_fstrans_unmark(cookie); } - kmem_free(parent, MAXPATHLEN); taskq_wait_outstanding(system_taskq, 0); /* @@ -1151,14 +1143,40 @@ zvol_create_minors_impl(const char *name) while ((job = list_head(&minors_list)) != NULL) { list_remove(&minors_list, job); if (!job->error) - ops->zv_create_minor(job->name); + (void) ops->zv_create_minor(job->name); kmem_strfree(job->name); kmem_free(job, sizeof (minors_job_t)); } list_destroy(&minors_list); +} - return (SET_ERROR(error)); +void +zvol_create_minor(const char *name) +{ + /* + * Note: the dsl_pool_config_lock must not be held. + * Minor node creation needs to obtain the zvol_state_lock. + * zvol_open() obtains the zvol_state_lock and then the dsl pool + * config lock. Therefore, we can't have the config lock now if + * we are going to wait for the zvol_state_lock, because it + * would be a lock order inversion which could lead to deadlock. + */ + + if (zvol_inhibit_dev) + return; + + if (strchr(name, '@') != NULL) { + uint64_t snapdev; + + int error = dsl_prop_get_integer(name, + "snapdev", &snapdev, NULL); + + if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE) + (void) ops->zv_create_minor(name); + } else { + (void) ops->zv_create_minor(name); + } } /* @@ -1366,7 +1384,7 @@ zvol_set_volmode_impl(char *name, uint64_t volmode) /* * It's unfortunate we need to remove minors before we create new ones: * this is necessary because our backing gendisk (zvol_state->zv_disk) - * coule be different when we set, for instance, volmode from "geom" + * could be different when we set, for instance, volmode from "geom" * to "dev" (or vice versa). * A possible optimization is to modify our consumers so we don't get * called when "volmode" does not change. @@ -1426,14 +1444,11 @@ zvol_task_free(zvol_task_t *task) * The worker thread function performed asynchronously. */ static void -zvol_task_cb(void *param) +zvol_task_cb(void *arg) { - zvol_task_t *task = (zvol_task_t *)param; + zvol_task_t *task = arg; switch (task->op) { - case ZVOL_ASYNC_CREATE_MINORS: - (void) zvol_create_minors_impl(task->name1); - break; case ZVOL_ASYNC_REMOVE_MINORS: zvol_remove_minors_impl(task->name1); break; @@ -1634,21 +1649,6 @@ zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode) zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); } -void -zvol_create_minors(spa_t *spa, const char *name, boolean_t async) -{ - zvol_task_t *task; - taskqid_t id; - - task = zvol_task_alloc(ZVOL_ASYNC_CREATE_MINORS, name, NULL, ~0ULL); - if (task == NULL) - return; - - id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP); - if ((async == B_FALSE) && (id != TASKQID_INVALID)) - taskq_wait_id(spa->spa_zvol_taskq, id); -} - void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async) {