mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 03:37:45 +03:00
Remove ZFC_IOC_*_MINOR ioctl()s
Early versions of ZFS coordinated the creation and destruction of device minors from userspace. This was inherently racy and in late 2009 these ioctl()s were removed leaving everything up to the kernel. This significantly simplified the code. However, we never picked up these changes in ZoL since we'd already significantly adjusted this code for Linux. This patch aims to rectify that by finally removing ZFC_IOC_*_MINOR ioctl()s and moving all the functionality down in to the kernel. Since this cleanup will change the kernel/user ABI it's being done in the same tag as the previous libzfs_core ABI changes. This will minimize, but not eliminate, the disruption to end users. Once merged ZoL, Illumos, and FreeBSD will basically be back in sync in regards to handling ZVOLs in the common code. While each platform must have its own custom zvol.c implemenation the interfaces provided are consistent. NOTES: 1) This patch introduces one subtle change in behavior which could not be easily avoided. Prior to this change callers of 'zfs create -V ...' were guaranteed that upon exit the /dev/zvol/ block device link would be created or an error returned. That's no longer the case. The utilities will no longer block waiting for the symlink to be created. Callers are now responsible for blocking, this is why a 'udev_wait' call was added to the 'label' function in scripts/common.sh. 2) The read-only behavior of a ZVOL now solely depends on if the ZVOL_RDONLY bit is set in zv->zv_flags. The redundant policy setting in the gendisk structure was removed. This both simplifies the code and allows us to safely leverage set_disk_ro() to issue a KOBJ_CHANGE uevent. See the comment in the code for futher details on this. 3) Because __zvol_create_minor() and zvol_alloc() may now be called in a sync task they must use KM_PUSHPAGE. References: illumos/illumos-gate@681d9761e8 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Closes #1969
This commit is contained in:
@@ -1229,6 +1229,16 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
|
||||
fnvlist_free(suspended);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
if (error == 0) {
|
||||
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(snaps, pair)) {
|
||||
char *snapname = nvpair_name(pair);
|
||||
zvol_create_minors(snapname);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -1601,6 +1611,9 @@ static int
|
||||
dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
|
||||
dsl_dataset_t *hds, void *arg)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
char *oldname, *newname;
|
||||
#endif
|
||||
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t val;
|
||||
@@ -1627,6 +1640,18 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
|
||||
VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj,
|
||||
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
|
||||
|
||||
#ifdef _KERNEL
|
||||
oldname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
|
||||
newname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
|
||||
snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
|
||||
ddrsa->ddrsa_oldsnapname);
|
||||
snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
|
||||
ddrsa->ddrsa_newsnapname);
|
||||
zvol_rename_minors(oldname, newname);
|
||||
kmem_free(newname, MAXPATHLEN);
|
||||
kmem_free(oldname, MAXPATHLEN);
|
||||
#endif
|
||||
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/zvol.h>
|
||||
#include "zfs_namecheck.h"
|
||||
|
||||
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
|
||||
@@ -1302,6 +1303,10 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
|
||||
VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
|
||||
dd->dd_myname, 8, 1, &dd->dd_object, tx));
|
||||
|
||||
#ifdef _KERNEL
|
||||
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
|
||||
#endif
|
||||
|
||||
dsl_prop_notify_all(dd);
|
||||
|
||||
dsl_dir_rele(newparent, FTAG);
|
||||
|
||||
+28
-38
@@ -2089,7 +2089,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
|
||||
return (err);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
boolean_t
|
||||
dataset_name_hidden(const char *name)
|
||||
{
|
||||
/*
|
||||
@@ -2808,30 +2808,6 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of volume
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_create_minor(zfs_cmd_t *zc)
|
||||
{
|
||||
return (zvol_create_minor(zc->zc_name));
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of volume
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_remove_minor(zfs_cmd_t *zc)
|
||||
{
|
||||
return (zvol_remove_minor(zc->zc_name));
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
@@ -3174,6 +3150,12 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
if (error != 0)
|
||||
(void) dsl_destroy_head(fsname);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
if (error == 0 && type == DMU_OST_ZVOL)
|
||||
zvol_create_minors(fsname);
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -3216,6 +3198,12 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
if (error != 0)
|
||||
(void) dsl_destroy_head(fsname);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
if (error == 0)
|
||||
zvol_create_minors(fsname);
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -3276,6 +3264,12 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
}
|
||||
|
||||
error = dsl_dataset_snapshot(snaps, props, outnvl);
|
||||
|
||||
#ifdef _KERNEL
|
||||
if (error == 0)
|
||||
zvol_create_minors(poolname);
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -3427,10 +3421,10 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
(name[poollen] != '/' && name[poollen] != '@'))
|
||||
return (SET_ERROR(EXDEV));
|
||||
|
||||
(void) zvol_remove_minor(name);
|
||||
error = zfs_unmount_snap(name);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
(void) zvol_remove_minor(name);
|
||||
}
|
||||
|
||||
return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
|
||||
@@ -3520,7 +3514,6 @@ zfs_ioc_rename(zfs_cmd_t *zc)
|
||||
{
|
||||
boolean_t recursive = zc->zc_cookie & 1;
|
||||
char *at;
|
||||
int err;
|
||||
|
||||
zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
|
||||
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
|
||||
@@ -3550,12 +3543,7 @@ zfs_ioc_rename(zfs_cmd_t *zc)
|
||||
|
||||
return (error);
|
||||
} else {
|
||||
err = dsl_dir_rename(zc->zc_name, zc->zc_value);
|
||||
if (!err && zc->zc_objset_type == DMU_OST_ZVOL) {
|
||||
(void) zvol_remove_minor(zc->zc_name);
|
||||
(void) zvol_create_minor(zc->zc_value);
|
||||
}
|
||||
return (err);
|
||||
return (dsl_dir_rename(zc->zc_name, zc->zc_value));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4045,6 +4033,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
error = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
if (error == 0)
|
||||
zvol_create_minors(tofs);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On error, restore the original props.
|
||||
*/
|
||||
@@ -5391,12 +5385,8 @@ zfs_ioctl_init(void)
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
|
||||
|
||||
/*
|
||||
* ZoL functions
|
||||
* ZoL functions
|
||||
*/
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor,
|
||||
zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor,
|
||||
zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
|
||||
zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
|
||||
|
||||
+87
-47
@@ -347,7 +347,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
|
||||
goto out_doi;
|
||||
}
|
||||
|
||||
if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if (zv->zv_flags & ZVOL_RDONLY) {
|
||||
error = SET_ERROR(EROFS);
|
||||
goto out_doi;
|
||||
}
|
||||
@@ -396,7 +396,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if (zv->zv_flags & ZVOL_RDONLY) {
|
||||
error = SET_ERROR(EROFS);
|
||||
goto out;
|
||||
}
|
||||
@@ -770,8 +770,7 @@ zvol_request(struct request_queue *q)
|
||||
zvol_dispatch(zvol_read, req);
|
||||
break;
|
||||
case WRITE:
|
||||
if (unlikely(get_disk_ro(zv->zv_disk)) ||
|
||||
unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||
__blk_end_request(req, -EROFS, size);
|
||||
break;
|
||||
}
|
||||
@@ -1019,8 +1018,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
if ((flag & FMODE_WRITE) &&
|
||||
(get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) {
|
||||
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
error = -EROFS;
|
||||
goto out_open_count;
|
||||
}
|
||||
@@ -1235,7 +1233,7 @@ zvol_alloc(dev_t dev, const char *name)
|
||||
zvol_state_t *zv;
|
||||
int error = 0;
|
||||
|
||||
zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
|
||||
zv = kmem_zalloc(sizeof (zvol_state_t), KM_PUSHPAGE);
|
||||
|
||||
spin_lock_init(&zv->zv_lock);
|
||||
list_link_init(&zv->zv_next);
|
||||
@@ -1315,7 +1313,7 @@ __zvol_snapdev_hidden(const char *name)
|
||||
char *atp;
|
||||
int error = 0;
|
||||
|
||||
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
|
||||
parent = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
|
||||
(void) strlcpy(parent, name, MAXPATHLEN);
|
||||
|
||||
if ((atp = strrchr(parent, '@')) != NULL) {
|
||||
@@ -1352,7 +1350,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
|
||||
goto out;
|
||||
}
|
||||
|
||||
doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);
|
||||
doi = kmem_alloc(sizeof(dmu_object_info_t), KM_PUSHPAGE);
|
||||
|
||||
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
|
||||
if (error)
|
||||
@@ -1474,77 +1472,118 @@ zvol_remove_minor(const char *name)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rename a block device minor mode for the specified volume.
|
||||
*/
|
||||
static void
|
||||
__zvol_rename_minor(zvol_state_t *zv, const char *newname)
|
||||
{
|
||||
int readonly = get_disk_ro(zv->zv_disk);
|
||||
|
||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||
|
||||
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
|
||||
|
||||
/*
|
||||
* The block device's read-only state is briefly changed causing
|
||||
* a KOBJ_CHANGE uevent to be issued. This ensures udev detects
|
||||
* the name change and fixes the symlinks. This does not change
|
||||
* ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
|
||||
* changes. This would normally be done using kobject_uevent() but
|
||||
* that is a GPL-only symbol which is why we need this workaround.
|
||||
*/
|
||||
set_disk_ro(zv->zv_disk, !readonly);
|
||||
set_disk_ro(zv->zv_disk, readonly);
|
||||
}
|
||||
|
||||
static int
|
||||
zvol_create_minors_cb(const char *dsname, void *arg)
|
||||
{
|
||||
if (strchr(dsname, '/') == NULL)
|
||||
return 0;
|
||||
(void) zvol_create_minor(dsname);
|
||||
|
||||
(void) __zvol_create_minor(dsname, B_FALSE);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create minors for specified pool, if pool is NULL create minors
|
||||
* for all available pools.
|
||||
* Create minors for specified dataset including children and snapshots.
|
||||
*/
|
||||
int
|
||||
zvol_create_minors(char *pool)
|
||||
zvol_create_minors(const char *name)
|
||||
{
|
||||
spa_t *spa = NULL;
|
||||
int error = 0;
|
||||
|
||||
if (zvol_inhibit_dev)
|
||||
return (0);
|
||||
|
||||
mutex_enter(&zvol_state_lock);
|
||||
if (pool) {
|
||||
error = dmu_objset_find(pool, zvol_create_minors_cb,
|
||||
if (!zvol_inhibit_dev)
|
||||
error = dmu_objset_find((char *)name, zvol_create_minors_cb,
|
||||
NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
|
||||
} else {
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
while ((spa = spa_next(spa)) != NULL) {
|
||||
error = dmu_objset_find(spa_name(spa), zvol_create_minors_cb, NULL,
|
||||
DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
mutex_exit(&zvol_state_lock);
|
||||
|
||||
return error;
|
||||
return (SET_ERROR(error));
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove minors for specified pool, if pool is NULL remove all minors.
|
||||
* Remove minors for specified dataset including children and snapshots.
|
||||
*/
|
||||
void
|
||||
zvol_remove_minors(const char *pool)
|
||||
zvol_remove_minors(const char *name)
|
||||
{
|
||||
zvol_state_t *zv, *zv_next;
|
||||
char *str;
|
||||
int namelen = ((name) ? strlen(name) : 0);
|
||||
|
||||
if (zvol_inhibit_dev)
|
||||
return;
|
||||
|
||||
str = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
|
||||
if (pool) {
|
||||
(void) strncpy(str, pool, strlen(pool));
|
||||
(void) strcat(str, "/");
|
||||
}
|
||||
|
||||
mutex_enter(&zvol_state_lock);
|
||||
|
||||
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
|
||||
zv_next = list_next(&zvol_state_list, zv);
|
||||
|
||||
if (pool == NULL || !strncmp(str, zv->zv_name, strlen(str))) {
|
||||
if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
|
||||
(strncmp(zv->zv_name, name, namelen) == 0 &&
|
||||
zv->zv_name[namelen] == '/')) {
|
||||
zvol_remove(zv);
|
||||
zvol_free(zv);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zvol_state_lock);
|
||||
kmem_free(str, MAXNAMELEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rename minors for specified dataset including children and snapshots.
|
||||
*/
|
||||
void
|
||||
zvol_rename_minors(const char *oldname, const char *newname)
|
||||
{
|
||||
zvol_state_t *zv, *zv_next;
|
||||
int oldnamelen, newnamelen;
|
||||
char *name;
|
||||
|
||||
if (zvol_inhibit_dev)
|
||||
return;
|
||||
|
||||
oldnamelen = strlen(oldname);
|
||||
newnamelen = strlen(newname);
|
||||
name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE);
|
||||
|
||||
mutex_enter(&zvol_state_lock);
|
||||
|
||||
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
|
||||
zv_next = list_next(&zvol_state_list, zv);
|
||||
|
||||
if (strcmp(zv->zv_name, oldname) == 0) {
|
||||
__zvol_rename_minor(zv, newname);
|
||||
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
|
||||
(zv->zv_name[oldnamelen] == '/' ||
|
||||
zv->zv_name[oldnamelen] == '@')) {
|
||||
snprintf(name, MAXNAMELEN, "%s%c%s", newname,
|
||||
zv->zv_name[oldnamelen],
|
||||
zv->zv_name + oldnamelen + 1);
|
||||
__zvol_rename_minor(zv, name);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zvol_state_lock);
|
||||
|
||||
kmem_free(name, MAXNAMELEN);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -1552,7 +1591,7 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
|
||||
uint64_t snapdev = *(uint64_t *) arg;
|
||||
|
||||
if (strchr(dsname, '@') == NULL)
|
||||
return 0;
|
||||
return (0);
|
||||
|
||||
switch (snapdev) {
|
||||
case ZFS_SNAPDEV_VISIBLE:
|
||||
@@ -1564,7 +1603,8 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
|
||||
(void) zvol_remove_minor(dsname);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
Reference in New Issue
Block a user