Files
mirror_zfs/module/os/freebsd/zfs/zvol_os.c
T

1626 lines
40 KiB
C
Raw Normal View History

2025-01-04 11:04:27 +11:00
// SPDX-License-Identifier: CDDL-1.0
2020-04-14 11:36:28 -07:00
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2022-07-11 23:16:13 +02:00
* or https://opensource.org/licenses/CDDL-1.0.
2020-04-14 11:36:28 -07:00
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Portions Copyright 2010 Robert Milkowski
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2024, Klara, Inc.
2020-04-14 11:36:28 -07:00
*/
/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
/*
* ZFS volume emulation driver.
*
* Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
* Volumes are accessed through the symbolic links named:
*
* /dev/zvol/<pool_name>/<dataset_name>
*
* Volumes are persistent through reboot. No user command needs to be
* run before opening and using a device.
*
* On FreeBSD ZVOLs are simply GEOM providers like any other storage device
* in the system. Except when they're simply character devices (volmode=dev).
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/proc.h>
2020-04-14 11:36:28 -07:00
#include <sys/zap.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/disk.h>
#include <sys/dmu_traverse.h>
#include <sys/dnode.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dir.h>
#include <sys/byteorder.h>
#include <sys/sunddi.h>
#include <sys/dirent.h>
#include <sys/policy.h>
#include <sys/queue.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ioctl.h>
#include <sys/zil.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_rlock.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_raidz.h>
#include <sys/zvol.h>
#include <sys/zil_impl.h>
2020-06-05 20:17:02 -04:00
#include <sys/dataset_kstats.h>
2020-04-14 11:36:28 -07:00
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/zfeature.h>
#include <sys/zio_checksum.h>
#include <sys/zil_impl.h>
#include <sys/filio.h>
2022-02-01 20:00:57 -09:00
#include <sys/freebsd_event.h>
2020-04-14 11:36:28 -07:00
#include <geom/geom.h>
#include <sys/zvol.h>
#include <sys/zvol_impl.h>
#include "zfs_namecheck.h"
#define ZVOL_DUMPSIZE "dumpsize"
#ifdef ZVOL_LOCK_DEBUG
#define ZVOL_RW_READER RW_WRITER
#define ZVOL_RW_READ_HELD RW_WRITE_HELD
#else
#define ZVOL_RW_READER RW_READER
#define ZVOL_RW_READ_HELD RW_READ_HELD
#endif
enum zvol_geom_state {
ZVOL_GEOM_UNINIT,
ZVOL_GEOM_STOPPED,
ZVOL_GEOM_RUNNING,
};
struct zvol_state_os {
#define zso_dev _zso_state._zso_dev
#define zso_geom _zso_state._zso_geom
union {
/* volmode=dev */
struct zvol_state_dev {
struct cdev *zsd_cdev;
2022-02-01 20:00:57 -09:00
struct selinfo zsd_selinfo;
2020-04-14 11:36:28 -07:00
} _zso_dev;
/* volmode=geom */
struct zvol_state_geom {
struct g_provider *zsg_provider;
struct bio_queue_head zsg_queue;
struct mtx zsg_queue_mtx;
enum zvol_geom_state zsg_state;
} _zso_geom;
} _zso_state;
2020-11-17 09:50:52 -08:00
int zso_dying;
2020-04-14 11:36:28 -07:00
};
static uint32_t zvol_minors;
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME");
SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &zvol_volmode, 0,
"Expose as GEOM providers (1), device files (2) or neither");
static boolean_t zpool_on_zvol = B_FALSE;
SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, recursive, CTLFLAG_RWTUN, &zpool_on_zvol, 0,
"Allow zpools to use zvols as vdevs (DANGEROUS)");
/*
* Toggle unmap functionality.
*/
boolean_t zvol_unmap_enabled = B_TRUE;
SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_enabled, CTLFLAG_RWTUN,
&zvol_unmap_enabled, 0, "Enable UNMAP functionality");
/*
* zvol maximum transfer in one DMU tx.
*/
int zvol_maxphys = DMU_MAX_ACCESS / 2;
static void zvol_ensure_zilog(zvol_state_t *zv);
static d_open_t zvol_cdev_open;
static d_close_t zvol_cdev_close;
static d_ioctl_t zvol_cdev_ioctl;
static d_read_t zvol_cdev_read;
static d_write_t zvol_cdev_write;
static d_strategy_t zvol_geom_bio_strategy;
2022-02-01 20:00:57 -09:00
static d_kqfilter_t zvol_cdev_kqfilter;
2020-04-14 11:36:28 -07:00
static struct cdevsw zvol_cdevsw = {
.d_name = "zvol",
.d_version = D_VERSION,
.d_flags = D_DISK | D_TRACKCLOSE,
.d_open = zvol_cdev_open,
.d_close = zvol_cdev_close,
.d_ioctl = zvol_cdev_ioctl,
.d_read = zvol_cdev_read,
.d_write = zvol_cdev_write,
.d_strategy = zvol_geom_bio_strategy,
2022-02-01 20:00:57 -09:00
.d_kqfilter = zvol_cdev_kqfilter,
};
static void zvol_filter_detach(struct knote *kn);
static int zvol_filter_vnode(struct knote *kn, long hint);
static struct filterops zvol_filterops_vnode = {
.f_isfd = 1,
.f_detach = zvol_filter_detach,
.f_event = zvol_filter_vnode,
2020-04-14 11:36:28 -07:00
};
extern uint_t zfs_geom_probe_vdev_key;
struct g_class zfs_zvol_class = {
.name = "ZFS::ZVOL",
.version = G_VERSION,
};
DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
static int zvol_geom_open(struct g_provider *pp, int flag, int count);
static int zvol_geom_close(struct g_provider *pp, int flag, int count);
static void zvol_geom_run(zvol_state_t *zv);
static void zvol_geom_destroy(zvol_state_t *zv);
static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
static void zvol_geom_worker(void *arg);
static void zvol_geom_bio_start(struct bio *bp);
static int zvol_geom_bio_getattr(struct bio *bp);
/* static d_strategy_t zvol_geom_bio_strategy; (declared elsewhere) */
/*
* GEOM mode implementation
*/
static int
zvol_geom_open(struct g_provider *pp, int flag, int count)
{
zvol_state_t *zv;
int err = 0;
boolean_t drop_suspend = B_FALSE;
2020-04-14 11:36:28 -07:00
if (!zpool_on_zvol && tsd_get(zfs_geom_probe_vdev_key) != NULL) {
/*
2021-12-23 19:04:07 +00:00
* If zfs_geom_probe_vdev_key is set, that means that zfs is
2020-04-14 11:36:28 -07:00
* attempting to probe geom providers while looking for a
* replacement for a missing VDEV. In this case, the
* spa_namespace_lock will not be held, but it is still illegal
* to use a zvol as a vdev. Deadlocks can result if another
2021-12-23 19:04:07 +00:00
* thread has spa_namespace_lock.
2020-04-14 11:36:28 -07:00
*/
return (SET_ERROR(EOPNOTSUPP));
}
retry:
2020-04-14 11:36:28 -07:00
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
2021-12-23 19:03:23 +00:00
/*
* Obtain a copy of private under zvol_state_lock to make sure either
* the result of zvol free code setting private to NULL is observed,
* or the zv is protected from being freed because of the positive
* zv_open_count.
*/
2020-04-14 11:36:28 -07:00
zv = pp->private;
if (zv == NULL) {
2020-11-17 09:50:52 -08:00
rw_exit(&zvol_state_lock);
err = SET_ERROR(ENXIO);
goto out_locked;
2020-04-14 11:36:28 -07:00
}
mutex_enter(&zv->zv_state_lock);
if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) {
2020-11-17 09:50:52 -08:00
rw_exit(&zvol_state_lock);
err = SET_ERROR(ENXIO);
goto out_zv_locked;
}
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
2020-04-14 11:36:28 -07:00
/*
2021-12-23 19:04:07 +00:00
* Make sure zvol is not suspended during first open
2020-04-14 11:36:28 -07:00
* (hold zv_suspend_lock) and respect proper lock acquisition
2021-12-23 19:04:07 +00:00
* ordering - zv_suspend_lock before zv_state_lock.
2020-04-14 11:36:28 -07:00
*/
if (zv->zv_open_count == 0) {
drop_suspend = B_TRUE;
2020-04-14 11:36:28 -07:00
if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
mutex_exit(&zv->zv_state_lock);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
mutex_enter(&zv->zv_state_lock);
2021-12-23 19:04:07 +00:00
/* Check to see if zv_suspend_lock is needed. */
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count != 0) {
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
}
}
}
rw_exit(&zvol_state_lock);
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
if (zv->zv_open_count == 0) {
2021-12-23 19:03:23 +00:00
boolean_t drop_namespace = B_FALSE;
2020-04-14 11:36:28 -07:00
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
2021-12-23 19:03:23 +00:00
/*
* Take spa_namespace_lock to prevent lock inversion when
* zvols from one pool are opened as vdevs in another.
*/
if (!mutex_owned(&spa_namespace_lock)) {
if (!mutex_tryenter(&spa_namespace_lock)) {
mutex_exit(&zv->zv_state_lock);
rw_exit(&zv->zv_suspend_lock);
2024-07-10 16:27:44 -05:00
drop_suspend = B_FALSE;
2021-12-23 19:03:23 +00:00
kern_yield(PRI_USER);
goto retry;
} else {
drop_namespace = B_TRUE;
}
}
2020-04-14 11:36:28 -07:00
err = zvol_first_open(zv, !(flag & FWRITE));
2021-12-23 19:03:23 +00:00
if (drop_namespace)
mutex_exit(&spa_namespace_lock);
2020-04-14 11:36:28 -07:00
if (err)
2020-11-17 09:50:52 -08:00
goto out_zv_locked;
2020-04-14 11:36:28 -07:00
pp->mediasize = zv->zv_volsize;
pp->stripeoffset = 0;
pp->stripesize = zv->zv_volblocksize;
}
2021-12-23 19:03:23 +00:00
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
2020-04-14 11:36:28 -07:00
/*
* Check for a bad on-disk format version now since we
* lied about owning the dataset readonly before.
*/
if ((flag & FWRITE) && ((zv->zv_flags & ZVOL_RDONLY) ||
dmu_objset_incompatible_encryption_version(zv->zv_objset))) {
err = SET_ERROR(EROFS);
goto out_opened;
2020-04-14 11:36:28 -07:00
}
if (zv->zv_flags & ZVOL_EXCL) {
err = SET_ERROR(EBUSY);
goto out_opened;
2020-04-14 11:36:28 -07:00
}
2022-05-02 16:26:28 -07:00
if (flag & O_EXCL) {
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count != 0) {
err = SET_ERROR(EBUSY);
goto out_opened;
2020-04-14 11:36:28 -07:00
}
zv->zv_flags |= ZVOL_EXCL;
}
zv->zv_open_count += count;
out_opened:
2020-11-17 09:50:52 -08:00
if (zv->zv_open_count == 0) {
2020-04-14 11:36:28 -07:00
zvol_last_close(zv);
2020-11-17 09:50:52 -08:00
wakeup(zv);
}
out_zv_locked:
mutex_exit(&zv->zv_state_lock);
out_locked:
2020-04-14 11:36:28 -07:00
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
return (err);
2020-04-14 11:36:28 -07:00
}
static int
zvol_geom_close(struct g_provider *pp, int flag, int count)
{
2022-02-16 02:38:43 +01:00
(void) flag;
2020-04-14 11:36:28 -07:00
zvol_state_t *zv;
boolean_t drop_suspend = B_TRUE;
int new_open_count;
2020-04-14 11:36:28 -07:00
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
zv = pp->private;
if (zv == NULL) {
rw_exit(&zvol_state_lock);
return (SET_ERROR(ENXIO));
}
mutex_enter(&zv->zv_state_lock);
if (zv->zv_flags & ZVOL_EXCL) {
2020-10-21 17:23:08 +00:00
ASSERT3U(zv->zv_open_count, ==, 1);
2020-04-14 11:36:28 -07:00
zv->zv_flags &= ~ZVOL_EXCL;
}
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
2020-04-14 11:36:28 -07:00
/*
* If the open count is zero, this is a spurious close.
* That indicates a bug in the kernel / DDI framework.
*/
2020-10-21 17:23:08 +00:00
ASSERT3U(zv->zv_open_count, >, 0);
2020-04-14 11:36:28 -07:00
/*
2021-12-23 19:04:07 +00:00
* Make sure zvol is not suspended during last close
2020-04-14 11:36:28 -07:00
* (hold zv_suspend_lock) and respect proper lock acquisition
2021-12-23 19:04:07 +00:00
* ordering - zv_suspend_lock before zv_state_lock.
2020-04-14 11:36:28 -07:00
*/
new_open_count = zv->zv_open_count - count;
if (new_open_count == 0) {
2020-04-14 11:36:28 -07:00
if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
mutex_exit(&zv->zv_state_lock);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
mutex_enter(&zv->zv_state_lock);
2021-12-23 19:04:07 +00:00
/* Check to see if zv_suspend_lock is needed. */
new_open_count = zv->zv_open_count - count;
if (new_open_count != 0) {
2020-04-14 11:36:28 -07:00
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
}
}
} else {
drop_suspend = B_FALSE;
}
rw_exit(&zvol_state_lock);
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
/*
* You may get multiple opens, but only one close.
*/
zv->zv_open_count = new_open_count;
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count == 0) {
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
zvol_last_close(zv);
2020-11-17 09:50:52 -08:00
wakeup(zv);
2020-04-14 11:36:28 -07:00
}
mutex_exit(&zv->zv_state_lock);
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
return (0);
}
static void
zvol_geom_run(zvol_state_t *zv)
{
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
2020-04-14 11:36:28 -07:00
g_error_provider(pp, 0);
kproc_kthread_add(zvol_geom_worker, zv, &system_proc, NULL, 0, 0,
2020-04-14 11:36:28 -07:00
"zfskern", "zvol %s", pp->name + sizeof (ZVOL_DRIVER));
}
static void
zvol_geom_destroy(zvol_state_t *zv)
{
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
2020-04-14 11:36:28 -07:00
g_topology_assert();
mutex_enter(&zv->zv_state_lock);
VERIFY3S(zsg->zsg_state, ==, ZVOL_GEOM_RUNNING);
2020-04-14 11:36:28 -07:00
mutex_exit(&zv->zv_state_lock);
zsg->zsg_provider = NULL;
g_wither_geom(pp->geom, ENXIO);
}
2020-11-17 09:50:52 -08:00
void
zvol_wait_close(zvol_state_t *zv)
{
if (zv->zv_volmode != ZFS_VOLMODE_GEOM)
return;
mutex_enter(&zv->zv_state_lock);
zv->zv_zso->zso_dying = B_TRUE;
if (zv->zv_open_count)
msleep(zv, &zv->zv_state_lock,
PRIBIO, "zvol:dying", 10*hz);
mutex_exit(&zv->zv_state_lock);
}
2020-04-14 11:36:28 -07:00
static int
zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
{
int count, error, flags;
g_topology_assert();
/*
* To make it easier we expect either open or close, but not both
* at the same time.
*/
KASSERT((acr >= 0 && acw >= 0 && ace >= 0) ||
(acr <= 0 && acw <= 0 && ace <= 0),
("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
pp->name, acr, acw, ace));
if (pp->private == NULL) {
if (acr <= 0 && acw <= 0 && ace <= 0)
return (0);
return (pp->error);
}
/*
* We don't pass FEXCL flag to zvol_geom_open()/zvol_geom_close() if
* ace != 0, because GEOM already handles that and handles it a bit
* differently. GEOM allows for multiple read/exclusive consumers and
* ZFS allows only one exclusive consumer, no matter if it is reader or
* writer. I like better the way GEOM works so I'll leave it for GEOM
* to decide what to do.
*/
count = acr + acw + ace;
if (count == 0)
return (0);
flags = 0;
if (acr != 0 || ace != 0)
flags |= FREAD;
if (acw != 0)
flags |= FWRITE;
g_topology_unlock();
if (count > 0)
error = zvol_geom_open(pp, flags, count);
else
error = zvol_geom_close(pp, flags, -count);
g_topology_lock();
return (error);
}
static void
zvol_geom_worker(void *arg)
{
zvol_state_t *zv = arg;
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct bio *bp;
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
2020-04-14 11:36:28 -07:00
thread_lock(curthread);
sched_prio(curthread, PRIBIO);
thread_unlock(curthread);
for (;;) {
mtx_lock(&zsg->zsg_queue_mtx);
bp = bioq_takefirst(&zsg->zsg_queue);
if (bp == NULL) {
if (zsg->zsg_state == ZVOL_GEOM_STOPPED) {
zsg->zsg_state = ZVOL_GEOM_RUNNING;
wakeup(&zsg->zsg_state);
mtx_unlock(&zsg->zsg_queue_mtx);
kthread_exit();
}
msleep(&zsg->zsg_queue, &zsg->zsg_queue_mtx,
PRIBIO | PDROP, "zvol:io", 0);
continue;
}
mtx_unlock(&zsg->zsg_queue_mtx);
2020-06-03 13:45:12 -04:00
zvol_geom_bio_strategy(bp);
2020-04-14 11:36:28 -07:00
}
}
static void
zvol_geom_bio_start(struct bio *bp)
{
zvol_state_t *zv = bp->bio_to->private;
2020-11-17 09:50:52 -08:00
struct zvol_state_geom *zsg;
2020-04-14 11:36:28 -07:00
boolean_t first;
2020-11-17 09:50:52 -08:00
if (zv == NULL) {
g_io_deliver(bp, ENXIO);
return;
}
2020-04-14 11:36:28 -07:00
if (bp->bio_cmd == BIO_GETATTR) {
if (zvol_geom_bio_getattr(bp))
g_io_deliver(bp, EOPNOTSUPP);
return;
}
if (!THREAD_CAN_SLEEP()) {
2020-11-17 09:50:52 -08:00
zsg = &zv->zv_zso->zso_geom;
2020-04-14 11:36:28 -07:00
mtx_lock(&zsg->zsg_queue_mtx);
first = (bioq_first(&zsg->zsg_queue) == NULL);
bioq_insert_tail(&zsg->zsg_queue, bp);
mtx_unlock(&zsg->zsg_queue_mtx);
if (first)
wakeup_one(&zsg->zsg_queue);
return;
}
2020-06-03 13:45:12 -04:00
zvol_geom_bio_strategy(bp);
2020-04-14 11:36:28 -07:00
}
static int
zvol_geom_bio_getattr(struct bio *bp)
{
zvol_state_t *zv;
zv = bp->bio_to->private;
2020-10-21 17:23:08 +00:00
ASSERT3P(zv, !=, NULL);
2020-04-14 11:36:28 -07:00
spa_t *spa = dmu_objset_spa(zv->zv_objset);
uint64_t refd, avail, usedobjs, availobjs;
if (g_handleattr_int(bp, "GEOM::candelete", 1))
return (0);
if (strcmp(bp->bio_attribute, "blocksavail") == 0) {
dmu_objset_space(zv->zv_objset, &refd, &avail,
&usedobjs, &availobjs);
if (g_handleattr_off_t(bp, "blocksavail", avail / DEV_BSIZE))
return (0);
} else if (strcmp(bp->bio_attribute, "blocksused") == 0) {
dmu_objset_space(zv->zv_objset, &refd, &avail,
&usedobjs, &availobjs);
if (g_handleattr_off_t(bp, "blocksused", refd / DEV_BSIZE))
return (0);
} else if (strcmp(bp->bio_attribute, "poolblocksavail") == 0) {
avail = metaslab_class_get_space(spa_normal_class(spa));
avail -= metaslab_class_get_alloc(spa_normal_class(spa));
if (g_handleattr_off_t(bp, "poolblocksavail",
avail / DEV_BSIZE))
return (0);
} else if (strcmp(bp->bio_attribute, "poolblocksused") == 0) {
refd = metaslab_class_get_alloc(spa_normal_class(spa));
if (g_handleattr_off_t(bp, "poolblocksused", refd / DEV_BSIZE))
return (0);
}
return (1);
}
2022-02-01 20:00:57 -09:00
static void
zvol_filter_detach(struct knote *kn)
{
zvol_state_t *zv;
struct zvol_state_dev *zsd;
zv = kn->kn_hook;
zsd = &zv->zv_zso->zso_dev;
knlist_remove(&zsd->zsd_selinfo.si_note, kn, 0);
}
static int
zvol_filter_vnode(struct knote *kn, long hint)
{
kn->kn_fflags |= kn->kn_sfflags & hint;
return (kn->kn_fflags != 0);
}
static int
zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn)
{
zvol_state_t *zv;
struct zvol_state_dev *zsd;
zv = dev->si_drv2;
zsd = &zv->zv_zso->zso_dev;
if (kn->kn_filter != EVFILT_VNODE)
return (EINVAL);
/* XXX: extend support for other NOTE_* events */
if (kn->kn_sfflags != NOTE_ATTRIB)
return (EINVAL);
kn->kn_fop = &zvol_filterops_vnode;
kn->kn_hook = zv;
knlist_add(&zsd->zsd_selinfo.si_note, kn, 0);
return (0);
}
2020-04-14 11:36:28 -07:00
static void
zvol_geom_bio_strategy(struct bio *bp)
{
zvol_state_t *zv;
uint64_t off, volsize;
size_t resid;
char *addr;
objset_t *os;
zfs_locked_range_t *lr;
int error = 0;
2020-06-03 13:45:12 -04:00
boolean_t doread = B_FALSE;
2020-04-14 11:36:28 -07:00
boolean_t is_dumpified;
2023-10-30 17:51:56 -04:00
boolean_t commit;
2020-04-14 11:36:28 -07:00
if (bp->bio_to)
zv = bp->bio_to->private;
else
zv = bp->bio_dev->si_drv2;
if (zv == NULL) {
error = SET_ERROR(ENXIO);
goto out;
}
2020-06-03 13:45:12 -04:00
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
2020-04-14 11:36:28 -07:00
if (zv->zv_flags & ZVOL_REMOVING) {
error = SET_ERROR(ENXIO);
goto resume;
}
2020-04-14 11:36:28 -07:00
switch (bp->bio_cmd) {
case BIO_READ:
2020-06-03 13:45:12 -04:00
doread = B_TRUE;
break;
2020-04-14 11:36:28 -07:00
case BIO_WRITE:
2020-06-03 13:45:12 -04:00
case BIO_FLUSH:
2020-04-14 11:36:28 -07:00
case BIO_DELETE:
2020-06-03 13:45:12 -04:00
if (zv->zv_flags & ZVOL_RDONLY) {
error = SET_ERROR(EROFS);
goto resume;
}
zvol_ensure_zilog(zv);
if (bp->bio_cmd == BIO_FLUSH)
2023-10-30 17:51:56 -04:00
goto commit;
2020-04-14 11:36:28 -07:00
break;
default:
error = SET_ERROR(EOPNOTSUPP);
2020-06-03 13:45:12 -04:00
goto resume;
2020-04-14 11:36:28 -07:00
}
off = bp->bio_offset;
volsize = zv->zv_volsize;
os = zv->zv_objset;
2020-10-21 17:23:08 +00:00
ASSERT3P(os, !=, NULL);
2020-04-14 11:36:28 -07:00
addr = bp->bio_data;
resid = bp->bio_length;
if (resid > 0 && off >= volsize) {
2020-04-14 11:36:28 -07:00
error = SET_ERROR(EIO);
2020-06-03 13:45:12 -04:00
goto resume;
2020-04-14 11:36:28 -07:00
}
is_dumpified = B_FALSE;
2023-10-30 17:51:56 -04:00
commit = !doread && !is_dumpified &&
2020-04-14 11:36:28 -07:00
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
/*
* There must be no buffer changes when doing a dmu_sync() because
* we can't change the data whilst calculating the checksum.
*/
lr = zfs_rangelock_enter(&zv->zv_rangelock, off, resid,
doread ? RL_READER : RL_WRITER);
if (bp->bio_cmd == BIO_DELETE) {
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
2020-04-14 11:36:28 -07:00
if (error != 0) {
dmu_tx_abort(tx);
} else {
2023-10-30 17:51:56 -04:00
zvol_log_truncate(zv, tx, off, resid);
2020-04-14 11:36:28 -07:00
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
off, resid);
resid = 0;
}
goto unlock;
}
while (resid != 0 && off < volsize) {
size_t size = MIN(resid, zvol_maxphys);
if (doread) {
error = dmu_read(os, ZVOL_OBJ, off, size, addr,
DMU_READ_PREFETCH);
} else {
dmu_tx_t *tx = dmu_tx_create(os);
dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, size);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
2020-04-14 11:36:28 -07:00
if (error) {
dmu_tx_abort(tx);
} else {
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
2023-10-30 17:51:56 -04:00
zvol_log_write(zv, tx, off, size, commit);
2020-04-14 11:36:28 -07:00
dmu_tx_commit(tx);
}
}
if (error) {
2021-12-23 19:04:07 +00:00
/* Convert checksum errors into IO errors. */
2020-04-14 11:36:28 -07:00
if (error == ECKSUM)
error = SET_ERROR(EIO);
break;
}
off += size;
addr += size;
resid -= size;
}
unlock:
zfs_rangelock_exit(lr);
bp->bio_completed = bp->bio_length - resid;
if (bp->bio_completed < bp->bio_length && off > volsize)
error = SET_ERROR(EINVAL);
2020-04-14 11:36:28 -07:00
2020-06-05 20:17:02 -04:00
switch (bp->bio_cmd) {
case BIO_FLUSH:
break;
case BIO_READ:
dataset_kstats_update_read_kstats(&zv->zv_kstat,
bp->bio_completed);
break;
case BIO_WRITE:
dataset_kstats_update_write_kstats(&zv->zv_kstat,
bp->bio_completed);
break;
case BIO_DELETE:
break;
default:
break;
}
2023-10-30 17:51:56 -04:00
if (commit) {
commit:
2020-04-14 11:36:28 -07:00
zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
2020-06-03 13:45:12 -04:00
resume:
rw_exit(&zv->zv_suspend_lock);
2020-04-14 11:36:28 -07:00
out:
if (bp->bio_to)
g_io_deliver(bp, error);
else
biofinish(bp, NULL, error);
}
/*
* Character device mode implementation
*/
static int
2021-01-20 22:27:30 -07:00
zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
2020-04-14 11:36:28 -07:00
{
zvol_state_t *zv;
uint64_t volsize;
zfs_locked_range_t *lr;
int error = 0;
2021-01-20 22:27:30 -07:00
zfs_uio_t uio;
zfs_uio_init(&uio, uio_s);
2020-04-14 11:36:28 -07:00
zv = dev->si_drv2;
volsize = zv->zv_volsize;
/*
* uio_loffset == volsize isn't an error as
2021-04-02 18:38:53 -07:00
* it's required for EOF processing.
2020-04-14 11:36:28 -07:00
*/
2021-01-20 22:27:30 -07:00
if (zfs_uio_resid(&uio) > 0 &&
(zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
2020-04-14 11:36:28 -07:00
return (SET_ERROR(EIO));
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
ssize_t start_resid = zfs_uio_resid(&uio);
2021-01-20 22:27:30 -07:00
lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
zfs_uio_resid(&uio), RL_READER);
while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
2020-04-14 11:36:28 -07:00
2021-12-23 19:04:07 +00:00
/* Don't read past the end. */
2021-01-20 22:27:30 -07:00
if (bytes > volsize - zfs_uio_offset(&uio))
bytes = volsize - zfs_uio_offset(&uio);
2020-04-14 11:36:28 -07:00
2021-01-20 22:27:30 -07:00
error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
2020-04-14 11:36:28 -07:00
if (error) {
2021-12-23 19:04:07 +00:00
/* Convert checksum errors into IO errors. */
2020-04-14 11:36:28 -07:00
if (error == ECKSUM)
error = SET_ERROR(EIO);
break;
}
}
zfs_rangelock_exit(lr);
int64_t nread = start_resid - zfs_uio_resid(&uio);
dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
rw_exit(&zv->zv_suspend_lock);
2020-04-14 11:36:28 -07:00
return (error);
}
static int
2021-01-20 22:27:30 -07:00
zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
2020-04-14 11:36:28 -07:00
{
zvol_state_t *zv;
uint64_t volsize;
zfs_locked_range_t *lr;
int error = 0;
2023-10-30 17:51:56 -04:00
boolean_t commit;
2021-01-20 22:27:30 -07:00
zfs_uio_t uio;
2020-04-14 11:36:28 -07:00
zv = dev->si_drv2;
volsize = zv->zv_volsize;
2021-01-20 22:27:30 -07:00
zfs_uio_init(&uio, uio_s);
if (zfs_uio_resid(&uio) > 0 &&
(zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
2020-04-14 11:36:28 -07:00
return (SET_ERROR(EIO));
ssize_t start_resid = zfs_uio_resid(&uio);
2023-10-30 17:51:56 -04:00
commit = (ioflag & IO_SYNC) ||
2020-04-14 11:36:28 -07:00
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
zvol_ensure_zilog(zv);
2021-01-20 22:27:30 -07:00
lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
zfs_uio_resid(&uio), RL_WRITER);
while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
uint64_t off = zfs_uio_offset(&uio);
2020-04-14 11:36:28 -07:00
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
2021-12-23 19:04:07 +00:00
if (bytes > volsize - off) /* Don't write past the end. */
2020-04-14 11:36:28 -07:00
bytes = volsize - off;
dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
2020-04-14 11:36:28 -07:00
if (error) {
dmu_tx_abort(tx);
break;
}
2021-01-20 22:27:30 -07:00
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
2020-04-14 11:36:28 -07:00
if (error == 0)
2023-10-30 17:51:56 -04:00
zvol_log_write(zv, tx, off, bytes, commit);
2020-04-14 11:36:28 -07:00
dmu_tx_commit(tx);
if (error)
break;
}
zfs_rangelock_exit(lr);
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
2023-10-30 17:51:56 -04:00
if (commit)
2020-04-14 11:36:28 -07:00
zil_commit(zv->zv_zilog, ZVOL_OBJ);
2020-06-03 13:45:12 -04:00
rw_exit(&zv->zv_suspend_lock);
2024-09-14 16:47:59 -04:00
2020-04-14 11:36:28 -07:00
return (error);
}
static int
zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
{
zvol_state_t *zv;
int err = 0;
boolean_t drop_suspend = B_FALSE;
2020-04-14 11:36:28 -07:00
retry:
2020-04-14 11:36:28 -07:00
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
2021-12-23 19:03:23 +00:00
/*
* Obtain a copy of si_drv2 under zvol_state_lock to make sure either
* the result of zvol free code setting si_drv2 to NULL is observed,
* or the zv is protected from being freed because of the positive
* zv_open_count.
*/
2020-04-14 11:36:28 -07:00
zv = dev->si_drv2;
if (zv == NULL) {
2020-11-17 09:50:52 -08:00
rw_exit(&zvol_state_lock);
err = SET_ERROR(ENXIO);
goto out_locked;
2020-04-14 11:36:28 -07:00
}
mutex_enter(&zv->zv_state_lock);
2021-12-23 19:03:23 +00:00
if (zv->zv_zso->zso_dying) {
rw_exit(&zvol_state_lock);
err = SET_ERROR(ENXIO);
goto out_zv_locked;
}
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV);
2020-04-14 11:36:28 -07:00
/*
2021-12-23 19:04:07 +00:00
* Make sure zvol is not suspended during first open
2020-04-14 11:36:28 -07:00
* (hold zv_suspend_lock) and respect proper lock acquisition
2021-12-23 19:04:07 +00:00
* ordering - zv_suspend_lock before zv_state_lock.
2020-04-14 11:36:28 -07:00
*/
if (zv->zv_open_count == 0) {
drop_suspend = B_TRUE;
2020-04-14 11:36:28 -07:00
if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
mutex_exit(&zv->zv_state_lock);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
mutex_enter(&zv->zv_state_lock);
2021-12-23 19:04:07 +00:00
/* Check to see if zv_suspend_lock is needed. */
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count != 0) {
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
}
}
}
rw_exit(&zvol_state_lock);
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
if (zv->zv_open_count == 0) {
2021-12-23 19:03:23 +00:00
boolean_t drop_namespace = B_FALSE;
2020-04-14 11:36:28 -07:00
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
2021-12-23 19:03:23 +00:00
/*
* Take spa_namespace_lock to prevent lock inversion when
* zvols from one pool are opened as vdevs in another.
*/
if (!mutex_owned(&spa_namespace_lock)) {
if (!mutex_tryenter(&spa_namespace_lock)) {
2022-01-26 14:23:39 -05:00
mutex_exit(&zv->zv_state_lock);
rw_exit(&zv->zv_suspend_lock);
2024-07-10 16:27:44 -05:00
drop_suspend = B_FALSE;
2021-12-23 19:03:23 +00:00
kern_yield(PRI_USER);
goto retry;
} else {
drop_namespace = B_TRUE;
}
}
2020-04-14 11:36:28 -07:00
err = zvol_first_open(zv, !(flags & FWRITE));
2021-12-23 19:03:23 +00:00
if (drop_namespace)
mutex_exit(&spa_namespace_lock);
2020-04-14 11:36:28 -07:00
if (err)
2020-11-17 09:50:52 -08:00
goto out_zv_locked;
2020-04-14 11:36:28 -07:00
}
2021-12-23 19:03:23 +00:00
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
2020-04-14 11:36:28 -07:00
if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
err = SET_ERROR(EROFS);
2020-04-14 11:36:28 -07:00
goto out_opened;
}
if (zv->zv_flags & ZVOL_EXCL) {
err = SET_ERROR(EBUSY);
2020-04-14 11:36:28 -07:00
goto out_opened;
}
2022-05-02 16:26:28 -07:00
if (flags & O_EXCL) {
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count != 0) {
err = SET_ERROR(EBUSY);
2020-04-14 11:36:28 -07:00
goto out_opened;
}
zv->zv_flags |= ZVOL_EXCL;
}
zv->zv_open_count++;
out_opened:
2020-11-17 09:50:52 -08:00
if (zv->zv_open_count == 0) {
2020-04-14 11:36:28 -07:00
zvol_last_close(zv);
2020-11-17 09:50:52 -08:00
wakeup(zv);
}
out_zv_locked:
mutex_exit(&zv->zv_state_lock);
2020-04-14 11:36:28 -07:00
out_locked:
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
return (err);
2020-04-14 11:36:28 -07:00
}
static int
zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
{
zvol_state_t *zv;
boolean_t drop_suspend = B_TRUE;
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
zv = dev->si_drv2;
if (zv == NULL) {
rw_exit(&zvol_state_lock);
return (SET_ERROR(ENXIO));
}
mutex_enter(&zv->zv_state_lock);
if (zv->zv_flags & ZVOL_EXCL) {
2020-10-21 17:23:08 +00:00
ASSERT3U(zv->zv_open_count, ==, 1);
2020-04-14 11:36:28 -07:00
zv->zv_flags &= ~ZVOL_EXCL;
}
2020-11-17 09:50:52 -08:00
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV);
2020-04-14 11:36:28 -07:00
/*
* If the open count is zero, this is a spurious close.
* That indicates a bug in the kernel / DDI framework.
*/
2020-10-21 17:23:08 +00:00
ASSERT3U(zv->zv_open_count, >, 0);
2020-04-14 11:36:28 -07:00
/*
2021-12-23 19:04:07 +00:00
* Make sure zvol is not suspended during last close
2020-04-14 11:36:28 -07:00
* (hold zv_suspend_lock) and respect proper lock acquisition
2021-12-23 19:04:07 +00:00
* ordering - zv_suspend_lock before zv_state_lock.
2020-04-14 11:36:28 -07:00
*/
if (zv->zv_open_count == 1) {
if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
mutex_exit(&zv->zv_state_lock);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
mutex_enter(&zv->zv_state_lock);
2021-12-23 19:04:07 +00:00
/* Check to see if zv_suspend_lock is needed. */
2020-04-14 11:36:28 -07:00
if (zv->zv_open_count != 1) {
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
}
}
} else {
drop_suspend = B_FALSE;
}
rw_exit(&zvol_state_lock);
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
/*
* You may get multiple opens, but only one close.
*/
zv->zv_open_count--;
if (zv->zv_open_count == 0) {
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
zvol_last_close(zv);
2020-11-17 09:50:52 -08:00
wakeup(zv);
2020-04-14 11:36:28 -07:00
}
mutex_exit(&zv->zv_state_lock);
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
return (0);
}
static int
zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
int fflag, struct thread *td)
{
zvol_state_t *zv;
zfs_locked_range_t *lr;
off_t offset, length;
2021-12-27 19:33:16 +01:00
int error;
2020-04-14 11:36:28 -07:00
boolean_t sync;
zv = dev->si_drv2;
error = 0;
KASSERT(zv->zv_open_count > 0,
("Device with zero access count in %s", __func__));
switch (cmd) {
case DIOCGSECTORSIZE:
*(uint32_t *)data = DEV_BSIZE;
break;
case DIOCGMEDIASIZE:
*(off_t *)data = zv->zv_volsize;
break;
case DIOCGFLUSH:
2020-06-03 13:45:12 -04:00
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
2020-04-14 11:36:28 -07:00
if (zv->zv_zilog != NULL)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
2020-06-03 13:45:12 -04:00
rw_exit(&zv->zv_suspend_lock);
2020-04-14 11:36:28 -07:00
break;
case DIOCGDELETE:
if (!zvol_unmap_enabled)
break;
offset = ((off_t *)data)[0];
length = ((off_t *)data)[1];
if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 ||
offset < 0 || offset >= zv->zv_volsize ||
length <= 0) {
printf("%s: offset=%jd length=%jd\n", __func__, offset,
length);
error = SET_ERROR(EINVAL);
2020-04-14 11:36:28 -07:00
break;
}
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
zvol_ensure_zilog(zv);
lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, length,
RL_WRITER);
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
2020-04-14 11:36:28 -07:00
if (error != 0) {
sync = FALSE;
dmu_tx_abort(tx);
} else {
sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
2023-10-30 17:51:56 -04:00
zvol_log_truncate(zv, tx, offset, length);
2020-04-14 11:36:28 -07:00
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
offset, length);
}
zfs_rangelock_exit(lr);
if (sync)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
rw_exit(&zv->zv_suspend_lock);
break;
case DIOCGSTRIPESIZE:
*(off_t *)data = zv->zv_volblocksize;
break;
case DIOCGSTRIPEOFFSET:
*(off_t *)data = 0;
break;
case DIOCGATTR: {
spa_t *spa = dmu_objset_spa(zv->zv_objset);
struct diocgattr_arg *arg = (struct diocgattr_arg *)data;
uint64_t refd, avail, usedobjs, availobjs;
if (strcmp(arg->name, "GEOM::candelete") == 0)
arg->value.i = 1;
else if (strcmp(arg->name, "blocksavail") == 0) {
dmu_objset_space(zv->zv_objset, &refd, &avail,
&usedobjs, &availobjs);
arg->value.off = avail / DEV_BSIZE;
} else if (strcmp(arg->name, "blocksused") == 0) {
dmu_objset_space(zv->zv_objset, &refd, &avail,
&usedobjs, &availobjs);
arg->value.off = refd / DEV_BSIZE;
} else if (strcmp(arg->name, "poolblocksavail") == 0) {
avail = metaslab_class_get_space(spa_normal_class(spa));
avail -= metaslab_class_get_alloc(
spa_normal_class(spa));
arg->value.off = avail / DEV_BSIZE;
} else if (strcmp(arg->name, "poolblocksused") == 0) {
refd = metaslab_class_get_alloc(spa_normal_class(spa));
arg->value.off = refd / DEV_BSIZE;
} else
error = SET_ERROR(ENOIOCTL);
2020-04-14 11:36:28 -07:00
break;
}
case FIOSEEKHOLE:
case FIOSEEKDATA: {
off_t *off = (off_t *)data;
uint64_t noff;
boolean_t hole;
hole = (cmd == FIOSEEKHOLE);
noff = *off;
2023-03-28 08:19:03 -07:00
lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX,
RL_READER);
2020-04-14 11:36:28 -07:00
error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff);
2023-03-28 08:19:03 -07:00
zfs_rangelock_exit(lr);
2020-04-14 11:36:28 -07:00
*off = noff;
break;
}
default:
error = SET_ERROR(ENOIOCTL);
2020-04-14 11:36:28 -07:00
}
return (error);
}
/*
* Misc. helpers
*/
static void
zvol_ensure_zilog(zvol_state_t *zv)
{
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
/*
* Open a ZIL if this is the first time we have written to this
* zvol. We protect zv->zv_zilog with zv_suspend_lock rather
* than zv_state_lock so that we don't need to acquire an
* additional lock in this path.
*/
if (zv->zv_zilog == NULL) {
2020-06-03 13:45:12 -04:00
if (!rw_tryupgrade(&zv->zv_suspend_lock)) {
rw_exit(&zv->zv_suspend_lock);
rw_enter(&zv->zv_suspend_lock, RW_WRITER);
}
2020-04-14 11:36:28 -07:00
if (zv->zv_zilog == NULL) {
zv->zv_zilog = zil_open(zv->zv_objset,
zvol_get_data, &zv->zv_kstat.dk_zil_sums);
2020-04-14 11:36:28 -07:00
zv->zv_flags |= ZVOL_WRITTEN_TO;
2022-02-07 19:24:38 +01:00
/* replay / destroy done in zvol_os_create_minor() */
VERIFY0(zv->zv_zilog->zl_header->zh_flags &
ZIL_REPLAY_NEEDED);
2020-04-14 11:36:28 -07:00
}
rw_downgrade(&zv->zv_suspend_lock);
}
}
2022-02-07 19:24:38 +01:00
boolean_t
zvol_os_is_zvol(const char *device)
2020-04-14 11:36:28 -07:00
{
return (device && strncmp(device, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
}
2022-02-07 19:24:38 +01:00
void
zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
2020-04-14 11:36:28 -07:00
{
ASSERT(RW_LOCK_HELD(&zvol_state_lock));
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
2021-12-23 19:04:07 +00:00
/* Move to a new hashtable entry. */
zv->zv_hash = zvol_name_hash(newname);
2020-04-14 11:36:28 -07:00
hlist_del(&zv->zv_hlink);
hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
2020-11-17 09:50:52 -08:00
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
2020-04-14 11:36:28 -07:00
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
struct g_geom *gp;
g_topology_lock();
gp = pp->geom;
2020-10-21 17:23:08 +00:00
ASSERT3P(gp, !=, NULL);
2020-04-14 11:36:28 -07:00
zsg->zsg_provider = NULL;
g_wither_provider(pp, ENXIO);
pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
pp->sectorsize = DEV_BSIZE;
pp->mediasize = zv->zv_volsize;
pp->private = zv;
zsg->zsg_provider = pp;
g_error_provider(pp, 0);
g_topology_unlock();
2020-11-17 09:50:52 -08:00
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
2020-04-14 11:36:28 -07:00
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev;
struct make_dev_args args;
dev = zsd->zsd_cdev;
if (dev != NULL) {
destroy_dev(dev);
dev = zsd->zsd_cdev = NULL;
if (zv->zv_open_count > 0) {
zv->zv_flags &= ~ZVOL_EXCL;
zv->zv_open_count = 0;
/* XXX need suspend lock but lock order */
zvol_last_close(zv);
}
}
make_dev_args_init(&args);
args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
args.mda_devsw = &zvol_cdevsw;
args.mda_cr = NULL;
args.mda_uid = UID_ROOT;
args.mda_gid = GID_OPERATOR;
args.mda_mode = 0640;
args.mda_si_drv2 = zv;
if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname)
== 0) {
2021-07-22 12:22:14 -04:00
dev->si_iosize_max = maxphys;
2020-04-14 11:36:28 -07:00
zsd->zsd_cdev = dev;
}
}
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
dataset_kstats_rename(&zv->zv_kstat, newname);
2020-04-14 11:36:28 -07:00
}
/*
* Remove minor node for the specified volume.
*/
2022-02-07 19:24:38 +01:00
void
zvol_os_free(zvol_state_t *zv)
2020-04-14 11:36:28 -07:00
{
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
2020-10-21 17:23:08 +00:00
ASSERT0(zv->zv_open_count);
2020-04-14 11:36:28 -07:00
ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
rw_destroy(&zv->zv_suspend_lock);
zfs_rangelock_fini(&zv->zv_rangelock);
2020-11-17 09:50:52 -08:00
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
2020-04-14 11:36:28 -07:00
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
2020-10-21 18:09:17 +00:00
struct g_provider *pp __maybe_unused = zsg->zsg_provider;
ASSERT3P(pp->private, ==, NULL);
2020-04-14 11:36:28 -07:00
g_topology_lock();
zvol_geom_destroy(zv);
g_topology_unlock();
mtx_destroy(&zsg->zsg_queue_mtx);
2020-11-17 09:50:52 -08:00
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
2020-04-14 11:36:28 -07:00
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
2021-07-22 12:22:14 -04:00
if (dev != NULL) {
ASSERT3P(dev->si_drv2, ==, NULL);
destroy_dev(dev);
2022-02-01 20:00:57 -09:00
knlist_clear(&zsd->zsd_selinfo.si_note, 0);
knlist_destroy(&zsd->zsd_selinfo.si_note);
2021-07-22 12:22:14 -04:00
}
2020-04-14 11:36:28 -07:00
}
mutex_destroy(&zv->zv_state_lock);
cv_destroy(&zv->zv_removing_cv);
2020-06-05 20:17:02 -04:00
dataset_kstats_destroy(&zv->zv_kstat);
2020-04-14 11:36:28 -07:00
kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
kmem_free(zv, sizeof (zvol_state_t));
zvol_minors--;
}
/*
* Create a minor node (plus a whole lot more) for the specified volume.
*/
2022-02-07 19:24:38 +01:00
int
zvol_os_create_minor(const char *name)
2020-04-14 11:36:28 -07:00
{
zvol_state_t *zv;
objset_t *os;
dmu_object_info_t *doi;
uint64_t volsize;
uint64_t volmode, hash;
int error;
bool replayed_zil = B_FALSE;
2020-04-14 11:36:28 -07:00
ZFS_LOG(1, "Creating ZVOL %s...", name);
hash = zvol_name_hash(name);
if ((zv = zvol_find_by_name_hash(name, hash, RW_NONE)) != NULL) {
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
mutex_exit(&zv->zv_state_lock);
return (SET_ERROR(EEXIST));
}
DROP_GIANT();
2020-04-14 11:36:28 -07:00
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
2021-12-23 19:04:07 +00:00
/* Lie and say we're read-only. */
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
2020-04-14 11:36:28 -07:00
if (error)
goto out_doi;
error = dmu_object_info(os, ZVOL_OBJ, doi);
if (error)
goto out_dmu_objset_disown;
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error)
goto out_dmu_objset_disown;
error = dsl_prop_get_integer(name,
zfs_prop_to_name(ZFS_PROP_VOLMODE), &volmode, NULL);
if (error || volmode == ZFS_VOLMODE_DEFAULT)
2020-04-14 11:36:28 -07:00
volmode = zvol_volmode;
error = 0;
2020-04-14 11:36:28 -07:00
/*
* zvol_alloc equivalent ...
*/
zv = kmem_zalloc(sizeof (*zv), KM_SLEEP);
zv->zv_hash = hash;
mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL);
2020-04-14 11:36:28 -07:00
zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
2020-11-17 09:50:52 -08:00
zv->zv_volmode = volmode;
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
2020-04-14 11:36:28 -07:00
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp;
struct g_geom *gp;
zsg->zsg_state = ZVOL_GEOM_UNINIT;
mtx_init(&zsg->zsg_queue_mtx, "zvol", NULL, MTX_DEF);
g_topology_lock();
gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
gp->start = zvol_geom_bio_start;
gp->access = zvol_geom_access;
pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
pp->sectorsize = DEV_BSIZE;
pp->mediasize = 0;
pp->private = zv;
zsg->zsg_provider = pp;
bioq_init(&zsg->zsg_queue);
2020-11-17 09:50:52 -08:00
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
2020-04-14 11:36:28 -07:00
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev;
struct make_dev_args args;
make_dev_args_init(&args);
args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
args.mda_devsw = &zvol_cdevsw;
args.mda_cr = NULL;
args.mda_uid = UID_ROOT;
args.mda_gid = GID_OPERATOR;
args.mda_mode = 0640;
args.mda_si_drv2 = zv;
2021-07-22 12:22:14 -04:00
if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name)
== 0) {
dev->si_iosize_max = maxphys;
zsd->zsd_cdev = dev;
2022-02-01 20:00:57 -09:00
knlist_init_sx(&zsd->zsd_selinfo.si_note,
&zv->zv_state_lock);
2020-04-14 11:36:28 -07:00
}
}
(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
zv->zv_flags |= ZVOL_RDONLY;
zv->zv_volblocksize = doi->doi_data_block_size;
zv->zv_volsize = volsize;
zv->zv_objset = os;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
if (error)
goto out_dmu_objset_disown;
2021-03-07 18:49:58 +01:00
ASSERT3P(zv->zv_zilog, ==, NULL);
zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
2020-04-14 11:36:28 -07:00
if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable)
replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE);
2020-04-14 11:36:28 -07:00
else
replayed_zil = zil_replay(os, zv, zvol_replay_vector);
2020-04-14 11:36:28 -07:00
}
if (replayed_zil)
zil_close(zv->zv_zilog);
2021-03-07 18:49:58 +01:00
zv->zv_zilog = NULL;
2020-04-14 11:36:28 -07:00
/* TODO: prefetch for geom tasting */
2020-04-14 11:36:28 -07:00
zv->zv_objset = NULL;
out_dmu_objset_disown:
dmu_objset_disown(os, B_TRUE, FTAG);
if (error == 0 && volmode == ZFS_VOLMODE_GEOM) {
zvol_geom_run(zv);
2020-04-14 11:36:28 -07:00
g_topology_unlock();
}
out_doi:
kmem_free(doi, sizeof (dmu_object_info_t));
if (error == 0) {
rw_enter(&zvol_state_lock, RW_WRITER);
zvol_insert(zv);
zvol_minors++;
rw_exit(&zvol_state_lock);
ZFS_LOG(1, "ZVOL %s created.", name);
2020-04-14 11:36:28 -07:00
}
PICKUP_GIANT();
return (error);
}
2022-02-07 19:24:38 +01:00
void
zvol_os_clear_private(zvol_state_t *zv)
2020-04-14 11:36:28 -07:00
{
ASSERT(RW_LOCK_HELD(&zvol_state_lock));
2020-11-17 09:50:52 -08:00
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
2020-04-14 11:36:28 -07:00
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
2020-10-21 18:09:17 +00:00
if (pp->private == NULL) /* already cleared */
2020-04-14 11:36:28 -07:00
return;
mtx_lock(&zsg->zsg_queue_mtx);
zsg->zsg_state = ZVOL_GEOM_STOPPED;
pp->private = NULL;
wakeup_one(&zsg->zsg_queue);
while (zsg->zsg_state != ZVOL_GEOM_RUNNING)
2020-10-21 18:09:17 +00:00
msleep(&zsg->zsg_state, &zsg->zsg_queue_mtx,
2020-04-14 11:36:28 -07:00
0, "zvol:w", 0);
mtx_unlock(&zsg->zsg_queue_mtx);
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
2020-11-17 09:50:52 -08:00
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
2020-10-21 18:09:17 +00:00
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
2021-07-22 12:22:14 -04:00
if (dev != NULL)
dev->si_drv2 = NULL;
2020-04-14 11:36:28 -07:00
}
}
2022-02-07 19:24:38 +01:00
int
zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
2020-04-14 11:36:28 -07:00
{
zv->zv_volsize = volsize;
2020-11-17 09:50:52 -08:00
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
2020-04-14 11:36:28 -07:00
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
g_topology_lock();
2020-10-21 18:09:17 +00:00
if (pp->private == NULL) {
g_topology_unlock();
return (SET_ERROR(ENXIO));
}
2020-04-14 11:36:28 -07:00
/*
* Do not invoke resize event when initial size was zero.
* ZVOL initializes the size on first open, this is not
* real resizing.
*/
if (pp->mediasize == 0)
pp->mediasize = zv->zv_volsize;
else
g_resize_provider(pp, zv->zv_volsize);
g_topology_unlock();
2022-02-01 20:00:57 -09:00
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
KNOTE_UNLOCKED(&zsd->zsd_selinfo.si_note, NOTE_ATTRIB);
2020-04-14 11:36:28 -07:00
}
return (0);
}
2022-02-07 19:24:38 +01:00
void
zvol_os_set_disk_ro(zvol_state_t *zv, int flags)
2020-04-14 11:36:28 -07:00
{
/*
* The ro/rw ZVOL mode is switched using zvol_set_ro() function by
* enabling/disabling ZVOL_RDONLY flag. No additional FreeBSD-specific
* actions are required for readonly zfs property switching.
*/
2020-04-14 11:36:28 -07:00
}
2022-02-07 19:24:38 +01:00
void
zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity)
2020-04-14 11:36:28 -07:00
{
/*
* The ZVOL size/capacity is changed by zvol_set_volsize() function.
* Leave this method empty, because all required job is doing by
* zvol_os_update_volsize() platform-specific function.
*/
2020-04-14 11:36:28 -07:00
}
/*
* Public interfaces
*/
int
zvol_busy(void)
{
return (zvol_minors != 0);
}
int
zvol_init(void)
{
zvol_init_impl();
return (0);
}
void
zvol_fini(void)
{
zvol_fini_impl();
}