zvol: Enable zvol threading functionality on FreeBSD

Make zvol I/O requests processing asynchronous on FreeBSD side in some
cases. Clone zvol threading logic and required module parameters from
Linux side. Make zvol threadpool creation/destruction logic shared for
both Linux and FreeBSD.
The IO requests are processed asynchronously in next cases:
- volmode=geom: if IO request thread is geom thread or cannot sleep.
- volmode=cdev: if IO request passed thru struct cdevsw .d_strategy
routine, mean is AIO request.
In all other cases the IO requests are processed synchronously. The
volthreading zvol property is ignored on FreeBSD side.

Sponsored-by: vStack, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: @ImAwsumm
Signed-off-by: Fedor Uporov <fuporov.vstack@gmail.com>
Closes #17169
This commit is contained in:
Fedor Uporov 2025-05-08 22:25:40 +03:00 committed by GitHub
parent f13d760aa8
commit 1a8f5ad3b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 253 additions and 244 deletions

View File

@ -60,6 +60,32 @@ typedef struct zvol_state {
boolean_t zv_threading; /* volthreading property */
} zvol_state_t;
/*
* zvol taskqs
*/
typedef struct zv_taskq {
uint_t tqs_cnt;
taskq_t **tqs_taskq;
} zv_taskq_t;
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
#ifdef __linux__
struct request *rq;
#endif
} zv_request_t;
typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
} zv_request_task_t;
/*
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
* to utilize more threads for small files but may affect prefetch hits.
*/
#define ZVOL_TASKQ_OFFSET_SHIFT 29
extern krwlock_t zvol_state_lock;
#define ZVOL_HT_SIZE 1024
@ -69,6 +95,10 @@ extern zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE];
extern unsigned int zvol_volmode;
extern unsigned int zvol_inhibit_dev;
extern unsigned int zvol_threads;
extern unsigned int zvol_num_taskqs;
extern unsigned int zvol_request_sync;
extern zv_taskq_t zvol_taskqs;
/*
* platform independent functions exported to platform code
@ -94,6 +124,8 @@ int zvol_clone_range(zvol_state_handle_t *, uint64_t,
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
size_t nbps);
zv_request_task_t *zv_request_task_create(zv_request_t zvr);
void zv_request_task_free(zv_request_task_t *task);
/*
* platform dependent functions exported to platform independent code

View File

@ -99,6 +99,7 @@
#include <geom/geom.h>
#include <sys/zvol.h>
#include <sys/zvol_impl.h>
#include <cityhash.h>
#include "zfs_namecheck.h"
@ -112,12 +113,6 @@
#define ZVOL_RW_READ_HELD RW_READ_HELD
#endif
enum zvol_geom_state {
ZVOL_GEOM_UNINIT,
ZVOL_GEOM_STOPPED,
ZVOL_GEOM_RUNNING,
};
struct zvol_state_os {
#define zso_dev _zso_state._zso_dev
#define zso_geom _zso_state._zso_geom
@ -131,9 +126,6 @@ struct zvol_state_os {
/* volmode=geom */
struct zvol_state_geom {
struct g_provider *zsg_provider;
struct bio_queue_head zsg_queue;
struct mtx zsg_queue_mtx;
enum zvol_geom_state zsg_state;
} _zso_geom;
} _zso_state;
int zso_dying;
@ -169,7 +161,7 @@ static d_close_t zvol_cdev_close;
static d_ioctl_t zvol_cdev_ioctl;
static d_read_t zvol_cdev_read;
static d_write_t zvol_cdev_write;
static d_strategy_t zvol_geom_bio_strategy;
static d_strategy_t zvol_cdev_bio_strategy;
static d_kqfilter_t zvol_cdev_kqfilter;
static struct cdevsw zvol_cdevsw = {
@ -181,7 +173,7 @@ static struct cdevsw zvol_cdevsw = {
.d_ioctl = zvol_cdev_ioctl,
.d_read = zvol_cdev_read,
.d_write = zvol_cdev_write,
.d_strategy = zvol_geom_bio_strategy,
.d_strategy = zvol_cdev_bio_strategy,
.d_kqfilter = zvol_cdev_kqfilter,
};
@ -205,13 +197,11 @@ DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
static int zvol_geom_open(struct g_provider *pp, int flag, int count);
static int zvol_geom_close(struct g_provider *pp, int flag, int count);
static void zvol_geom_run(zvol_state_t *zv);
static void zvol_geom_destroy(zvol_state_t *zv);
static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
static void zvol_geom_worker(void *arg);
static void zvol_geom_bio_start(struct bio *bp);
static int zvol_geom_bio_getattr(struct bio *bp);
/* static d_strategy_t zvol_geom_bio_strategy; (declared elsewhere) */
static void zvol_geom_bio_strategy(struct bio *bp, boolean_t sync);
/*
* GEOM mode implementation
@ -419,20 +409,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count)
return (0);
}
static void
zvol_geom_run(zvol_state_t *zv)
{
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct g_provider *pp = zsg->zsg_provider;
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
g_error_provider(pp, 0);
kproc_kthread_add(zvol_geom_worker, zv, &system_proc, NULL, 0, 0,
"zfskern", "zvol %s", pp->name + sizeof (ZVOL_DRIVER));
}
static void
zvol_geom_destroy(zvol_state_t *zv)
{
@ -443,9 +419,6 @@ zvol_geom_destroy(zvol_state_t *zv)
g_topology_assert();
mutex_enter(&zv->zv_state_lock);
VERIFY3S(zsg->zsg_state, ==, ZVOL_GEOM_RUNNING);
mutex_exit(&zv->zv_state_lock);
zsg->zsg_provider = NULL;
g_wither_geom(pp->geom, ENXIO);
}
@ -516,44 +489,10 @@ zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
return (error);
}
static void
zvol_geom_worker(void *arg)
{
zvol_state_t *zv = arg;
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
struct bio *bp;
ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
thread_lock(curthread);
sched_prio(curthread, PRIBIO);
thread_unlock(curthread);
for (;;) {
mtx_lock(&zsg->zsg_queue_mtx);
bp = bioq_takefirst(&zsg->zsg_queue);
if (bp == NULL) {
if (zsg->zsg_state == ZVOL_GEOM_STOPPED) {
zsg->zsg_state = ZVOL_GEOM_RUNNING;
wakeup(&zsg->zsg_state);
mtx_unlock(&zsg->zsg_queue_mtx);
kthread_exit();
}
msleep(&zsg->zsg_queue, &zsg->zsg_queue_mtx,
PRIBIO | PDROP, "zvol:io", 0);
continue;
}
mtx_unlock(&zsg->zsg_queue_mtx);
zvol_geom_bio_strategy(bp);
}
}
static void
zvol_geom_bio_start(struct bio *bp)
{
zvol_state_t *zv = bp->bio_to->private;
struct zvol_state_geom *zsg;
boolean_t first;
if (zv == NULL) {
g_io_deliver(bp, ENXIO);
@ -565,18 +504,8 @@ zvol_geom_bio_start(struct bio *bp)
return;
}
if (!THREAD_CAN_SLEEP()) {
zsg = &zv->zv_zso->zso_geom;
mtx_lock(&zsg->zsg_queue_mtx);
first = (bioq_first(&zsg->zsg_queue) == NULL);
bioq_insert_tail(&zsg->zsg_queue, bp);
mtx_unlock(&zsg->zsg_queue_mtx);
if (first)
wakeup_one(&zsg->zsg_queue);
return;
}
zvol_geom_bio_strategy(bp);
zvol_geom_bio_strategy(bp, !g_is_geom_thread(curthread) &&
THREAD_CAN_SLEEP());
}
static int
@ -660,9 +589,10 @@ zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn)
}
static void
zvol_geom_bio_strategy(struct bio *bp)
zvol_strategy_impl(zv_request_t *zvr)
{
zvol_state_t *zv;
struct bio *bp;
uint64_t off, volsize;
size_t resid;
char *addr;
@ -673,11 +603,8 @@ zvol_geom_bio_strategy(struct bio *bp)
boolean_t is_dumpified;
boolean_t commit;
if (bp->bio_to)
zv = bp->bio_to->private;
else
zv = bp->bio_dev->si_drv2;
bp = zvr->bio;
zv = zvr->zv;
if (zv == NULL) {
error = SET_ERROR(ENXIO);
goto out;
@ -813,6 +740,63 @@ out:
biofinish(bp, NULL, error);
}
static void
zvol_strategy_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_strategy_impl(&task->zvr);
zv_request_task_free(task);
}
static void
zvol_geom_bio_strategy(struct bio *bp, boolean_t sync)
{
zv_taskq_t *ztqs = &zvol_taskqs;
zv_request_task_t *task;
zvol_state_t *zv;
uint_t tq_idx;
uint_t taskq_hash;
int error;
if (bp->bio_to)
zv = bp->bio_to->private;
else
zv = bp->bio_dev->si_drv2;
if (zv == NULL) {
error = SET_ERROR(ENXIO);
if (bp->bio_to)
g_io_deliver(bp, error);
else
biofinish(bp, NULL, error);
return;
}
zv_request_t zvr = {
.zv = zv,
.bio = bp,
};
if (sync || zvol_request_sync) {
zvol_strategy_impl(&zvr);
return;
}
taskq_hash = cityhash3((uintptr_t)zv, curcpu, bp->bio_offset >>
ZVOL_TASKQ_OFFSET_SHIFT);
tq_idx = taskq_hash % ztqs->tqs_cnt;
task = zv_request_task_create(zvr);
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx], zvol_strategy_task, task,
0, &task->ent);
}
static void
zvol_cdev_bio_strategy(struct bio *bp)
{
zvol_geom_bio_strategy(bp, B_FALSE);
}
/*
* Character device mode implementation
*/
@ -1352,7 +1336,6 @@ zvol_os_free(zvol_state_t *zv)
g_topology_lock();
zvol_geom_destroy(zv);
g_topology_unlock();
mtx_destroy(&zsg->zsg_queue_mtx);
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
@ -1432,9 +1415,6 @@ zvol_os_create_minor(const char *name)
struct g_provider *pp;
struct g_geom *gp;
zsg->zsg_state = ZVOL_GEOM_UNINIT;
mtx_init(&zsg->zsg_queue_mtx, "zvol", NULL, MTX_DEF);
g_topology_lock();
gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
gp->start = zvol_geom_bio_start;
@ -1446,7 +1426,6 @@ zvol_os_create_minor(const char *name)
pp->private = zv;
zsg->zsg_provider = pp;
bioq_init(&zsg->zsg_queue);
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev;
@ -1502,7 +1481,7 @@ out_dmu_objset_disown:
dmu_objset_disown(os, B_TRUE, FTAG);
if (error == 0 && volmode == ZFS_VOLMODE_GEOM) {
zvol_geom_run(zv);
g_error_provider(zv->zv_zso->zso_geom.zsg_provider, 0);
g_topology_unlock();
}
out_doi:
@ -1529,14 +1508,7 @@ zvol_os_clear_private(zvol_state_t *zv)
if (pp->private == NULL) /* already cleared */
return;
mtx_lock(&zsg->zsg_queue_mtx);
zsg->zsg_state = ZVOL_GEOM_STOPPED;
pp->private = NULL;
wakeup_one(&zsg->zsg_queue);
while (zsg->zsg_state != ZVOL_GEOM_RUNNING)
msleep(&zsg->zsg_state, &zsg->zsg_queue_mtx,
0, "zvol:w", 0);
mtx_unlock(&zsg->zsg_queue_mtx);
ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
@ -1606,8 +1578,7 @@ zvol_busy(void)
int
zvol_init(void)
{
zvol_init_impl();
return (0);
return (zvol_init_impl());
}
void

View File

@ -51,21 +51,13 @@ static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
struct request *rq, boolean_t force_sync);
static unsigned int zvol_major = ZVOL_MAJOR;
static unsigned int zvol_request_sync = 0;
static unsigned int zvol_prefetch_bytes = (128 * 1024);
static unsigned long zvol_max_discard_blocks = 16384;
/*
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
* to utilize more threads for small files but may affect prefetch hits.
*/
#define ZVOL_TASKQ_OFFSET_SHIFT 29
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
static unsigned int zvol_open_timeout_ms = 1000;
#endif
static unsigned int zvol_threads = 0;
static unsigned int zvol_blk_mq_threads = 0;
static unsigned int zvol_blk_mq_actual_threads;
static boolean_t zvol_use_blk_mq = B_FALSE;
@ -82,8 +74,6 @@ static boolean_t zvol_use_blk_mq = B_FALSE;
*/
static unsigned int zvol_blk_mq_blocks_per_thread = 8;
static unsigned int zvol_num_taskqs = 0;
#ifndef BLKDEV_DEFAULT_RQ
/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
@ -117,45 +107,8 @@ struct zvol_state_os {
boolean_t use_blk_mq;
};
typedef struct zv_taskq {
uint_t tqs_cnt;
taskq_t **tqs_taskq;
} zv_taskq_t;
static zv_taskq_t zvol_taskqs;
static struct ida zvol_ida;
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
struct request *rq;
} zv_request_t;
typedef struct zv_work {
struct request *rq;
struct work_struct work;
} zv_work_t;
typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
} zv_request_task_t;
static zv_request_task_t *
zv_request_task_create(zv_request_t zvr)
{
zv_request_task_t *task;
task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
taskq_init_ent(&task->ent);
task->zvr = zvr;
return (task);
}
static void
zv_request_task_free(zv_request_task_t *task)
{
kmem_free(task, sizeof (*task));
}
/*
* This is called when a new block multiqueue request comes in. A request
* contains one or more BIOs.
@ -1793,59 +1746,14 @@ zvol_init(void)
{
int error;
/*
* zvol_threads is the module param the user passes in.
*
* zvol_actual_threads is what we use internally, since the user can
* pass zvol_thread = 0 to mean "use all the CPUs" (the default).
*/
static unsigned int zvol_actual_threads;
if (zvol_threads == 0) {
/*
* See dde9380a1 for why 32 was chosen here. This should
* probably be refined to be some multiple of the number
* of CPUs.
*/
zvol_actual_threads = MAX(num_online_cpus(), 32);
} else {
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
error = zvol_init_impl();
if (error) {
printk(KERN_INFO "ZFS: zvol_init_impl() failed %d\n", error);
return (error);
}
/*
* Use atleast 32 zvol_threads but for many core system,
* prefer 6 threads per taskq, but no more taskqs
* than threads in them on large systems.
*
* taskq total
* cpus taskqs threads threads
* ------- ------- ------- -------
* 1 1 32 32
* 2 1 32 32
* 4 1 32 32
* 8 2 16 32
* 16 3 11 33
* 32 5 7 35
* 64 8 8 64
* 128 11 12 132
* 256 16 16 256
*/
zv_taskq_t *ztqs = &zvol_taskqs;
uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
if (num_tqs == 0) {
num_tqs = 1 + num_online_cpus() / 6;
while (num_tqs * num_tqs > zvol_actual_threads)
num_tqs--;
}
uint_t per_tq_thread = zvol_actual_threads / num_tqs;
if (per_tq_thread * num_tqs < zvol_actual_threads)
per_tq_thread++;
ztqs->tqs_cnt = num_tqs;
ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
error = register_blkdev(zvol_major, ZVOL_DRIVER);
if (error) {
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
ztqs->tqs_taskq = NULL;
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
return (error);
}
@ -1864,25 +1772,6 @@ zvol_init(void)
1024);
}
for (uint_t i = 0; i < num_tqs; i++) {
char name[32];
(void) snprintf(name, sizeof (name), "%s_tq-%u",
ZVOL_DRIVER, i);
ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
maxclsyspri, per_tq_thread, INT_MAX,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
if (ztqs->tqs_taskq[i] == NULL) {
for (int j = i - 1; j >= 0; j--)
taskq_destroy(ztqs->tqs_taskq[j]);
unregister_blkdev(zvol_major, ZVOL_DRIVER);
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
sizeof (taskq_t *));
ztqs->tqs_taskq = NULL;
return (-ENOMEM);
}
}
zvol_init_impl();
ida_init(&zvol_ida);
return (0);
}
@ -1890,21 +1779,9 @@ zvol_init(void)
void
zvol_fini(void)
{
zv_taskq_t *ztqs = &zvol_taskqs;
zvol_fini_impl();
unregister_blkdev(zvol_major, ZVOL_DRIVER);
if (ztqs->tqs_taskq == NULL) {
ASSERT3U(ztqs->tqs_cnt, ==, 0);
} else {
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
taskq_destroy(ztqs->tqs_taskq[i]);
}
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
sizeof (taskq_t *));
ztqs->tqs_taskq = NULL;
}
zvol_fini_impl();
ida_destroy(&zvol_ida);
}
@ -1915,19 +1792,9 @@ MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
module_param(zvol_major, uint, 0444);
MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
module_param(zvol_threads, uint, 0444);
MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
"to 0 to use all active CPUs");
module_param(zvol_request_sync, uint, 0644);
MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
module_param(zvol_max_discard_blocks, ulong, 0444);
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
module_param(zvol_num_taskqs, uint, 0444);
MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs");
module_param(zvol_prefetch_bytes, uint, 0644);
MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");

View File

@ -90,11 +90,15 @@
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
unsigned int zvol_threads = 0;
unsigned int zvol_num_taskqs = 0;
unsigned int zvol_request_sync = 0;
struct hlist_head *zvol_htable;
static list_t zvol_state_list;
krwlock_t zvol_state_lock;
extern int zfs_bclone_wait_dirty;
zv_taskq_t zvol_taskqs;
typedef enum {
ZVOL_ASYNC_REMOVE_MINORS,
@ -111,6 +115,22 @@ typedef struct {
uint64_t value;
} zvol_task_t;
zv_request_task_t *
zv_request_task_create(zv_request_t zvr)
{
zv_request_task_t *task;
task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
taskq_init_ent(&task->ent);
task->zvr = zvr;
return (task);
}
void
zv_request_task_free(zv_request_task_t *task)
{
kmem_free(task, sizeof (*task));
}
uint64_t
zvol_name_hash(const char *name)
{
@ -2018,6 +2038,75 @@ zvol_init_impl(void)
{
int i;
/*
* zvol_threads is the module param the user passes in.
*
* zvol_actual_threads is what we use internally, since the user can
* pass zvol_thread = 0 to mean "use all the CPUs" (the default).
*/
static unsigned int zvol_actual_threads;
if (zvol_threads == 0) {
/*
* See dde9380a1 for why 32 was chosen here. This should
* probably be refined to be some multiple of the number
* of CPUs.
*/
zvol_actual_threads = MAX(max_ncpus, 32);
} else {
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
}
/*
* Use at least 32 zvol_threads but for many core system,
* prefer 6 threads per taskq, but no more taskqs
* than threads in them on large systems.
*
* taskq total
* cpus taskqs threads threads
* ------- ------- ------- -------
* 1 1 32 32
* 2 1 32 32
* 4 1 32 32
* 8 2 16 32
* 16 3 11 33
* 32 5 7 35
* 64 8 8 64
* 128 11 12 132
* 256 16 16 256
*/
zv_taskq_t *ztqs = &zvol_taskqs;
int num_tqs = MIN(max_ncpus, zvol_num_taskqs);
if (num_tqs == 0) {
num_tqs = 1 + max_ncpus / 6;
while (num_tqs * num_tqs > zvol_actual_threads)
num_tqs--;
}
int per_tq_thread = zvol_actual_threads / num_tqs;
if (per_tq_thread * num_tqs < zvol_actual_threads)
per_tq_thread++;
ztqs->tqs_cnt = num_tqs;
ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
for (uint_t i = 0; i < num_tqs; i++) {
char name[32];
(void) snprintf(name, sizeof (name), "%s_tq-%u",
ZVOL_DRIVER, i);
ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
maxclsyspri, per_tq_thread, INT_MAX,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
if (ztqs->tqs_taskq[i] == NULL) {
for (int j = i - 1; j >= 0; j--)
taskq_destroy(ztqs->tqs_taskq[j]);
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
sizeof (taskq_t *));
ztqs->tqs_taskq = NULL;
return (SET_ERROR(ENOMEM));
}
}
list_create(&zvol_state_list, sizeof (zvol_state_t),
offsetof(zvol_state_t, zv_next));
rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL);
@ -2033,6 +2122,8 @@ zvol_init_impl(void)
void
zvol_fini_impl(void)
{
zv_taskq_t *ztqs = &zvol_taskqs;
zvol_remove_minors_impl(NULL);
/*
@ -2046,4 +2137,23 @@ zvol_fini_impl(void)
kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
list_destroy(&zvol_state_list);
rw_destroy(&zvol_state_lock);
if (ztqs->tqs_taskq == NULL) {
ASSERT3U(ztqs->tqs_cnt, ==, 0);
} else {
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
taskq_destroy(ztqs->tqs_taskq[i]);
}
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
sizeof (taskq_t *));
ztqs->tqs_taskq = NULL;
}
}
ZFS_MODULE_PARAM(zfs, , zvol_threads, UINT, ZMOD_RW,
"Number of threads for I/O requests. Set to 0 to use all active CPUs");
ZFS_MODULE_PARAM(zfs, , zvol_num_taskqs, UINT, ZMOD_RW,
"Number of zvol taskqs");
ZFS_MODULE_PARAM(zfs, , zvol_request_sync, UINT, ZMOD_RW,
"Synchronously handle bio requests");

View File

@ -3386,17 +3386,21 @@ function set_tunable_impl
function save_tunable
{
if tunable_exists $1 ; then
[[ ! -d $TEST_BASE_DIR ]] && return 1
[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
fi
}
function restore_tunable
{
if tunable_exists $1 ; then
[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
set_tunable64 "$1" "$val"
rm $TEST_BASE_DIR/tunable-$1
fi
}
#

View File

@ -102,6 +102,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_REQUEST_SYNC zvol_request_sync zvol_request_sync
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED bclone_enabled zfs_bclone_enabled
BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty

View File

@ -140,3 +140,11 @@ function set_blk_mq
log_must set_tunable32 VOL_USE_BLK_MQ $1
fi
}
# enable/disable zvol sync mode
#
# $1: 1 = enable, 0 = disable
function set_zvol_sync
{
log_must set_tunable32 VOL_REQUEST_SYNC $1
}

View File

@ -60,6 +60,9 @@ typeset -f each_zvol_size=$(( floor($biggest_zvol_size_possible * 0.9 / \
typeset tmpdir="$(mktemp -t -d zvol_stress_fio_state.XXXXXX)"
log_must save_tunable VOL_USE_BLK_MQ
log_must save_tunable VOL_REQUEST_SYNC
function create_zvols
{
log_note "Creating $num_zvols zvols that are ${each_zvol_size}B each"
@ -124,7 +127,8 @@ function cleanup
log_must zinject -c all
log_must zpool clear $TESTPOOL
destroy_zvols
set_blk_mq 0
log_must restore_tunable VOL_USE_BLK_MQ
log_must restore_tunable VOL_REQUEST_SYNC
# Remove all fio's leftover state files
if [ -n "$tmpdir" ] ; then
@ -146,6 +150,18 @@ destroy_zvols
set_blk_mq 1
create_zvols
do_zvol_stress
destroy_zvols
# Disable zvol sync mode, and re-run test
set_zvol_sync 0
create_zvols
do_zvol_stress
destroy_zvols
# Same for enabled zvol sync mode
set_zvol_sync 1
create_zvols
do_zvol_stress
# Inject some errors, and verify we see some IO errors in zpool status
sync_pool $TESTPOOL