Add vdev property to disable vdev scheduler

Added vdev property to disable the vdev scheduler.
The intention behind this property is to improve IOPS
performance when using o_direct.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Signed-off-by: MigeljanImeri <ImeriMigel@gmail.com>
Closes #17358
This commit is contained in:
MigeljanImeri
2026-02-23 12:34:33 -05:00
committed by GitHub
parent d2f5cb3a50
commit 4975430cf5
14 changed files with 206 additions and 2 deletions
+3
View File
@@ -968,6 +968,9 @@ skip_open:
else
vd->vdev_nonrot = B_FALSE;
/* Is backed by a block device. */
vd->vdev_is_blkdev = B_TRUE;
/* Set when device reports it supports TRIM. */
error = g_getattr("GEOM::candelete", cp, &has_trim);
vd->vdev_has_trim = (error == 0 && has_trim);
+3
View File
@@ -447,6 +447,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
/* Inform the ZIO pipeline that we are non-rotational */
v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev));
/* Is backed by a block device. */
v->vdev_is_blkdev = B_TRUE;
/* Physical volume size in bytes for the partition */
*psize = bdev_capacity(bdev);
+11
View File
@@ -326,6 +326,13 @@ vdev_prop_init(void)
{ NULL }
};
static const zprop_index_t vdevschedulertype_table[] = {
{ "auto", VDEV_SCHEDULER_AUTO },
{ "on", VDEV_SCHEDULER_ON },
{ "off", VDEV_SCHEDULER_OFF },
{ NULL }
};
struct zfs_mod_supported_features *sfeatures =
zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
@@ -484,6 +491,10 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
"SLOW_IO_EVENTS", boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_SCHEDULER, "scheduler",
VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
"auto | on | off", "IO_SCHEDULER",
vdevschedulertype_table, sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
+16
View File
@@ -767,6 +767,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
vd->vdev_scheduler = vdev_prop_default_numeric(VDEV_PROP_SCHEDULER);
list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
list_link_init(&vd->vdev_initialize_node);
@@ -3972,6 +3974,12 @@ vdev_load(vdev_t *vd)
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SCHEDULER,
&vd->vdev_scheduler);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
}
/*
@@ -6259,6 +6267,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_slow_io_t = intval;
break;
case VDEV_PROP_SCHEDULER:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_scheduler = intval;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6664,6 +6679,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
case VDEV_PROP_SCHEDULER:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
+3
View File
@@ -109,6 +109,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
*/
vd->vdev_nonrot = B_TRUE;
/* Is not backed by a block device. */
vd->vdev_is_blkdev = B_FALSE;
/*
* Allow TRIM on file based vdevs. This may not always be supported,
* since it depends on your kernel version and underlying filesystem
+40
View File
@@ -879,6 +879,38 @@ again:
return (zio);
}
static boolean_t
vdev_should_queue_io(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
boolean_t should_queue = B_TRUE;
/*
* Add zio with ZIO_FLAG_NODATA to queue as bypass code
* currently does not handle certain cases (gang abd, raidz
* write aggregation).
*/
if (zio->io_flags & ZIO_FLAG_NODATA)
return (B_TRUE);
switch (vd->vdev_scheduler) {
case VDEV_SCHEDULER_AUTO:
if (vd->vdev_nonrot && vd->vdev_is_blkdev)
should_queue = B_FALSE;
break;
case VDEV_SCHEDULER_ON:
should_queue = B_TRUE;
break;
case VDEV_SCHEDULER_OFF:
should_queue = B_FALSE;
break;
default:
should_queue = B_TRUE;
break;
}
return (should_queue);
}
zio_t *
vdev_queue_io(zio_t *zio)
{
@@ -922,6 +954,11 @@ vdev_queue_io(zio_t *zio)
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
zio->io_timestamp = gethrtime();
if (!vdev_should_queue_io(zio)) {
zio->io_queue_state = ZIO_QS_NONE;
return (zio);
}
mutex_enter(&vq->vq_lock);
vdev_queue_io_add(vq, zio);
nio = vdev_queue_io_to_issue(vq);
@@ -954,6 +991,9 @@ vdev_queue_io_done(zio_t *zio)
vq->vq_io_complete_ts = now;
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
if (zio->io_queue_state == ZIO_QS_NONE)
return;
mutex_enter(&vq->vq_lock);
vdev_queue_pending_remove(vq, zio);