mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Add vdev property to disable vdev scheduler
Added vdev property to disable the vdev scheduler. The intention behind this property is to improve IOPS performance when using o_direct. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: MigeljanImeri <ImeriMigel@gmail.com> Closes #17358
This commit is contained in:
@@ -968,6 +968,9 @@ skip_open:
|
||||
else
|
||||
vd->vdev_nonrot = B_FALSE;
|
||||
|
||||
/* Is backed by a block device. */
|
||||
vd->vdev_is_blkdev = B_TRUE;
|
||||
|
||||
/* Set when device reports it supports TRIM. */
|
||||
error = g_getattr("GEOM::candelete", cp, &has_trim);
|
||||
vd->vdev_has_trim = (error == 0 && has_trim);
|
||||
|
||||
@@ -447,6 +447,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
/* Inform the ZIO pipeline that we are non-rotational */
|
||||
v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev));
|
||||
|
||||
/* Is backed by a block device. */
|
||||
v->vdev_is_blkdev = B_TRUE;
|
||||
|
||||
/* Physical volume size in bytes for the partition */
|
||||
*psize = bdev_capacity(bdev);
|
||||
|
||||
|
||||
@@ -326,6 +326,13 @@ vdev_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const zprop_index_t vdevschedulertype_table[] = {
|
||||
{ "auto", VDEV_SCHEDULER_AUTO },
|
||||
{ "on", VDEV_SCHEDULER_ON },
|
||||
{ "off", VDEV_SCHEDULER_OFF },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
struct zfs_mod_supported_features *sfeatures =
|
||||
zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
|
||||
|
||||
@@ -484,6 +491,10 @@ vdev_prop_init(void)
|
||||
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
|
||||
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
|
||||
"SLOW_IO_EVENTS", boolean_table, sfeatures);
|
||||
zprop_register_index(VDEV_PROP_SCHEDULER, "scheduler",
|
||||
VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
|
||||
"auto | on | off", "IO_SCHEDULER",
|
||||
vdevschedulertype_table, sfeatures);
|
||||
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
|
||||
@@ -767,6 +767,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
|
||||
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
|
||||
|
||||
vd->vdev_scheduler = vdev_prop_default_numeric(VDEV_PROP_SCHEDULER);
|
||||
|
||||
list_link_init(&vd->vdev_config_dirty_node);
|
||||
list_link_init(&vd->vdev_state_dirty_node);
|
||||
list_link_init(&vd->vdev_initialize_node);
|
||||
@@ -3972,6 +3974,12 @@ vdev_load(vdev_t *vd)
|
||||
if (error && error != ENOENT)
|
||||
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
|
||||
"failed [error=%d]", (u_longlong_t)zapobj, error);
|
||||
|
||||
error = vdev_prop_get_int(vd, VDEV_PROP_SCHEDULER,
|
||||
&vd->vdev_scheduler);
|
||||
if (error && error != ENOENT)
|
||||
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
|
||||
"failed [error=%d]", (u_longlong_t)zapobj, error);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6259,6 +6267,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
}
|
||||
vd->vdev_slow_io_t = intval;
|
||||
break;
|
||||
case VDEV_PROP_SCHEDULER:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
vd->vdev_scheduler = intval;
|
||||
break;
|
||||
default:
|
||||
/* Most processing is done in vdev_props_set_sync */
|
||||
break;
|
||||
@@ -6664,6 +6679,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
case VDEV_PROP_IO_T:
|
||||
case VDEV_PROP_SLOW_IO_N:
|
||||
case VDEV_PROP_SLOW_IO_T:
|
||||
case VDEV_PROP_SCHEDULER:
|
||||
err = vdev_prop_get_int(vd, prop, &intval);
|
||||
if (err && err != ENOENT)
|
||||
break;
|
||||
|
||||
@@ -109,6 +109,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
*/
|
||||
vd->vdev_nonrot = B_TRUE;
|
||||
|
||||
/* Is not backed by a block device. */
|
||||
vd->vdev_is_blkdev = B_FALSE;
|
||||
|
||||
/*
|
||||
* Allow TRIM on file based vdevs. This may not always be supported,
|
||||
* since it depends on your kernel version and underlying filesystem
|
||||
|
||||
@@ -879,6 +879,38 @@ again:
|
||||
return (zio);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
vdev_should_queue_io(zio_t *zio)
|
||||
{
|
||||
vdev_t *vd = zio->io_vd;
|
||||
boolean_t should_queue = B_TRUE;
|
||||
|
||||
/*
|
||||
* Add zio with ZIO_FLAG_NODATA to queue as bypass code
|
||||
* currently does not handle certain cases (gang abd, raidz
|
||||
* write aggregation).
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_NODATA)
|
||||
return (B_TRUE);
|
||||
|
||||
switch (vd->vdev_scheduler) {
|
||||
case VDEV_SCHEDULER_AUTO:
|
||||
if (vd->vdev_nonrot && vd->vdev_is_blkdev)
|
||||
should_queue = B_FALSE;
|
||||
break;
|
||||
case VDEV_SCHEDULER_ON:
|
||||
should_queue = B_TRUE;
|
||||
break;
|
||||
case VDEV_SCHEDULER_OFF:
|
||||
should_queue = B_FALSE;
|
||||
break;
|
||||
default:
|
||||
should_queue = B_TRUE;
|
||||
break;
|
||||
}
|
||||
return (should_queue);
|
||||
}
|
||||
|
||||
zio_t *
|
||||
vdev_queue_io(zio_t *zio)
|
||||
{
|
||||
@@ -922,6 +954,11 @@ vdev_queue_io(zio_t *zio)
|
||||
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
|
||||
zio->io_timestamp = gethrtime();
|
||||
|
||||
if (!vdev_should_queue_io(zio)) {
|
||||
zio->io_queue_state = ZIO_QS_NONE;
|
||||
return (zio);
|
||||
}
|
||||
|
||||
mutex_enter(&vq->vq_lock);
|
||||
vdev_queue_io_add(vq, zio);
|
||||
nio = vdev_queue_io_to_issue(vq);
|
||||
@@ -954,6 +991,9 @@ vdev_queue_io_done(zio_t *zio)
|
||||
vq->vq_io_complete_ts = now;
|
||||
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
|
||||
|
||||
if (zio->io_queue_state == ZIO_QS_NONE)
|
||||
return;
|
||||
|
||||
mutex_enter(&vq->vq_lock);
|
||||
vdev_queue_pending_remove(vq, zio);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user