diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 6b27e75ae..1371a3f03 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -151,6 +151,7 @@ struct vdev { vdev_stat_t vdev_stat; /* virtual device statistics */ boolean_t vdev_expanding; /* expand the vdev? */ boolean_t vdev_reopening; /* reopen in progress? */ + boolean_t vdev_nonrot; /* true if solid state */ int vdev_open_error; /* error on last open */ kthread_t *vdev_open_thread; /* thread opening children */ uint64_t vdev_crtxg; /* txg when top-level was added */ diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index b328cbb0a..59bcefd34 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1518,7 +1518,7 @@ metaslab_weight(metaslab_t *msp) * In effect, this means that we'll select the metaslab with the most * free bandwidth rather than simply the one with the most free space. */ - if (metaslab_lba_weighting_enabled) { + if (!vd->vdev_nonrot && metaslab_lba_weighting_enabled) { weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; ASSERT(weight >= space && weight <= 2 * space); } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 8e50ababc..7aff5455b 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1108,6 +1108,7 @@ vdev_open_child(void *arg) vd->vdev_open_thread = curthread; vd->vdev_open_error = vdev_open(vd); vd->vdev_open_thread = NULL; + vd->vdev_parent->vdev_nonrot &= vd->vdev_nonrot; } static boolean_t @@ -1134,15 +1135,19 @@ vdev_open_children(vdev_t *vd) int children = vd->vdev_children; int c; + vd->vdev_nonrot = B_TRUE; + /* * in order to handle pools on top of zvols, do the opens * in a single thread so that the same thread holds the * spa_namespace_lock */ if (vdev_uses_zvols(vd)) { - for (c = 0; c < children; c++) + for (c = 0; c < children; c++) { vd->vdev_child[c]->vdev_open_error = vdev_open(vd->vdev_child[c]); + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; + } return; } tq = taskq_create("vdev_open", children, minclsyspri, @@ -1153,6 +1158,9 @@ vdev_open_children(vdev_t *vd) TQ_SLEEP) != 0); taskq_destroy(tq); + + for (c = 0; c < children; c++) + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; } /* diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index eb77c269c..380ede35b 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -301,6 +301,9 @@ skip_open: /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; + /* Inform the ZIO pipeline that we are non-rotational */ + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); + /* Physical volume size in bytes */ *psize = bdev_capacity(vd->vd_bdev); diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index e61240fdc..a29ea7bf9 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -57,6 +57,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, vattr_t vattr; int error; + /* Rotational optimizations only make sense on block devices */ + vd->vdev_nonrot = B_TRUE; + /* * We must have a pathname, and it must be absolute. */