mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
vdev_ashift should only be set once
== Motivation and Context The new vdev ashift optimization prevents the removal of devices when a zfs configuration is comprised of disks which have different logical and physical block sizes. This is caused because we set 'spa_min_ashift' in vdev_open and then later call 'vdev_ashift_optimize'. This would result in an inconsistency between spa's ashift calculations and that of the top-level vdev. In addition, the optimization logical ignores the overridden ashift value that would be provided by '-o ashift=<val>'. == Description This change reworks the vdev ashift optimization so that it's only set the first time the device is configured. It still allows the physical and logical ahsift values to be set every time the device is opened but those values are only consulted on first open. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Cedric Berger <cedric@precidata.com> Signed-off-by: George Wilson <gwilson@delphix.com> External-Issue: DLPX-71831 Closes #10932
This commit is contained in:
@@ -9434,8 +9434,6 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
|
||||
|
||||
ASSERT(!l2arc_vdev_present(vd));
|
||||
|
||||
vdev_ashift_optimize(vd);
|
||||
|
||||
/*
|
||||
* Create a new l2arc device entry.
|
||||
*/
|
||||
|
||||
@@ -5762,7 +5762,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
for (int c = 0; error == 0 && c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
|
||||
vdev_ashift_optimize(vd);
|
||||
vdev_metaslab_set_size(vd);
|
||||
vdev_expand(vd, txg);
|
||||
}
|
||||
|
||||
@@ -577,10 +577,8 @@ spa_config_update(spa_t *spa, int what)
|
||||
(tvd->vdev_islog && tvd->vdev_removing))
|
||||
continue;
|
||||
|
||||
if (tvd->vdev_ms_array == 0) {
|
||||
vdev_ashift_optimize(tvd);
|
||||
if (tvd->vdev_ms_array == 0)
|
||||
vdev_metaslab_set_size(tvd);
|
||||
}
|
||||
vdev_expand(tvd, txg);
|
||||
}
|
||||
}
|
||||
|
||||
+59
-37
@@ -1672,6 +1672,38 @@ vdev_set_deflate_ratio(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximize performance by inflating the configured ashift for top level
|
||||
* vdevs to be as close to the physical ashift as possible while maintaining
|
||||
* administrator defined limits and ensuring it doesn't go below the
|
||||
* logical ashift.
|
||||
*/
|
||||
static void
|
||||
vdev_ashift_optimize(vdev_t *vd)
|
||||
{
|
||||
ASSERT(vd == vd->vdev_top);
|
||||
|
||||
if (vd->vdev_ashift < vd->vdev_physical_ashift) {
|
||||
vd->vdev_ashift = MIN(
|
||||
MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
|
||||
MAX(zfs_vdev_min_auto_ashift,
|
||||
vd->vdev_physical_ashift));
|
||||
} else {
|
||||
/*
|
||||
* If the logical and physical ashifts are the same, then
|
||||
* we ensure that the top-level vdev's ashift is not smaller
|
||||
* than our minimum ashift value. For the unusual case
|
||||
* where logical ashift > physical ashift, we can't cap
|
||||
* the calculated ashift based on max ashift as that
|
||||
* would cause failures.
|
||||
* We still check if we need to increase it to match
|
||||
* the min ashift.
|
||||
*/
|
||||
vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
|
||||
vd->vdev_ashift);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a virtual device for access.
|
||||
*/
|
||||
@@ -1830,16 +1862,17 @@ vdev_open(vdev_t *vd)
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
/*
|
||||
* We can always set the logical/physical ashift members since
|
||||
* their values are only used to calculate the vdev_ashift when
|
||||
* the device is first added to the config. These values should
|
||||
* not be used for anything else since they may change whenever
|
||||
* the device is reopened and we don't store them in the label.
|
||||
*/
|
||||
vd->vdev_physical_ashift =
|
||||
MAX(physical_ashift, vd->vdev_physical_ashift);
|
||||
vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
|
||||
vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
|
||||
|
||||
if (vd->vdev_logical_ashift > ASHIFT_MAX) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_ASHIFT_TOO_BIG);
|
||||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
vd->vdev_logical_ashift = MAX(logical_ashift,
|
||||
vd->vdev_logical_ashift);
|
||||
|
||||
if (vd->vdev_asize == 0) {
|
||||
/*
|
||||
@@ -1848,6 +1881,24 @@ vdev_open(vdev_t *vd)
|
||||
*/
|
||||
vd->vdev_asize = asize;
|
||||
vd->vdev_max_asize = max_asize;
|
||||
|
||||
/*
|
||||
* If the vdev_ashift was not overriden at creation time,
|
||||
* then set it the logical ashift and optimize the ashift.
|
||||
*/
|
||||
if (vd->vdev_ashift == 0) {
|
||||
vd->vdev_ashift = vd->vdev_logical_ashift;
|
||||
|
||||
if (vd->vdev_logical_ashift > ASHIFT_MAX) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_ASHIFT_TOO_BIG);
|
||||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
|
||||
if (vd->vdev_top == vd) {
|
||||
vdev_ashift_optimize(vd);
|
||||
}
|
||||
}
|
||||
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
|
||||
vd->vdev_ashift > ASHIFT_MAX)) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
@@ -2444,35 +2495,6 @@ vdev_metaslab_set_size(vdev_t *vd)
|
||||
ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximize performance by inflating the configured ashift for top level
|
||||
* vdevs to be as close to the physical ashift as possible while maintaining
|
||||
* administrator defined limits and ensuring it doesn't go below the
|
||||
* logical ashift.
|
||||
*/
|
||||
void
|
||||
vdev_ashift_optimize(vdev_t *vd)
|
||||
{
|
||||
if (vd == vd->vdev_top) {
|
||||
if (vd->vdev_ashift < vd->vdev_physical_ashift) {
|
||||
vd->vdev_ashift = MIN(
|
||||
MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
|
||||
MAX(zfs_vdev_min_auto_ashift,
|
||||
vd->vdev_physical_ashift));
|
||||
} else {
|
||||
/*
|
||||
* Unusual case where logical ashift > physical ashift
|
||||
* so we can't cap the calculated ashift based on max
|
||||
* ashift as that would cause failures.
|
||||
* We still check if we need to increase it to match
|
||||
* the min ashift.
|
||||
*/
|
||||
vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
|
||||
vd->vdev_ashift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
|
||||
{
|
||||
|
||||
@@ -391,7 +391,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
|
||||
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
|
||||
*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
|
||||
*physical_ashift = MAX(*physical_ashift,
|
||||
vd->vdev_physical_ashift);
|
||||
cvd->vdev_physical_ashift);
|
||||
}
|
||||
|
||||
if (numerrors == vd->vdev_children) {
|
||||
|
||||
Reference in New Issue
Block a user