mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Vdev Properties Feature
Add properties, similar to pool properties, to each vdev. This makes use of the existing per-vdev ZAP that was added as part of device evacuation/removal. A large number of read-only properties are exposed, many of the members of struct vdev_t, that provide useful statistics. Adds support for read-only "removing" vdev property. Adds the "allocating" property that defaults to "on" and can be set to "off" to prevent future allocations from that top-level vdev. Supports user-defined vdev properties. Includes support for properties.vdev in SYSFS. Co-authored-by: Allan Jude <allan@klarasystems.com> Co-authored-by: Mark Maybee <mark.maybee@delphix.com> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Signed-off-by: Allan Jude <allan@klarasystems.com> Closes #11711
This commit is contained in:
+1
-1
@@ -786,7 +786,7 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
|
||||
continue;
|
||||
|
||||
if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) {
|
||||
uint64_t ver;
|
||||
uint64_t ver = 0;
|
||||
|
||||
if (prop == ZPOOL_PROP_VERSION) {
|
||||
VERIFY(nvpair_value_uint64(elem, &ver) == 0);
|
||||
|
||||
+15
-23
@@ -1833,36 +1833,27 @@ spa_update_dspace(spa_t *spa)
|
||||
{
|
||||
spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
|
||||
ddt_get_dedup_dspace(spa);
|
||||
if (spa->spa_vdev_removal != NULL) {
|
||||
if (spa->spa_nonallocating_dspace > 0) {
|
||||
/*
|
||||
* We can't allocate from the removing device, so subtract
|
||||
* its size if it was included in dspace (i.e. if this is a
|
||||
* normal-class vdev, not special/dedup). This prevents the
|
||||
* DMU/DSL from filling up the (now smaller) pool while we
|
||||
* are in the middle of removing the device.
|
||||
* Subtract the space provided by all non-allocating vdevs that
|
||||
* contribute to dspace. If a file is overwritten, its old
|
||||
* blocks are freed and new blocks are allocated. If there are
|
||||
* no snapshots of the file, the available space should remain
|
||||
* the same. The old blocks could be freed from the
|
||||
* non-allocating vdev, but the new blocks must be allocated on
|
||||
* other (allocating) vdevs. By reserving the entire size of
|
||||
* the non-allocating vdevs (including allocated space), we
|
||||
* ensure that there will be enough space on the allocating
|
||||
* vdevs for this file overwrite to succeed.
|
||||
*
|
||||
* Note that the DMU/DSL doesn't actually know or care
|
||||
* how much space is allocated (it does its own tracking
|
||||
* of how much space has been logically used). So it
|
||||
* doesn't matter that the data we are moving may be
|
||||
* allocated twice (on the old device and the new
|
||||
* device).
|
||||
* allocated twice (on the old device and the new device).
|
||||
*/
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
vdev_t *vd =
|
||||
vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
|
||||
/*
|
||||
* If the stars align, we can wind up here after
|
||||
* vdev_remove_complete() has cleared vd->vdev_mg but before
|
||||
* spa->spa_vdev_removal gets cleared, so we must check before
|
||||
* we dereference.
|
||||
*/
|
||||
if (vd->vdev_mg &&
|
||||
vd->vdev_mg->mg_class == spa_normal_class(spa)) {
|
||||
spa->spa_dspace -= spa_deflate(spa) ?
|
||||
vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
|
||||
}
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
ASSERT3U(spa->spa_dspace, >=, spa->spa_nonallocating_dspace);
|
||||
spa->spa_dspace -= spa->spa_nonallocating_dspace;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2429,6 +2420,7 @@ spa_init(spa_mode_t mode)
|
||||
zpool_prop_init();
|
||||
zpool_feature_init();
|
||||
spa_config_load();
|
||||
vdev_prop_init();
|
||||
l2arc_start();
|
||||
scan_init();
|
||||
qat_init();
|
||||
|
||||
+622
-4
@@ -28,6 +28,7 @@
|
||||
* Copyright 2017 Joyent, Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
* Copyright (c) 2021, Klara Inc.
|
||||
* Copyright [2021] Hewlett Packard Enterprise Development LP
|
||||
*/
|
||||
|
||||
@@ -59,6 +60,7 @@
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/zfs_ratelimit.h>
|
||||
#include "zfs_prop.h"
|
||||
|
||||
/*
|
||||
* One metaslab from each (normal-class) vdev is used by the ZIL. These are
|
||||
@@ -865,6 +867,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
&vd->vdev_ms_shift);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE,
|
||||
&vd->vdev_asize);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NONALLOCATING,
|
||||
&vd->vdev_noalloc);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING,
|
||||
&vd->vdev_removing);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
|
||||
@@ -1183,8 +1187,10 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
ASSERT3P(tvd->vdev_indirect_mapping, ==, NULL);
|
||||
ASSERT3P(tvd->vdev_indirect_births, ==, NULL);
|
||||
ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL);
|
||||
ASSERT0(tvd->vdev_noalloc);
|
||||
ASSERT0(tvd->vdev_removing);
|
||||
ASSERT0(tvd->vdev_rebuilding);
|
||||
tvd->vdev_noalloc = svd->vdev_noalloc;
|
||||
tvd->vdev_removing = svd->vdev_removing;
|
||||
tvd->vdev_rebuilding = svd->vdev_rebuilding;
|
||||
tvd->vdev_rebuild_config = svd->vdev_rebuild_config;
|
||||
@@ -1200,6 +1206,7 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
svd->vdev_indirect_mapping = NULL;
|
||||
svd->vdev_indirect_births = NULL;
|
||||
svd->vdev_obsolete_sm = NULL;
|
||||
svd->vdev_noalloc = 0;
|
||||
svd->vdev_removing = 0;
|
||||
svd->vdev_rebuilding = 0;
|
||||
|
||||
@@ -1498,11 +1505,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
||||
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
|
||||
|
||||
/*
|
||||
* If the vdev is being removed we don't activate
|
||||
* the metaslabs since we want to ensure that no new
|
||||
* allocations are performed on this device.
|
||||
* If the vdev is marked as non-allocating then don't
|
||||
* activate the metaslabs since we want to ensure that
|
||||
* no allocations are performed on this device.
|
||||
*/
|
||||
if (!expanding && !vd->vdev_removing) {
|
||||
if (vd->vdev_noalloc) {
|
||||
/* track non-allocating vdev space */
|
||||
spa->spa_nonallocating_dspace += spa_deflate(spa) ?
|
||||
vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
|
||||
} else if (!expanding) {
|
||||
metaslab_group_activate(vd->vdev_mg);
|
||||
if (vd->vdev_log_mg != NULL)
|
||||
metaslab_group_activate(vd->vdev_log_mg);
|
||||
@@ -4469,6 +4480,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
|
||||
vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
|
||||
vd->vdev_mg->mg_fragmentation : 0;
|
||||
}
|
||||
vs->vs_noalloc = MAX(vd->vdev_noalloc,
|
||||
tvd ? tvd->vdev_noalloc : 0);
|
||||
}
|
||||
|
||||
vdev_get_stats_ex_impl(vd, vs, vsx);
|
||||
@@ -5375,6 +5388,23 @@ vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
vdev_name(vdev_t *vd, char *buf, int buflen)
|
||||
{
|
||||
if (vd->vdev_path == NULL) {
|
||||
if (strcmp(vd->vdev_ops->vdev_op_type, "root") == 0) {
|
||||
strlcpy(buf, vd->vdev_spa->spa_name, buflen);
|
||||
} else if (!vd->vdev_ops->vdev_op_leaf) {
|
||||
snprintf(buf, buflen, "%s-%llu",
|
||||
vd->vdev_ops->vdev_op_type,
|
||||
(u_longlong_t)vd->vdev_id);
|
||||
}
|
||||
} else {
|
||||
strlcpy(buf, vd->vdev_path, buflen);
|
||||
}
|
||||
return (buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look at the vdev tree and determine whether any devices are currently being
|
||||
* replaced.
|
||||
@@ -5404,6 +5434,594 @@ vdev_replace_in_progress(vdev_t *vdev)
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a (source=src, propname=propval) list to an nvlist.
|
||||
*/
|
||||
static void
|
||||
vdev_prop_add_list(nvlist_t *nvl, const char *propname, char *strval,
|
||||
uint64_t intval, zprop_source_t src)
|
||||
{
|
||||
nvlist_t *propval;
|
||||
|
||||
propval = fnvlist_alloc();
|
||||
fnvlist_add_uint64(propval, ZPROP_SOURCE, src);
|
||||
|
||||
if (strval != NULL)
|
||||
fnvlist_add_string(propval, ZPROP_VALUE, strval);
|
||||
else
|
||||
fnvlist_add_uint64(propval, ZPROP_VALUE, intval);
|
||||
|
||||
fnvlist_add_nvlist(nvl, propname, propval);
|
||||
nvlist_free(propval);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
vdev_t *vd;
|
||||
nvlist_t *nvp = arg;
|
||||
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
nvpair_t *elem = NULL;
|
||||
uint64_t vdev_guid;
|
||||
nvlist_t *nvprops;
|
||||
|
||||
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
|
||||
nvprops = fnvlist_lookup_nvlist(nvp, ZPOOL_VDEV_PROPS_SET_PROPS);
|
||||
vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
|
||||
VERIFY(vd != NULL);
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
uint64_t intval, objid = 0;
|
||||
char *strval;
|
||||
vdev_prop_t prop;
|
||||
const char *propname = nvpair_name(elem);
|
||||
zprop_type_t proptype;
|
||||
|
||||
/*
|
||||
* Set vdev property values in the vdev props mos object.
|
||||
*/
|
||||
if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
panic("vdev not top or leaf");
|
||||
}
|
||||
|
||||
switch (prop = vdev_name_to_prop(propname)) {
|
||||
case VDEV_PROP_USER:
|
||||
if (vdev_prop_user(propname)) {
|
||||
strval = fnvpair_value_string(elem);
|
||||
if (strlen(strval) == 0) {
|
||||
/* remove the property if value == "" */
|
||||
(void) zap_remove(mos, objid, propname,
|
||||
tx);
|
||||
} else {
|
||||
VERIFY0(zap_update(mos, objid, propname,
|
||||
1, strlen(strval) + 1, strval, tx));
|
||||
}
|
||||
spa_history_log_internal(spa, "vdev set", tx,
|
||||
"vdev_guid=%llu: %s=%s",
|
||||
(u_longlong_t)vdev_guid, nvpair_name(elem),
|
||||
strval);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* normalize the property name */
|
||||
propname = vdev_prop_to_name(prop);
|
||||
proptype = vdev_prop_get_type(prop);
|
||||
|
||||
if (nvpair_type(elem) == DATA_TYPE_STRING) {
|
||||
ASSERT(proptype == PROP_TYPE_STRING);
|
||||
strval = fnvpair_value_string(elem);
|
||||
VERIFY0(zap_update(mos, objid, propname,
|
||||
1, strlen(strval) + 1, strval, tx));
|
||||
spa_history_log_internal(spa, "vdev set", tx,
|
||||
"vdev_guid=%llu: %s=%s",
|
||||
(u_longlong_t)vdev_guid, nvpair_name(elem),
|
||||
strval);
|
||||
} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
|
||||
intval = fnvpair_value_uint64(elem);
|
||||
|
||||
if (proptype == PROP_TYPE_INDEX) {
|
||||
const char *unused;
|
||||
VERIFY0(vdev_prop_index_to_string(
|
||||
prop, intval, &unused));
|
||||
}
|
||||
VERIFY0(zap_update(mos, objid, propname,
|
||||
sizeof (uint64_t), 1, &intval, tx));
|
||||
spa_history_log_internal(spa, "vdev set", tx,
|
||||
"vdev_guid=%llu: %s=%lld",
|
||||
(u_longlong_t)vdev_guid,
|
||||
nvpair_name(elem), (longlong_t)intval);
|
||||
} else {
|
||||
panic("invalid vdev property type %u",
|
||||
nvpair_type(elem));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mutex_exit(&spa->spa_props_lock);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
nvpair_t *elem = NULL;
|
||||
uint64_t vdev_guid;
|
||||
nvlist_t *nvprops;
|
||||
int error;
|
||||
|
||||
ASSERT(vd != NULL);
|
||||
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_SET_PROPS,
|
||||
&nvprops) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
char *propname = nvpair_name(elem);
|
||||
vdev_prop_t prop = vdev_name_to_prop(propname);
|
||||
uint64_t intval = 0;
|
||||
char *strval = NULL;
|
||||
|
||||
if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
|
||||
error = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (vdev_prop_readonly(prop)) {
|
||||
error = EROFS;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* Special Processing */
|
||||
switch (prop) {
|
||||
case VDEV_PROP_PATH:
|
||||
if (vd->vdev_path == NULL) {
|
||||
error = EROFS;
|
||||
break;
|
||||
}
|
||||
if (nvpair_value_string(elem, &strval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
/* New path must start with /dev/ */
|
||||
if (strncmp(strval, "/dev/", 5)) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
error = spa_vdev_setpath(spa, vdev_guid, strval);
|
||||
break;
|
||||
case VDEV_PROP_ALLOCATING:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
if (intval != vd->vdev_noalloc)
|
||||
break;
|
||||
if (intval == 0)
|
||||
error = spa_vdev_noalloc(spa, vdev_guid);
|
||||
else
|
||||
error = spa_vdev_alloc(spa, vdev_guid);
|
||||
break;
|
||||
default:
|
||||
/* Most processing is done in vdev_props_set_sync */
|
||||
break;
|
||||
}
|
||||
end:
|
||||
if (error != 0) {
|
||||
intval = error;
|
||||
vdev_prop_add_list(outnvl, propname, strval, intval, 0);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
return (dsl_sync_task(spa->spa_name, NULL, vdev_props_set_sync,
|
||||
innvl, 6, ZFS_SPACE_CHECK_EXTRA_RESERVED));
|
||||
}
|
||||
|
||||
int
|
||||
vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
int err = 0;
|
||||
uint64_t objid;
|
||||
uint64_t vdev_guid;
|
||||
nvpair_t *elem = NULL;
|
||||
nvlist_t *nvprops = NULL;
|
||||
uint64_t intval = 0;
|
||||
char *strval = NULL;
|
||||
const char *propname = NULL;
|
||||
vdev_prop_t prop;
|
||||
|
||||
ASSERT(vd != NULL);
|
||||
ASSERT(mos != NULL);
|
||||
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
|
||||
|
||||
if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
ASSERT(objid != 0);
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
if (nvprops != NULL) {
|
||||
char namebuf[64] = { 0 };
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
intval = 0;
|
||||
strval = NULL;
|
||||
propname = nvpair_name(elem);
|
||||
prop = vdev_name_to_prop(propname);
|
||||
zprop_source_t src = ZPROP_SRC_DEFAULT;
|
||||
uint64_t integer_size, num_integers;
|
||||
|
||||
switch (prop) {
|
||||
/* Special Read-only Properties */
|
||||
case VDEV_PROP_NAME:
|
||||
strval = vdev_name(vd, namebuf,
|
||||
sizeof (namebuf));
|
||||
if (strval == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname, strval, 0,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_CAPACITY:
|
||||
/* percent used */
|
||||
intval = (vd->vdev_stat.vs_dspace == 0) ? 0 :
|
||||
(vd->vdev_stat.vs_alloc * 100 /
|
||||
vd->vdev_stat.vs_dspace);
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
intval, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_STATE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_state, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_GUID:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_guid, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_ASIZE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_asize, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_PSIZE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_psize, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_ASHIFT:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_ashift, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_SIZE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_dspace, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_FREE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_dspace -
|
||||
vd->vdev_stat.vs_alloc, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_ALLOCATED:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_alloc, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_EXPANDSZ:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_esize, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_FRAGMENTATION:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_fragmentation,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_PARITY:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vdev_get_nparity(vd), ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_PATH:
|
||||
if (vd->vdev_path == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
vd->vdev_path, 0, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_DEVID:
|
||||
if (vd->vdev_devid == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
vd->vdev_devid, 0, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_PHYS_PATH:
|
||||
if (vd->vdev_physpath == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
vd->vdev_physpath, 0, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_ENC_PATH:
|
||||
if (vd->vdev_enc_sysfs_path == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
vd->vdev_enc_sysfs_path, 0, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_FRU:
|
||||
if (vd->vdev_fru == NULL)
|
||||
continue;
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
vd->vdev_fru, 0, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_PARENT:
|
||||
if (vd->vdev_parent != NULL) {
|
||||
strval = vdev_name(vd->vdev_parent,
|
||||
namebuf, sizeof (namebuf));
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
strval, 0, ZPROP_SRC_NONE);
|
||||
}
|
||||
continue;
|
||||
case VDEV_PROP_CHILDREN:
|
||||
if (vd->vdev_children > 0)
|
||||
strval = kmem_zalloc(ZAP_MAXVALUELEN,
|
||||
KM_SLEEP);
|
||||
for (uint64_t i = 0; i < vd->vdev_children;
|
||||
i++) {
|
||||
char *vname;
|
||||
|
||||
vname = vdev_name(vd->vdev_child[i],
|
||||
namebuf, sizeof (namebuf));
|
||||
if (vname == NULL)
|
||||
vname = "(unknown)";
|
||||
if (strlen(strval) > 0)
|
||||
strlcat(strval, ",",
|
||||
ZAP_MAXVALUELEN);
|
||||
strlcat(strval, vname, ZAP_MAXVALUELEN);
|
||||
}
|
||||
if (strval != NULL) {
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
strval, 0, ZPROP_SRC_NONE);
|
||||
kmem_free(strval, ZAP_MAXVALUELEN);
|
||||
}
|
||||
continue;
|
||||
case VDEV_PROP_NUMCHILDREN:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_children, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_READ_ERRORS:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_read_errors,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_WRITE_ERRORS:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_write_errors,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_CHECKSUM_ERRORS:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_checksum_errors,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_INITIALIZE_ERRORS:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_initialize_errors,
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_NULL:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_NULL],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_READ:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_READ],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_WRITE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_WRITE],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_FREE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_FREE],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_CLAIM:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_CLAIM],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_OPS_TRIM:
|
||||
/*
|
||||
* TRIM ops and bytes are reported to user
|
||||
* space as ZIO_TYPE_IOCTL. This is done to
|
||||
* preserve the vdev_stat_t structure layout
|
||||
* for user space.
|
||||
*/
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_ops[ZIO_TYPE_IOCTL],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_NULL:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_NULL],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_READ:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_READ],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_WRITE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_WRITE],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_FREE:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_FREE],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_CLAIM:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_CLAIM],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_BYTES_TRIM:
|
||||
/*
|
||||
* TRIM ops and bytes are reported to user
|
||||
* space as ZIO_TYPE_IOCTL. This is done to
|
||||
* preserve the vdev_stat_t structure layout
|
||||
* for user space.
|
||||
*/
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_stat.vs_bytes[ZIO_TYPE_IOCTL],
|
||||
ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_REMOVING:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
vd->vdev_removing, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
/* Numeric Properites */
|
||||
case VDEV_PROP_ALLOCATING:
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
strval = NULL;
|
||||
|
||||
err = zap_lookup(mos, objid, nvpair_name(elem),
|
||||
sizeof (uint64_t), 1, &intval);
|
||||
if (err == ENOENT) {
|
||||
intval =
|
||||
vdev_prop_default_numeric(prop);
|
||||
err = 0;
|
||||
} else if (err)
|
||||
break;
|
||||
if (intval == vdev_prop_default_numeric(prop))
|
||||
src = ZPROP_SRC_DEFAULT;
|
||||
|
||||
/* Leaf vdevs cannot have this property */
|
||||
if (vd->vdev_mg == NULL &&
|
||||
vd->vdev_top != NULL) {
|
||||
src = ZPROP_SRC_NONE;
|
||||
intval = ZPROP_BOOLEAN_NA;
|
||||
}
|
||||
|
||||
vdev_prop_add_list(outnvl, propname, strval,
|
||||
intval, src);
|
||||
break;
|
||||
/* Text Properties */
|
||||
case VDEV_PROP_COMMENT:
|
||||
/* Exists in the ZAP below */
|
||||
/* FALLTHRU */
|
||||
case VDEV_PROP_USER:
|
||||
/* User Properites */
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
|
||||
err = zap_length(mos, objid, nvpair_name(elem),
|
||||
&integer_size, &num_integers);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
switch (integer_size) {
|
||||
case 8:
|
||||
/* User properties cannot be integers */
|
||||
err = EINVAL;
|
||||
break;
|
||||
case 1:
|
||||
/* string property */
|
||||
strval = kmem_alloc(num_integers,
|
||||
KM_SLEEP);
|
||||
err = zap_lookup(mos, objid,
|
||||
nvpair_name(elem), 1,
|
||||
num_integers, strval);
|
||||
if (err) {
|
||||
kmem_free(strval,
|
||||
num_integers);
|
||||
break;
|
||||
}
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
strval, 0, src);
|
||||
kmem_free(strval, num_integers);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
err = ENOENT;
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Get all properties from the MOS vdev property object.
|
||||
*/
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
for (zap_cursor_init(&zc, mos, objid);
|
||||
(err = zap_cursor_retrieve(&zc, &za)) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
intval = 0;
|
||||
strval = NULL;
|
||||
zprop_source_t src = ZPROP_SRC_DEFAULT;
|
||||
propname = za.za_name;
|
||||
prop = vdev_name_to_prop(propname);
|
||||
|
||||
switch (za.za_integer_length) {
|
||||
case 8:
|
||||
/* We do not allow integer user properties */
|
||||
/* This is likely an internal value */
|
||||
break;
|
||||
case 1:
|
||||
/* string property */
|
||||
strval = kmem_alloc(za.za_num_integers,
|
||||
KM_SLEEP);
|
||||
err = zap_lookup(mos, objid, za.za_name, 1,
|
||||
za.za_num_integers, strval);
|
||||
if (err) {
|
||||
kmem_free(strval, za.za_num_integers);
|
||||
break;
|
||||
}
|
||||
vdev_prop_add_list(outnvl, propname, strval, 0,
|
||||
src);
|
||||
kmem_free(strval, za.za_num_integers);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
mutex_exit(&spa->spa_props_lock);
|
||||
if (err && err != ENOENT) {
|
||||
return (err);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(vdev_fault);
|
||||
EXPORT_SYMBOL(vdev_degrade);
|
||||
EXPORT_SYMBOL(vdev_online);
|
||||
|
||||
@@ -496,6 +496,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
|
||||
vd->vdev_asize);
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
|
||||
if (vd->vdev_noalloc) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_NONALLOCATING,
|
||||
vd->vdev_noalloc);
|
||||
}
|
||||
if (vd->vdev_removing) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
|
||||
vd->vdev_removing);
|
||||
|
||||
+227
-47
@@ -167,6 +167,176 @@ spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_activate(vdev_t *vd)
|
||||
{
|
||||
metaslab_group_t *mg = vd->vdev_mg;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
uint64_t vdev_space = spa_deflate(spa) ?
|
||||
vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
|
||||
|
||||
ASSERT(!vd->vdev_islog);
|
||||
ASSERT(vd->vdev_noalloc);
|
||||
|
||||
metaslab_group_activate(mg);
|
||||
metaslab_group_activate(vd->vdev_log_mg);
|
||||
|
||||
ASSERT3U(spa->spa_nonallocating_dspace, >=, vdev_space);
|
||||
|
||||
spa->spa_nonallocating_dspace -= vdev_space;
|
||||
|
||||
vd->vdev_noalloc = B_FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_passivate(vdev_t *vd, uint64_t *txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int error;
|
||||
|
||||
ASSERT(!vd->vdev_noalloc);
|
||||
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
metaslab_group_t *mg = vd->vdev_mg;
|
||||
metaslab_class_t *normal = spa_normal_class(spa);
|
||||
if (mg->mg_class == normal) {
|
||||
/*
|
||||
* We must check that this is not the only allocating device in
|
||||
* the pool before passivating, otherwise we will not be able
|
||||
* to make progress because we can't allocate from any vdevs.
|
||||
*/
|
||||
boolean_t last = B_TRUE;
|
||||
for (uint64_t id = 0; id < rvd->vdev_children; id++) {
|
||||
vdev_t *cvd = rvd->vdev_child[id];
|
||||
|
||||
if (cvd == vd ||
|
||||
cvd->vdev_ops == &vdev_indirect_ops)
|
||||
continue;
|
||||
|
||||
metaslab_class_t *mc = cvd->vdev_mg->mg_class;
|
||||
if (mc != normal)
|
||||
continue;
|
||||
|
||||
if (!cvd->vdev_noalloc) {
|
||||
last = B_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (last)
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
metaslab_group_passivate(mg);
|
||||
ASSERT(!vd->vdev_islog);
|
||||
metaslab_group_passivate(vd->vdev_log_mg);
|
||||
|
||||
/*
|
||||
* Wait for the youngest allocations and frees to sync,
|
||||
* and then wait for the deferral of those frees to finish.
|
||||
*/
|
||||
spa_vdev_config_exit(spa, NULL,
|
||||
*txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
|
||||
|
||||
/*
|
||||
* We must ensure that no "stubby" log blocks are allocated
|
||||
* on the device to be removed. These blocks could be
|
||||
* written at any time, including while we are in the middle
|
||||
* of copying them.
|
||||
*/
|
||||
error = spa_reset_logs(spa);
|
||||
|
||||
*txg = spa_vdev_config_enter(spa);
|
||||
|
||||
if (error != 0) {
|
||||
metaslab_group_activate(mg);
|
||||
ASSERT(!vd->vdev_islog);
|
||||
if (vd->vdev_log_mg != NULL)
|
||||
metaslab_group_activate(vd->vdev_log_mg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
spa->spa_nonallocating_dspace += spa_deflate(spa) ?
|
||||
vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
|
||||
vd->vdev_noalloc = B_TRUE;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn off allocations for a top-level device from the pool.
|
||||
*
|
||||
* Turning off allocations for a top-level device can take a significant
|
||||
* amount of time. As a result we use the spa_vdev_config_[enter/exit]
|
||||
* functions which allow us to grab and release the spa_config_lock while
|
||||
* still holding the namespace lock. During each step the configuration
|
||||
* is synced out.
|
||||
*/
|
||||
int
|
||||
spa_vdev_noalloc(spa_t *spa, uint64_t guid)
|
||||
{
|
||||
vdev_t *vd;
|
||||
uint64_t txg;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(!MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
txg = spa_vdev_enter(spa);
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
||||
|
||||
if (vd == NULL)
|
||||
error = SET_ERROR(ENOENT);
|
||||
else if (vd->vdev_mg == NULL)
|
||||
error = SET_ERROR(ZFS_ERR_VDEV_NOTSUP);
|
||||
else if (!vd->vdev_noalloc)
|
||||
error = vdev_passivate(vd, &txg);
|
||||
|
||||
if (error == 0) {
|
||||
vdev_dirty_leaves(vd, VDD_DTL, txg);
|
||||
vdev_config_dirty(vd);
|
||||
}
|
||||
|
||||
error = spa_vdev_exit(spa, NULL, txg, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
spa_vdev_alloc(spa_t *spa, uint64_t guid)
|
||||
{
|
||||
vdev_t *vd;
|
||||
uint64_t txg;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(!MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
txg = spa_vdev_enter(spa);
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
||||
|
||||
if (vd == NULL)
|
||||
error = SET_ERROR(ENOENT);
|
||||
else if (vd->vdev_mg == NULL)
|
||||
error = SET_ERROR(ZFS_ERR_VDEV_NOTSUP);
|
||||
else if (!vd->vdev_removing)
|
||||
vdev_activate(vd);
|
||||
|
||||
if (error == 0) {
|
||||
vdev_dirty_leaves(vd, VDD_DTL, txg);
|
||||
vdev_config_dirty(vd);
|
||||
}
|
||||
|
||||
(void) spa_vdev_exit(spa, NULL, txg, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
|
||||
nvlist_t *dev_to_remove)
|
||||
@@ -1193,6 +1363,8 @@ vdev_remove_complete(spa_t *spa)
|
||||
ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
|
||||
ASSERT3P(vd->vdev_trim_thread, ==, NULL);
|
||||
ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
|
||||
uint64_t vdev_space = spa_deflate(spa) ?
|
||||
vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
|
||||
|
||||
sysevent_t *ev = spa_event_create(spa, vd, NULL,
|
||||
ESC_ZFS_VDEV_REMOVE_DEV);
|
||||
@@ -1200,6 +1372,12 @@ vdev_remove_complete(spa_t *spa)
|
||||
zfs_dbgmsg("finishing device removal for vdev %llu in txg %llu",
|
||||
(u_longlong_t)vd->vdev_id, (u_longlong_t)txg);
|
||||
|
||||
ASSERT3U(0, !=, vdev_space);
|
||||
ASSERT3U(spa->spa_nonallocating_dspace, >=, vdev_space);
|
||||
|
||||
/* the vdev is no longer part of the dspace */
|
||||
spa->spa_nonallocating_dspace -= vdev_space;
|
||||
|
||||
/*
|
||||
* Discard allocation state.
|
||||
*/
|
||||
@@ -1619,6 +1797,28 @@ spa_vdev_remove_suspend(spa_t *spa)
|
||||
mutex_exit(&svr->svr_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the "allocating" property has been set to "off"
|
||||
*/
|
||||
static boolean_t
|
||||
vdev_prop_allocating_off(vdev_t *vd)
|
||||
{
|
||||
uint64_t objid = vd->vdev_top_zap;
|
||||
uint64_t allocating = 1;
|
||||
|
||||
/* no vdev property object => no props */
|
||||
if (objid != 0) {
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
(void) zap_lookup(mos, objid, "allocating", sizeof (uint64_t),
|
||||
1, &allocating);
|
||||
mutex_exit(&spa->spa_props_lock);
|
||||
}
|
||||
return (allocating == 0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
spa_vdev_remove_cancel_check(void *arg, dmu_tx_t *tx)
|
||||
@@ -1761,6 +1961,13 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
|
||||
spa_finish_removal(spa, DSS_CANCELED, tx);
|
||||
|
||||
vd->vdev_removing = B_FALSE;
|
||||
|
||||
if (!vdev_prop_allocating_off(vd)) {
|
||||
spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER);
|
||||
vdev_activate(vd);
|
||||
spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
vdev_config_dirty(vd);
|
||||
|
||||
zfs_dbgmsg("canceled device removal for vdev %llu in %llu",
|
||||
@@ -1774,21 +1981,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
|
||||
static int
|
||||
spa_vdev_remove_cancel_impl(spa_t *spa)
|
||||
{
|
||||
uint64_t vdid = spa->spa_vdev_removal->svr_vdev_id;
|
||||
|
||||
int error = dsl_sync_task(spa->spa_name, spa_vdev_remove_cancel_check,
|
||||
spa_vdev_remove_cancel_sync, NULL, 0,
|
||||
ZFS_SPACE_CHECK_EXTRA_RESERVED);
|
||||
|
||||
if (error == 0) {
|
||||
spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER);
|
||||
vdev_t *vd = vdev_lookup_top(spa, vdid);
|
||||
metaslab_group_activate(vd->vdev_mg);
|
||||
ASSERT(!vd->vdev_islog);
|
||||
metaslab_group_activate(vd->vdev_log_mg);
|
||||
spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -1984,6 +2179,11 @@ spa_vdev_remove_top_check(vdev_t *vd)
|
||||
if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
/*
|
||||
* This device is already being removed
|
||||
*/
|
||||
if (vd->vdev_removing)
|
||||
return (SET_ERROR(EALREADY));
|
||||
|
||||
metaslab_class_t *mc = vd->vdev_mg->mg_class;
|
||||
metaslab_class_t *normal = spa_normal_class(spa);
|
||||
@@ -2002,20 +2202,12 @@ spa_vdev_remove_top_check(vdev_t *vd)
|
||||
ASSERT3U(available, >=, vd->vdev_stat.vs_alloc);
|
||||
if (available < vd->vdev_stat.vs_alloc)
|
||||
return (SET_ERROR(ENOSPC));
|
||||
} else {
|
||||
} else if (!vd->vdev_noalloc) {
|
||||
/* available space in the pool's normal class */
|
||||
uint64_t available = dsl_dir_space_available(
|
||||
spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE);
|
||||
if (available <
|
||||
vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) {
|
||||
/*
|
||||
* This is a normal device. There has to be enough free
|
||||
* space to remove the device and leave double the
|
||||
* "slop" space (i.e. we must leave at least 3% of the
|
||||
* pool free, in addition to the normal slop space).
|
||||
*/
|
||||
if (available < vd->vdev_stat.vs_dspace)
|
||||
return (SET_ERROR(ENOSPC));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2108,6 +2300,7 @@ static int
|
||||
spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
boolean_t set_noalloc = B_FALSE;
|
||||
int error;
|
||||
|
||||
/*
|
||||
@@ -2116,8 +2309,6 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
|
||||
* are errors.
|
||||
*/
|
||||
error = spa_vdev_remove_top_check(vd);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Stop allocating from this vdev. Note that we must check
|
||||
@@ -2127,31 +2318,22 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
|
||||
* The above check for sufficient free space serves this
|
||||
* purpose.
|
||||
*/
|
||||
metaslab_group_t *mg = vd->vdev_mg;
|
||||
metaslab_group_passivate(mg);
|
||||
ASSERT(!vd->vdev_islog);
|
||||
metaslab_group_passivate(vd->vdev_log_mg);
|
||||
if (error == 0 && !vd->vdev_noalloc) {
|
||||
set_noalloc = B_TRUE;
|
||||
error = vdev_passivate(vd, txg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the youngest allocations and frees to sync,
|
||||
* and then wait for the deferral of those frees to finish.
|
||||
*/
|
||||
spa_vdev_config_exit(spa, NULL,
|
||||
*txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
|
||||
|
||||
/*
|
||||
* We must ensure that no "stubby" log blocks are allocated
|
||||
* on the device to be removed. These blocks could be
|
||||
* written at any time, including while we are in the middle
|
||||
* of copying them.
|
||||
*/
|
||||
error = spa_reset_logs(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* We stop any initializing and TRIM that is currently in progress
|
||||
* but leave the state as "active". This will allow the process to
|
||||
* resume if the removal is canceled sometime later.
|
||||
*/
|
||||
|
||||
spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
|
||||
|
||||
vdev_initialize_stop_all(vd, VDEV_INITIALIZE_ACTIVE);
|
||||
vdev_trim_stop_all(vd, VDEV_TRIM_ACTIVE);
|
||||
vdev_autotrim_stop_wait(vd);
|
||||
@@ -2162,13 +2344,11 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
|
||||
* Things might have changed while the config lock was dropped
|
||||
* (e.g. space usage). Check for errors again.
|
||||
*/
|
||||
if (error == 0)
|
||||
error = spa_vdev_remove_top_check(vd);
|
||||
error = spa_vdev_remove_top_check(vd);
|
||||
|
||||
if (error != 0) {
|
||||
metaslab_group_activate(mg);
|
||||
ASSERT(!vd->vdev_islog);
|
||||
metaslab_group_activate(vd->vdev_log_mg);
|
||||
if (set_noalloc)
|
||||
vdev_activate(vd);
|
||||
spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
|
||||
spa_async_request(spa, SPA_ASYNC_TRIM_RESTART);
|
||||
spa_async_request(spa, SPA_ASYNC_AUTOTRIM_RESTART);
|
||||
|
||||
+101
-1
@@ -38,7 +38,7 @@
|
||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
|
||||
* Copyright (c) 2019, Klara Inc.
|
||||
* Copyright (c) 2019, 2021, Klara Inc.
|
||||
* Copyright (c) 2019, Allan Jude
|
||||
*/
|
||||
|
||||
@@ -2981,6 +2981,96 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* innvl: {
|
||||
* "vdevprops_set_vdev" -> guid
|
||||
* "vdevprops_set_props" -> { prop -> value }
|
||||
* }
|
||||
*
|
||||
* outnvl: propname -> error code (int32)
|
||||
*/
|
||||
static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
|
||||
{ZPOOL_VDEV_PROPS_SET_VDEV, DATA_TYPE_UINT64, 0},
|
||||
{ZPOOL_VDEV_PROPS_SET_PROPS, DATA_TYPE_NVLIST, 0}
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
spa_t *spa;
|
||||
int error;
|
||||
vdev_t *vd;
|
||||
uint64_t vdev_guid;
|
||||
|
||||
/* Early validation */
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (outnvl == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if ((error = spa_open(poolname, &spa, FTAG)) != 0)
|
||||
return (error);
|
||||
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
error = vdev_prop_set(vd, innvl, outnvl);
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* innvl: {
|
||||
* "vdevprops_get_vdev" -> guid
|
||||
* (optional) "vdevprops_get_props" -> { propname -> propid }
|
||||
* }
|
||||
*
|
||||
* outnvl: propname -> value
|
||||
*/
|
||||
static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
|
||||
{ZPOOL_VDEV_PROPS_GET_VDEV, DATA_TYPE_UINT64, 0},
|
||||
{ZPOOL_VDEV_PROPS_GET_PROPS, DATA_TYPE_NVLIST, ZK_OPTIONAL}
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
spa_t *spa;
|
||||
int error;
|
||||
vdev_t *vd;
|
||||
uint64_t vdev_guid;
|
||||
|
||||
/* Early validation */
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (outnvl == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if ((error = spa_open(poolname, &spa, FTAG)) != 0)
|
||||
return (error);
|
||||
|
||||
if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
error = vdev_prop_get(vd, innvl, outnvl);
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
@@ -7107,6 +7197,16 @@ zfs_ioctl_init(void)
|
||||
POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
|
||||
zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
|
||||
|
||||
zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
|
||||
zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
|
||||
POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
|
||||
ARRAY_SIZE(zfs_keys_vdev_get_props));
|
||||
|
||||
zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
|
||||
zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
|
||||
zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
|
||||
|
||||
/* IOCTLS that use the legacy function signature */
|
||||
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
|
||||
|
||||
+1
-1
@@ -3755,7 +3755,7 @@ zio_vdev_io_start(zio_t *zio)
|
||||
* Note: the code can handle other kinds of writes,
|
||||
* but we don't expect them.
|
||||
*/
|
||||
if (zio->io_vd->vdev_removing) {
|
||||
if (zio->io_vd->vdev_noalloc) {
|
||||
ASSERT(zio->io_flags &
|
||||
(ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL |
|
||||
ZIO_FLAG_RESILVER | ZIO_FLAG_INDUCE_DAMAGE));
|
||||
|
||||
Reference in New Issue
Block a user