Add device rebuild feature

The device_rebuild feature enables sequential reconstruction when
resilvering.  Mirror vdevs can be rebuilt in LBA order which may
more quickly restore redundancy depending on the pools average block
size, overall fragmentation and the performance characteristics
of the devices.  However, block checksums cannot be verified
as part of the rebuild thus a scrub is automatically started after
the sequential resilver completes.

The new '-s' option has been added to the `zpool attach` and
`zpool replace` command to request sequential reconstruction
instead of healing reconstruction when resilvering.

    zpool attach -s <pool> <existing vdev> <new vdev>
    zpool replace -s <pool> <old vdev> <new vdev>

The `zpool status` output has been updated to report the progress
of sequential resilvering in the same way as healing resilvering.
The one notable difference is that multiple sequential resilvers
may be in progress as long as they're operating on different
top-level vdevs.

The `zpool wait -t resilver` command was extended to wait on
sequential resilvers.  From this perspective they are no different
than healing resilvers.

Sequential resilvers cannot be supported for RAIDZ, but are
compatible with the dRAID feature being developed.

As part of this change the resilver_restart_* tests were moved
in to the functional/replacement directory.  Additionally, the
replacement tests were renamed and extended to verify both
resilvering and rebuilding.

Original-patch-by: Isaac Huang <he.huang@intel.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: John Poduska <jpoduska@datto.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10349
This commit is contained in:
Brian Behlendorf
2020-07-03 11:05:50 -07:00
committed by GitHub
parent 7ddb753d17
commit 9a49d3f3d3
65 changed files with 3281 additions and 362 deletions
+20 -6
View File
@@ -2446,7 +2446,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
ps->pss_state == DSS_SCANNING) {
if (cmd == POOL_SCRUB_PAUSE)
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
else
@@ -3128,8 +3129,8 @@ is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
* If 'replacing' is specified, the new disk will replace the old one.
*/
int
zpool_vdev_attach(zpool_handle_t *zhp,
const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
{
zfs_cmd_t zc = {"\0"};
char msg[1024];
@@ -3164,6 +3165,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
zc.zc_cookie = replacing;
zc.zc_simple = rebuild;
if (rebuild &&
zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"the loaded zfs module doesn't support device rebuilds"));
return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
}
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0 || children != 1) {
@@ -3224,16 +3233,21 @@ zpool_vdev_attach(zpool_handle_t *zhp,
uint64_t version = zpool_get_prop_int(zhp,
ZPOOL_PROP_VERSION, NULL);
if (islog)
if (islog) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a log with a spare"));
else if (version >= SPA_VERSION_MULTI_REPLACE)
} else if (rebuild) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"only mirror vdevs support sequential "
"reconstruction"));
} else if (version >= SPA_VERSION_MULTI_REPLACE) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"already in replacing/spare config; wait "
"for completion or use 'zpool detach'"));
else
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a replacing device"));
}
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"can only attach to mirrors and top-level "
+46 -1
View File
@@ -84,6 +84,8 @@ static char *zfs_msgid_table[] = {
* ZPOOL_STATUS_RESILVERING
* ZPOOL_STATUS_OFFLINE_DEV
* ZPOOL_STATUS_REMOVED_DEV
* ZPOOL_STATUS_REBUILDING
* ZPOOL_STATUS_REBUILD_SCRUB
* ZPOOL_STATUS_OK
*/
};
@@ -195,7 +197,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
* - Check for any data errors
* - Check for any faulted or missing devices in a replicated config
* - Look for any devices showing errors
* - Check for any resilvering devices
* - Check for any resilvering or rebuilding devices
*
* There can obviously be multiple errors within a single pool, so this routine
* only picks the most damaging of all the current errors to report.
@@ -233,6 +235,49 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
ps->pss_state == DSS_SCANNING)
return (ZPOOL_STATUS_RESILVERING);
/*
* Currently rebuilding a vdev, check top-level vdevs.
*/
vdev_rebuild_stat_t *vrs = NULL;
nvlist_t **child;
uint_t c, i, children;
uint64_t rebuild_end_time = 0;
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if ((nvlist_lookup_uint64_array(child[c],
ZPOOL_CONFIG_REBUILD_STATS,
(uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
uint64_t state = vrs->vrs_state;
if (state == VDEV_REBUILD_ACTIVE) {
return (ZPOOL_STATUS_REBUILDING);
} else if (state == VDEV_REBUILD_COMPLETE &&
vrs->vrs_end_time > rebuild_end_time) {
rebuild_end_time = vrs->vrs_end_time;
}
}
}
/*
* If we can determine when the last scrub was run, and it
* was before the last rebuild completed, then recommend
* that the pool be scrubbed to verify all checksums. When
* ps is NULL we can infer the pool has never been scrubbed.
*/
if (rebuild_end_time > 0) {
if (ps != NULL) {
if ((ps->pss_state == DSS_FINISHED &&
ps->pss_func == POOL_SCAN_SCRUB &&
rebuild_end_time > ps->pss_end_time) ||
ps->pss_state == DSS_NONE)
return (ZPOOL_STATUS_REBUILD_SCRUB);
} else {
return (ZPOOL_STATUS_REBUILD_SCRUB);
}
}
}
/*
* The multihost property is set and the pool may be active.
*/
+9
View File
@@ -286,6 +286,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
"resilver_defer feature"));
case EZFS_EXPORT_IN_PROGRESS:
return (dgettext(TEXT_DOMAIN, "pool export in progress"));
case EZFS_REBUILDING:
return (dgettext(TEXT_DOMAIN, "currently sequentially "
"resilvering"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -693,6 +696,12 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_EXPORT_IN_PROGRESS:
zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
break;
case ZFS_ERR_RESILVER_IN_PROGRESS:
zfs_verror(hdl, EZFS_RESILVERING, fmt, ap);
break;
case ZFS_ERR_REBUILD_IN_PROGRESS:
zfs_verror(hdl, EZFS_REBUILDING, fmt, ap);
break;
case ZFS_ERR_IOC_CMD_UNAVAIL:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
"module does not support this operation. A reboot may "