mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Add device rebuild feature
The device_rebuild feature enables sequential reconstruction when resilvering. Mirror vdevs can be rebuilt in LBA order which may more quickly restore redundancy depending on the pools average block size, overall fragmentation and the performance characteristics of the devices. However, block checksums cannot be verified as part of the rebuild thus a scrub is automatically started after the sequential resilver completes. The new '-s' option has been added to the `zpool attach` and `zpool replace` command to request sequential reconstruction instead of healing reconstruction when resilvering. zpool attach -s <pool> <existing vdev> <new vdev> zpool replace -s <pool> <old vdev> <new vdev> The `zpool status` output has been updated to report the progress of sequential resilvering in the same way as healing resilvering. The one notable difference is that multiple sequential resilvers may be in progress as long as they're operating on different top-level vdevs. The `zpool wait -t resilver` command was extended to wait on sequential resilvers. From this perspective they are no different than healing resilvers. Sequential resilvers cannot be supported for RAIDZ, but are compatible with the dRAID feature being developed. As part of this change the resilver_restart_* tests were moved in to the functional/replacement directory. Additionally, the replacement tests were renamed and extended to verify both resilvering and rebuilding. Original-patch-by: Isaac Huang <he.huang@intel.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: John Poduska <jpoduska@datto.com> Co-authored-by: Mark Maybee <mmaybee@cray.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #10349
This commit is contained in:
parent
7ddb753d17
commit
9a49d3f3d3
@ -437,7 +437,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||
return;
|
||||
}
|
||||
|
||||
ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
|
||||
ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_FALSE);
|
||||
|
||||
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
|
||||
fullpath, path, (ret == 0) ? "no errors" :
|
||||
|
@ -237,7 +237,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
dev_name, basename(spare_name));
|
||||
|
||||
if (zpool_vdev_attach(zhp, dev_name, spare_name,
|
||||
replacement, B_TRUE) == 0) {
|
||||
replacement, B_TRUE, B_FALSE) == 0) {
|
||||
free(dev_name);
|
||||
nvlist_free(replacement);
|
||||
return (B_TRUE);
|
||||
@ -319,12 +319,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
|
||||
fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class);
|
||||
|
||||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, &state);
|
||||
|
||||
/*
|
||||
* If this is a resource notifying us of device removal then simply
|
||||
* check for an available spare and continue unless the device is a
|
||||
* l2arc vdev, in which case we just offline it.
|
||||
*/
|
||||
if (strcmp(class, "resource.fs.zfs.removed") == 0) {
|
||||
if (strcmp(class, "resource.fs.zfs.removed") == 0 ||
|
||||
(strcmp(class, "resource.fs.zfs.statechange") == 0 &&
|
||||
state == VDEV_STATE_REMOVED)) {
|
||||
char *devtype;
|
||||
char *devname;
|
||||
|
||||
@ -365,8 +369,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
* healthy ones so we need to confirm the actual state value.
|
||||
*/
|
||||
if (strcmp(class, "resource.fs.zfs.statechange") == 0 &&
|
||||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE,
|
||||
&state) == 0 && state == VDEV_STATE_HEALTHY) {
|
||||
state == VDEV_STATE_HEALTHY) {
|
||||
zfs_vdev_repair(hdl, nvl);
|
||||
return;
|
||||
}
|
||||
|
@ -5,10 +5,12 @@
|
||||
# Exit codes:
|
||||
# 1: Internal error
|
||||
# 2: Script wasn't enabled in zed.rc
|
||||
# 3: Scrubs are automatically started for sequential resilvers
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ "${ZED_SCRUB_AFTER_RESILVER}" = "1" ] || exit 2
|
||||
[ "${ZEVENT_RESILVER_TYPE}" != "sequential" ] || exit 3
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 1
|
||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 1
|
||||
zed_check_cmd "${ZPOOL}" || exit 1
|
||||
|
@ -337,7 +337,7 @@ get_usage(zpool_help_t idx)
|
||||
return (gettext("\tadd [-fgLnP] [-o property=value] "
|
||||
"<pool> <vdev> ...\n"));
|
||||
case HELP_ATTACH:
|
||||
return (gettext("\tattach [-fw] [-o property=value] "
|
||||
return (gettext("\tattach [-fsw] [-o property=value] "
|
||||
"<pool> <device> <new-device>\n"));
|
||||
case HELP_CLEAR:
|
||||
return (gettext("\tclear [-nF] <pool> [device]\n"));
|
||||
@ -380,7 +380,7 @@ get_usage(zpool_help_t idx)
|
||||
case HELP_ONLINE:
|
||||
return (gettext("\tonline [-e] <pool> <device> ...\n"));
|
||||
case HELP_REPLACE:
|
||||
return (gettext("\treplace [-fw] [-o property=value] "
|
||||
return (gettext("\treplace [-fsw] [-o property=value] "
|
||||
"<pool> <device> [new-device]\n"));
|
||||
case HELP_REMOVE:
|
||||
return (gettext("\tremove [-npsw] <pool> <device> ...\n"));
|
||||
@ -2077,10 +2077,10 @@ health_str_to_color(const char *health)
|
||||
*/
|
||||
static void
|
||||
print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
|
||||
nvlist_t *nv, int depth, boolean_t isspare)
|
||||
nvlist_t *nv, int depth, boolean_t isspare, vdev_rebuild_stat_t *vrs)
|
||||
{
|
||||
nvlist_t **child, *root;
|
||||
uint_t c, children;
|
||||
uint_t c, i, children;
|
||||
pool_scan_stat_t *ps = NULL;
|
||||
vdev_stat_t *vs;
|
||||
char rbuf[6], wbuf[6], cbuf[6];
|
||||
@ -2266,6 +2266,14 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
|
||||
}
|
||||
}
|
||||
|
||||
/* The top-level vdevs have the rebuild stats */
|
||||
if (vrs != NULL && vrs->vrs_state == VDEV_REBUILD_ACTIVE &&
|
||||
children == 0) {
|
||||
if (vs->vs_rebuild_processed != 0) {
|
||||
(void) printf(gettext(" (resilvering)"));
|
||||
}
|
||||
}
|
||||
|
||||
if (cb->vcdl != NULL) {
|
||||
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
|
||||
printf(" ");
|
||||
@ -2295,11 +2303,17 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
|
||||
if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
|
||||
continue;
|
||||
|
||||
/* Provide vdev_rebuild_stats to children if available */
|
||||
if (vrs == NULL) {
|
||||
(void) nvlist_lookup_uint64_array(nv,
|
||||
ZPOOL_CONFIG_REBUILD_STATS,
|
||||
(uint64_t **)&vrs, &i);
|
||||
}
|
||||
|
||||
vname = zpool_vdev_name(g_zfs, zhp, child[c],
|
||||
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
|
||||
|
||||
print_status_config(zhp, cb, vname, child[c], depth + 2,
|
||||
isspare);
|
||||
isspare, vrs);
|
||||
free(vname);
|
||||
}
|
||||
}
|
||||
@ -2468,7 +2482,7 @@ print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv,
|
||||
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
|
||||
if (cb->cb_print_status)
|
||||
print_status_config(zhp, cb, name, child[c], 2,
|
||||
B_FALSE);
|
||||
B_FALSE, NULL);
|
||||
else
|
||||
print_import_config(cb, name, child[c], 2);
|
||||
free(name);
|
||||
@ -2622,6 +2636,7 @@ show_import(nvlist_t *config)
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_RESILVERING:
|
||||
case ZPOOL_STATUS_REBUILDING:
|
||||
printf_color(ANSI_BOLD, gettext("status: "));
|
||||
printf_color(ANSI_YELLOW, gettext("One or more devices were "
|
||||
"being resilvered.\n"));
|
||||
@ -6118,6 +6133,7 @@ static int
|
||||
zpool_do_attach_or_replace(int argc, char **argv, int replacing)
|
||||
{
|
||||
boolean_t force = B_FALSE;
|
||||
boolean_t rebuild = B_FALSE;
|
||||
boolean_t wait = B_FALSE;
|
||||
int c;
|
||||
nvlist_t *nvroot;
|
||||
@ -6128,7 +6144,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
|
||||
int ret;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "fo:w")) != -1) {
|
||||
while ((c = getopt(argc, argv, "fo:sw")) != -1) {
|
||||
switch (c) {
|
||||
case 'f':
|
||||
force = B_TRUE;
|
||||
@ -6146,6 +6162,9 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
|
||||
(add_prop_list(optarg, propval, &props, B_TRUE)))
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
case 's':
|
||||
rebuild = B_TRUE;
|
||||
break;
|
||||
case 'w':
|
||||
wait = B_TRUE;
|
||||
break;
|
||||
@ -6230,7 +6249,8 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
|
||||
return (1);
|
||||
}
|
||||
|
||||
ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
|
||||
ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing,
|
||||
rebuild);
|
||||
|
||||
if (ret == 0 && wait)
|
||||
ret = zpool_wait(zhp,
|
||||
@ -6244,9 +6264,10 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool replace [-fw] [-o property=value] <pool> <device> <new_device>
|
||||
* zpool replace [-fsw] [-o property=value] <pool> <device> <new_device>
|
||||
*
|
||||
* -f Force attach, even if <new_device> appears to be in use.
|
||||
* -s Use sequential instead of healing reconstruction for resilver.
|
||||
* -o Set property=value.
|
||||
* -w Wait for replacing to complete before returning
|
||||
*
|
||||
@ -6260,9 +6281,10 @@ zpool_do_replace(int argc, char **argv)
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool attach [-fw] [-o property=value] <pool> <device> <new_device>
|
||||
* zpool attach [-fsw] [-o property=value] <pool> <device> <new_device>
|
||||
*
|
||||
* -f Force attach, even if <new_device> appears to be in use.
|
||||
* -s Use sequential instead of healing reconstruction for resilver.
|
||||
* -o Set property=value.
|
||||
* -w Wait for resilvering to complete before returning
|
||||
*
|
||||
@ -7131,20 +7153,41 @@ zpool_do_trim(int argc, char **argv)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts a total number of seconds to a human readable string broken
|
||||
* down in to days/hours/minutes/seconds.
|
||||
*/
|
||||
static void
|
||||
secs_to_dhms(uint64_t total, char *buf)
|
||||
{
|
||||
uint64_t days = total / 60 / 60 / 24;
|
||||
uint64_t hours = (total / 60 / 60) % 24;
|
||||
uint64_t mins = (total / 60) % 60;
|
||||
uint64_t secs = (total % 60);
|
||||
|
||||
if (days > 0) {
|
||||
(void) sprintf(buf, "%llu days %02llu:%02llu:%02llu",
|
||||
(u_longlong_t)days, (u_longlong_t)hours,
|
||||
(u_longlong_t)mins, (u_longlong_t)secs);
|
||||
} else {
|
||||
(void) sprintf(buf, "%02llu:%02llu:%02llu",
|
||||
(u_longlong_t)hours, (u_longlong_t)mins,
|
||||
(u_longlong_t)secs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out detailed scrub status.
|
||||
*/
|
||||
static void
|
||||
print_scan_status(pool_scan_stat_t *ps)
|
||||
print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
|
||||
{
|
||||
time_t start, end, pause;
|
||||
uint64_t total_secs_left;
|
||||
uint64_t elapsed, secs_left, mins_left, hours_left, days_left;
|
||||
uint64_t pass_scanned, scanned, pass_issued, issued, total;
|
||||
uint64_t scan_rate, issue_rate;
|
||||
uint64_t elapsed, scan_rate, issue_rate;
|
||||
double fraction_done;
|
||||
char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
|
||||
char srate_buf[7], irate_buf[7];
|
||||
char srate_buf[7], irate_buf[7], time_buf[32];
|
||||
|
||||
printf(" ");
|
||||
printf_color(ANSI_BOLD, gettext("scan:"));
|
||||
@ -7168,26 +7211,18 @@ print_scan_status(pool_scan_stat_t *ps)
|
||||
|
||||
/* Scan is finished or canceled. */
|
||||
if (ps->pss_state == DSS_FINISHED) {
|
||||
total_secs_left = end - start;
|
||||
days_left = total_secs_left / 60 / 60 / 24;
|
||||
hours_left = (total_secs_left / 60 / 60) % 24;
|
||||
mins_left = (total_secs_left / 60) % 60;
|
||||
secs_left = (total_secs_left % 60);
|
||||
secs_to_dhms(end - start, time_buf);
|
||||
|
||||
if (ps->pss_func == POOL_SCAN_SCRUB) {
|
||||
(void) printf(gettext("scrub repaired %s "
|
||||
"in %llu days %02llu:%02llu:%02llu "
|
||||
"with %llu errors on %s"), processed_buf,
|
||||
(u_longlong_t)days_left, (u_longlong_t)hours_left,
|
||||
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
|
||||
(u_longlong_t)ps->pss_errors, ctime(&end));
|
||||
"in %s with %llu errors on %s"), processed_buf,
|
||||
time_buf, (u_longlong_t)ps->pss_errors,
|
||||
ctime(&end));
|
||||
} else if (ps->pss_func == POOL_SCAN_RESILVER) {
|
||||
(void) printf(gettext("resilvered %s "
|
||||
"in %llu days %02llu:%02llu:%02llu "
|
||||
"with %llu errors on %s"), processed_buf,
|
||||
(u_longlong_t)days_left, (u_longlong_t)hours_left,
|
||||
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
|
||||
(u_longlong_t)ps->pss_errors, ctime(&end));
|
||||
"in %s with %llu errors on %s"), processed_buf,
|
||||
time_buf, (u_longlong_t)ps->pss_errors,
|
||||
ctime(&end));
|
||||
}
|
||||
return;
|
||||
} else if (ps->pss_state == DSS_CANCELED) {
|
||||
@ -7235,13 +7270,9 @@ print_scan_status(pool_scan_stat_t *ps)
|
||||
|
||||
scan_rate = pass_scanned / elapsed;
|
||||
issue_rate = pass_issued / elapsed;
|
||||
total_secs_left = (issue_rate != 0 && total >= issued) ?
|
||||
uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
|
||||
((total - issued) / issue_rate) : UINT64_MAX;
|
||||
|
||||
days_left = total_secs_left / 60 / 60 / 24;
|
||||
hours_left = (total_secs_left / 60 / 60) % 24;
|
||||
mins_left = (total_secs_left / 60) % 60;
|
||||
secs_left = (total_secs_left % 60);
|
||||
secs_to_dhms(total_secs_left, time_buf);
|
||||
|
||||
/* format all of the numbers we will be reporting */
|
||||
zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
|
||||
@ -7271,10 +7302,84 @@ print_scan_status(pool_scan_stat_t *ps)
|
||||
if (pause == 0) {
|
||||
if (total_secs_left != UINT64_MAX &&
|
||||
issue_rate >= 10 * 1024 * 1024) {
|
||||
(void) printf(gettext(", %llu days "
|
||||
"%02llu:%02llu:%02llu to go\n"),
|
||||
(u_longlong_t)days_left, (u_longlong_t)hours_left,
|
||||
(u_longlong_t)mins_left, (u_longlong_t)secs_left);
|
||||
(void) printf(gettext(", %s to go\n"), time_buf);
|
||||
} else {
|
||||
(void) printf(gettext(", no estimated "
|
||||
"completion time\n"));
|
||||
}
|
||||
} else {
|
||||
(void) printf(gettext("\n"));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
|
||||
{
|
||||
if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE)
|
||||
return;
|
||||
|
||||
printf(" ");
|
||||
printf_color(ANSI_BOLD, gettext("scan:"));
|
||||
printf(" ");
|
||||
|
||||
uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
|
||||
uint64_t bytes_issued = vrs->vrs_bytes_issued;
|
||||
uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
|
||||
uint64_t bytes_est = vrs->vrs_bytes_est;
|
||||
uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
|
||||
(vrs->vrs_pass_time_ms + 1)) * 1000;
|
||||
uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
|
||||
(vrs->vrs_pass_time_ms + 1)) * 1000;
|
||||
double scan_pct = MIN((double)bytes_scanned * 100 /
|
||||
(bytes_est + 1), 100);
|
||||
|
||||
/* Format all of the numbers we will be reporting */
|
||||
char bytes_scanned_buf[7], bytes_issued_buf[7];
|
||||
char bytes_rebuilt_buf[7], bytes_est_buf[7];
|
||||
char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
|
||||
zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
|
||||
sizeof (bytes_scanned_buf));
|
||||
zfs_nicebytes(bytes_issued, bytes_issued_buf,
|
||||
sizeof (bytes_issued_buf));
|
||||
zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
|
||||
sizeof (bytes_rebuilt_buf));
|
||||
zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
|
||||
zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
|
||||
zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));
|
||||
|
||||
time_t start = vrs->vrs_start_time;
|
||||
time_t end = vrs->vrs_end_time;
|
||||
|
||||
/* Rebuild is finished or canceled. */
|
||||
if (vrs->vrs_state == VDEV_REBUILD_COMPLETE) {
|
||||
secs_to_dhms(vrs->vrs_scan_time_ms / 1000, time_buf);
|
||||
(void) printf(gettext("resilvered (%s) %s in %s "
|
||||
"with %llu errors on %s"), vdev_name, bytes_rebuilt_buf,
|
||||
time_buf, (u_longlong_t)vrs->vrs_errors, ctime(&end));
|
||||
return;
|
||||
} else if (vrs->vrs_state == VDEV_REBUILD_CANCELED) {
|
||||
(void) printf(gettext("resilver (%s) canceled on %s"),
|
||||
vdev_name, ctime(&end));
|
||||
return;
|
||||
} else if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
|
||||
(void) printf(gettext("resilver (%s) in progress since %s"),
|
||||
vdev_name, ctime(&start));
|
||||
}
|
||||
|
||||
assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);
|
||||
|
||||
secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
|
||||
MAX(scan_rate, 1), time_buf);
|
||||
|
||||
(void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
|
||||
"%s total\n"), bytes_scanned_buf, scan_rate_buf,
|
||||
bytes_issued_buf, issue_rate_buf, bytes_est_buf);
|
||||
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
|
||||
bytes_rebuilt_buf, scan_pct);
|
||||
|
||||
if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
|
||||
if (scan_rate >= 10 * 1024 * 1024) {
|
||||
(void) printf(gettext(", %s to go\n"), time_buf);
|
||||
} else {
|
||||
(void) printf(gettext(", no estimated "
|
||||
"completion time\n"));
|
||||
@ -7285,9 +7390,38 @@ print_scan_status(pool_scan_stat_t *ps)
|
||||
}
|
||||
|
||||
/*
|
||||
* As we don't scrub checkpointed blocks, we want to warn the
|
||||
* user that we skipped scanning some blocks if a checkpoint exists
|
||||
* or existed at any time during the scan.
|
||||
* Print rebuild status for top-level vdevs.
|
||||
*/
|
||||
static void
|
||||
print_rebuild_status(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t children;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
for (uint_t c = 0; c < children; c++) {
|
||||
vdev_rebuild_stat_t *vrs;
|
||||
uint_t i;
|
||||
|
||||
if (nvlist_lookup_uint64_array(child[c],
|
||||
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
|
||||
char *name = zpool_vdev_name(g_zfs, zhp,
|
||||
child[c], VDEV_NAME_TYPE_ID);
|
||||
print_rebuild_status_impl(vrs, name);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* As we don't scrub checkpointed blocks, we want to warn the user that we
|
||||
* skipped scanning some blocks if a checkpoint exists or existed at any
|
||||
* time during the scan. If a sequential instead of healing reconstruction
|
||||
* was performed then the blocks were reconstructed. However, their checksums
|
||||
* have not been verified so we still print the warning.
|
||||
*/
|
||||
static void
|
||||
print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs)
|
||||
@ -7318,6 +7452,95 @@ print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns B_TRUE if there is an active rebuild in progress. Otherwise,
|
||||
* B_FALSE is returned and 'rebuild_end_time' is set to the end time for
|
||||
* the last completed (or cancelled) rebuild.
|
||||
*/
|
||||
static boolean_t
|
||||
check_rebuilding(nvlist_t *nvroot, uint64_t *rebuild_end_time)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t children;
|
||||
boolean_t rebuilding = B_FALSE;
|
||||
uint64_t end_time = 0;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
for (uint_t c = 0; c < children; c++) {
|
||||
vdev_rebuild_stat_t *vrs;
|
||||
uint_t i;
|
||||
|
||||
if (nvlist_lookup_uint64_array(child[c],
|
||||
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
|
||||
|
||||
if (vrs->vrs_end_time > end_time)
|
||||
end_time = vrs->vrs_end_time;
|
||||
|
||||
if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
|
||||
rebuilding = B_TRUE;
|
||||
end_time = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rebuild_end_time != NULL)
|
||||
*rebuild_end_time = end_time;
|
||||
|
||||
return (rebuilding);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the scan status.
|
||||
*/
|
||||
static void
|
||||
print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
{
|
||||
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
|
||||
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
|
||||
boolean_t active_resilver = B_FALSE;
|
||||
pool_checkpoint_stat_t *pcs = NULL;
|
||||
pool_scan_stat_t *ps = NULL;
|
||||
uint_t c;
|
||||
|
||||
if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
|
||||
(uint64_t **)&ps, &c) == 0) {
|
||||
if (ps->pss_func == POOL_SCAN_RESILVER) {
|
||||
resilver_end_time = ps->pss_end_time;
|
||||
active_resilver = (ps->pss_state == DSS_SCANNING);
|
||||
}
|
||||
|
||||
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
|
||||
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
|
||||
}
|
||||
|
||||
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
|
||||
boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
|
||||
|
||||
/* Always print the scrub status when available. */
|
||||
if (have_scrub)
|
||||
print_scan_scrub_resilver_status(ps);
|
||||
|
||||
/*
|
||||
* When there is an active resilver or rebuild print its status.
|
||||
* Otherwise print the status of the last resilver or rebuild.
|
||||
*/
|
||||
if (active_resilver || (!active_rebuild && have_resilver &&
|
||||
resilver_end_time && resilver_end_time > rebuild_end_time)) {
|
||||
print_scan_scrub_resilver_status(ps);
|
||||
} else if (active_rebuild || (!active_resilver && have_rebuild &&
|
||||
rebuild_end_time && rebuild_end_time > resilver_end_time)) {
|
||||
print_rebuild_status(zhp, nvroot);
|
||||
}
|
||||
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
|
||||
print_checkpoint_scan_warning(ps, pcs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out detailed removal status.
|
||||
*/
|
||||
@ -7504,7 +7727,7 @@ print_spares(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **spares,
|
||||
for (i = 0; i < nspares; i++) {
|
||||
name = zpool_vdev_name(g_zfs, zhp, spares[i],
|
||||
cb->cb_name_flags);
|
||||
print_status_config(zhp, cb, name, spares[i], 2, B_TRUE);
|
||||
print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, NULL);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
@ -7524,7 +7747,8 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache,
|
||||
for (i = 0; i < nl2cache; i++) {
|
||||
name = zpool_vdev_name(g_zfs, zhp, l2cache[i],
|
||||
cb->cb_name_flags);
|
||||
print_status_config(zhp, cb, name, l2cache[i], 2, B_FALSE);
|
||||
print_status_config(zhp, cb, name, l2cache[i], 2,
|
||||
B_FALSE, NULL);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
@ -7718,6 +7942,7 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_RESILVERING:
|
||||
case ZPOOL_STATUS_REBUILDING:
|
||||
printf_color(ANSI_BOLD, gettext("status: "));
|
||||
printf_color(ANSI_YELLOW, gettext("One or more devices is "
|
||||
"currently being resilvered. The pool will\n\tcontinue "
|
||||
@ -7727,6 +7952,16 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
"complete.\n"));
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_REBUILD_SCRUB:
|
||||
printf_color(ANSI_BOLD, gettext("status: "));
|
||||
printf_color(ANSI_YELLOW, gettext("One or more devices have "
|
||||
"been sequentially resilvered, scrubbing\n\tthe pool "
|
||||
"is recommended.\n"));
|
||||
printf_color(ANSI_BOLD, gettext("action: "));
|
||||
printf_color(ANSI_YELLOW, gettext("Use 'zpool scrub' to "
|
||||
"verify all data checksums.\n"));
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_CORRUPT_DATA:
|
||||
printf_color(ANSI_BOLD, gettext("status: "));
|
||||
printf_color(ANSI_YELLOW, gettext("One or more devices has "
|
||||
@ -7951,18 +8186,16 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
nvlist_t **spares, **l2cache;
|
||||
uint_t nspares, nl2cache;
|
||||
pool_checkpoint_stat_t *pcs = NULL;
|
||||
pool_scan_stat_t *ps = NULL;
|
||||
pool_removal_stat_t *prs = NULL;
|
||||
|
||||
print_scan_status(zhp, nvroot);
|
||||
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
|
||||
print_removal_status(zhp, prs);
|
||||
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
|
||||
print_scan_status(ps);
|
||||
print_checkpoint_scan_warning(ps, pcs);
|
||||
print_removal_status(zhp, prs);
|
||||
print_checkpoint_status(pcs);
|
||||
|
||||
cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0,
|
||||
@ -7987,7 +8220,7 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
printf("\n");
|
||||
|
||||
print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0,
|
||||
B_FALSE);
|
||||
B_FALSE, NULL);
|
||||
|
||||
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP);
|
||||
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
|
||||
@ -9543,6 +9776,36 @@ vdev_activity_remaining(nvlist_t *nv, zpool_wait_activity_t activity)
|
||||
return (bytes_remaining);
|
||||
}
|
||||
|
||||
/* Add up the total number of bytes left to rebuild across top-level vdevs */
|
||||
static uint64_t
|
||||
vdev_activity_top_remaining(nvlist_t *nv)
|
||||
{
|
||||
uint64_t bytes_remaining = 0;
|
||||
nvlist_t **child;
|
||||
uint_t children;
|
||||
int error;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
for (uint_t c = 0; c < children; c++) {
|
||||
vdev_rebuild_stat_t *vrs;
|
||||
uint_t i;
|
||||
|
||||
error = nvlist_lookup_uint64_array(child[c],
|
||||
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i);
|
||||
if (error == 0) {
|
||||
if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
|
||||
bytes_remaining += (vrs->vrs_bytes_est -
|
||||
vrs->vrs_bytes_rebuilt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (bytes_remaining);
|
||||
}
|
||||
|
||||
/* Whether any vdevs are 'spare' or 'replacing' vdevs */
|
||||
static boolean_t
|
||||
vdev_any_spare_replacing(nvlist_t *nv)
|
||||
@ -9652,6 +9915,9 @@ print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row)
|
||||
bytes_rem[ZPOOL_WAIT_SCRUB] = rem;
|
||||
else
|
||||
bytes_rem[ZPOOL_WAIT_RESILVER] = rem;
|
||||
} else if (check_rebuilding(nvroot, NULL)) {
|
||||
bytes_rem[ZPOOL_WAIT_RESILVER] =
|
||||
vdev_activity_top_remaining(nvroot);
|
||||
}
|
||||
|
||||
bytes_rem[ZPOOL_WAIT_INITIALIZE] =
|
||||
|
@ -3507,7 +3507,16 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
|
||||
ashift, NULL, 0, 0, 1);
|
||||
|
||||
error = spa_vdev_attach(spa, oldguid, root, replacing);
|
||||
/*
|
||||
* When supported select either a healing or sequential resilver.
|
||||
*/
|
||||
boolean_t rebuilding = B_FALSE;
|
||||
if (pvd->vdev_ops == &vdev_mirror_ops ||
|
||||
pvd->vdev_ops == &vdev_root_ops) {
|
||||
rebuilding = !!ztest_random(2);
|
||||
}
|
||||
|
||||
error = spa_vdev_attach(spa, oldguid, root, replacing, rebuilding);
|
||||
|
||||
nvlist_free(root);
|
||||
|
||||
@ -3527,10 +3536,11 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
expected_error = error;
|
||||
|
||||
if (error == ZFS_ERR_CHECKPOINT_EXISTS ||
|
||||
error == ZFS_ERR_DISCARDING_CHECKPOINT)
|
||||
error == ZFS_ERR_DISCARDING_CHECKPOINT ||
|
||||
error == ZFS_ERR_RESILVER_IN_PROGRESS ||
|
||||
error == ZFS_ERR_REBUILD_IN_PROGRESS)
|
||||
expected_error = error;
|
||||
|
||||
/* XXX workaround 6690467 */
|
||||
if (error != expected_error && expected_error != EBUSY) {
|
||||
fatal(0, "attach (%s %llu, %s %llu, %d) "
|
||||
"returned %d, expected %d",
|
||||
|
@ -368,7 +368,6 @@ AC_CONFIG_FILES([
|
||||
tests/zfs-tests/tests/functional/rename_dirs/Makefile
|
||||
tests/zfs-tests/tests/functional/replacement/Makefile
|
||||
tests/zfs-tests/tests/functional/reservation/Makefile
|
||||
tests/zfs-tests/tests/functional/resilver/Makefile
|
||||
tests/zfs-tests/tests/functional/rootpool/Makefile
|
||||
tests/zfs-tests/tests/functional/rsend/Makefile
|
||||
tests/zfs-tests/tests/functional/scrub_mirror/Makefile
|
||||
|
@ -95,6 +95,8 @@ zfs_errno = enum_with_offset(1024, [
|
||||
'ZFS_ERR_EXPORT_IN_PROGRESS',
|
||||
'ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR',
|
||||
'ZFS_ERR_STREAM_TRUNCATED',
|
||||
'ZFS_ERR_RESILVER_IN_PROGRESS',
|
||||
'ZFS_ERR_REBUILD_IN_PROGRESS',
|
||||
],
|
||||
{}
|
||||
)
|
||||
|
@ -79,7 +79,7 @@ typedef enum zfs_error {
|
||||
EZFS_NODEVICE, /* no such device in pool */
|
||||
EZFS_BADDEV, /* invalid device to add */
|
||||
EZFS_NOREPLICAS, /* no valid replicas */
|
||||
EZFS_RESILVERING, /* currently resilvering */
|
||||
EZFS_RESILVERING, /* resilvering (healing reconstruction) */
|
||||
EZFS_BADVERSION, /* unsupported version */
|
||||
EZFS_POOLUNAVAIL, /* pool is currently unavailable */
|
||||
EZFS_DEVOVERFLOW, /* too many devices in one vdev */
|
||||
@ -148,6 +148,7 @@ typedef enum zfs_error {
|
||||
EZFS_TRIM_NOTSUP, /* device does not support trim */
|
||||
EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */
|
||||
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
|
||||
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
|
||||
EZFS_UNKNOWN
|
||||
} zfs_error_t;
|
||||
|
||||
@ -297,7 +298,7 @@ extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
|
||||
vdev_state_t *);
|
||||
extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
|
||||
extern int zpool_vdev_attach(zpool_handle_t *, const char *,
|
||||
const char *, nvlist_t *, int);
|
||||
const char *, nvlist_t *, int, boolean_t);
|
||||
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
|
||||
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
|
||||
extern int zpool_vdev_remove_cancel(zpool_handle_t *);
|
||||
@ -387,6 +388,8 @@ typedef enum {
|
||||
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
|
||||
ZPOOL_STATUS_OFFLINE_DEV, /* device offline */
|
||||
ZPOOL_STATUS_REMOVED_DEV, /* removed device */
|
||||
ZPOOL_STATUS_REBUILDING, /* device being rebuilt */
|
||||
ZPOOL_STATUS_REBUILD_SCRUB, /* recommend scrubbing the pool */
|
||||
|
||||
/*
|
||||
* Finally, the following indicates a healthy pool.
|
||||
|
@ -89,6 +89,7 @@ COMMON_H = \
|
||||
vdev_initialize.h \
|
||||
vdev_raidz.h \
|
||||
vdev_raidz_impl.h \
|
||||
vdev_rebuild.h \
|
||||
vdev_removal.h \
|
||||
vdev_trim.h \
|
||||
xvattr.h \
|
||||
|
@ -42,6 +42,8 @@ struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dmu_tx;
|
||||
|
||||
extern int zfs_scan_suspend_progress;
|
||||
|
||||
/*
|
||||
* All members of this structure must be uint64_t, for byteswap
|
||||
* purposes.
|
||||
|
@ -704,6 +704,7 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
|
||||
#define ZPOOL_CONFIG_REMOVING "removing"
|
||||
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
|
||||
#define ZPOOL_CONFIG_REBUILD_TXG "rebuild_txg"
|
||||
#define ZPOOL_CONFIG_COMMENT "comment"
|
||||
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */
|
||||
@ -730,6 +731,7 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */
|
||||
#define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats"
|
||||
|
||||
/*
|
||||
* The persistent vdev state is stored as separate values rather than a single
|
||||
@ -778,6 +780,9 @@ typedef struct zpool_load_policy {
|
||||
#define VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS \
|
||||
"com.delphix:ms_unflushed_phys_txgs"
|
||||
|
||||
#define VDEV_TOP_ZAP_VDEV_REBUILD_PHYS \
|
||||
"org.openzfs:vdev_rebuild"
|
||||
|
||||
#define VDEV_TOP_ZAP_ALLOCATION_BIAS \
|
||||
"org.zfsonlinux:allocation_bias"
|
||||
|
||||
@ -991,6 +996,21 @@ typedef enum dsl_scan_state {
|
||||
DSS_NUM_STATES
|
||||
} dsl_scan_state_t;
|
||||
|
||||
typedef struct vdev_rebuild_stat {
|
||||
uint64_t vrs_state; /* vdev_rebuild_state_t */
|
||||
uint64_t vrs_start_time; /* time_t */
|
||||
uint64_t vrs_end_time; /* time_t */
|
||||
uint64_t vrs_scan_time_ms; /* total run time (millisecs) */
|
||||
uint64_t vrs_bytes_scanned; /* allocated bytes scanned */
|
||||
uint64_t vrs_bytes_issued; /* read bytes issued */
|
||||
uint64_t vrs_bytes_rebuilt; /* rebuilt bytes */
|
||||
uint64_t vrs_bytes_est; /* total bytes to scan */
|
||||
uint64_t vrs_errors; /* scanning errors */
|
||||
uint64_t vrs_pass_time_ms; /* pass run time (millisecs) */
|
||||
uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
|
||||
uint64_t vrs_pass_bytes_issued; /* bytes rebuilt since start/resume */
|
||||
} vdev_rebuild_stat_t;
|
||||
|
||||
/*
|
||||
* Errata described by https://zfsonlinux.org/msg/ZFS-8000-ER. The ordering
|
||||
* of this enum must be maintained to ensure the errata identifiers map to
|
||||
@ -1047,6 +1067,7 @@ typedef struct vdev_stat {
|
||||
uint64_t vs_trim_bytes_est; /* total bytes to trim */
|
||||
uint64_t vs_trim_state; /* vdev_trim_state_t */
|
||||
uint64_t vs_trim_action_time; /* time_t */
|
||||
uint64_t vs_rebuild_processed; /* bytes rebuilt */
|
||||
} vdev_stat_t;
|
||||
|
||||
/*
|
||||
@ -1178,6 +1199,13 @@ typedef enum {
|
||||
VDEV_TRIM_COMPLETE,
|
||||
} vdev_trim_state_t;
|
||||
|
||||
typedef enum {
|
||||
VDEV_REBUILD_NONE,
|
||||
VDEV_REBUILD_ACTIVE,
|
||||
VDEV_REBUILD_CANCELED,
|
||||
VDEV_REBUILD_COMPLETE,
|
||||
} vdev_rebuild_state_t;
|
||||
|
||||
/*
|
||||
* nvlist name constants. Facilitate restricting snapshot iteration range for
|
||||
* the "list next snapshot" ioctl
|
||||
@ -1337,6 +1365,8 @@ typedef enum {
|
||||
ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR,
|
||||
ZFS_ERR_STREAM_TRUNCATED,
|
||||
ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH,
|
||||
ZFS_ERR_RESILVER_IN_PROGRESS,
|
||||
ZFS_ERR_REBUILD_IN_PROGRESS,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
@ -1478,7 +1508,12 @@ typedef enum {
|
||||
* given payloads:
|
||||
*
|
||||
* ESC_ZFS_RESILVER_START
|
||||
* ESC_ZFS_RESILVER_END
|
||||
* ESC_ZFS_RESILVER_FINISH
|
||||
*
|
||||
* ZFS_EV_POOL_NAME DATA_TYPE_STRING
|
||||
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
|
||||
* ZFS_EV_RESILVER_TYPE DATA_TYPE_STRING
|
||||
*
|
||||
* ESC_ZFS_POOL_DESTROY
|
||||
* ESC_ZFS_POOL_REGUID
|
||||
*
|
||||
@ -1532,6 +1567,7 @@ typedef enum {
|
||||
#define ZFS_EV_HIST_IOCTL "history_ioctl"
|
||||
#define ZFS_EV_HIST_DSNAME "history_dsname"
|
||||
#define ZFS_EV_HIST_DSID "history_dsid"
|
||||
#define ZFS_EV_RESILVER_TYPE "resilver_type"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -790,17 +790,12 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
#define SPA_ASYNC_AUTOTRIM_RESTART 0x400
|
||||
#define SPA_ASYNC_L2CACHE_REBUILD 0x800
|
||||
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
|
||||
|
||||
/*
|
||||
* Controls the behavior of spa_vdev_remove().
|
||||
*/
|
||||
#define SPA_REMOVE_UNSPARE 0x01
|
||||
#define SPA_REMOVE_DONE 0x02
|
||||
#define SPA_ASYNC_REBUILD_DONE 0x2000
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
|
||||
int replacing);
|
||||
int replacing, int rebuild);
|
||||
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
|
||||
int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
@ -988,6 +983,7 @@ extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||
|
||||
/* Pool vdev add/remove lock */
|
||||
extern uint64_t spa_vdev_enter(spa_t *spa);
|
||||
extern uint64_t spa_vdev_detach_enter(spa_t *spa, uint64_t guid);
|
||||
extern uint64_t spa_vdev_config_enter(spa_t *spa);
|
||||
extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int error, char *tag);
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <sys/spa_checkpoint.h>
|
||||
#include <sys/spa_log_spacemap.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_rebuild.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/dmu.h>
|
||||
|
@ -73,7 +73,7 @@ extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
|
||||
extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
|
||||
extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, uint64_t off, size_t size);
|
||||
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
int scrub_done);
|
||||
boolean_t scrub_done, boolean_t rebuild_done);
|
||||
extern boolean_t vdev_dtl_required(vdev_t *vd);
|
||||
extern boolean_t vdev_resilver_needed(vdev_t *vd,
|
||||
uint64_t *minp, uint64_t *maxp);
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/vdev_indirect_mapping.h>
|
||||
#include <sys/vdev_indirect_births.h>
|
||||
#include <sys/vdev_rebuild.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/zfs_ratelimit.h>
|
||||
|
||||
@ -295,13 +296,26 @@ struct vdev {
|
||||
uint64_t vdev_trim_secure; /* requested secure TRIM */
|
||||
uint64_t vdev_trim_action_time; /* start and end time */
|
||||
|
||||
/* for limiting outstanding I/Os (initialize and TRIM) */
|
||||
/* Rebuild related */
|
||||
boolean_t vdev_rebuilding;
|
||||
boolean_t vdev_rebuild_exit_wanted;
|
||||
boolean_t vdev_rebuild_cancel_wanted;
|
||||
boolean_t vdev_rebuild_reset_wanted;
|
||||
kmutex_t vdev_rebuild_lock;
|
||||
kcondvar_t vdev_rebuild_cv;
|
||||
kthread_t *vdev_rebuild_thread;
|
||||
vdev_rebuild_t vdev_rebuild_config;
|
||||
|
||||
/* For limiting outstanding I/Os (initialize, TRIM, rebuild) */
|
||||
kmutex_t vdev_initialize_io_lock;
|
||||
kcondvar_t vdev_initialize_io_cv;
|
||||
uint64_t vdev_initialize_inflight;
|
||||
kmutex_t vdev_trim_io_lock;
|
||||
kcondvar_t vdev_trim_io_cv;
|
||||
uint64_t vdev_trim_inflight[3];
|
||||
kmutex_t vdev_rebuild_io_lock;
|
||||
kcondvar_t vdev_rebuild_io_cv;
|
||||
uint64_t vdev_rebuild_inflight;
|
||||
|
||||
/*
|
||||
* Values stored in the config for an indirect or removing vdev.
|
||||
@ -358,6 +372,7 @@ struct vdev {
|
||||
uint64_t vdev_degraded; /* persistent degraded state */
|
||||
uint64_t vdev_removed; /* persistent removed state */
|
||||
uint64_t vdev_resilver_txg; /* persistent resilvering state */
|
||||
uint64_t vdev_rebuild_txg; /* persistent rebuilding state */
|
||||
uint64_t vdev_nparity; /* number of parity devices for raidz */
|
||||
char *vdev_path; /* vdev path (if any) */
|
||||
char *vdev_devid; /* vdev devid (if any) */
|
||||
|
97
include/sys/vdev_rebuild.h
Normal file
97
include/sys/vdev_rebuild.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation.
|
||||
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_REBUILD_H
|
||||
#define _SYS_VDEV_REBUILD_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Number of entries in the physical vdev_rebuild_phys structure. This
|
||||
* state is stored per top-level as VDEV_ZAP_TOP_VDEV_REBUILD_PHYS.
|
||||
*/
|
||||
#define REBUILD_PHYS_ENTRIES 12
|
||||
|
||||
/*
|
||||
* On-disk rebuild configuration and state. When adding new fields they
|
||||
* must be added to the end of the structure.
|
||||
*/
|
||||
typedef struct vdev_rebuild_phys {
|
||||
uint64_t vrp_rebuild_state; /* vdev_rebuild_state_t */
|
||||
uint64_t vrp_last_offset; /* last rebuilt offset */
|
||||
uint64_t vrp_min_txg; /* minimum missing txg */
|
||||
uint64_t vrp_max_txg; /* maximum missing txg */
|
||||
uint64_t vrp_start_time; /* start time */
|
||||
uint64_t vrp_end_time; /* end time */
|
||||
uint64_t vrp_scan_time_ms; /* total run time in ms */
|
||||
uint64_t vrp_bytes_scanned; /* alloc bytes scanned */
|
||||
uint64_t vrp_bytes_issued; /* read bytes rebuilt */
|
||||
uint64_t vrp_bytes_rebuilt; /* rebuilt bytes */
|
||||
uint64_t vrp_bytes_est; /* total bytes to scan */
|
||||
uint64_t vrp_errors; /* errors during rebuild */
|
||||
} vdev_rebuild_phys_t;
|
||||
|
||||
/*
|
||||
* The vdev_rebuild_t describes the current state and how a top-level vdev
|
||||
* should be rebuilt. The core elements are the top-vdev, the metaslab being
|
||||
* rebuilt, range tree containing the allocted extents and the on-disk state.
|
||||
*/
|
||||
typedef struct vdev_rebuild {
|
||||
vdev_t *vr_top_vdev; /* top-level vdev to rebuild */
|
||||
metaslab_t *vr_scan_msp; /* scanning disabled metaslab */
|
||||
range_tree_t *vr_scan_tree; /* scan ranges (in metaslab) */
|
||||
|
||||
/* In-core state and progress */
|
||||
uint64_t vr_scan_offset[TXG_SIZE];
|
||||
uint64_t vr_prev_scan_time_ms; /* any previous scan time */
|
||||
|
||||
/* Per-rebuild pass statistics for calculating bandwidth */
|
||||
uint64_t vr_pass_start_time;
|
||||
uint64_t vr_pass_bytes_scanned;
|
||||
uint64_t vr_pass_bytes_issued;
|
||||
|
||||
/* On-disk state updated by vdev_rebuild_zap_update_sync() */
|
||||
vdev_rebuild_phys_t vr_rebuild_phys;
|
||||
} vdev_rebuild_t;
|
||||
|
||||
boolean_t vdev_rebuild_active(vdev_t *);
|
||||
|
||||
int vdev_rebuild_load(vdev_t *);
|
||||
void vdev_rebuild(vdev_t *);
|
||||
void vdev_rebuild_stop_wait(vdev_t *);
|
||||
void vdev_rebuild_stop_all(spa_t *);
|
||||
void vdev_rebuild_restart(spa_t *);
|
||||
void vdev_rebuild_clear_sync(void *, dmu_tx_t *);
|
||||
int vdev_rebuild_get_stats(vdev_t *, vdev_rebuild_stat_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_REBUILD_H */
|
@ -31,6 +31,7 @@ typedef enum zio_priority {
|
||||
ZIO_PRIORITY_REMOVAL, /* reads/writes for vdev removal */
|
||||
ZIO_PRIORITY_INITIALIZING, /* initializing I/O */
|
||||
ZIO_PRIORITY_TRIM, /* trim I/O (discard) */
|
||||
ZIO_PRIORITY_REBUILD, /* reads/writes for vdev rebuild */
|
||||
ZIO_PRIORITY_NUM_QUEUEABLE,
|
||||
ZIO_PRIORITY_NOW, /* non-queued i/os (e.g. free) */
|
||||
} zio_priority_t;
|
||||
|
@ -74,6 +74,7 @@ typedef enum spa_feature {
|
||||
SPA_FEATURE_BOOKMARK_WRITTEN,
|
||||
SPA_FEATURE_LOG_SPACEMAP,
|
||||
SPA_FEATURE_LIVELIST,
|
||||
SPA_FEATURE_DEVICE_REBUILD,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
|
@ -2446,7 +2446,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
|
||||
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
|
||||
(void) nvlist_lookup_uint64_array(nvroot,
|
||||
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
|
||||
if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
|
||||
if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
|
||||
ps->pss_state == DSS_SCANNING) {
|
||||
if (cmd == POOL_SCRUB_PAUSE)
|
||||
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
|
||||
else
|
||||
@ -3128,8 +3129,8 @@ is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
|
||||
* If 'replacing' is specified, the new disk will replace the old one.
|
||||
*/
|
||||
int
|
||||
zpool_vdev_attach(zpool_handle_t *zhp,
|
||||
const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
|
||||
zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
@ -3164,6 +3165,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
|
||||
|
||||
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
|
||||
zc.zc_cookie = replacing;
|
||||
zc.zc_simple = rebuild;
|
||||
|
||||
if (rebuild &&
|
||||
zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"the loaded zfs module doesn't support device rebuilds"));
|
||||
return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
|
||||
}
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0 || children != 1) {
|
||||
@ -3224,16 +3233,21 @@ zpool_vdev_attach(zpool_handle_t *zhp,
|
||||
uint64_t version = zpool_get_prop_int(zhp,
|
||||
ZPOOL_PROP_VERSION, NULL);
|
||||
|
||||
if (islog)
|
||||
if (islog) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"cannot replace a log with a spare"));
|
||||
else if (version >= SPA_VERSION_MULTI_REPLACE)
|
||||
} else if (rebuild) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"only mirror vdevs support sequential "
|
||||
"reconstruction"));
|
||||
} else if (version >= SPA_VERSION_MULTI_REPLACE) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"already in replacing/spare config; wait "
|
||||
"for completion or use 'zpool detach'"));
|
||||
else
|
||||
} else {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"cannot replace a replacing device"));
|
||||
}
|
||||
} else {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"can only attach to mirrors and top-level "
|
||||
|
@ -84,6 +84,8 @@ static char *zfs_msgid_table[] = {
|
||||
* ZPOOL_STATUS_RESILVERING
|
||||
* ZPOOL_STATUS_OFFLINE_DEV
|
||||
* ZPOOL_STATUS_REMOVED_DEV
|
||||
* ZPOOL_STATUS_REBUILDING
|
||||
* ZPOOL_STATUS_REBUILD_SCRUB
|
||||
* ZPOOL_STATUS_OK
|
||||
*/
|
||||
};
|
||||
@ -195,7 +197,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
|
||||
* - Check for any data errors
|
||||
* - Check for any faulted or missing devices in a replicated config
|
||||
* - Look for any devices showing errors
|
||||
* - Check for any resilvering devices
|
||||
* - Check for any resilvering or rebuilding devices
|
||||
*
|
||||
* There can obviously be multiple errors within a single pool, so this routine
|
||||
* only picks the most damaging of all the current errors to report.
|
||||
@ -233,6 +235,49 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
|
||||
ps->pss_state == DSS_SCANNING)
|
||||
return (ZPOOL_STATUS_RESILVERING);
|
||||
|
||||
/*
|
||||
* Currently rebuilding a vdev, check top-level vdevs.
|
||||
*/
|
||||
vdev_rebuild_stat_t *vrs = NULL;
|
||||
nvlist_t **child;
|
||||
uint_t c, i, children;
|
||||
uint64_t rebuild_end_time = 0;
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++) {
|
||||
if ((nvlist_lookup_uint64_array(child[c],
|
||||
ZPOOL_CONFIG_REBUILD_STATS,
|
||||
(uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
|
||||
uint64_t state = vrs->vrs_state;
|
||||
|
||||
if (state == VDEV_REBUILD_ACTIVE) {
|
||||
return (ZPOOL_STATUS_REBUILDING);
|
||||
} else if (state == VDEV_REBUILD_COMPLETE &&
|
||||
vrs->vrs_end_time > rebuild_end_time) {
|
||||
rebuild_end_time = vrs->vrs_end_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we can determine when the last scrub was run, and it
|
||||
* was before the last rebuild completed, then recommend
|
||||
* that the pool be scrubbed to verify all checksums. When
|
||||
* ps is NULL we can infer the pool has never been scrubbed.
|
||||
*/
|
||||
if (rebuild_end_time > 0) {
|
||||
if (ps != NULL) {
|
||||
if ((ps->pss_state == DSS_FINISHED &&
|
||||
ps->pss_func == POOL_SCAN_SCRUB &&
|
||||
rebuild_end_time > ps->pss_end_time) ||
|
||||
ps->pss_state == DSS_NONE)
|
||||
return (ZPOOL_STATUS_REBUILD_SCRUB);
|
||||
} else {
|
||||
return (ZPOOL_STATUS_REBUILD_SCRUB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The multihost property is set and the pool may be active.
|
||||
*/
|
||||
|
@ -286,6 +286,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
"resilver_defer feature"));
|
||||
case EZFS_EXPORT_IN_PROGRESS:
|
||||
return (dgettext(TEXT_DOMAIN, "pool export in progress"));
|
||||
case EZFS_REBUILDING:
|
||||
return (dgettext(TEXT_DOMAIN, "currently sequentially "
|
||||
"resilvering"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
@ -693,6 +696,12 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
||||
case ZFS_ERR_EXPORT_IN_PROGRESS:
|
||||
zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_RESILVER_IN_PROGRESS:
|
||||
zfs_verror(hdl, EZFS_RESILVERING, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_REBUILD_IN_PROGRESS:
|
||||
zfs_verror(hdl, EZFS_REBUILDING, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_IOC_CMD_UNAVAIL:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
|
||||
"module does not support this operation. A reboot may "
|
||||
|
@ -132,6 +132,7 @@ KERNEL_C = \
|
||||
vdev_raidz_math_sse2.c \
|
||||
vdev_raidz_math_ssse3.c \
|
||||
vdev_raidz_math_powerpc_altivec.c \
|
||||
vdev_rebuild.c \
|
||||
vdev_removal.c \
|
||||
vdev_root.c \
|
||||
vdev_trim.c \
|
||||
|
@ -1862,6 +1862,30 @@ queue's min_active. See the section "ZFS I/O SCHEDULER".
|
||||
Default value: \fB1,000\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_vdev_rebuild_max_active\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
Maximum sequential resilver I/Os active to each device.
|
||||
See the section "ZFS I/O SCHEDULER".
|
||||
.sp
|
||||
Default value: \fB3\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_vdev_rebuild_min_active\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
Minimum sequential resilver I/Os active to each device.
|
||||
See the section "ZFS I/O SCHEDULER".
|
||||
.sp
|
||||
Default value: \fB1\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -2707,6 +2731,18 @@ Include cache hits in read history
|
||||
Use \fB1\fR for yes and \fB0\fR for no (default).
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_rebuild_max_segment\fR (ulong)
|
||||
.ad
|
||||
.RS 12n
|
||||
Maximum read segment size to issue when sequentially resilvering a
|
||||
top-level vdev.
|
||||
.sp
|
||||
Default value: \fB1,048,576\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -255,6 +255,35 @@ This feature becomes \fBactive\fR when a bookmark is created and will be
|
||||
returned to the \fBenabled\fR state when all bookmarks with these fields are destroyed.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBdevice_rebuild\fR
|
||||
.ad
|
||||
.RS 4n
|
||||
.TS
|
||||
l l .
|
||||
GUID org.openzfs:device_rebuild
|
||||
READ\-ONLY COMPATIBLE yes
|
||||
DEPENDENCIES none
|
||||
.TE
|
||||
|
||||
This feature enables the ability for the \fBzpool attach\fR and \fBzpool
|
||||
replace\fR subcommands to perform sequential reconstruction (instead of
|
||||
healing reconstruction) when resilvering.
|
||||
|
||||
Sequential reconstruction resilvers a device in LBA order without immediately
|
||||
verifying the checksums. Once complete a scrub is started which then verifies
|
||||
the checksums. This approach allows full redundancy to be restored to the pool
|
||||
in the minimum amount of time. This two phase approach will take longer than a
|
||||
healing resilver when the time to verify the checksums is included. However,
|
||||
unless there is additional pool damage no checksum errors should be reported
|
||||
by the scrub. This feature is incompatible with raidz configurations.
|
||||
|
||||
This feature becomes \fBactive\fR while a sequential resilver is in progress,
|
||||
and returns to \fBenabled\fR when the resilver completes.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -27,7 +27,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd August 9, 2019
|
||||
.Dd May 15, 2020
|
||||
.Dt ZPOOL-ATTACH 8
|
||||
.Os Linux
|
||||
.Sh NAME
|
||||
@ -36,7 +36,7 @@
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Cm attach
|
||||
.Op Fl fw
|
||||
.Op Fl fsw
|
||||
.Oo Fl o Ar property Ns = Ns Ar value Oc
|
||||
.Ar pool device new_device
|
||||
.Sh DESCRIPTION
|
||||
@ -44,7 +44,7 @@
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm attach
|
||||
.Op Fl fw
|
||||
.Op Fl fsw
|
||||
.Oo Fl o Ar property Ns = Ns Ar value Oc
|
||||
.Ar pool device new_device
|
||||
.Xc
|
||||
@ -68,22 +68,29 @@ is part of a two-way mirror, attaching
|
||||
creates a three-way mirror, and so on.
|
||||
In either case,
|
||||
.Ar new_device
|
||||
begins to resilver immediately.
|
||||
begins to resilver immediately and any running scrub is cancelled.
|
||||
.Bl -tag -width Ds
|
||||
.It Fl f
|
||||
Forces use of
|
||||
.Ar new_device ,
|
||||
even if it appears to be in use.
|
||||
Not all devices can be overridden in this manner.
|
||||
.It Fl w
|
||||
Waits until
|
||||
.Ar new_device
|
||||
has finished resilvering before returning.
|
||||
.It Fl o Ar property Ns = Ns Ar value
|
||||
Sets the given pool properties. See the
|
||||
.Xr zpoolprops 8
|
||||
manual page for a list of valid properties that can be set. The only property
|
||||
supported at the moment is ashift.
|
||||
.It Fl s
|
||||
The
|
||||
.Ar new_device
|
||||
is reconstructed sequentially to restore redundancy as quickly as possible.
|
||||
Checksums are not verfied during sequential reconstruction so a scrub is
|
||||
started when the resilver completes.
|
||||
Sequential reconstruction is not supported for raidz configurations.
|
||||
.It Fl w
|
||||
Waits until
|
||||
.Ar new_device
|
||||
has finished resilvering before returning.
|
||||
.El
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
|
@ -27,7 +27,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd August 9, 2019
|
||||
.Dd May 15, 2020
|
||||
.Dt ZPOOL-REPLACE 8
|
||||
.Os Linux
|
||||
.Sh NAME
|
||||
@ -36,7 +36,7 @@
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Cm replace
|
||||
.Op Fl fw
|
||||
.Op Fl fsw
|
||||
.Oo Fl o Ar property Ns = Ns Ar value Oc
|
||||
.Ar pool Ar device Op Ar new_device
|
||||
.Sh DESCRIPTION
|
||||
@ -44,7 +44,7 @@
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm replace
|
||||
.Op Fl fw
|
||||
.Op Fl fsw
|
||||
.Op Fl o Ar property Ns = Ns Ar value
|
||||
.Ar pool Ar device Op Ar new_device
|
||||
.Xc
|
||||
@ -56,6 +56,7 @@ This is equivalent to attaching
|
||||
.Ar new_device ,
|
||||
waiting for it to resilver, and then detaching
|
||||
.Ar old_device .
|
||||
Any in progress scrub will be cancelled.
|
||||
.Pp
|
||||
The size of
|
||||
.Ar new_device
|
||||
@ -86,6 +87,13 @@ Sets the given pool properties. See the
|
||||
manual page for a list of valid properties that can be set.
|
||||
The only property supported at the moment is
|
||||
.Sy ashift .
|
||||
.It Fl s
|
||||
The
|
||||
.Ar new_device
|
||||
is reconstructed sequentially to restore redundancy as quickly as possible.
|
||||
Checksums are not verfied during sequential reconstruction so a scrub is
|
||||
started when the resilver completes.
|
||||
Sequential reconstruction is not supported for raidz configurations.
|
||||
.It Fl w
|
||||
Waits until the replacement has completed before returning.
|
||||
.El
|
||||
|
@ -27,7 +27,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd August 9, 2019
|
||||
.Dd May 15, 2020
|
||||
.Dt ZPOOL-STATUS 8
|
||||
.Os Linux
|
||||
.Sh NAME
|
||||
@ -59,7 +59,7 @@ is specified, then the status of each pool in the system is displayed.
|
||||
For more information on pool and device health, see the
|
||||
.Em Device Failure and Recovery
|
||||
section of
|
||||
.Xr zpoolconcepts 8.
|
||||
.Xr zpoolconcepts 8 .
|
||||
.Pp
|
||||
If a scrub or resilver is in progress, this command reports the percentage done
|
||||
and the estimated time to completion.
|
||||
|
@ -251,6 +251,7 @@ SRCS+= abd.c \
|
||||
vdev_raidz.c \
|
||||
vdev_raidz_math.c \
|
||||
vdev_raidz_math_scalar.c \
|
||||
vdev_rebuild.c \
|
||||
vdev_raidz_math_avx2.c \
|
||||
vdev_raidz_math_avx512bw.c \
|
||||
vdev_raidz_math_avx512f.c \
|
||||
|
@ -570,6 +570,11 @@ zpool_feature_init(void)
|
||||
"com.datto:resilver_defer", "resilver_defer",
|
||||
"Support for deferring new resilvers when one is already running.",
|
||||
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
|
||||
|
||||
zfeature_register(SPA_FEATURE_DEVICE_REBUILD,
|
||||
"org.openzfs:device_rebuild", "device_rebuild",
|
||||
"Support for sequential device rebuilds",
|
||||
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
|
@ -94,6 +94,7 @@ $(MODULE)-objs += vdev_queue.o
|
||||
$(MODULE)-objs += vdev_raidz.o
|
||||
$(MODULE)-objs += vdev_raidz_math.o
|
||||
$(MODULE)-objs += vdev_raidz_math_scalar.o
|
||||
$(MODULE)-objs += vdev_rebuild.o
|
||||
$(MODULE)-objs += vdev_removal.o
|
||||
$(MODULE)-objs += vdev_root.o
|
||||
$(MODULE)-objs += vdev_trim.o
|
||||
|
@ -704,8 +704,9 @@ static int
|
||||
dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
|
||||
vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
|
||||
|
||||
if (dsl_scan_is_running(scn))
|
||||
if (dsl_scan_is_running(scn) || vdev_rebuild_active(rvd))
|
||||
return (SET_ERROR(EBUSY));
|
||||
|
||||
return (0);
|
||||
@ -746,8 +747,12 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
|
||||
|
||||
if (vdev_resilver_needed(spa->spa_root_vdev,
|
||||
&scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
|
||||
spa_event_notify(spa, NULL, NULL,
|
||||
nvlist_t *aux = fnvlist_alloc();
|
||||
fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE,
|
||||
"healing");
|
||||
spa_event_notify(spa, NULL, aux,
|
||||
ESC_ZFS_RESILVER_START);
|
||||
nvlist_free(aux);
|
||||
} else {
|
||||
spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_START);
|
||||
}
|
||||
@ -761,6 +766,21 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
|
||||
if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
|
||||
scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
|
||||
|
||||
/*
|
||||
* When starting a resilver clear any existing rebuild state.
|
||||
* This is required to prevent stale rebuild status from
|
||||
* being reported when a rebuild is run, then a resilver and
|
||||
* finally a scrub. In which case only the scrub status
|
||||
* should be reported by 'zpool status'.
|
||||
*/
|
||||
if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (uint64_t i = 0; i < rvd->vdev_children; i++) {
|
||||
vdev_t *vd = rvd->vdev_child[i];
|
||||
vdev_rebuild_clear_sync(
|
||||
(void *)(uintptr_t)vd->vdev_id, tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* back to the generic stuff */
|
||||
@ -918,14 +938,22 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
|
||||
if (complete &&
|
||||
!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
|
||||
vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
|
||||
scn->scn_phys.scn_max_txg, B_TRUE);
|
||||
scn->scn_phys.scn_max_txg, B_TRUE, B_FALSE);
|
||||
|
||||
spa_event_notify(spa, NULL, NULL,
|
||||
scn->scn_phys.scn_min_txg ?
|
||||
ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
|
||||
if (scn->scn_phys.scn_min_txg) {
|
||||
nvlist_t *aux = fnvlist_alloc();
|
||||
fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE,
|
||||
"healing");
|
||||
spa_event_notify(spa, NULL, aux,
|
||||
ESC_ZFS_RESILVER_FINISH);
|
||||
nvlist_free(aux);
|
||||
} else {
|
||||
spa_event_notify(spa, NULL, NULL,
|
||||
ESC_ZFS_SCRUB_FINISH);
|
||||
}
|
||||
} else {
|
||||
vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
|
||||
0, B_TRUE);
|
||||
0, B_TRUE, B_FALSE);
|
||||
}
|
||||
spa_errlog_rotate(spa);
|
||||
|
||||
|
109
module/zfs/spa.c
109
module/zfs/spa.c
@ -57,6 +57,7 @@
|
||||
#include <sys/vdev_indirect_mapping.h>
|
||||
#include <sys/vdev_indirect_births.h>
|
||||
#include <sys/vdev_initialize.h>
|
||||
#include <sys/vdev_rebuild.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/vdev_disk.h>
|
||||
#include <sys/metaslab.h>
|
||||
@ -1562,6 +1563,7 @@ spa_unload(spa_t *spa)
|
||||
vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE);
|
||||
vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
|
||||
vdev_autotrim_stop_all(spa);
|
||||
vdev_rebuild_stop_all(spa);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4240,7 +4242,7 @@ spa_ld_load_vdev_metadata(spa_t *spa)
|
||||
* Propagate the leaf DTLs we just loaded all the way up the vdev tree.
|
||||
*/
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
|
||||
vdev_dtl_reassess(rvd, 0, 0, B_FALSE, B_FALSE);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
||||
return (0);
|
||||
@ -4829,11 +4831,16 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
||||
update_config_cache);
|
||||
|
||||
/*
|
||||
* Check all DTLs to see if anything needs resilvering.
|
||||
* Check if a rebuild was in progress and if so resume it.
|
||||
* Then check all DTLs to see if anything needs resilvering.
|
||||
* The resilver will be deferred if a rebuild was started.
|
||||
*/
|
||||
if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
|
||||
vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
|
||||
if (vdev_rebuild_active(spa->spa_root_vdev)) {
|
||||
vdev_rebuild_restart(spa);
|
||||
} else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
|
||||
vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
|
||||
spa_async_request(spa, SPA_ASYNC_RESILVER);
|
||||
}
|
||||
|
||||
/*
|
||||
* Log the fact that we booted up (so that we can detect if
|
||||
@ -6313,6 +6320,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
|
||||
vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE);
|
||||
vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
|
||||
vdev_autotrim_stop_all(spa);
|
||||
vdev_rebuild_stop_all(spa);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6536,12 +6544,17 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
|
||||
* extra rules: you can't attach to it after it's been created, and upon
|
||||
* completion of resilvering, the first disk (the one being replaced)
|
||||
* is automatically detached.
|
||||
*
|
||||
* If 'rebuild' is specified, then sequential reconstruction (a.ka. rebuild)
|
||||
* should be performed instead of traditional healing reconstruction. From
|
||||
* an administrators perspective these are both resilver operations.
|
||||
*/
|
||||
int
|
||||
spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
|
||||
int rebuild)
|
||||
{
|
||||
uint64_t txg, dtl_max_txg;
|
||||
vdev_t *rvd __maybe_unused = spa->spa_root_vdev;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
|
||||
vdev_ops_t *pvops;
|
||||
char *oldvdpath, *newvdpath;
|
||||
@ -6561,6 +6574,19 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
return (spa_vdev_exit(spa, NULL, txg, error));
|
||||
}
|
||||
|
||||
if (rebuild) {
|
||||
if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
|
||||
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
|
||||
|
||||
if (dsl_scan_resilvering(spa_get_dsl(spa)))
|
||||
return (spa_vdev_exit(spa, NULL, txg,
|
||||
ZFS_ERR_RESILVER_IN_PROGRESS));
|
||||
} else {
|
||||
if (vdev_rebuild_active(rvd))
|
||||
return (spa_vdev_exit(spa, NULL, txg,
|
||||
ZFS_ERR_REBUILD_IN_PROGRESS));
|
||||
}
|
||||
|
||||
if (spa->spa_vdev_removal != NULL)
|
||||
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
|
||||
|
||||
@ -6593,6 +6619,18 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
|
||||
if (rebuild) {
|
||||
/*
|
||||
* For rebuilds, the parent vdev must support reconstruction
|
||||
* using only space maps. This means the only allowable
|
||||
* parents are the root vdev or a mirror vdev.
|
||||
*/
|
||||
if (pvd->vdev_ops != &vdev_mirror_ops &&
|
||||
pvd->vdev_ops != &vdev_root_ops) {
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
}
|
||||
}
|
||||
|
||||
if (!replacing) {
|
||||
/*
|
||||
* For attach, the only allowable parent is a mirror or the root
|
||||
@ -6646,7 +6684,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
* than the top-level vdev.
|
||||
*/
|
||||
if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
|
||||
/*
|
||||
* If this is an in-place replacement, update oldvd's path and devid
|
||||
@ -6664,9 +6702,6 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
}
|
||||
}
|
||||
|
||||
/* mark the device being resilvered */
|
||||
newvd->vdev_resilver_txg = txg;
|
||||
|
||||
/*
|
||||
* If the parent is not a mirror, or if we're replacing, insert the new
|
||||
* mirror/replacing/spare vdev above oldvd.
|
||||
@ -6704,8 +6739,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
*/
|
||||
dtl_max_txg = txg + TXG_CONCURRENT_STATES;
|
||||
|
||||
vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
|
||||
dtl_max_txg - TXG_INITIAL);
|
||||
vdev_dtl_dirty(newvd, DTL_MISSING,
|
||||
TXG_INITIAL, dtl_max_txg - TXG_INITIAL);
|
||||
|
||||
if (newvd->vdev_isspare) {
|
||||
spa_spare_activate(newvd);
|
||||
@ -6722,16 +6757,25 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
vdev_dirty(tvd, VDD_DTL, newvd, txg);
|
||||
|
||||
/*
|
||||
* Schedule the resilver to restart in the future. We do this to
|
||||
* ensure that dmu_sync-ed blocks have been stitched into the
|
||||
* respective datasets. We do not do this if resilvers have been
|
||||
* deferred.
|
||||
* Schedule the resilver or rebuild to restart in the future. We do
|
||||
* this to ensure that dmu_sync-ed blocks have been stitched into the
|
||||
* respective datasets.
|
||||
*/
|
||||
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
|
||||
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
|
||||
vdev_defer_resilver(newvd);
|
||||
else
|
||||
dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
|
||||
if (rebuild) {
|
||||
newvd->vdev_rebuild_txg = txg;
|
||||
|
||||
vdev_rebuild(tvd);
|
||||
} else {
|
||||
newvd->vdev_resilver_txg = txg;
|
||||
|
||||
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
|
||||
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
|
||||
vdev_defer_resilver(newvd);
|
||||
} else {
|
||||
dsl_scan_restart_resilver(spa->spa_dsl_pool,
|
||||
dtl_max_txg);
|
||||
}
|
||||
}
|
||||
|
||||
if (spa->spa_bootfs)
|
||||
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
|
||||
@ -6774,7 +6818,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
|
||||
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
txg = spa_vdev_enter(spa);
|
||||
txg = spa_vdev_detach_enter(spa, guid);
|
||||
|
||||
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
||||
|
||||
@ -7728,6 +7772,12 @@ spa_vdev_resilver_done(spa_t *spa)
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
||||
/*
|
||||
* If a detach was not performed above replace waiters will not have
|
||||
* been notified. In which case we must do so now.
|
||||
*/
|
||||
spa_notify_waiters(spa);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -7970,10 +8020,22 @@ spa_async_thread(void *arg)
|
||||
if (tasks & SPA_ASYNC_RESILVER_DONE)
|
||||
spa_vdev_resilver_done(spa);
|
||||
|
||||
/*
|
||||
* If any devices are done replacing, detach them. Then if no
|
||||
* top-level vdevs are rebuilding attempt to kick off a scrub.
|
||||
*/
|
||||
if (tasks & SPA_ASYNC_REBUILD_DONE) {
|
||||
spa_vdev_resilver_done(spa);
|
||||
|
||||
if (!vdev_rebuild_active(spa->spa_root_vdev))
|
||||
(void) dsl_scan(spa->spa_dsl_pool, POOL_SCAN_SCRUB);
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick off a resilver.
|
||||
*/
|
||||
if (tasks & SPA_ASYNC_RESILVER &&
|
||||
!vdev_rebuild_active(spa->spa_root_vdev) &&
|
||||
(!dsl_scan_resilvering(dp) ||
|
||||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
|
||||
dsl_scan_restart_resilver(dp, 0);
|
||||
@ -9470,6 +9532,9 @@ spa_activity_in_progress(spa_t *spa, zpool_wait_activity_t activity,
|
||||
DSS_SCANNING);
|
||||
break;
|
||||
case ZPOOL_WAIT_RESILVER:
|
||||
if ((*in_progress = vdev_rebuild_active(spa->spa_root_vdev)))
|
||||
break;
|
||||
/* fall through */
|
||||
case ZPOOL_WAIT_SCRUB:
|
||||
{
|
||||
boolean_t scanning, paused, is_scrub;
|
||||
|
@ -1165,6 +1165,30 @@ spa_vdev_enter(spa_t *spa)
|
||||
return (spa_vdev_config_enter(spa));
|
||||
}
|
||||
|
||||
/*
|
||||
* The same as spa_vdev_enter() above but additionally takes the guid of
|
||||
* the vdev being detached. When there is a rebuild in process it will be
|
||||
* suspended while the vdev tree is modified then resumed by spa_vdev_exit().
|
||||
* The rebuild is canceled if only a single child remains after the detach.
|
||||
*/
|
||||
uint64_t
|
||||
spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
|
||||
{
|
||||
mutex_enter(&spa->spa_vdev_top_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
|
||||
vdev_autotrim_stop_all(spa);
|
||||
|
||||
if (guid != 0) {
|
||||
vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
||||
if (vd) {
|
||||
vdev_rebuild_stop_wait(vd->vdev_top);
|
||||
}
|
||||
}
|
||||
|
||||
return (spa_vdev_config_enter(spa));
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal implementation for spa_vdev_enter(). Used when a vdev
|
||||
* operation requires multiple syncs (i.e. removing a device) while
|
||||
@ -1198,7 +1222,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
|
||||
/*
|
||||
* Reassess the DTLs.
|
||||
*/
|
||||
vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
|
||||
vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE, B_FALSE);
|
||||
|
||||
if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
|
||||
config_changed = B_TRUE;
|
||||
@ -1271,6 +1295,7 @@ int
|
||||
spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
|
||||
{
|
||||
vdev_autotrim_restart(spa);
|
||||
vdev_rebuild_restart(spa);
|
||||
|
||||
spa_vdev_config_exit(spa, vd, txg, error, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
@ -1322,7 +1347,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
|
||||
}
|
||||
|
||||
if (vd != NULL || error == 0)
|
||||
vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE);
|
||||
vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE, B_FALSE);
|
||||
|
||||
if (vd != NULL) {
|
||||
if (vd != spa->spa_root_vdev)
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_rebuild.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/metaslab_impl.h>
|
||||
@ -551,10 +552,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_scan_io_queue_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
mutex_init(&vd->vdev_initialize_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_initialize_io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_initialize_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_initialize_io_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
mutex_init(&vd->vdev_trim_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_autotrim_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
@ -562,10 +565,16 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_rebuild_io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_rebuild_io_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
for (int t = 0; t < DTL_TYPES; t++) {
|
||||
vd->vdev_dtl[t] = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
|
||||
0);
|
||||
}
|
||||
|
||||
txg_list_create(&vd->vdev_ms_list, spa,
|
||||
offsetof(struct metaslab, ms_txg_node));
|
||||
txg_list_create(&vd->vdev_dtl_list, spa,
|
||||
@ -835,6 +844,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
|
||||
&vd->vdev_resilver_txg);
|
||||
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REBUILD_TXG,
|
||||
&vd->vdev_rebuild_txg);
|
||||
|
||||
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
|
||||
vdev_defer_resilver(vd);
|
||||
|
||||
@ -890,6 +902,7 @@ vdev_free(vdev_t *vd)
|
||||
ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
|
||||
ASSERT3P(vd->vdev_trim_thread, ==, NULL);
|
||||
ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
|
||||
ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
|
||||
|
||||
/*
|
||||
* Scan queues are normally destroyed at the end of a scan. If the
|
||||
@ -998,10 +1011,12 @@ vdev_free(vdev_t *vd)
|
||||
mutex_destroy(&vd->vdev_stat_lock);
|
||||
mutex_destroy(&vd->vdev_probe_lock);
|
||||
mutex_destroy(&vd->vdev_scan_io_queue_lock);
|
||||
|
||||
mutex_destroy(&vd->vdev_initialize_lock);
|
||||
mutex_destroy(&vd->vdev_initialize_io_lock);
|
||||
cv_destroy(&vd->vdev_initialize_io_cv);
|
||||
cv_destroy(&vd->vdev_initialize_cv);
|
||||
|
||||
mutex_destroy(&vd->vdev_trim_lock);
|
||||
mutex_destroy(&vd->vdev_autotrim_lock);
|
||||
mutex_destroy(&vd->vdev_trim_io_lock);
|
||||
@ -1009,6 +1024,11 @@ vdev_free(vdev_t *vd)
|
||||
cv_destroy(&vd->vdev_autotrim_cv);
|
||||
cv_destroy(&vd->vdev_trim_io_cv);
|
||||
|
||||
mutex_destroy(&vd->vdev_rebuild_lock);
|
||||
mutex_destroy(&vd->vdev_rebuild_io_lock);
|
||||
cv_destroy(&vd->vdev_rebuild_cv);
|
||||
cv_destroy(&vd->vdev_rebuild_io_cv);
|
||||
|
||||
zfs_ratelimit_fini(&vd->vdev_delay_rl);
|
||||
zfs_ratelimit_fini(&vd->vdev_checksum_rl);
|
||||
|
||||
@ -1078,7 +1098,10 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
ASSERT3P(tvd->vdev_indirect_births, ==, NULL);
|
||||
ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL);
|
||||
ASSERT0(tvd->vdev_removing);
|
||||
ASSERT0(tvd->vdev_rebuilding);
|
||||
tvd->vdev_removing = svd->vdev_removing;
|
||||
tvd->vdev_rebuilding = svd->vdev_rebuilding;
|
||||
tvd->vdev_rebuild_config = svd->vdev_rebuild_config;
|
||||
tvd->vdev_indirect_config = svd->vdev_indirect_config;
|
||||
tvd->vdev_indirect_mapping = svd->vdev_indirect_mapping;
|
||||
tvd->vdev_indirect_births = svd->vdev_indirect_births;
|
||||
@ -1092,6 +1115,7 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
svd->vdev_indirect_births = NULL;
|
||||
svd->vdev_obsolete_sm = NULL;
|
||||
svd->vdev_removing = 0;
|
||||
svd->vdev_rebuilding = 0;
|
||||
|
||||
for (t = 0; t < TXG_SIZE; t++) {
|
||||
while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
|
||||
@ -2576,11 +2600,8 @@ vdev_dtl_max(vdev_t *vd)
|
||||
* excise the DTLs.
|
||||
*/
|
||||
static boolean_t
|
||||
vdev_dtl_should_excise(vdev_t *vd)
|
||||
vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
|
||||
|
||||
ASSERT0(vd->vdev_children);
|
||||
|
||||
if (vd->vdev_state < VDEV_STATE_DEGRADED)
|
||||
@ -2589,23 +2610,52 @@ vdev_dtl_should_excise(vdev_t *vd)
|
||||
if (vd->vdev_resilver_deferred)
|
||||
return (B_FALSE);
|
||||
|
||||
if (vd->vdev_resilver_txg == 0 ||
|
||||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
|
||||
if (range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* When a resilver is initiated the scan will assign the scn_max_txg
|
||||
* value to the highest txg value that exists in all DTLs. If this
|
||||
* device's max DTL is not part of this scan (i.e. it is not in
|
||||
* the range (scn_min_txg, scn_max_txg] then it is not eligible
|
||||
* for excision.
|
||||
*/
|
||||
if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
|
||||
ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd));
|
||||
ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg);
|
||||
ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg);
|
||||
return (B_TRUE);
|
||||
if (rebuild_done) {
|
||||
vdev_rebuild_t *vr = &vd->vdev_top->vdev_rebuild_config;
|
||||
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
||||
|
||||
/* Rebuild not initiated by attach */
|
||||
if (vd->vdev_rebuild_txg == 0)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* When a rebuild completes without error then all missing data
|
||||
* up to the rebuild max txg has been reconstructed and the DTL
|
||||
* is eligible for excision.
|
||||
*/
|
||||
if (vrp->vrp_rebuild_state == VDEV_REBUILD_COMPLETE &&
|
||||
vdev_dtl_max(vd) <= vrp->vrp_max_txg) {
|
||||
ASSERT3U(vrp->vrp_min_txg, <=, vdev_dtl_min(vd));
|
||||
ASSERT3U(vrp->vrp_min_txg, <, vd->vdev_rebuild_txg);
|
||||
ASSERT3U(vd->vdev_rebuild_txg, <=, vrp->vrp_max_txg);
|
||||
return (B_TRUE);
|
||||
}
|
||||
} else {
|
||||
dsl_scan_t *scn = vd->vdev_spa->spa_dsl_pool->dp_scan;
|
||||
dsl_scan_phys_t *scnp __maybe_unused = &scn->scn_phys;
|
||||
|
||||
/* Resilver not initiated by attach */
|
||||
if (vd->vdev_resilver_txg == 0)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* When a resilver is initiated the scan will assign the
|
||||
* scn_max_txg value to the highest txg value that exists
|
||||
* in all DTLs. If this device's max DTL is not part of this
|
||||
* scan (i.e. it is not in the range (scn_min_txg, scn_max_txg]
|
||||
* then it is not eligible for excision.
|
||||
*/
|
||||
if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
|
||||
ASSERT3U(scnp->scn_min_txg, <=, vdev_dtl_min(vd));
|
||||
ASSERT3U(scnp->scn_min_txg, <, vd->vdev_resilver_txg);
|
||||
ASSERT3U(vd->vdev_resilver_txg, <=, scnp->scn_max_txg);
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
@ -2614,7 +2664,8 @@ vdev_dtl_should_excise(vdev_t *vd)
|
||||
* write operations will be issued to the pool.
|
||||
*/
|
||||
void
|
||||
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
boolean_t scrub_done, boolean_t rebuild_done)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
avl_tree_t reftree;
|
||||
@ -2624,22 +2675,28 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_dtl_reassess(vd->vdev_child[c], txg,
|
||||
scrub_txg, scrub_done);
|
||||
scrub_txg, scrub_done, rebuild_done);
|
||||
|
||||
if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
|
||||
return;
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
|
||||
vdev_rebuild_t *vr = &vd->vdev_top->vdev_rebuild_config;
|
||||
boolean_t check_excise = B_FALSE;
|
||||
boolean_t wasempty = B_TRUE;
|
||||
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
|
||||
/*
|
||||
* If requested, pretend the scan completed cleanly.
|
||||
* If requested, pretend the scan or rebuild completed cleanly.
|
||||
*/
|
||||
if (zfs_scan_ignore_errors && scn)
|
||||
scn->scn_phys.scn_errors = 0;
|
||||
if (zfs_scan_ignore_errors) {
|
||||
if (scn != NULL)
|
||||
scn->scn_phys.scn_errors = 0;
|
||||
if (vr != NULL)
|
||||
vr->vr_rebuild_phys.vrp_errors = 0;
|
||||
}
|
||||
|
||||
if (scrub_txg != 0 &&
|
||||
!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
|
||||
@ -2654,21 +2711,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we've completed a scan cleanly then determine
|
||||
* if this vdev should remove any DTLs. We only want to
|
||||
* excise regions on vdevs that were available during
|
||||
* the entire duration of this scan.
|
||||
* If we've completed a scrub/resilver or a rebuild cleanly
|
||||
* then determine if this vdev should remove any DTLs. We
|
||||
* only want to excise regions on vdevs that were available
|
||||
* during the entire duration of this scan.
|
||||
*/
|
||||
if (scrub_txg != 0 &&
|
||||
(spa->spa_scrub_started ||
|
||||
(scn != NULL && scn->scn_phys.scn_errors == 0)) &&
|
||||
vdev_dtl_should_excise(vd)) {
|
||||
if (rebuild_done &&
|
||||
vr != NULL && vr->vr_rebuild_phys.vrp_errors == 0) {
|
||||
check_excise = B_TRUE;
|
||||
} else {
|
||||
if (spa->spa_scrub_started ||
|
||||
(scn != NULL && scn->scn_phys.scn_errors == 0)) {
|
||||
check_excise = B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (scrub_txg && check_excise &&
|
||||
vdev_dtl_should_excise(vd, rebuild_done)) {
|
||||
/*
|
||||
* We completed a scrub up to scrub_txg. If we
|
||||
* did it without rebooting, then the scrub dtl
|
||||
* will be valid, so excise the old region and
|
||||
* fold in the scrub dtl. Otherwise, leave the
|
||||
* dtl as-is if there was an error.
|
||||
* We completed a scrub, resilver or rebuild up to
|
||||
* scrub_txg. If we did it without rebooting, then
|
||||
* the scrub dtl will be valid, so excise the old
|
||||
* region and fold in the scrub dtl. Otherwise,
|
||||
* leave the dtl as-is if there was an error.
|
||||
*
|
||||
* There's little trick here: to excise the beginning
|
||||
* of the DTL_MISSING map, we put it into a reference
|
||||
@ -2711,15 +2776,20 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
|
||||
|
||||
/*
|
||||
* If the vdev was resilvering and no longer has any
|
||||
* DTLs then reset its resilvering flag and dirty
|
||||
* If the vdev was resilvering or rebuilding and no longer
|
||||
* has any DTLs then reset the appropriate flag and dirty
|
||||
* the top level so that we persist the change.
|
||||
*/
|
||||
if (txg != 0 && vd->vdev_resilver_txg != 0 &&
|
||||
if (txg != 0 &&
|
||||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) &&
|
||||
range_tree_is_empty(vd->vdev_dtl[DTL_OUTAGE])) {
|
||||
vd->vdev_resilver_txg = 0;
|
||||
vdev_config_dirty(vd->vdev_top);
|
||||
if (vd->vdev_rebuild_txg != 0) {
|
||||
vd->vdev_rebuild_txg = 0;
|
||||
vdev_config_dirty(vd->vdev_top);
|
||||
} else if (vd->vdev_resilver_txg != 0) {
|
||||
vd->vdev_resilver_txg = 0;
|
||||
vdev_config_dirty(vd->vdev_top);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
@ -2955,10 +3025,10 @@ vdev_dtl_required(vdev_t *vd)
|
||||
* If not, we can safely offline/detach/remove the device.
|
||||
*/
|
||||
vd->vdev_cant_read = B_TRUE;
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
|
||||
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
|
||||
vd->vdev_cant_read = cant_read;
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
|
||||
|
||||
if (!required && zio_injection_enabled) {
|
||||
required = !!zio_handle_device_injection(vd, NULL,
|
||||
@ -3065,6 +3135,20 @@ vdev_load(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Load any rebuild state from the top-level vdev zap.
|
||||
*/
|
||||
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
|
||||
error = vdev_rebuild_load(vd);
|
||||
if (error && error != ENOTSUP) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
vdev_dbgmsg(vd, "vdev_load: vdev_rebuild_load "
|
||||
"failed [error=%d]", error);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a top-level vdev, initialize its metaslabs.
|
||||
*/
|
||||
@ -3947,6 +4031,7 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
|
||||
vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
|
||||
vs->vs_state = vd->vdev_state;
|
||||
vs->vs_rsize = vdev_get_min_asize(vd);
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
vs->vs_rsize += VDEV_LABEL_START_SIZE +
|
||||
VDEV_LABEL_END_SIZE;
|
||||
@ -3973,7 +4058,11 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
|
||||
vs->vs_trim_bytes_est = vd->vdev_trim_bytes_est;
|
||||
vs->vs_trim_state = vd->vdev_trim_state;
|
||||
vs->vs_trim_action_time = vd->vdev_trim_action_time;
|
||||
|
||||
/* Set when there is a deferred resilver. */
|
||||
vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
|
||||
}
|
||||
|
||||
/*
|
||||
* Report expandable space on top-level, non-auxiliary devices
|
||||
* only. The expandable space is reported in terms of metaslab
|
||||
@ -3985,13 +4074,16 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
|
||||
vd->vdev_max_asize - vd->vdev_asize,
|
||||
1ULL << tvd->vdev_ms_shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Report fragmentation and rebuild progress for top-level,
|
||||
* non-auxiliary, concrete devices.
|
||||
*/
|
||||
if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
|
||||
vdev_is_concrete(vd)) {
|
||||
vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
|
||||
vd->vdev_mg->mg_fragmentation : 0;
|
||||
}
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
|
||||
}
|
||||
|
||||
vdev_get_stats_ex_impl(vd, vs, vsx);
|
||||
@ -4072,17 +4164,35 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
|
||||
if (flags & ZIO_FLAG_IO_REPAIR) {
|
||||
/*
|
||||
* Repair is the result of a resilver issued by the
|
||||
* scan thread (spa_sync).
|
||||
*/
|
||||
if (flags & ZIO_FLAG_SCAN_THREAD) {
|
||||
dsl_scan_phys_t *scn_phys =
|
||||
&spa->spa_dsl_pool->dp_scan->scn_phys;
|
||||
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
|
||||
dsl_scan_phys_t *scn_phys = &scn->scn_phys;
|
||||
uint64_t *processed = &scn_phys->scn_processed;
|
||||
|
||||
/* XXX cleanup? */
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
atomic_add_64(processed, psize);
|
||||
vs->vs_scan_processed += psize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Repair is the result of a rebuild issued by the
|
||||
* rebuild thread (vdev_rebuild_thread).
|
||||
*/
|
||||
if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
|
||||
vdev_t *tvd = vd->vdev_top;
|
||||
vdev_rebuild_t *vr = &tvd->vdev_rebuild_config;
|
||||
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
||||
uint64_t *rebuilt = &vrp->vrp_bytes_rebuilt;
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
atomic_add_64(rebuilt, psize);
|
||||
vs->vs_rebuild_processed += psize;
|
||||
}
|
||||
|
||||
if (flags & ZIO_FLAG_SELF_HEAL)
|
||||
vs->vs_self_healed += psize;
|
||||
}
|
||||
@ -4094,6 +4204,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
if (vd->vdev_ops->vdev_op_leaf &&
|
||||
(zio->io_priority < ZIO_PRIORITY_NUM_QUEUEABLE)) {
|
||||
zio_type_t vs_type = type;
|
||||
zio_priority_t priority = zio->io_priority;
|
||||
|
||||
/*
|
||||
* TRIM ops and bytes are reported to user space as
|
||||
@ -4103,19 +4214,44 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
if (type == ZIO_TYPE_TRIM)
|
||||
vs_type = ZIO_TYPE_IOCTL;
|
||||
|
||||
/*
|
||||
* Solely for the purposes of 'zpool iostat -lqrw'
|
||||
* reporting use the priority to catagorize the IO.
|
||||
* Only the following are reported to user space:
|
||||
*
|
||||
* ZIO_PRIORITY_SYNC_READ,
|
||||
* ZIO_PRIORITY_SYNC_WRITE,
|
||||
* ZIO_PRIORITY_ASYNC_READ,
|
||||
* ZIO_PRIORITY_ASYNC_WRITE,
|
||||
* ZIO_PRIORITY_SCRUB,
|
||||
* ZIO_PRIORITY_TRIM.
|
||||
*/
|
||||
if (priority == ZIO_PRIORITY_REBUILD) {
|
||||
priority = ((type == ZIO_TYPE_WRITE) ?
|
||||
ZIO_PRIORITY_ASYNC_WRITE :
|
||||
ZIO_PRIORITY_SCRUB);
|
||||
} else if (priority == ZIO_PRIORITY_INITIALIZING) {
|
||||
ASSERT3U(type, ==, ZIO_TYPE_WRITE);
|
||||
priority = ZIO_PRIORITY_ASYNC_WRITE;
|
||||
} else if (priority == ZIO_PRIORITY_REMOVAL) {
|
||||
priority = ((type == ZIO_TYPE_WRITE) ?
|
||||
ZIO_PRIORITY_ASYNC_WRITE :
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
vs->vs_ops[vs_type]++;
|
||||
vs->vs_bytes[vs_type] += psize;
|
||||
|
||||
if (flags & ZIO_FLAG_DELEGATED) {
|
||||
vsx->vsx_agg_histo[zio->io_priority]
|
||||
vsx->vsx_agg_histo[priority]
|
||||
[RQ_HISTO(zio->io_size)]++;
|
||||
} else {
|
||||
vsx->vsx_ind_histo[zio->io_priority]
|
||||
vsx->vsx_ind_histo[priority]
|
||||
[RQ_HISTO(zio->io_size)]++;
|
||||
}
|
||||
|
||||
if (zio->io_delta && zio->io_delay) {
|
||||
vsx->vsx_queue_histo[zio->io_priority]
|
||||
vsx->vsx_queue_histo[priority]
|
||||
[L_HISTO(zio->io_delta - zio->io_delay)]++;
|
||||
vsx->vsx_disk_histo[type]
|
||||
[L_HISTO(zio->io_delay)]++;
|
||||
|
@ -404,6 +404,19 @@ root_vdev_actions_getprogress(vdev_t *vd, nvlist_t *nvl)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
top_vdev_actions_getprogress(vdev_t *vd, nvlist_t *nvl)
|
||||
{
|
||||
if (vd == vd->vdev_top) {
|
||||
vdev_rebuild_stat_t vrs;
|
||||
if (vdev_rebuild_get_stats(vd, &vrs) == 0) {
|
||||
fnvlist_add_uint64_array(nvl,
|
||||
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t *)&vrs,
|
||||
sizeof (vrs) / sizeof (uint64_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the nvlist representing this vdev's config.
|
||||
*/
|
||||
@ -559,6 +572,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
vdev_config_generate_stats(vd, nv);
|
||||
|
||||
root_vdev_actions_getprogress(vd, nv);
|
||||
top_vdev_actions_getprogress(vd, nv);
|
||||
|
||||
/*
|
||||
* Note: this can be called from open context
|
||||
@ -663,6 +677,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
if (vd->vdev_resilver_txg != 0)
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
|
||||
vd->vdev_resilver_txg);
|
||||
if (vd->vdev_rebuild_txg != 0)
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_REBUILD_TXG,
|
||||
vd->vdev_rebuild_txg);
|
||||
if (vd->vdev_faulted)
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, B_TRUE);
|
||||
if (vd->vdev_degraded)
|
||||
|
@ -767,8 +767,9 @@ vdev_mirror_io_done(zio_t *zio)
|
||||
|
||||
zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
|
||||
mc->mc_vd, mc->mc_offset,
|
||||
zio->io_abd, zio->io_size,
|
||||
ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
|
||||
zio->io_abd, zio->io_size, ZIO_TYPE_WRITE,
|
||||
zio->io_priority == ZIO_PRIORITY_REBUILD ?
|
||||
ZIO_PRIORITY_REBUILD : ZIO_PRIORITY_ASYNC_WRITE,
|
||||
ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
|
||||
ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
|
||||
}
|
||||
|
@ -158,6 +158,8 @@ uint32_t zfs_vdev_initializing_min_active = 1;
|
||||
uint32_t zfs_vdev_initializing_max_active = 1;
|
||||
uint32_t zfs_vdev_trim_min_active = 1;
|
||||
uint32_t zfs_vdev_trim_max_active = 2;
|
||||
uint32_t zfs_vdev_rebuild_min_active = 1;
|
||||
uint32_t zfs_vdev_rebuild_max_active = 3;
|
||||
|
||||
/*
|
||||
* When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
|
||||
@ -278,6 +280,8 @@ vdev_queue_class_min_active(zio_priority_t p)
|
||||
return (zfs_vdev_initializing_min_active);
|
||||
case ZIO_PRIORITY_TRIM:
|
||||
return (zfs_vdev_trim_min_active);
|
||||
case ZIO_PRIORITY_REBUILD:
|
||||
return (zfs_vdev_rebuild_min_active);
|
||||
default:
|
||||
panic("invalid priority %u", p);
|
||||
return (0);
|
||||
@ -352,6 +356,8 @@ vdev_queue_class_max_active(spa_t *spa, zio_priority_t p)
|
||||
return (zfs_vdev_initializing_max_active);
|
||||
case ZIO_PRIORITY_TRIM:
|
||||
return (zfs_vdev_trim_max_active);
|
||||
case ZIO_PRIORITY_REBUILD:
|
||||
return (zfs_vdev_rebuild_max_active);
|
||||
default:
|
||||
panic("invalid priority %u", p);
|
||||
return (0);
|
||||
@ -845,7 +851,8 @@ vdev_queue_io(zio_t *zio)
|
||||
zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
|
||||
zio->io_priority != ZIO_PRIORITY_SCRUB &&
|
||||
zio->io_priority != ZIO_PRIORITY_REMOVAL &&
|
||||
zio->io_priority != ZIO_PRIORITY_INITIALIZING) {
|
||||
zio->io_priority != ZIO_PRIORITY_INITIALIZING &&
|
||||
zio->io_priority != ZIO_PRIORITY_REBUILD) {
|
||||
zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
|
||||
}
|
||||
} else if (zio->io_type == ZIO_TYPE_WRITE) {
|
||||
@ -854,7 +861,8 @@ vdev_queue_io(zio_t *zio)
|
||||
if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
|
||||
zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE &&
|
||||
zio->io_priority != ZIO_PRIORITY_REMOVAL &&
|
||||
zio->io_priority != ZIO_PRIORITY_INITIALIZING) {
|
||||
zio->io_priority != ZIO_PRIORITY_INITIALIZING &&
|
||||
zio->io_priority != ZIO_PRIORITY_REBUILD) {
|
||||
zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
|
||||
}
|
||||
} else {
|
||||
@ -1051,6 +1059,12 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, trim_max_active, INT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, trim_min_active, INT, ZMOD_RW,
|
||||
"Min active trim/discard I/Os per vdev");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, rebuild_max_active, INT, ZMOD_RW,
|
||||
"Max active rebuild I/Os per vdev");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, rebuild_min_active, INT, ZMOD_RW,
|
||||
"Min active rebuild I/Os per vdev");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, queue_depth_pct, INT, ZMOD_RW,
|
||||
"Queue depth percentage for each top-level vdev");
|
||||
/* END CSTYLED */
|
||||
|
1106
module/zfs/vdev_rebuild.c
Normal file
1106
module/zfs/vdev_rebuild.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1938,8 +1938,9 @@ static int
|
||||
zfs_ioc_vdev_attach(zfs_cmd_t *zc)
|
||||
{
|
||||
spa_t *spa;
|
||||
int replacing = zc->zc_cookie;
|
||||
nvlist_t *config;
|
||||
int replacing = zc->zc_cookie;
|
||||
int rebuild = zc->zc_simple;
|
||||
int error;
|
||||
|
||||
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
|
||||
@ -1947,7 +1948,8 @@ zfs_ioc_vdev_attach(zfs_cmd_t *zc)
|
||||
|
||||
if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
|
||||
zc->zc_iflags, &config)) == 0) {
|
||||
error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
|
||||
error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
|
||||
rebuild);
|
||||
nvlist_free(config);
|
||||
}
|
||||
|
||||
|
@ -487,7 +487,8 @@ tests = ['zpool_wait_discard', 'zpool_wait_freeing',
|
||||
tags = ['functional', 'cli_root', 'zpool_wait']
|
||||
|
||||
[tests/functional/cli_root/zpool_wait/scan]
|
||||
tests = ['zpool_wait_replace_cancel', 'zpool_wait_resilver', 'zpool_wait_scrub_cancel',
|
||||
tests = ['zpool_wait_replace_cancel', 'zpool_wait_rebuild',
|
||||
'zpool_wait_resilver', 'zpool_wait_scrub_cancel',
|
||||
'zpool_wait_replace', 'zpool_wait_scrub_basic', 'zpool_wait_scrub_flag']
|
||||
tags = ['functional', 'cli_root', 'zpool_wait']
|
||||
|
||||
@ -748,7 +749,11 @@ tests = ['rename_dirs_001_pos']
|
||||
tags = ['functional', 'rename_dirs']
|
||||
|
||||
[tests/functional/replacement]
|
||||
tests = ['replacement_001_pos', 'replacement_002_pos', 'replacement_003_pos']
|
||||
tests = ['attach_import', 'attach_multiple', 'attach_rebuild',
|
||||
'attach_resilver', 'detach', 'rebuild_disabled_feature',
|
||||
'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild',
|
||||
'replace_resilver', 'resilver_restart_001', 'resilver_restart_002',
|
||||
'scrub_cancel']
|
||||
tags = ['functional', 'replacement']
|
||||
|
||||
[tests/functional/reservation]
|
||||
@ -762,10 +767,6 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
|
||||
'reservation_022_pos']
|
||||
tags = ['functional', 'reservation']
|
||||
|
||||
[tests/functional/resilver]
|
||||
tests = ['resilver_restart_001', 'resilver_restart_002']
|
||||
tags = ['functional', 'resilver']
|
||||
|
||||
[tests/functional/rootpool]
|
||||
tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
|
||||
tags = ['functional', 'rootpool']
|
||||
|
@ -2222,26 +2222,27 @@ function check_pool_status # pool token keyword <verbose>
|
||||
if [[ $verbose == true ]]; then
|
||||
log_note $scan
|
||||
fi
|
||||
echo $scan | grep -i "$keyword" > /dev/null 2>&1
|
||||
echo $scan | egrep -i "$keyword" > /dev/null 2>&1
|
||||
|
||||
return $?
|
||||
}
|
||||
|
||||
#
|
||||
# The following functions are instance of check_pool_status()
|
||||
# is_pool_resilvering - to check if the pool is resilver in progress
|
||||
# is_pool_resilvered - to check if the pool is resilver completed
|
||||
# is_pool_scrubbing - to check if the pool is scrub in progress
|
||||
# is_pool_scrubbed - to check if the pool is scrub completed
|
||||
# is_pool_scrub_stopped - to check if the pool is scrub stopped
|
||||
# is_pool_scrub_paused - to check if the pool has scrub paused
|
||||
# is_pool_removing - to check if the pool is removing a vdev
|
||||
# is_pool_removed - to check if the pool is remove completed
|
||||
# is_pool_discarding - to check if the pool has checkpoint being discarded
|
||||
# is_pool_resilvering - to check if the pool resilver is in progress
|
||||
# is_pool_resilvered - to check if the pool resilver is completed
|
||||
# is_pool_scrubbing - to check if the pool scrub is in progress
|
||||
# is_pool_scrubbed - to check if the pool scrub is completed
|
||||
# is_pool_scrub_stopped - to check if the pool scrub is stopped
|
||||
# is_pool_scrub_paused - to check if the pool scrub has paused
|
||||
# is_pool_removing - to check if the pool removing is a vdev
|
||||
# is_pool_removed - to check if the pool remove is completed
|
||||
# is_pool_discarding - to check if the pool checkpoint is being discarded
|
||||
#
|
||||
function is_pool_resilvering #pool <verbose>
|
||||
{
|
||||
check_pool_status "$1" "scan" "resilver in progress since " $2
|
||||
check_pool_status "$1" "scan" \
|
||||
"resilver[ ()0-9A-Za-z_-]* in progress since" $2
|
||||
return $?
|
||||
}
|
||||
|
||||
@ -3487,7 +3488,7 @@ function wait_scrubbed
|
||||
typeset pool=${1:-$TESTPOOL}
|
||||
while true ; do
|
||||
is_pool_scrubbed $pool && break
|
||||
log_must sleep 1
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,6 @@ SUBDIRS = \
|
||||
rename_dirs \
|
||||
replacement \
|
||||
reservation \
|
||||
resilver \
|
||||
rootpool \
|
||||
rsend \
|
||||
scrub_mirror \
|
||||
|
@ -79,6 +79,7 @@ typeset -a properties=(
|
||||
"feature@redacted_datasets"
|
||||
"feature@bookmark_written"
|
||||
"feature@log_spacemap"
|
||||
"feature@device_rebuild"
|
||||
)
|
||||
|
||||
if is_linux || is_freebsd; then
|
||||
|
@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
|
||||
cleanup.ksh \
|
||||
zpool_wait_replace.ksh \
|
||||
zpool_wait_replace_cancel.ksh \
|
||||
zpool_wait_rebuild.ksh \
|
||||
zpool_wait_resilver.ksh \
|
||||
zpool_wait_scrub_basic.ksh \
|
||||
zpool_wait_scrub_cancel.ksh \
|
||||
|
@ -0,0 +1,64 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# 'zpool wait' works when waiting for sequential resilvering to complete.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Attach a device to the pool so that sequential resilvering starts.
|
||||
# 2. Start 'zpool wait'.
|
||||
# 3. Monitor the waiting process to make sure it returns neither too soon nor
|
||||
# too late.
|
||||
# 4. Repeat 1-3, except using the '-w' flag with 'zpool attach' instead of using
|
||||
# 'zpool wait'.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
remove_io_delay
|
||||
kill_if_running $pid
|
||||
get_disklist $TESTPOOL | grep $DISK2 >/dev/null && \
|
||||
log_must zpool detach $TESTPOOL $DISK2
|
||||
}
|
||||
|
||||
typeset -r IN_PROGRESS_CHECK="is_pool_resilvering $TESTPOOL"
|
||||
typeset pid
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
add_io_delay $TESTPOOL
|
||||
|
||||
# Test 'zpool wait -t resilver'
|
||||
log_must zpool attach -s $TESTPOOL $DISK1 $DISK2
|
||||
log_bkgrnd zpool wait -t resilver $TESTPOOL
|
||||
pid=$!
|
||||
check_while_waiting $pid "$IN_PROGRESS_CHECK"
|
||||
|
||||
log_must zpool detach $TESTPOOL $DISK2
|
||||
|
||||
# Test 'zpool attach -w'
|
||||
log_bkgrnd zpool attach -sw $TESTPOOL $DISK1 $DISK2
|
||||
pid=$!
|
||||
while ! is_pool_resilvering $TESTPOOL && proc_exists $pid; do
|
||||
log_must sleep .5
|
||||
done
|
||||
check_while_waiting $pid "$IN_PROGRESS_CHECK"
|
||||
|
||||
log_pass "'zpool wait -t resilver' and 'zpool attach -w' work."
|
@ -2,9 +2,20 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/replacement
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
replacement_001_pos.ksh \
|
||||
replacement_002_pos.ksh \
|
||||
replacement_003_pos.ksh
|
||||
attach_import.ksh \
|
||||
attach_multiple.ksh \
|
||||
attach_rebuild.ksh \
|
||||
attach_resilver.ksh \
|
||||
detach.ksh \
|
||||
rebuild_disabled_feature.ksh \
|
||||
rebuild_multiple.ksh \
|
||||
rebuild_raidz.ksh \
|
||||
replace_import.ksh \
|
||||
replace_rebuild.ksh \
|
||||
replace_resilver.ksh \
|
||||
resilver_restart_001.ksh \
|
||||
resilver_restart_002.ksh \
|
||||
scrub_cancel.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
replacement.cfg
|
||||
|
67
tests/zfs-tests/tests/functional/replacement/attach_import.ksh
Executable file
67
tests/zfs-tests/tests/functional/replacement/attach_import.ksh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify that on import an in progress attach operation is resumed.
|
||||
#
|
||||
# Strategy:
|
||||
# 1. For both healing and sequential resilvering.
|
||||
# a. Create a pool
|
||||
# b. Add a vdev with 'zpool attach' and resilver (-s) it.
|
||||
# c. Export the pool
|
||||
# d. Import the pool
|
||||
# e. Verify the 'zpool attach' resumed resilvering
|
||||
# f. Destroy the pool
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]}
|
||||
}
|
||||
|
||||
log_assert "Verify attach is resumed on import"
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]}
|
||||
|
||||
# Verify healing and sequential resilver resume on import.
|
||||
for arg in "" "-s"; do
|
||||
log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[0]}
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
log_must zpool attach $arg $TESTPOOL1 ${VDEV_FILES[0]} ${VDEV_FILES[1]}
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL1
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
log_must is_pool_resilvered $TESTPOOL1
|
||||
destroy_pool $TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass "Verify attach is resumed on import"
|
111
tests/zfs-tests/tests/functional/replacement/attach_multiple.ksh
Executable file
111
tests/zfs-tests/tests/functional/replacement/attach_multiple.ksh
Executable file
@ -0,0 +1,111 @@
|
||||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify that attach/detach work while resilvering and attaching
|
||||
# multiple vdevs.
|
||||
#
|
||||
# Strategy:
|
||||
# 1. Create a single vdev pool
|
||||
# 2. While healing or sequential resilvering:
|
||||
# a. Attach a vdev to convert the pool to a mirror.
|
||||
# b. Attach a vdev to convert the pool to a 3-way mirror.
|
||||
# c. Verify the original vdev cannot be removed (no redundant copies)
|
||||
# d. Detach a vdev. Healing and sequential resilver remain running.
|
||||
# e. Detach a vdev. Healing resilver remains running, sequential
|
||||
# resilver is canceled.
|
||||
# f. Wait for resilver to complete.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]}
|
||||
}
|
||||
|
||||
log_assert "Verify attach/detech with multiple vdevs"
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]}
|
||||
|
||||
# Verify resilver resumes on import.
|
||||
log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[0]}
|
||||
|
||||
for replace_mode in "healing" "sequential"; do
|
||||
#
|
||||
# Resilvers abort the dsl_scan and reconfigure it for resilvering.
|
||||
# Rebuilds cancel the dsl_scan and start the vdev_rebuild thread.
|
||||
#
|
||||
if [[ "$replace_mode" = "healing" ]]; then
|
||||
flags=""
|
||||
else
|
||||
flags="-s"
|
||||
fi
|
||||
|
||||
log_mustnot is_pool_resilvering $TESTPOOL1
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
|
||||
# Attach first vdev (stripe -> mirror)
|
||||
log_must zpool attach $flags $TESTPOOL1 \
|
||||
${VDEV_FILES[0]} ${VDEV_FILES[1]}
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
|
||||
# Attach second vdev (2-way -> 3-way mirror)
|
||||
log_must zpool attach $flags $TESTPOOL1 \
|
||||
${VDEV_FILES[1]} ${VDEV_FILES[2]}
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
|
||||
# Original vdev cannot be detached until there is sufficent redundancy.
|
||||
log_mustnot zpool detach $TESTPOOL1 ${VDEV_FILES[0]}
|
||||
|
||||
# Detach first vdev (resilver keeps running)
|
||||
log_must zpool detach $TESTPOOL1 ${VDEV_FILES[1]}
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
|
||||
#
|
||||
# Detach second vdev. There's a difference in behavior between
|
||||
# healing and sequential resilvers. A healing resilver will not be
|
||||
# cancelled even though there's nothing on the original vdev which
|
||||
# needs to be rebuilt. A sequential resilver on the otherhand is
|
||||
# canceled when returning to a non-redundant striped layout. At
|
||||
# some point the healing resilver behavior should be updated to match
|
||||
# the sequential resilver behavior.
|
||||
#
|
||||
log_must zpool detach $TESTPOOL1 ${VDEV_FILES[2]}
|
||||
|
||||
if [[ "$replace_mode" = "healing" ]]; then
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
else
|
||||
log_mustnot is_pool_resilvering $TESTPOOL1
|
||||
fi
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait $TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass "Verify attach/detech with multiple vdevs"
|
173
tests/zfs-tests/tests/functional/replacement/attach_rebuild.ksh
Executable file
173
tests/zfs-tests/tests/functional/replacement/attach_rebuild.ksh
Executable file
@ -0,0 +1,173 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Attaching disks during I/O should pass for supported pools.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create multidisk pools (stripe/mirror/raidz) and
|
||||
# start some random I/O
|
||||
# 2. Attach a disk to the pool.
|
||||
# 3. Verify the integrity of the file system and the resilvering.
|
||||
#
|
||||
# NOTE: Raidz does not support the sequential resilver (-s) option.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids; do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk during I/O completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function attach_test
|
||||
{
|
||||
typeset -i iters=2
|
||||
typeset -i index=0
|
||||
typeset opt=$1
|
||||
typeset disk1=$2
|
||||
typeset disk2=$3
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
log_must zpool attach -sw $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
for wait_pid in $child_pids; do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 3 ]]; do
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
#
|
||||
# Create a replacement disk special file.
|
||||
#
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
for op in "" "-f"; do
|
||||
create_pool $TESTPOOL1 mirror $specials_list
|
||||
log_must zfs create $TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
attach_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_fail "$REPLACEFILE is not present."
|
||||
fi
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
done
|
||||
|
||||
log_note "Verify 'zpool attach' fails with non-mirrors."
|
||||
|
||||
for type in "" "raidz" "raidz1"; do
|
||||
for op in "" "-f"; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zfs create $TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_mustnot zpool attach -s "$opt" $TESTDIR/$TESTFILE1.1 \
|
||||
$TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -eq 0 ]]; then
|
||||
log_fail "$REPLACEFILE should not be present."
|
||||
fi
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
done
|
||||
done
|
||||
|
||||
log_pass
|
@ -104,9 +104,7 @@ function attach_test
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
log_must zpool attach $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
sleep 10
|
||||
log_must zpool attach -w $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
@ -119,13 +117,13 @@ function attach_test
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 2 ]]; do
|
||||
mkfile $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
while [[ $i != 3 ]]; do
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
@ -134,7 +132,7 @@ done
|
||||
#
|
||||
# Create a replacement disk special file.
|
||||
#
|
||||
mkfile $MINVDEVSIZE $TESTDIR/$REPLACEFILE
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
for op in "" "-f"; do
|
||||
create_pool $TESTPOOL1 mirror $specials_list
|
||||
@ -143,7 +141,7 @@ for op in "" "-f"; do
|
||||
|
||||
attach_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_fail "$REPLACEFILE is not present."
|
||||
fi
|
||||
@ -162,7 +160,7 @@ for type in "" "raidz" "raidz1"; do
|
||||
log_mustnot zpool attach "$opt" $TESTDIR/$TESTFILE1.1 \
|
||||
$TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -eq 0 ]]; then
|
||||
log_fail "$REPLACEFILE should not be present."
|
||||
fi
|
@ -121,8 +121,8 @@ function detach_test
|
||||
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 2 ]]; do
|
||||
mkfile $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
while [[ $i != 3 ]]; do
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
@ -134,7 +134,7 @@ log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
detach_test $TESTDIR/$TESTFILE1.1
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$TESTFILE1.1"
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTFILE1.1"
|
||||
if [[ $? -eq 0 ]]; then
|
||||
log_fail "$TESTFILE1.1 should no longer be present."
|
||||
fi
|
||||
@ -143,14 +143,14 @@ destroy_pool $TESTPOOL1
|
||||
|
||||
log_note "Verify 'zpool detach' fails with non-mirrors."
|
||||
|
||||
for type in "" "raidz" "raidz1" ; do
|
||||
for type in "" "raidz" "raidz1"; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zfs create $TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_mustnot zpool detach $TESTDIR/$TESTFILE1.1
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$TESTFILE1.1"
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTFILE1.1"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_fail "$TESTFILE1.1 is not present."
|
||||
fi
|
78
tests/zfs-tests/tests/functional/replacement/rebuild_disabled_feature.ksh
Executable file
78
tests/zfs-tests/tests/functional/replacement/rebuild_disabled_feature.ksh
Executable file
@ -0,0 +1,78 @@
|
||||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify device_rebuild feature flags.
|
||||
#
|
||||
# Strategy:
|
||||
# 1. Create a pool with all features disabled.
|
||||
# 2. Verify 'zpool replace -s' fails and the feature is disabled.
|
||||
# 3. Enable the device_rebuild feature.
|
||||
# 4. Verify 'zpool replace -s' works and the feature is active.
|
||||
# 5. Wait for the feature to return to enabled.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
}
|
||||
|
||||
function check_feature_flag
|
||||
{
|
||||
feature=$1
|
||||
pool=$2
|
||||
expected_value=$3
|
||||
|
||||
value="$(zpool get -H -o property,value all $pool | \
|
||||
egrep "$feature" | awk '{print $2}')"
|
||||
if [ "$value" = "$expected_value" ]; then
|
||||
log_note "$feature verified to be $value"
|
||||
else
|
||||
log_fail "$feature should be $expected_value but is $value"
|
||||
fi
|
||||
}
|
||||
|
||||
log_assert "Verify device_rebuild feature flags."
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
log_must zpool create -d $TESTPOOL1 ${VDEV_FILES[@]}
|
||||
|
||||
log_mustnot zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "disabled"
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
log_must zpool set feature@device_rebuild=enabled $TESTPOOL1
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "active"
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "enabled"
|
||||
|
||||
log_pass "Verify device_rebuild feature flags."
|
126
tests/zfs-tests/tests/functional/replacement/rebuild_multiple.ksh
Executable file
126
tests/zfs-tests/tests/functional/replacement/rebuild_multiple.ksh
Executable file
@ -0,0 +1,126 @@
|
||||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Sequential reconstruction (unlike healing reconstruction) operate on the
|
||||
# top-level vdev. This means that a sequential resilver operation can be
|
||||
# started/stopped on a different top-level vdev without impacting other
|
||||
# sequential resilvers.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a mirrored pool.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE $SPARE_VDEV_FILE2
|
||||
}
|
||||
|
||||
function check_history
|
||||
{
|
||||
pool=$1
|
||||
msg=$2
|
||||
exp=$3
|
||||
|
||||
count=$(zpool history -i $pool | grep "rebuild" | grep -c "$msg")
|
||||
if [[ "$count" -ne "$exp" ]]; then
|
||||
log_fail "Expected $exp rebuild '$msg' messages, found $count"
|
||||
else
|
||||
log_note "Found $count/$exp rebuild '$msg' messages"
|
||||
fi
|
||||
}
|
||||
|
||||
log_assert "Rebuilds operate on the top-level vdevs"
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} \
|
||||
$SPARE_VDEV_FILE $SPARE_VDEV_FILE2
|
||||
|
||||
# Verify two sequential resilvers can run concurrently.
|
||||
log_must zpool create -f $TESTPOOL1 \
|
||||
mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]} \
|
||||
mirror ${VDEV_FILES[2]} ${VDEV_FILES[3]}
|
||||
log_must zfs create $TESTPOOL1/$TESTFS
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
|
||||
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=32
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[3]} $SPARE_VDEV_FILE2
|
||||
|
||||
check_history $TESTPOOL1 "started" 2
|
||||
check_history $TESTPOOL1 "reset" 0
|
||||
check_history $TESTPOOL1 "complete" 0
|
||||
check_history $TESTPOOL1 "canceled" 0
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
|
||||
check_history $TESTPOOL1 "complete" 2
|
||||
destroy_pool $TESTPOOL1
|
||||
|
||||
# Verify canceling one resilver (zpool detach) does not impact others.
|
||||
log_must zpool create -f $TESTPOOL1 \
|
||||
mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]} \
|
||||
mirror ${VDEV_FILES[2]} ${VDEV_FILES[3]}
|
||||
log_must zfs create $TESTPOOL1/$TESTFS
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
|
||||
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=32
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[3]} $SPARE_VDEV_FILE2
|
||||
|
||||
check_history $TESTPOOL1 "started" 2
|
||||
check_history $TESTPOOL1 "reset" 0
|
||||
check_history $TESTPOOL1 "complete" 0
|
||||
check_history $TESTPOOL1 "canceled" 0
|
||||
|
||||
log_must zpool detach $TESTPOOL1 $SPARE_VDEV_FILE2
|
||||
|
||||
check_history $TESTPOOL1 "complete" 0
|
||||
check_history $TESTPOOL1 "canceled" 1
|
||||
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
|
||||
check_history $TESTPOOL1 "complete" 1
|
||||
check_history $TESTPOOL1 "canceled" 1
|
||||
destroy_pool $TESTPOOL1
|
||||
|
||||
log_pass "Rebuilds operate on the top-level vdevs"
|
70
tests/zfs-tests/tests/functional/replacement/rebuild_raidz.ksh
Executable file
70
tests/zfs-tests/tests/functional/replacement/rebuild_raidz.ksh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Executing 'zpool replace -s' for raidz vdevs failed. Sequential
|
||||
# resilvers are only allowed for stripe/mirror pools.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a raidz pool, verify 'zpool replace -s' fails
|
||||
# 2. Create a stripe/mirror pool, verify 'zpool replace -s' passes
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
}
|
||||
|
||||
log_assert "Sequential resilver is not allowed for raidz vdevs"
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
|
||||
# raidz[1-3]
|
||||
for vdev_type in "raidz" "raidz2" "raidz3"; do
|
||||
log_must zpool create -f $TESTPOOL1 $vdev_type ${VDEV_FILES[@]}
|
||||
log_mustnot zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} \
|
||||
$SPARE_VDEV_FILE
|
||||
destroy_pool $TESTPOOL1
|
||||
done
|
||||
|
||||
# stripe
|
||||
log_must zpool create $TESTPOOL1 ${VDEV_FILES[@]}
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
destroy_pool $TESTPOOL1
|
||||
|
||||
# mirror
|
||||
log_must zpool create $TESTPOOL1 mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]}
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
|
||||
destroy_pool $TESTPOOL1
|
||||
|
||||
log_pass "Sequential resilver is not allowed for raidz vdevs"
|
67
tests/zfs-tests/tests/functional/replacement/replace_import.ksh
Executable file
67
tests/zfs-tests/tests/functional/replacement/replace_import.ksh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify that on import an in progress replace operation is resumed.
|
||||
#
|
||||
# Strategy:
|
||||
# 1. For both healing and sequential resilvering replace:
|
||||
# a. Create a pool
|
||||
# b. Repalce a vdev with 'zpool replace' to resilver (-s) it.
|
||||
# c. Export the pool
|
||||
# d. Import the pool
|
||||
# e. Verify the 'zpool replace' resumed resilvering.
|
||||
# f. Destroy the pool
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
}
|
||||
|
||||
log_assert "Verify replace is resumed on import"
|
||||
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
|
||||
# Verify healing and sequential resilver resume on import.
|
||||
for arg in "" "-s"; do
|
||||
log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[@]}
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[0]} $SPARE_VDEV_FILE
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL1
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
log_must is_pool_resilvered $TESTPOOL1
|
||||
destroy_pool $TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass "Verify replace is resumed on import"
|
158
tests/zfs-tests/tests/functional/replacement/replace_rebuild.ksh
Executable file
158
tests/zfs-tests/tests/functional/replacement/replace_rebuild.ksh
Executable file
@ -0,0 +1,158 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Replacing disks during I/O should pass for supported pools.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create multidisk pools (stripe/mirror) and
|
||||
# start some random I/O
|
||||
# 2. Replace a disk in the pool with another disk.
|
||||
# 3. Verify the integrity of the file system and the rebuilding.
|
||||
#
|
||||
# NOTE: Raidz does not support the sequential resilver (-s) option.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk with -r during I/O completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function replace_test
|
||||
{
|
||||
typeset -i iters=2
|
||||
typeset -i index=0
|
||||
typeset opt=$1
|
||||
typeset disk1=$2
|
||||
typeset disk2=$3
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
log_must zpool replace -sw $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 3 ]]; do
|
||||
log_must truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
#
|
||||
# Create a replacement disk special file.
|
||||
#
|
||||
log_must truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
for type in "" "mirror"; do
|
||||
for op in "" "-f"; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zfs create $TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
replace_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_fail "$REPLACEFILE is not present."
|
||||
fi
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
log_must rm -rf /$TESTPOOL1
|
||||
done
|
||||
done
|
||||
|
||||
log_pass
|
@ -104,9 +104,7 @@ function replace_test
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
log_must zpool replace $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
sleep 10
|
||||
log_must zpool replace -w $opt $TESTPOOL1 $disk1 $disk2
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
@ -119,11 +117,12 @@ function replace_test
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 2 ]]; do
|
||||
while [[ $i != 3 ]]; do
|
||||
log_must truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
@ -143,7 +142,7 @@ for type in "" "raidz" "mirror"; do
|
||||
|
||||
replace_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
|
||||
|
||||
zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
|
||||
zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_fail "$REPLACEFILE is not present."
|
||||
fi
|
@ -36,3 +36,8 @@ export HOLES_SEED=${HOLES_SEED-""}
|
||||
export HOLES_FILEOFFSET=${HOLES_FILEOFFSET-""}
|
||||
export HOLES_COUNT=${HOLES_COUNT-"16384"} # FILESIZE/BLKSIZE/8
|
||||
export REPLACEFILE="sparedisk"
|
||||
|
||||
set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4}
|
||||
export VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 ))
|
||||
export SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1
|
||||
export SPARE_VDEV_FILE2=$TEST_BASE_DIR/spare-2
|
||||
|
@ -20,7 +20,7 @@
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/resilver/resilver.cfg
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
@ -50,7 +50,7 @@ function cleanup
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
|
||||
log_must zinject -c all
|
||||
destroy_pool $TESTPOOL
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
}
|
||||
|
||||
@ -70,7 +70,7 @@ function verify_restarts # <msg> <cnt> <defer>
|
||||
[[ -z "$defer" ]] && return
|
||||
|
||||
# use zdb to find which vdevs have the resilver defer flag
|
||||
VDEV_DEFERS=$(zdb -C $TESTPOOL | awk '
|
||||
VDEV_DEFERS=$(zdb -C $TESTPOOL1 | awk '
|
||||
/children/ { gsub(/[^0-9]/, ""); child = $0 }
|
||||
/com\.datto:resilver_defer$/ { print child }
|
||||
')
|
||||
@ -106,17 +106,17 @@ log_must set_tunable32 ZEVENT_LEN_MAX 512
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
|
||||
log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \
|
||||
log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \
|
||||
raidz ${VDEV_FILES[@]}
|
||||
|
||||
# create 4 filesystems
|
||||
for fs in fs{0..3}
|
||||
do
|
||||
log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs
|
||||
log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL1/$fs
|
||||
done
|
||||
|
||||
# simultaneously write 16M to each of them
|
||||
set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0
|
||||
set -A DATAPATHS /$TESTPOOL1/fs{0..3}/dat.0
|
||||
log_note "Writing data files"
|
||||
for path in ${DATAPATHS[@]}
|
||||
do
|
||||
@ -131,7 +131,7 @@ do
|
||||
|
||||
if [[ $test == "with" ]]
|
||||
then
|
||||
log_must zpool set feature@resilver_defer=enabled $TESTPOOL
|
||||
log_must zpool set feature@resilver_defer=enabled $TESTPOOL1
|
||||
RESTARTS=( "${DEFER_RESTARTS[@]}" )
|
||||
VDEVS=( "${DEFER_VDEVS[@]}" )
|
||||
VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
|
||||
@ -144,7 +144,7 @@ do
|
||||
log_must set_tunable32 RESILVER_MIN_TIME_MS 50
|
||||
|
||||
# initiate a resilver and suspend the scan as soon as possible
|
||||
log_must zpool replace $TESTPOOL $VDEV_REPLACE
|
||||
log_must zpool replace $TESTPOOL1 $VDEV_REPLACE
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
|
||||
# there should only be 1 resilver start
|
||||
@ -152,16 +152,16 @@ do
|
||||
|
||||
# offline then online a vdev to introduce a new DTL range after current
|
||||
# scan, which should restart (or defer) the resilver
|
||||
log_must zpool offline $TESTPOOL ${VDEV_FILES[2]}
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool online $TESTPOOL ${VDEV_FILES[2]}
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]}
|
||||
log_must zpool sync $TESTPOOL1
|
||||
log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]}
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
# there should now be 2 resilver starts w/o defer, 1 with defer
|
||||
verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
|
||||
|
||||
# inject read io errors on vdev and verify resilver does not restart
|
||||
log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL
|
||||
log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL1
|
||||
log_must cat ${DATAPATHS[1]} > /dev/null
|
||||
log_must zinject -c all
|
||||
|
||||
@ -173,17 +173,12 @@ do
|
||||
log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
|
||||
|
||||
# wait for resilver to finish
|
||||
for iter in {0..59}
|
||||
do
|
||||
is_pool_resilvered $TESTPOOL && break
|
||||
sleep 1
|
||||
done
|
||||
is_pool_resilvered $TESTPOOL ||
|
||||
log_fail "resilver timed out"
|
||||
log_must zpool wait -t resilver $TESTPOOL1
|
||||
log_must is_pool_resilvered $TESTPOOL1
|
||||
|
||||
# wait for a few txg's to see if a resilver happens
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool sync $TESTPOOL1
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
# there should now be 2 resilver starts
|
||||
verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
|
@ -20,7 +20,7 @@
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/resilver/resilver.cfg
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
@ -40,7 +40,7 @@
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
destroy_pool $TESTPOOL
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
log_must set_tunable32 SCAN_LEGACY $ORIG_SCAN_LEGACY
|
||||
}
|
||||
@ -56,25 +56,25 @@ log_must set_tunable32 SCAN_LEGACY 1
|
||||
|
||||
# create the pool and a 32M file (32k blocks)
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE
|
||||
log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]}
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1
|
||||
log_must zpool create -f -O recordsize=1k $TESTPOOL1 ${VDEV_FILES[0]}
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL1/file bs=1M count=32 > /dev/null 2>&1
|
||||
|
||||
# determine objset/object
|
||||
objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
|
||||
object=$(ls -i /$TESTPOOL/file | awk '{print $1}')
|
||||
objset=$(zdb -d $TESTPOOL1/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
|
||||
object=$(ls -i /$TESTPOOL1/file | awk '{print $1}')
|
||||
|
||||
# inject event to cause error during resilver
|
||||
log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL
|
||||
log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL1
|
||||
|
||||
# clear events and start resilver
|
||||
log_must zpool events -c
|
||||
log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE
|
||||
log_must zpool attach $TESTPOOL1 ${VDEV_FILES[0]} $SPARE_VDEV_FILE
|
||||
|
||||
log_note "waiting for read errors to start showing up"
|
||||
for iter in {0..59}
|
||||
do
|
||||
zpool sync $TESTPOOL
|
||||
err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}')
|
||||
zpool sync $TESTPOOL1
|
||||
err=$(zpool status $TESTPOOL1 | grep ${VDEV_FILES[0]} | awk '{print $3}')
|
||||
(( $err > 0 )) && break
|
||||
sleep 1
|
||||
done
|
||||
@ -92,8 +92,8 @@ done
|
||||
(( $finish == 0 )) && log_fail "resilver took too long to finish"
|
||||
|
||||
# wait a few syncs to ensure that zfs does not restart the resilver
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool sync $TESTPOOL
|
||||
log_must zpool sync $TESTPOOL1
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
# check if resilver was restarted
|
||||
start=$(zpool events | grep "sysevent.fs.zfs.resilver_start" | wc -l)
|
112
tests/zfs-tests/tests/functional/replacement/scrub_cancel.ksh
Executable file
112
tests/zfs-tests/tests/functional/replacement/scrub_cancel.ksh
Executable file
@ -0,0 +1,112 @@
|
||||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify scrub behaves as intended when contending with a healing or
|
||||
# sequential resilver.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool
|
||||
# 2. Add a modest amount of data to the pool.
|
||||
# 3. For healing and sequential resilver:
|
||||
# a. Start scrubbing.
|
||||
# b. Verify a resilver can be started and it cancels the scrub.
|
||||
# c. Verify a scrub cannot be started when resilvering
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
|
||||
$ORIG_SCAN_SUSPEND_PROGRESS
|
||||
destroy_pool $TESTPOOL1
|
||||
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
}
|
||||
|
||||
log_assert "Scrub was cancelled by resilver"
|
||||
|
||||
ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
|
||||
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
|
||||
|
||||
log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[@]}
|
||||
log_must zfs create $TESTPOOL1/$TESTFS
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
|
||||
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=64
|
||||
log_must zpool sync $TESTPOOL1
|
||||
|
||||
# Request a healing or sequential resilver
|
||||
for replace_mode in "healing" "sequential"; do
|
||||
|
||||
#
|
||||
# Healing resilvers abort the dsl_scan and reconfigure it for
|
||||
# resilvering. Sequential resilvers cancel the dsl_scan and start
|
||||
# the vdev_rebuild thread.
|
||||
#
|
||||
if [[ "$replace_mode" = "healing" ]]; then
|
||||
history_msg="scan aborted, restarting"
|
||||
flags=""
|
||||
else
|
||||
history_msg="scan cancelled"
|
||||
flags="-s"
|
||||
fi
|
||||
|
||||
# Limit scanning time and suspend the scan as soon as possible.
|
||||
log_must set_tunable32 RESILVER_MIN_TIME_MS 50
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
|
||||
|
||||
# Initiate a scrub.
|
||||
log_must zpool scrub $TESTPOOL1
|
||||
|
||||
# Initiate a resilver to cancel the scrub.
|
||||
log_must zpool replace $flags $TESTPOOL1 ${VDEV_FILES[1]} \
|
||||
$SPARE_VDEV_FILE
|
||||
|
||||
# Verify the scrub was canceled, it may take a few seconds to exit.
|
||||
while is_pool_scrubbing $TESTPOOL1; do
|
||||
sleep 1
|
||||
done
|
||||
log_mustnot is_pool_scrubbing $TESTPOOL1
|
||||
|
||||
# Verify a scrub cannot be started while resilvering.
|
||||
log_must is_pool_resilvering $TESTPOOL1
|
||||
log_mustnot zpool scrub $TESTPOOL1
|
||||
|
||||
# Unsuspend resilver.
|
||||
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
|
||||
log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
|
||||
|
||||
# Wait for resilver to finish then put the original back.
|
||||
log_must zpool wait $TESTPOOL1
|
||||
log_must zpool replace $flags -w $TESTPOOL1 $SPARE_VDEV_FILE \
|
||||
${VDEV_FILES[1]}
|
||||
done
|
||||
log_pass "Scrub was cancelled by resilver"
|
||||
|
@ -1,9 +0,0 @@
|
||||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
resilver_restart_001.ksh \
|
||||
resilver_restart_002.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
resilver.cfg
|
@ -1,31 +0,0 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/resilver/resilver.cfg
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_pass
|
@ -1,32 +0,0 @@
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4}
|
||||
SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1
|
||||
|
||||
VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 ))
|
@ -1,31 +0,0 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
|
||||
#
|
||||
# Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/resilver/resilver.cfg
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_pass
|
Loading…
Reference in New Issue
Block a user