mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Teach zpool scrub to scrub only blocks in error log
Added a flag '-e' in zpool scrub to scrub only blocks in error log. A user can pause, resume and cancel the error scrub by passing additional command line arguments -p -s just like a regular scrub. This involves adding a new flag, creating new libzfs interfaces, a new ioctl, and the actual iteration and read-issuing logic. Error scrubbing is executed in multiple txg to make sure pool performance is not affected. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Co-authored-by: TulsiJain tulsi.jain@delphix.com Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #8995 Closes #12355
This commit is contained in:
committed by
Brian Behlendorf
parent
e34e15ed6d
commit
482eeef804
+101
-10
@@ -401,7 +401,7 @@ get_usage(zpool_help_t idx)
|
||||
return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
|
||||
"[<device> ...]\n"));
|
||||
case HELP_SCRUB:
|
||||
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
|
||||
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
|
||||
case HELP_RESILVER:
|
||||
return (gettext("\tresilver <pool> ...\n"));
|
||||
case HELP_TRIM:
|
||||
@@ -7309,8 +7309,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool scrub [-s | -p] [-w] <pool> ...
|
||||
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
|
||||
*
|
||||
* -e Only scrub blocks in the error log.
|
||||
* -s Stop. Stops any in-progress scrub.
|
||||
* -p Pause. Pause in-progress scrub.
|
||||
* -w Wait. Blocks until scrub has completed.
|
||||
@@ -7326,14 +7327,21 @@ zpool_do_scrub(int argc, char **argv)
|
||||
cb.cb_type = POOL_SCAN_SCRUB;
|
||||
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
|
||||
|
||||
boolean_t is_error_scrub = B_FALSE;
|
||||
boolean_t is_pause = B_FALSE;
|
||||
boolean_t is_stop = B_FALSE;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "spw")) != -1) {
|
||||
while ((c = getopt(argc, argv, "spwe")) != -1) {
|
||||
switch (c) {
|
||||
case 'e':
|
||||
is_error_scrub = B_TRUE;
|
||||
break;
|
||||
case 's':
|
||||
cb.cb_type = POOL_SCAN_NONE;
|
||||
is_stop = B_TRUE;
|
||||
break;
|
||||
case 'p':
|
||||
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
|
||||
is_pause = B_TRUE;
|
||||
break;
|
||||
case 'w':
|
||||
wait = B_TRUE;
|
||||
@@ -7345,11 +7353,21 @@ zpool_do_scrub(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
if (cb.cb_type == POOL_SCAN_NONE &&
|
||||
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
|
||||
(void) fprintf(stderr, gettext("invalid option combination: "
|
||||
"-s and -p are mutually exclusive\n"));
|
||||
if (is_pause && is_stop) {
|
||||
(void) fprintf(stderr, gettext("invalid option "
|
||||
"combination :-s and -p are mutually exclusive\n"));
|
||||
usage(B_FALSE);
|
||||
} else {
|
||||
if (is_error_scrub)
|
||||
cb.cb_type = POOL_SCAN_ERRORSCRUB;
|
||||
|
||||
if (is_pause) {
|
||||
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
|
||||
} else if (is_stop) {
|
||||
cb.cb_type = POOL_SCAN_NONE;
|
||||
} else {
|
||||
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
|
||||
@@ -7573,6 +7591,70 @@ secs_to_dhms(uint64_t total, char *buf)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out detailed error scrub status.
|
||||
*/
|
||||
static void
|
||||
print_err_scrub_status(pool_scan_stat_t *ps)
|
||||
{
|
||||
time_t start, end, pause;
|
||||
uint64_t total_secs_left;
|
||||
uint64_t secs_left, mins_left, hours_left, days_left;
|
||||
uint64_t examined, to_be_examined;
|
||||
|
||||
if (ps == NULL || ps->pss_error_scrub_func != POOL_SCAN_ERRORSCRUB) {
|
||||
return;
|
||||
}
|
||||
|
||||
(void) printf(gettext(" scrub: "));
|
||||
|
||||
start = ps->pss_error_scrub_start;
|
||||
end = ps->pss_error_scrub_end;
|
||||
pause = ps->pss_pass_error_scrub_pause;
|
||||
examined = ps->pss_error_scrub_examined;
|
||||
to_be_examined = ps->pss_error_scrub_to_be_examined;
|
||||
|
||||
assert(ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB);
|
||||
|
||||
if (ps->pss_error_scrub_state == DSS_FINISHED) {
|
||||
total_secs_left = end - start;
|
||||
days_left = total_secs_left / 60 / 60 / 24;
|
||||
hours_left = (total_secs_left / 60 / 60) % 24;
|
||||
mins_left = (total_secs_left / 60) % 60;
|
||||
secs_left = (total_secs_left % 60);
|
||||
|
||||
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
|
||||
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
|
||||
(u_longlong_t)days_left, (u_longlong_t)hours_left,
|
||||
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
|
||||
ctime(&end));
|
||||
|
||||
return;
|
||||
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
|
||||
(void) printf(gettext("error scrub canceled on %s"),
|
||||
ctime(&end));
|
||||
return;
|
||||
}
|
||||
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);
|
||||
|
||||
/* Error scrub is in progress. */
|
||||
if (pause == 0) {
|
||||
(void) printf(gettext("error scrub in progress since %s"),
|
||||
ctime(&start));
|
||||
} else {
|
||||
(void) printf(gettext("error scrub paused since %s"),
|
||||
ctime(&pause));
|
||||
(void) printf(gettext("\terror scrub started on %s"),
|
||||
ctime(&start));
|
||||
}
|
||||
|
||||
double fraction_done = (double)examined / (to_be_examined + examined);
|
||||
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
|
||||
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
|
||||
|
||||
(void) printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out detailed scrub status.
|
||||
*/
|
||||
@@ -7909,10 +7991,12 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
{
|
||||
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
|
||||
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
|
||||
boolean_t have_errorscrub = B_FALSE;
|
||||
boolean_t active_resilver = B_FALSE;
|
||||
pool_checkpoint_stat_t *pcs = NULL;
|
||||
pool_scan_stat_t *ps = NULL;
|
||||
uint_t c;
|
||||
time_t scrub_start = 0, errorscrub_start = 0;
|
||||
|
||||
if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
|
||||
(uint64_t **)&ps, &c) == 0) {
|
||||
@@ -7921,16 +8005,23 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
active_resilver = (ps->pss_state == DSS_SCANNING);
|
||||
}
|
||||
|
||||
|
||||
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
|
||||
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
|
||||
scrub_start = ps->pss_start_time;
|
||||
have_errorscrub = (ps->pss_error_scrub_func ==
|
||||
POOL_SCAN_ERRORSCRUB);
|
||||
errorscrub_start = ps->pss_error_scrub_start;
|
||||
}
|
||||
|
||||
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
|
||||
boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
|
||||
|
||||
/* Always print the scrub status when available. */
|
||||
if (have_scrub)
|
||||
if (have_scrub && scrub_start > errorscrub_start)
|
||||
print_scan_scrub_resilver_status(ps);
|
||||
else if (have_errorscrub && errorscrub_start >= scrub_start)
|
||||
print_err_scrub_status(ps);
|
||||
|
||||
/*
|
||||
* When there is an active resilver or rebuild print its status.
|
||||
|
||||
Reference in New Issue
Block a user