Implemented zpool scrub pause/resume

Currently, there is no way to pause a scrub. Pausing may
be useful when the pool is busy with other I/O to preserve
bandwidth.

This patch adds the ability to pause and resume scrubbing.
This is achieved by maintaining a persistent on-disk scrub state.
While the state is 'paused' we do not scrub any more blocks.
We do however perform regular scan housekeeping such as
freeing async destroyed and deadlist blocks while paused.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Reviewed-by: Serapheim Dimitropoulos <serapheimd@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
Closes #6167
This commit is contained in:
Alek P 2017-07-06 22:16:13 -07:00 committed by Brian Behlendorf
parent 94b25662c5
commit 0ea05c64f8
17 changed files with 364 additions and 126 deletions

View File

@ -342,7 +342,7 @@ get_usage(zpool_help_t idx)
case HELP_REOPEN: case HELP_REOPEN:
return (gettext("\treopen <pool>\n")); return (gettext("\treopen <pool>\n"));
case HELP_SCRUB: case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n")); return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_STATUS: case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]" return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
"[-T d|u] [pool] ... [interval [count]]\n")); "[-T d|u] [pool] ... [interval [count]]\n"));
@ -5759,6 +5759,7 @@ typedef struct scrub_cbdata {
int cb_type; int cb_type;
int cb_argc; int cb_argc;
char **cb_argv; char **cb_argv;
pool_scrub_cmd_t cb_scrub_cmd;
} scrub_cbdata_t; } scrub_cbdata_t;
int int
@ -5776,15 +5777,16 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1); return (1);
} }
err = zpool_scan(zhp, cb->cb_type); err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
return (err != 0); return (err != 0);
} }
/* /*
* zpool scrub [-s] <pool> ... * zpool scrub [-s | -p] <pool> ...
* *
* -s Stop. Stops any in-progress scrub. * -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
*/ */
int int
zpool_do_scrub(int argc, char **argv) zpool_do_scrub(int argc, char **argv)
@ -5793,13 +5795,17 @@ zpool_do_scrub(int argc, char **argv)
scrub_cbdata_t cb; scrub_cbdata_t cb;
cb.cb_type = POOL_SCAN_SCRUB; cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
/* check options */ /* check options */
while ((c = getopt(argc, argv, "s")) != -1) { while ((c = getopt(argc, argv, "sp")) != -1) {
switch (c) { switch (c) {
case 's': case 's':
cb.cb_type = POOL_SCAN_NONE; cb.cb_type = POOL_SCAN_NONE;
break; break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
break;
case '?': case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"), (void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt); optopt);
@ -5807,6 +5813,13 @@ zpool_do_scrub(int argc, char **argv)
} }
} }
if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
usage(B_FALSE);
}
cb.cb_argc = argc; cb.cb_argc = argc;
cb.cb_argv = argv; cb.cb_argv = argv;
argc -= optind; argc -= optind;
@ -5826,7 +5839,7 @@ zpool_do_scrub(int argc, char **argv)
void void
print_scan_status(pool_scan_stat_t *ps) print_scan_status(pool_scan_stat_t *ps)
{ {
time_t start, end; time_t start, end, pause;
uint64_t elapsed, mins_left, hours_left; uint64_t elapsed, mins_left, hours_left;
uint64_t pass_exam, examined, total; uint64_t pass_exam, examined, total;
uint_t rate; uint_t rate;
@ -5844,6 +5857,7 @@ print_scan_status(pool_scan_stat_t *ps)
start = ps->pss_start_time; start = ps->pss_start_time;
end = ps->pss_end_time; end = ps->pss_end_time;
pause = ps->pss_pass_scrub_pause;
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
assert(ps->pss_func == POOL_SCAN_SCRUB || assert(ps->pss_func == POOL_SCAN_SCRUB ||
@ -5886,8 +5900,17 @@ print_scan_status(pool_scan_stat_t *ps)
* Scan is in progress. * Scan is in progress.
*/ */
if (ps->pss_func == POOL_SCAN_SCRUB) { if (ps->pss_func == POOL_SCAN_SCRUB) {
(void) printf(gettext("scrub in progress since %s"), if (pause == 0) {
ctime(&start)); (void) printf(gettext("scrub in progress since %s"),
ctime(&start));
} else {
char buf[32];
struct tm *p = localtime(&pause);
(void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p);
(void) printf(gettext("scrub paused since %s\n"), buf);
(void) printf(gettext("\tscrub started on %s"),
ctime(&start));
}
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (ps->pss_func == POOL_SCAN_RESILVER) {
(void) printf(gettext("resilver in progress since %s"), (void) printf(gettext("resilver in progress since %s"),
ctime(&start)); ctime(&start));
@ -5899,6 +5922,7 @@ print_scan_status(pool_scan_stat_t *ps)
/* elapsed time for this pass */ /* elapsed time for this pass */
elapsed = time(NULL) - ps->pss_pass_start; elapsed = time(NULL) - ps->pss_pass_start;
elapsed -= ps->pss_pass_scrub_spent_paused;
elapsed = elapsed ? elapsed : 1; elapsed = elapsed ? elapsed : 1;
pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
rate = pass_exam / elapsed; rate = pass_exam / elapsed;
@ -5908,19 +5932,25 @@ print_scan_status(pool_scan_stat_t *ps)
zfs_nicebytes(examined, examined_buf, sizeof (examined_buf)); zfs_nicebytes(examined, examined_buf, sizeof (examined_buf));
zfs_nicebytes(total, total_buf, sizeof (total_buf)); zfs_nicebytes(total, total_buf, sizeof (total_buf));
zfs_nicebytes(rate, rate_buf, sizeof (rate_buf));
/* /*
* do not print estimated time if hours_left is more than 30 days * do not print estimated time if hours_left is more than 30 days
* or we have a paused scrub
*/ */
(void) printf(gettext("\t%s scanned out of %s at %s/s"), if (pause == 0) {
examined_buf, total_buf, rate_buf); zfs_nicebytes(rate, rate_buf, sizeof (rate_buf));
if (hours_left < (30 * 24)) { (void) printf(gettext("\t%s scanned out of %s at %s/s"),
(void) printf(gettext(", %lluh%um to go\n"), examined_buf, total_buf, rate_buf);
(u_longlong_t)hours_left, (uint_t)(mins_left % 60)); if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
}
} else { } else {
(void) printf(gettext( (void) printf(gettext("\t%s scanned out of %s\n"),
", (scan is slow, no estimated time)\n")); examined_buf, total_buf);
} }
if (ps->pss_func == POOL_SCAN_RESILVER) { if (ps->pss_func == POOL_SCAN_RESILVER) {

View File

@ -147,6 +147,7 @@ typedef enum zfs_error {
EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_UNKNOWN EZFS_UNKNOWN
} zfs_error_t; } zfs_error_t;
@ -260,7 +261,7 @@ typedef struct splitflags {
/* /*
* Functions to manipulate pool and vdev state * Functions to manipulate pool and vdev state
*/ */
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_reguid(zpool_handle_t *); extern int zpool_reguid(zpool_handle_t *);
extern int zpool_reopen(zpool_handle_t *); extern int zpool_reopen(zpool_handle_t *);

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/ */
#ifndef _SYS_DSL_SCAN_H #ifndef _SYS_DSL_SCAN_H
@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {
typedef enum dsl_scan_flags { typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0, DSF_VISIT_DS_AGAIN = 1<<0,
DSF_SCRUB_PAUSED = 1<<1,
} dsl_scan_flags_t; } dsl_scan_flags_t;
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN) #define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
@ -84,8 +86,8 @@ typedef enum dsl_scan_flags {
* *
* The following members of this structure direct the behavior of the scan: * The following members of this structure direct the behavior of the scan:
* *
* scn_pausing - a scan that cannot be completed in a single txg or * scn_suspending - a scan that cannot be completed in a single txg or
* has exceeded its allotted time will need to pause. * has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing * When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk. * the pool and write out the current state to disk.
* *
@ -107,7 +109,7 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan { typedef struct dsl_scan {
struct dsl_pool *scn_dp; struct dsl_pool *scn_dp;
boolean_t scn_pausing; boolean_t scn_suspending;
uint64_t scn_restart_txg; uint64_t scn_restart_txg;
uint64_t scn_done_txg; uint64_t scn_done_txg;
uint64_t scn_sync_start_time; uint64_t scn_sync_start_time;
@ -117,8 +119,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree; boolean_t scn_is_bptree;
boolean_t scn_async_destroying; boolean_t scn_async_destroying;
boolean_t scn_async_stalled; boolean_t scn_async_stalled;
/* for debugging / information */
uint64_t scn_visited_this_txg; uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys; dsl_scan_phys_t scn_phys;
@ -129,6 +129,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *); int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t); int dsl_scan(struct dsl_pool *, pool_scan_func_t);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp); boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@ -139,6 +141,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx); struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn); boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -765,6 +765,16 @@ typedef enum pool_scan_func {
POOL_SCAN_FUNCS POOL_SCAN_FUNCS
} pool_scan_func_t; } pool_scan_func_t;
/*
* Used to control scrub pause and resume.
*/
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;
/* /*
* ZIO types. Needed to interpret vdev statistics below. * ZIO types. Needed to interpret vdev statistics below.
*/ */
@ -797,6 +807,9 @@ typedef struct pool_scan_stat {
/* values not stored on disk */ /* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */
uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
/* cumulative time scrub spent paused, needed for rate calculation */
uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t; } pool_scan_stat_t;
typedef enum dsl_scan_state { typedef enum dsl_scan_state {

View File

@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/ */
#ifndef _SYS_SPA_H #ifndef _SYS_SPA_H
@ -657,6 +658,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */ /* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func); extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa); extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
/* spa syncing */ /* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */

View File

@ -25,6 +25,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/ */
#ifndef _SYS_SPA_IMPL_H #ifndef _SYS_SPA_IMPL_H
@ -193,6 +194,8 @@ struct spa {
uint8_t spa_scrub_started; /* started since last boot */ uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */ uint64_t spa_scan_pass_start; /* start time per pass/reboot */
uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */
uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */ kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */ kthread_t *spa_async_thread; /* thread doing async task */

View File

@ -1898,22 +1898,39 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
* Scan the pool. * Scan the pool.
*/ */
int int
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
{ {
zfs_cmd_t zc = {"\0"}; zfs_cmd_t zc = {"\0"};
char msg[1024]; char msg[1024];
int err;
libzfs_handle_t *hdl = zhp->zpool_hdl; libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = func; zc.zc_cookie = func;
zc.zc_flags = cmd;
if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
(errno == ENOENT && func != POOL_SCAN_NONE)) return (0);
err = errno;
/* ECANCELED on a scrub means we resumed a paused scrub */
if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
cmd == POOL_SCRUB_NORMAL)
return (0);
if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
return (0); return (0);
if (func == POOL_SCAN_SCRUB) { if (func == POOL_SCAN_SCRUB) {
(void) snprintf(msg, sizeof (msg), if (cmd == POOL_SCRUB_PAUSE) {
dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot pause scrubbing %s"), zc.zc_name);
} else {
assert(cmd == POOL_SCRUB_NORMAL);
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot scrub %s"), zc.zc_name);
}
} else if (func == POOL_SCAN_NONE) { } else if (func == POOL_SCAN_NONE) {
(void) snprintf(msg, sizeof (msg), (void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
@ -1922,7 +1939,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
assert(!"unexpected result"); assert(!"unexpected result");
} }
if (errno == EBUSY) { if (err == EBUSY) {
nvlist_t *nvroot; nvlist_t *nvroot;
pool_scan_stat_t *ps = NULL; pool_scan_stat_t *ps = NULL;
uint_t psc; uint_t psc;
@ -1931,14 +1948,18 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
(void) nvlist_lookup_uint64_array(nvroot, (void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_SCRUB) if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
return (zfs_error(hdl, EZFS_SCRUBBING, msg)); if (cmd == POOL_SCRUB_PAUSE)
else return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
else
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
} else {
return (zfs_error(hdl, EZFS_RESILVERING, msg)); return (zfs_error(hdl, EZFS_RESILVERING, msg));
} else if (errno == ENOENT) { }
} else if (err == ENOENT) {
return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
} else { } else {
return (zpool_standard_error(hdl, errno, msg)); return (zpool_standard_error(hdl, err, msg));
} }
} }

View File

@ -24,6 +24,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
*/ */
/* /*
@ -246,6 +247,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_POSTSPLIT_ONLINE: case EZFS_POSTSPLIT_ONLINE:
return (dgettext(TEXT_DOMAIN, "disk was split from this pool " return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
"into a new one")); "into a new one"));
case EZFS_SCRUB_PAUSED:
return (dgettext(TEXT_DOMAIN, "scrub is paused; "
"use 'zpool scrub' to resume"));
case EZFS_SCRUBBING: case EZFS_SCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently scrubbing; " return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
"use 'zpool scrub -s' to cancel current scrub")); "use 'zpool scrub -s' to cancel current scrub"));

View File

@ -25,8 +25,9 @@
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2017 George Melikov. All Rights Reserved. .\" Copyright (c) 2017 George Melikov. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" .\"
.Dd June 22, 2017 .Dd June 28, 2017
.Dt ZPOOL 8 SMM .Dt ZPOOL 8 SMM
.Os Linux .Os Linux
.Sh NAME .Sh NAME
@ -151,7 +152,7 @@
.Ar pool Ar device Op Ar new_device .Ar pool Ar device Op Ar new_device
.Nm .Nm
.Cm scrub .Cm scrub
.Op Fl s .Op Fl s | Fl p
.Ar pool Ns ... .Ar pool Ns ...
.Nm .Nm
.Cm set .Cm set
@ -1769,10 +1770,10 @@ The only property supported at the moment is
.It Xo .It Xo
.Nm .Nm
.Cm scrub .Cm scrub
.Op Fl s .Op Fl s | Fl p
.Ar pool Ns ... .Ar pool Ns ...
.Xc .Xc
Begins a scrub. Begins a scrub or resumes a paused scrub.
The scrub examines all data in the specified pools to verify that it checksums The scrub examines all data in the specified pools to verify that it checksums
correctly. correctly.
For replicated For replicated
@ -1795,15 +1796,25 @@ faults or disk failure.
.Pp .Pp
Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows
one at a time. one at a time.
If a scrub is already in progress, the If a scrub is paused, the
.Nm zpool Cm scrub .Nm zpool Cm scrub
command terminates it and starts a new scrub. resumes it.
If a resilver is in progress, ZFS does not allow a scrub to be started until the If a resilver is in progress, ZFS does not allow a scrub to be started until the
resilver completes. resilver completes.
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl s .It Fl s
Stop scrubbing. Stop scrubbing.
.El .El
.Bl -tag -width Ds
.It Fl p
Pause scrubbing.
Scrub progress is periodically synced to disk so if the system
is restarted or pool is exported during a paused scrub, the scrub will resume
from the place where it was last checkpointed to disk.
To resume a paused scrub issue
.Nm zpool Cm scrub
again.
.El
.It Xo .It Xo
.Nm .Nm
.Cm set .Cm set

View File

@ -22,6 +22,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright 2016 Gary Mills * Copyright 2016 Gary Mills
* Copyright (c) 2017 Datto Inc.
*/ */
#include <sys/dsl_scan.h> #include <sys/dsl_scan.h>
@ -317,6 +318,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
scn->scn_phys.scn_queue_obj = 0; scn->scn_phys.scn_queue_obj = 0;
} }
scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
/* /*
* If we were "restarted" from a stopped state, don't bother * If we were "restarted" from a stopped state, don't bother
* with anything else. * with anything else.
@ -403,6 +406,92 @@ dsl_scan_cancel(dsl_pool_t *dp)
dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
} }
boolean_t
dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
{
if (dsl_scan_scrubbing(scn->scn_dp) &&
scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED)
return (B_TRUE);
return (B_FALSE);
}
static int
dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
{
pool_scrub_cmd_t *cmd = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_scan_t *scn = dp->dp_scan;
if (*cmd == POOL_SCRUB_PAUSE) {
/* can't pause a scrub when there is no in-progress scrub */
if (!dsl_scan_scrubbing(dp))
return (SET_ERROR(ENOENT));
/* can't pause a paused scrub */
if (dsl_scan_is_paused_scrub(scn))
return (SET_ERROR(EBUSY));
} else if (*cmd != POOL_SCRUB_NORMAL) {
return (SET_ERROR(ENOTSUP));
}
return (0);
}
static void
dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
{
pool_scrub_cmd_t *cmd = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
spa_t *spa = dp->dp_spa;
dsl_scan_t *scn = dp->dp_scan;
if (*cmd == POOL_SCRUB_PAUSE) {
/* can't pause a scrub when there is no in-progress scrub */
spa->spa_scan_pass_scrub_pause = gethrestime_sec();
scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
dsl_scan_sync_state(scn, tx);
} else {
ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
if (dsl_scan_is_paused_scrub(scn)) {
/*
* We need to keep track of how much time we spend
* paused per pass so that we can adjust the scrub rate
* shown in the output of 'zpool status'
*/
spa->spa_scan_pass_scrub_spent_paused +=
gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
spa->spa_scan_pass_scrub_pause = 0;
scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
dsl_scan_sync_state(scn, tx);
}
}
}
/*
* Set scrub pause/resume state if it makes sense to do so
*/
int
dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
{
return (dsl_sync_task(spa_name(dp->dp_spa),
dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
ZFS_SPACE_CHECK_RESERVED));
}
boolean_t
dsl_scan_scrubbing(const dsl_pool_t *dp)
{
dsl_scan_t *scn = dp->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING &&
scn->scn_phys.scn_func == POOL_SCAN_SCRUB)
return (B_TRUE);
return (B_FALSE);
}
static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx); dmu_objset_type_t ostype, dmu_tx_t *tx);
@ -444,7 +533,7 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
extern int zfs_vdev_async_write_active_min_dirty_percent; extern int zfs_vdev_async_write_active_min_dirty_percent;
static boolean_t static boolean_t
dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
{ {
uint64_t elapsed_nanosecs; uint64_t elapsed_nanosecs;
int mintime; int mintime;
@ -454,8 +543,8 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
if (zb && (int64_t)zb->zb_object < 0) if (zb && (int64_t)zb->zb_object < 0)
return (B_FALSE); return (B_FALSE);
if (scn->scn_pausing) if (scn->scn_suspending)
return (B_TRUE); /* we're already pausing */ return (B_TRUE); /* we're already suspending */
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */ return (B_FALSE); /* we're resuming */
@ -465,7 +554,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
return (B_FALSE); return (B_FALSE);
/* /*
* We pause if: * We suspend if:
* - we have scanned for the maximum time: an entire txg * - we have scanned for the maximum time: an entire txg
* timeout (default 5 sec) * timeout (default 5 sec)
* or * or
@ -488,19 +577,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) || dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
spa_shutting_down(scn->scn_dp->dp_spa)) { spa_shutting_down(scn->scn_dp->dp_spa)) {
if (zb) { if (zb) {
dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n", dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset, (longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object, (longlong_t)zb->zb_object,
(longlong_t)zb->zb_level, (longlong_t)zb->zb_level,
(longlong_t)zb->zb_blkid); (longlong_t)zb->zb_blkid);
scn->scn_phys.scn_bookmark = *zb; scn->scn_phys.scn_bookmark = *zb;
} }
dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n", dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
scn->scn_pausing = B_TRUE; scn->scn_suspending = B_TRUE;
return (B_TRUE); return (B_TRUE);
} }
return (B_FALSE); return (B_FALSE);
@ -638,7 +727,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/* /*
* If we found the block we're trying to resume from, or * If we found the block we're trying to resume from, or
* we went past it to a different object, zero it out to * we went past it to a different object, zero it out to
* indicate that it's OK to start checking for pausing * indicate that it's OK to start checking for suspending
* again. * again.
*/ */
if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
@ -745,7 +834,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
/* /*
* We also always visit user/group accounting * We also always visit user/group accounting
* objects, and never skip them, even if we are * objects, and never skip them, even if we are
* pausing. This is necessary so that the space * suspending. This is necessary so that the space
* deltas from this txg get integrated. * deltas from this txg get integrated.
*/ */
dsl_scan_visitdnode(scn, ds, osp->os_type, dsl_scan_visitdnode(scn, ds, osp->os_type,
@ -803,7 +892,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
/* ASSERT(pbuf == NULL || arc_released(pbuf)); */ /* ASSERT(pbuf == NULL || arc_released(pbuf)); */
if (dsl_scan_check_pause(scn, zb)) if (dsl_scan_check_suspend(scn, zb))
goto out; goto out;
if (dsl_scan_check_resume(scn, dnp, zb)) if (dsl_scan_check_resume(scn, dnp, zb))
@ -1149,14 +1238,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname); dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
"pausing=%u", "suspending=%u",
(longlong_t)dsobj, dsname, (longlong_t)dsobj, dsname,
(longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_cur_min_txg,
(longlong_t)scn->scn_phys.scn_cur_max_txg, (longlong_t)scn->scn_phys.scn_cur_max_txg,
(int)scn->scn_pausing); (int)scn->scn_suspending);
kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
if (scn->scn_pausing) if (scn->scn_suspending)
goto out; goto out;
/* /*
@ -1322,13 +1411,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
n++; n++;
if (dsl_scan_check_pause(scn, NULL)) if (dsl_scan_check_suspend(scn, NULL))
break; break;
} }
zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u", zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
(longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max, "suspending=%u", (longlong_t)n,
(int)scn->scn_pausing); (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
ASSERT(error == 0 || error == ENOENT); ASSERT(error == 0 || error == ENOENT);
ASSERT(error != ENOENT || ASSERT(error != ENOENT ||
@ -1372,7 +1461,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
dsl_scan_ddt(scn, tx); dsl_scan_ddt(scn, tx);
if (scn->scn_pausing) if (scn->scn_suspending)
return; return;
} }
@ -1384,7 +1473,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visit_rootbp(scn, NULL, dsl_scan_visit_rootbp(scn, NULL,
&dp->dp_meta_rootbp, tx); &dp->dp_meta_rootbp, tx);
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
if (scn->scn_pausing) if (scn->scn_suspending)
return; return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
@ -1394,22 +1483,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn, dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx); dp->dp_origin_snap->ds_object, tx);
} }
ASSERT(!scn->scn_pausing); ASSERT(!scn->scn_suspending);
} else if (scn->scn_phys.scn_bookmark.zb_objset != } else if (scn->scn_phys.scn_bookmark.zb_objset !=
ZB_DESTROYED_OBJSET) { ZB_DESTROYED_OBJSET) {
/* /*
* If we were paused, continue from here. Note if the * If we were suspended, continue from here. Note if the
* ds we were paused on was deleted, the zb_objset may * ds we were suspended on was deleted, the zb_objset may
* be -1, so we will skip this and find a new objset * be -1, so we will skip this and find a new objset
* below. * below.
*/ */
dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx); dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
if (scn->scn_pausing) if (scn->scn_suspending)
return; return;
} }
/* /*
* In case we were paused right at the end of the ds, zero the * In case we were suspended right at the end of the ds, zero the
* bookmark so we don't think that we're still trying to resume. * bookmark so we don't think that we're still trying to resume.
*/ */
bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t)); bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
@ -1443,7 +1532,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn, dsobj, tx); dsl_scan_visitds(scn, dsobj, tx);
zap_cursor_fini(zc); zap_cursor_fini(zc);
if (scn->scn_pausing) if (scn->scn_suspending)
goto out; goto out;
} }
zap_cursor_fini(zc); zap_cursor_fini(zc);
@ -1453,7 +1542,7 @@ out:
} }
static boolean_t static boolean_t
dsl_scan_free_should_pause(dsl_scan_t *scn) dsl_scan_free_should_suspend(dsl_scan_t *scn)
{ {
uint64_t elapsed_nanosecs; uint64_t elapsed_nanosecs;
@ -1477,7 +1566,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
if (!scn->scn_is_bptree || if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
if (dsl_scan_free_should_pause(scn)) if (dsl_scan_free_should_suspend(scn))
return (SET_ERROR(ERESTART)); return (SET_ERROR(ERESTART));
} }
@ -1500,7 +1589,8 @@ dsl_scan_active(dsl_scan_t *scn)
return (B_FALSE); return (B_FALSE);
if (spa_shutting_down(spa)) if (spa_shutting_down(spa))
return (B_FALSE); return (B_FALSE);
if (scn->scn_phys.scn_state == DSS_SCANNING || if ((scn->scn_phys.scn_state == DSS_SCANNING &&
!dsl_scan_is_paused_scrub(scn)) ||
(scn->scn_async_destroying && !scn->scn_async_stalled)) (scn->scn_async_destroying && !scn->scn_async_stalled))
return (B_TRUE); return (B_TRUE);
@ -1555,12 +1645,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return; return;
scn->scn_visited_this_txg = 0; scn->scn_visited_this_txg = 0;
scn->scn_pausing = B_FALSE; scn->scn_suspending = B_FALSE;
scn->scn_sync_start_time = gethrtime(); scn->scn_sync_start_time = gethrtime();
spa->spa_scrub_active = B_TRUE; spa->spa_scrub_active = B_TRUE;
/* /*
* First process the async destroys. If we pause, don't do * First process the async destroys. If we suspend, don't do
* any scrubbing or resilvering. This ensures that there are no * any scrubbing or resilvering. This ensures that there are no
* async destroys while we are scanning, so the scan code doesn't * async destroys while we are scanning, so the scan code doesn't
* have to worry about traversing it. It is also faster to free the * have to worry about traversing it. It is also faster to free the
@ -1677,7 +1767,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return; return;
if (scn->scn_done_txg == tx->tx_txg) { if (scn->scn_done_txg == tx->tx_txg) {
ASSERT(!scn->scn_pausing); ASSERT(!scn->scn_suspending);
/* finished with scan. */ /* finished with scan. */
zfs_dbgmsg("txg %llu scan complete", tx->tx_txg); zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
dsl_scan_done(scn, B_TRUE, tx); dsl_scan_done(scn, B_TRUE, tx);
@ -1686,6 +1776,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return; return;
} }
if (dsl_scan_is_paused_scrub(scn))
return;
if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
scn->scn_phys.scn_ddt_class_max) { scn->scn_phys.scn_ddt_class_max) {
zfs_dbgmsg("doing scan sync txg %llu; " zfs_dbgmsg("doing scan sync txg %llu; "
@ -1720,7 +1813,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)scn->scn_visited_this_txg, (longlong_t)scn->scn_visited_this_txg,
(longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
if (!scn->scn_pausing) { if (!scn->scn_suspending) {
scn->scn_done_txg = tx->tx_txg + 1; scn->scn_done_txg = tx->tx_txg + 1;
zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu", zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
tx->tx_txg, scn->scn_done_txg); tx->tx_txg, scn->scn_done_txg);
@ -1957,11 +2050,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
return (0); return (0);
} }
/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */ /*
* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
* Can also be called to resume a paused scrub.
*/
int int
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
{ {
spa_t *spa = dp->dp_spa; spa_t *spa = dp->dp_spa;
dsl_scan_t *scn = dp->dp_scan;
/* /*
* Purge all vdev caches and probe all devices. We do this here * Purge all vdev caches and probe all devices. We do this here
@ -1976,6 +2073,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE; spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0); (void) spa_vdev_state_exit(spa, NULL, 0);
if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
/* got scrub start cmd, resume paused scrub */
int err = dsl_scrub_set_pause_resume(scn->scn_dp,
POOL_SCRUB_NORMAL);
if (err == 0)
return (ECANCELED);
return (SET_ERROR(err));
}
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE)); dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));
} }

View File

@ -29,6 +29,7 @@
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/ */
/* /*
@ -5726,6 +5727,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
* SPA Scanning * SPA Scanning
* ========================================================================== * ==========================================================================
*/ */
int
spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
{
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
if (dsl_scan_resilvering(spa->spa_dsl_pool))
return (SET_ERROR(EBUSY));
return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
}
int int
spa_scan_stop(spa_t *spa) spa_scan_stop(spa_t *spa)

View File

@ -24,6 +24,7 @@
* Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -2007,6 +2008,11 @@ spa_scan_stat_init(spa_t *spa)
{ {
/* data not stored on disk */ /* data not stored on disk */
spa->spa_scan_pass_start = gethrestime_sec(); spa->spa_scan_pass_start = gethrestime_sec();
if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
else
spa->spa_scan_pass_scrub_pause = 0;
spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_exam = 0;
vdev_scan_stat_init(spa->spa_root_vdev); vdev_scan_stat_init(spa->spa_root_vdev);
} }
@ -2037,6 +2043,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
/* data not stored on disk */ /* data not stored on disk */
ps->pss_pass_start = spa->spa_scan_pass_start; ps->pss_pass_start = spa->spa_scan_pass_start;
ps->pss_pass_exam = spa->spa_scan_pass_exam; ps->pss_pass_exam = spa->spa_scan_pass_exam;
ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
return (0); return (0);
} }

View File

@ -1674,6 +1674,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
* inputs: * inputs:
* zc_name name of the pool * zc_name name of the pool
* zc_cookie scan func (pool_scan_func_t) * zc_cookie scan func (pool_scan_func_t)
* zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
*/ */
static int static int
zfs_ioc_pool_scan(zfs_cmd_t *zc) zfs_ioc_pool_scan(zfs_cmd_t *zc)
@ -1684,7 +1685,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error); return (error);
if (zc->zc_cookie == POOL_SCAN_NONE) if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
return (SET_ERROR(EINVAL));
if (zc->zc_flags == POOL_SCRUB_PAUSE)
error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
else if (zc->zc_cookie == POOL_SCAN_NONE)
error = spa_scan_stop(spa); error = spa_scan_stop(spa);
else else
error = spa_scan(spa, zc->zc_cookie); error = spa_scan(spa, zc->zc_cookie);

View File

@ -1999,54 +1999,65 @@ function check_vdev_state # pool disk state{online,offline,unavail}
# #
# Return 0 is contain, 1 otherwise # Return 0 is contain, 1 otherwise
# #
function check_pool_status # pool token keyword function check_pool_status # pool token keyword <verbose>
{ {
typeset pool=$1 typeset pool=$1
typeset token=$2 typeset token=$2
typeset keyword=$3 typeset keyword=$3
typeset verbose=${4:-false}
zpool status -v "$pool" 2>/dev/null | nawk -v token="$token:" ' scan=$(zpool status -v "$pool" 2>/dev/null | nawk -v token="$token:" '
($1==token) {print $0}' \ ($1==token) {print $0}')
| grep -i "$keyword" > /dev/null 2>&1 if [[ $verbose == true ]]; then
log_note $scan
fi
echo $scan | grep -i "$keyword" > /dev/null 2>&1
return $? return $?
} }
# #
# These 5 following functions are instance of check_pool_status() # These 6 following functions are instance of check_pool_status()
# is_pool_resilvering - to check if the pool is resilver in progress # is_pool_resilvering - to check if the pool is resilver in progress
# is_pool_resilvered - to check if the pool is resilver completed # is_pool_resilvered - to check if the pool is resilver completed
# is_pool_scrubbing - to check if the pool is scrub in progress # is_pool_scrubbing - to check if the pool is scrub in progress
# is_pool_scrubbed - to check if the pool is scrub completed # is_pool_scrubbed - to check if the pool is scrub completed
# is_pool_scrub_stopped - to check if the pool is scrub stopped # is_pool_scrub_stopped - to check if the pool is scrub stopped
# is_pool_scrub_paused - to check if the pool has scrub paused
# #
function is_pool_resilvering #pool function is_pool_resilvering #pool <verbose>
{ {
check_pool_status "$1" "scan" "resilver in progress since " check_pool_status "$1" "scan" "resilver in progress since " $2
return $? return $?
} }
function is_pool_resilvered #pool function is_pool_resilvered #pool <verbose>
{ {
check_pool_status "$1" "scan" "resilvered " check_pool_status "$1" "scan" "resilvered " $2
return $? return $?
} }
function is_pool_scrubbing #pool function is_pool_scrubbing #pool <verbose>
{ {
check_pool_status "$1" "scan" "scrub in progress since " check_pool_status "$1" "scan" "scrub in progress since " $2
return $? return $?
} }
function is_pool_scrubbed #pool function is_pool_scrubbed #pool <verbose>
{ {
check_pool_status "$1" "scan" "scrub repaired" check_pool_status "$1" "scan" "scrub repaired" $2
return $? return $?
} }
function is_pool_scrub_stopped #pool function is_pool_scrub_stopped #pool <verbose>
{ {
check_pool_status "$1" "scan" "scrub canceled" check_pool_status "$1" "scan" "scrub canceled" $2
return $?
}
function is_pool_scrub_paused #pool <verbose>
{
check_pool_status "$1" "scan" "scrub paused since " $2
return $? return $?
} }

View File

@ -27,6 +27,7 @@
# #
# Copyright (c) 2016 by Delphix. All rights reserved. # Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 Datto Inc.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -34,12 +35,15 @@
# #
# DESCRIPTION: # DESCRIPTION:
# Verify scrub -s works correctly. # Verify scrub, scrub -p, and scrub -s show the right status.
# #
# STRATEGY: # STRATEGY:
# 1. Create pool and fill with hundreds data. # 1. Create pool and create a 100MB file in it.
# 2. zpool scrub the pool # 2. zpool scrub the pool and verify it's doing a scrub.
# 3. Verify zpool scrub -s succeed when the system is scrubbing. # 3. Pause scrub and verify it's paused.
# 4. Try to pause a paused scrub and make sure that fails.
# 5. Resume the paused scrub and verify scrub is again being performed.
# 6. Verify zpool scrub -s succeed when the system is scrubbing.
# #
# NOTES: # NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the # A 10ms delay is added to the ZIOs in order to ensure that the
@ -49,11 +53,25 @@
verify_runnable "global" verify_runnable "global"
log_assert "Verify scrub -s works correctly." function cleanup
log_must zinject -d $DISK1 -D10:1 $TESTPOOL {
log_must zpool scrub $TESTPOOL log_must zinject -c all
log_must zpool scrub -s $TESTPOOL }
log_must is_pool_scrub_stopped $TESTPOOL
log_must zinject -c all log_onexit cleanup
log_pass "Verify scrub -s works correctly."
log_assert "Verify scrub, scrub -p, and scrub -s show the right status."
log_must zinject -d $DISK1 -D20:1 $TESTPOOL
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -p $TESTPOOL
log_must is_pool_scrub_paused $TESTPOOL true
log_mustnot zpool scrub -p $TESTPOOL
log_must is_pool_scrub_paused $TESTPOOL true
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
log_pass "Verified scrub, -s, and -p show expected status."

View File

@ -27,6 +27,7 @@
# #
# Copyright (c) 2016 by Delphix. All rights reserved. # Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 by Datto Inc.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -34,14 +35,12 @@
# #
# DESCRIPTION: # DESCRIPTION:
# scrub command terminates the existing scrub process and starts # scrub command fails when there is an existing scrub in progress
# a new scrub.
# #
# STRATEGY: # STRATEGY:
# 1. Setup a pool and fill with data # 1. Setup a pool and fill it with data
# 2. Kick off a scrub # 2. Kick off a scrub
# 3. Check the completed percent and invoke another scrub # 2. Kick off a second scrub and verify it fails
# 4. Check the percent again, verify a new scrub started.
# #
# NOTES: # NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the # A 10ms delay is added to the ZIOs in order to ensure that the
@ -51,33 +50,21 @@
verify_runnable "global" verify_runnable "global"
function get_scrub_percent function cleanup
{ {
typeset -i percent log_must zinject -c all
percent=$(zpool status $TESTPOOL | grep "^ scrub" | \
awk '{print $7}' | awk -F. '{print $1}')
if is_pool_scrubbed $TESTPOOL ; then
percent=100
fi
echo $percent
} }
log_assert "scrub command terminates the existing scrub process and starts" \ log_onexit cleanup
"a new scrub."
log_assert "Scrub command fails when there is already a scrub in progress"
log_must zinject -d $DISK1 -D10:1 $TESTPOOL log_must zinject -d $DISK1 -D10:1 $TESTPOOL
log_must zpool scrub $TESTPOOL log_must zpool scrub $TESTPOOL
typeset -i PERCENT=30 percent=0 log_must is_pool_scrubbing $TESTPOOL true
while ((percent < PERCENT)) ; do log_mustnot zpool scrub $TESTPOOL
percent=$(get_scrub_percent) log_must is_pool_scrubbing $TESTPOOL true
done log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
log_must zpool scrub $TESTPOOL log_pass "Issuing a scrub command failed when scrub was already in progress"
percent=$(get_scrub_percent)
if ((percent > PERCENT)); then
log_fail "zpool scrub don't stop existing scrubbing process."
fi
log_must zinject -c all
log_pass "scrub command terminates the existing scrub process and starts" \
"a new scrub."

View File

@ -27,6 +27,7 @@
# #
# Copyright (c) 2013, 2016 by Delphix. All rights reserved. # Copyright (c) 2013, 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 Datto Inc.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -48,6 +49,7 @@ verify_runnable "global"
log_assert "zpool scrub returns an error when run as a user" log_assert "zpool scrub returns an error when run as a user"
log_mustnot zpool scrub $TESTPOOL log_mustnot zpool scrub $TESTPOOL
log_mustnot zpool scrub -p $TESTPOOL
log_mustnot zpool scrub -s $TESTPOOL log_mustnot zpool scrub -s $TESTPOOL
log_pass "zpool scrub returns an error when run as a user" log_pass "zpool scrub returns an error when run as a user"