Defer new resilvers until the current one ends

Currently, if a resilver is triggered for any reason while an
existing one is running, zfs will immediately restart the existing
resilver from the beginning to include the new drive. This causes
problems for system administrators when a drive fails while another
is already resilvering. In this case, the optimal thing to do to
reduce risk of data loss is to wait for the current resilver to end
before immediately replacing the second failed drive, which allows
the system to operate with two incomplete drives for the minimum
amount of time.

This patch introduces the resilver_defer feature that essentially
does this for the admin without forcing them to wait and monitor
the resilver manually. The change requires an on-disk feature
since we must mark drives that are part of a deferred resilver in
the vdev config to ensure that we do not assume they are done
resilvering when an existing resilver completes.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: @mmaybee 
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7732
This commit is contained in:
Tom Caputi 2018-10-19 00:06:18 -04:00 committed by Brian Behlendorf
parent 9f438c5f94
commit 80a91e7469
28 changed files with 543 additions and 21 deletions

View File

@ -97,6 +97,7 @@ static int zpool_do_replace(int, char **);
static int zpool_do_split(int, char **); static int zpool_do_split(int, char **);
static int zpool_do_scrub(int, char **); static int zpool_do_scrub(int, char **);
static int zpool_do_resilver(int, char **);
static int zpool_do_import(int, char **); static int zpool_do_import(int, char **);
static int zpool_do_export(int, char **); static int zpool_do_export(int, char **);
@ -149,6 +150,7 @@ typedef enum {
HELP_REPLACE, HELP_REPLACE,
HELP_REMOVE, HELP_REMOVE,
HELP_SCRUB, HELP_SCRUB,
HELP_RESILVER,
HELP_STATUS, HELP_STATUS,
HELP_UPGRADE, HELP_UPGRADE,
HELP_EVENTS, HELP_EVENTS,
@ -276,6 +278,7 @@ static zpool_command_t command_table[] = {
{ "split", zpool_do_split, HELP_SPLIT }, { "split", zpool_do_split, HELP_SPLIT },
{ NULL }, { NULL },
{ "scrub", zpool_do_scrub, HELP_SCRUB }, { "scrub", zpool_do_scrub, HELP_SCRUB },
{ "resilver", zpool_do_resilver, HELP_RESILVER },
{ NULL }, { NULL },
{ "import", zpool_do_import, HELP_IMPORT }, { "import", zpool_do_import, HELP_IMPORT },
{ "export", zpool_do_export, HELP_EXPORT }, { "export", zpool_do_export, HELP_EXPORT },
@ -358,6 +361,8 @@ get_usage(zpool_help_t idx)
return (gettext("\treopen [-n] <pool>\n")); return (gettext("\treopen [-n] <pool>\n"));
case HELP_SCRUB: case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] <pool> ...\n")); return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_STATUS: case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]" return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
"[-T d|u] [pool] ... \n" "[-T d|u] [pool] ... \n"
@ -1874,11 +1879,14 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
(void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS, (void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &c); (uint64_t **)&ps, &c);
if (ps != NULL && ps->pss_state == DSS_SCANNING && if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0) {
vs->vs_scan_processed != 0 && children == 0) { if (vs->vs_scan_processed != 0) {
(void) printf(gettext(" (%s)"), (void) printf(gettext(" (%s)"),
(ps->pss_func == POOL_SCAN_RESILVER) ? (ps->pss_func == POOL_SCAN_RESILVER) ?
"resilvering" : "repairing"); "resilvering" : "repairing");
} else if (vs->vs_resilver_deferred) {
(void) printf(gettext(" (awaiting resilver)"));
}
} }
if (cb->vcdl != NULL) { if (cb->vcdl != NULL) {
@ -6251,7 +6259,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
* Ignore faulted pools. * Ignore faulted pools.
*/ */
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
(void) fprintf(stderr, gettext("cannot scrub '%s': pool is " (void) fprintf(stderr, gettext("cannot scan '%s': pool is "
"currently unavailable\n"), zpool_get_name(zhp)); "currently unavailable\n"), zpool_get_name(zhp));
return (1); return (1);
} }
@ -6319,6 +6327,44 @@ zpool_do_scrub(int argc, char **argv)
return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
} }
/*
* zpool resilver <pool> ...
*
* Restarts any in-progress resilver
*/
int
zpool_do_resilver(int argc, char **argv)
{
int c;
scrub_cbdata_t cb;
cb.cb_type = POOL_SCAN_RESILVER;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
cb.cb_argc = argc;
cb.cb_argv = argv;
/* check options */
while ((c = getopt(argc, argv, "")) != -1) {
switch (c) {
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}
return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
}
/* /*
* Print out detailed scrub status. * Print out detailed scrub status.
*/ */

View File

@ -252,6 +252,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zpool_online/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_online/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_remove/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_remove/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_replace/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_replace/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_set/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_set/Makefile

View File

@ -710,6 +710,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top" #define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf" #define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps" #define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
#define ZPOOL_CONFIG_RESILVER_DEFER "com.datto:resilver_defer"
#define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */ #define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */ #define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */ #define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
@ -988,6 +989,7 @@ typedef struct vdev_stat {
uint64_t vs_scan_processed; /* scan processed bytes */ uint64_t vs_scan_processed; /* scan processed bytes */
uint64_t vs_fragmentation; /* device fragmentation */ uint64_t vs_fragmentation; /* device fragmentation */
uint64_t vs_checkpoint_space; /* checkpoint-consumed space */ uint64_t vs_checkpoint_space; /* checkpoint-consumed space */
uint64_t vs_resilver_deferred; /* resilver deferred */
} vdev_stat_t; } vdev_stat_t;
/* /*

View File

@ -281,6 +281,13 @@ struct spa {
uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */
uint64_t spa_scan_pass_issued; /* issued bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */
/*
* We are in the middle of a resilver, and another resilver
* is needed once this one completes. This is set iff any
* vdev_resilver_deferred is set.
*/
boolean_t spa_resilver_deferred;
kmutex_t spa_async_lock; /* protect async state */ kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */ kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */ int spa_async_suspended; /* async tasks suspended */

View File

@ -149,6 +149,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd);
extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
typedef enum vdev_config_flag { typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0, VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1, VDEV_CONFIG_L2CACHE = 1 << 1,

View File

@ -335,6 +335,7 @@ struct vdev {
boolean_t vdev_isspare; /* was a hot spare */ boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */ boolean_t vdev_isl2cache; /* was a l2cache device */
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
boolean_t vdev_resilver_deferred; /* resilver deferred */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */ vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */

View File

@ -65,6 +65,7 @@ typedef enum spa_feature {
SPA_FEATURE_POOL_CHECKPOINT, SPA_FEATURE_POOL_CHECKPOINT,
SPA_FEATURE_SPACEMAP_V2, SPA_FEATURE_SPACEMAP_V2,
SPA_FEATURE_ALLOCATION_CLASSES, SPA_FEATURE_ALLOCATION_CLASSES,
SPA_FEATURE_RESILVER_DEFER,
SPA_FEATURES SPA_FEATURES
} spa_feature_t; } spa_feature_t;

1
include/zfs_gitrev.h Normal file
View File

@ -0,0 +1 @@
#define ZFS_META_GITREV "unknown"

View File

@ -756,6 +756,27 @@ can also be triggered on filesystems via `zfs set version=current <pool/fs>`.
The upgrade process runs in the background and may take a while to complete The upgrade process runs in the background and may take a while to complete
for the filesystems containing a large number of files. for the filesystems containing a large number of files.
.RE
.sp
.ne 2
.na
\fB\fBresilver_defer\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.datto:resilver_defer
READ\-ONLY COMPATIBLE yes
DEPENDENCIES none
.TE
This feature allows zfs to postpone new resilvers if an existing one is already
in progress. Without this feature, any new resilvers will cause the currently
running one to be immediately restarted from the beginning.
This feature becomes \fBactive\fR once a resilver has been defered, and returns
to being \fBenabled\fR when the defered resilver begins.
.RE .RE
.sp .sp

View File

@ -162,6 +162,9 @@
.Oo Fl o Ar property Ns = Ns Ar value Oc .Oo Fl o Ar property Ns = Ns Ar value Oc
.Ar pool Ar device Op Ar new_device .Ar pool Ar device Op Ar new_device
.Nm .Nm
.Cm resilver
.Ar pool Ns ...
.Nm
.Cm scrub .Cm scrub
.Op Fl s | Fl p .Op Fl s | Fl p
.Ar pool Ns ... .Ar pool Ns ...
@ -2069,6 +2072,14 @@ again.
.El .El
.It Xo .It Xo
.Nm .Nm
.Cm resilver
.Ar pool Ns ...
.Xc
Starts a resilver. If an existing resilver is already running it will be
restarted from the beginning. Any drives that were scheduled for a deferred
resilver will be added to the new one.
.It Xo
.Nm
.Cm set .Cm set
.Ar property Ns = Ns Ar value .Ar property Ns = Ns Ar value
.Ar pool .Ar pool

View File

@ -445,6 +445,11 @@ zpool_feature_init(void)
"Support for separate allocation classes.", "Support for separate allocation classes.",
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL); ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
} }
zfeature_register(SPA_FEATURE_RESILVER_DEFER,
"com.datto:resilver_defer", "resilver_defer",
"Support for defering new resilvers when one is already running.",
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
} }
#if defined(_KERNEL) #if defined(_KERNEL)

View File

@ -175,6 +175,8 @@ enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */ /* max number of blocks to free in a single TXG */
unsigned long zfs_async_block_max_blocks = 100000; unsigned long zfs_async_block_max_blocks = 100000;
int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */
/* /*
* We wait a few txgs after importing a pool to begin scanning so that * We wait a few txgs after importing a pool to begin scanning so that
* the import / mounting code isn't held up by scrub / resilver IO. * the import / mounting code isn't held up by scrub / resilver IO.
@ -720,6 +722,11 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE; spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0); (void) spa_vdev_state_exit(spa, NULL, 0);
if (func == POOL_SCAN_RESILVER) {
dsl_resilver_restart(spa->spa_dsl_pool, 0);
return (0);
}
if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) { if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
/* got scrub start cmd, resume paused scrub */ /* got scrub start cmd, resume paused scrub */
int err = dsl_scrub_set_pause_resume(scn->scn_dp, int err = dsl_scrub_set_pause_resume(scn->scn_dp,
@ -736,6 +743,41 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
} }
/*
* Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
* B_TRUE if we have devices that need to be resilvered and are available to
* accept resilver I/Os.
*/
static boolean_t
dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
{
boolean_t resilver_needed = B_FALSE;
spa_t *spa = vd->vdev_spa;
for (int c = 0; c < vd->vdev_children; c++) {
resilver_needed |=
dsl_scan_clear_deferred(vd->vdev_child[c], tx);
}
if (vd == spa->spa_root_vdev &&
spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
vdev_config_dirty(vd);
spa->spa_resilver_deferred = B_FALSE;
return (resilver_needed);
}
if (!vdev_is_concrete(vd) || vd->vdev_aux ||
!vd->vdev_ops->vdev_op_leaf)
return (resilver_needed);
if (vd->vdev_resilver_deferred)
vd->vdev_resilver_deferred = B_FALSE;
return (!vdev_is_dead(vd) && !vd->vdev_offline &&
vdev_resilver_needed(vd, NULL, NULL));
}
/* ARGSUSED */ /* ARGSUSED */
static void static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@ -835,6 +877,25 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
* Let the async thread assess this and handle the detach. * Let the async thread assess this and handle the detach.
*/ */
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
/*
* Clear any deferred_resilver flags in the config.
* If there are drives that need resilvering, kick
* off an asynchronous request to start resilver.
* dsl_scan_clear_deferred() may update the config
* before the resilver can restart. In the event of
* a crash during this period, the spa loading code
* will find the drives that need to be resilvered
* when the machine reboots and start the resilver then.
*/
boolean_t resilver_needed =
dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
if (resilver_needed) {
spa_history_log_internal(spa,
"starting deferred resilver", tx,
"errors=%llu", spa_get_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
} }
scn->scn_phys.scn_end_time = gethrestime_sec(); scn->scn_phys.scn_end_time = gethrestime_sec();
@ -2966,6 +3027,26 @@ dsl_scan_active(dsl_scan_t *scn)
return (used != 0); return (used != 0);
} }
static boolean_t
dsl_scan_check_deferred(vdev_t *vd)
{
boolean_t need_resilver = B_FALSE;
for (int c = 0; c < vd->vdev_children; c++) {
need_resilver |=
dsl_scan_check_deferred(vd->vdev_child[c]);
}
if (!vdev_is_concrete(vd) || vd->vdev_aux ||
!vd->vdev_ops->vdev_op_leaf)
return (need_resilver);
if (!vd->vdev_resilver_deferred)
need_resilver = B_TRUE;
return (need_resilver);
}
static boolean_t static boolean_t
dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize, dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
uint64_t phys_birth) uint64_t phys_birth)
@ -3013,6 +3094,13 @@ dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize)) if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
return (B_FALSE); return (B_FALSE);
/*
* Check that this top-level vdev has a device under it which
* is resilvering and is not deferred.
*/
if (!dsl_scan_check_deferred(vd))
return (B_FALSE);
return (B_TRUE); return (B_TRUE);
} }
@ -3173,12 +3261,19 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
spa_t *spa = dp->dp_spa; spa_t *spa = dp->dp_spa;
state_sync_type_t sync_type = SYNC_OPTIONAL; state_sync_type_t sync_type = SYNC_OPTIONAL;
if (spa->spa_resilver_deferred &&
!spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
/* /*
* Check for scn_restart_txg before checking spa_load_state, so * Check for scn_restart_txg before checking spa_load_state, so
* that we can restart an old-style scan while the pool is being * that we can restart an old-style scan while the pool is being
* imported (see dsl_scan_init). * imported (see dsl_scan_init). We also restart scans if there
* is a deferred resilver and the user has manually disabled
* deferred resilvers via the tunable.
*/ */
if (dsl_scan_restarting(scn, tx)) { if (dsl_scan_restarting(scn, tx) ||
(spa->spa_resilver_deferred && zfs_resilver_disable_defer)) {
pool_scan_func_t func = POOL_SCAN_SCRUB; pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx); dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@ -4000,4 +4095,8 @@ MODULE_PARM_DESC(zfs_scan_strict_mem_lim,
module_param(zfs_scan_fill_weight, int, 0644); module_param(zfs_scan_fill_weight, int, 0644);
MODULE_PARM_DESC(zfs_scan_fill_weight, MODULE_PARM_DESC(zfs_scan_fill_weight,
"Tunable to adjust bias towards more filled segments during scans"); "Tunable to adjust bias towards more filled segments during scans");
module_param(zfs_resilver_disable_defer, int, 0644);
MODULE_PARM_DESC(zfs_resilver_disable_defer,
"Process all resilvers immediately");
#endif #endif

View File

@ -6059,8 +6059,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
/* /*
* Schedule the resilver to restart in the future. We do this to * Schedule the resilver to restart in the future. We do this to
* ensure that dmu_sync-ed blocks have been stitched into the * ensure that dmu_sync-ed blocks have been stitched into the
* respective datasets. * respective datasets. We do not do this if resilvers have been
* deferred.
*/ */
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, newvd);
else
dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs) if (spa->spa_bootfs)
@ -6933,6 +6938,7 @@ static void
spa_async_thread(void *arg) spa_async_thread(void *arg)
{ {
spa_t *spa = (spa_t *)arg; spa_t *spa = (spa_t *)arg;
dsl_pool_t *dp = spa->spa_dsl_pool;
int tasks; int tasks;
ASSERT(spa->spa_sync_on); ASSERT(spa->spa_sync_on);
@ -7008,8 +7014,10 @@ spa_async_thread(void *arg)
/* /*
* Kick off a resilver. * Kick off a resilver.
*/ */
if (tasks & SPA_ASYNC_RESILVER) if (tasks & SPA_ASYNC_RESILVER &&
dsl_resilver_restart(spa->spa_dsl_pool, 0); (!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
dsl_resilver_restart(dp, 0);
/* /*
* Let the world know that we're done. * Let the world know that we're done.

View File

@ -790,6 +790,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
&vd->vdev_resilver_txg); &vd->vdev_resilver_txg);
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
/* /*
* In general, when importing a pool we want to ignore the * In general, when importing a pool we want to ignore the
* persistent fault state, as the diagnosis made on another * persistent fault state, as the diagnosis made on another
@ -1798,8 +1801,13 @@ vdev_open(vdev_t *vd)
* since this would just restart the scrub we are already doing. * since this would just restart the scrub we are already doing.
*/ */
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
vdev_resilver_needed(vd, NULL, NULL)) vdev_resilver_needed(vd, NULL, NULL)) {
if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
else
spa_async_request(spa, SPA_ASYNC_RESILVER); spa_async_request(spa, SPA_ASYNC_RESILVER);
}
return (0); return (0);
} }
@ -2488,6 +2496,9 @@ vdev_dtl_should_excise(vdev_t *vd)
if (vd->vdev_state < VDEV_STATE_DEGRADED) if (vd->vdev_state < VDEV_STATE_DEGRADED)
return (B_FALSE); return (B_FALSE);
if (vd->vdev_resilver_deferred)
return (B_FALSE);
if (vd->vdev_resilver_txg == 0 || if (vd->vdev_resilver_txg == 0 ||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
return (B_TRUE); return (B_TRUE);
@ -3618,8 +3629,14 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top)) if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top); vdev_state_dirty(vd->vdev_top);
if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
spa_feature_is_enabled(spa,
SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
else
spa_async_request(spa, SPA_ASYNC_RESILVER); spa_async_request(spa, SPA_ASYNC_RESILVER);
}
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR); spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
} }
@ -3840,6 +3857,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
vs->vs_fragmentation = (vd->vdev_mg != NULL) ? vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
vd->vdev_mg->mg_fragmentation : 0; vd->vdev_mg->mg_fragmentation : 0;
} }
if (vd->vdev_ops->vdev_op_leaf)
vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
} }
ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0); ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
@ -4578,6 +4597,14 @@ vdev_deadman(vdev_t *vd, char *tag)
} }
} }
void
vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
{
ASSERT(vd->vdev_ops->vdev_op_leaf);
vd->vdev_resilver_deferred = B_TRUE;
spa->spa_resilver_deferred = B_TRUE;
}
#if defined(_KERNEL) #if defined(_KERNEL)
EXPORT_SYMBOL(vdev_fault); EXPORT_SYMBOL(vdev_fault);
EXPORT_SYMBOL(vdev_degrade); EXPORT_SYMBOL(vdev_degrade);

View File

@ -524,6 +524,12 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP, fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
vd->vdev_top_zap); vd->vdev_top_zap);
} }
if (vd->vdev_resilver_deferred) {
ASSERT(vd->vdev_ops->vdev_op_leaf);
ASSERT(spa->spa_resilver_deferred);
fnvlist_add_boolean(nv, ZPOOL_CONFIG_RESILVER_DEFER);
}
} }
if (getstats) { if (getstats) {

View File

@ -421,6 +421,10 @@ tags = ['functional', 'cli_root', 'zpool_reopen']
tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift'] tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
tags = ['functional', 'cli_root', 'zpool_replace'] tags = ['functional', 'cli_root', 'zpool_replace']
[tests/functional/cli_root/zpool_resilver]
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
tags = ['functional', 'cli_root', 'zpool_resilver']
[tests/functional/cli_root/zpool_scrub] [tests/functional/cli_root/zpool_scrub]
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos',

View File

@ -52,6 +52,7 @@ SUBDIRS = \
zpool_remove \ zpool_remove \
zpool_reopen \ zpool_reopen \
zpool_replace \ zpool_replace \
zpool_resilver \
zpool_scrub \ zpool_scrub \
zpool_set \ zpool_set \
zpool_split \ zpool_split \

View File

@ -87,5 +87,6 @@ if is_linux; then
"feature@encryption" "feature@encryption"
"feature@project_quota" "feature@project_quota"
"feature@allocation_classes" "feature@allocation_classes"
"feature@resilver_defer"
) )
fi fi

View File

@ -115,3 +115,10 @@ function is_scan_restarted #pool
zpool history -i $pool | grep -q "scan aborted, restarting" zpool history -i $pool | grep -q "scan aborted, restarting"
return $? return $?
} }
function is_deferred_scan_started #pool
{
typeset pool=$1
zpool history -i $pool | grep -q "starting deferred resilver"
return $?
}

View File

@ -29,7 +29,7 @@
# 4. Execute scrub. # 4. Execute scrub.
# 5. "Plug back" disk. # 5. "Plug back" disk.
# 6. Reopen a pool with an -n flag. # 6. Reopen a pool with an -n flag.
# 7. Check if scrub scan is NOT replaced by resilver. # 7. Check if resilver was deferred.
# 8. Check if trying to put device to offline fails because of no valid # 8. Check if trying to put device to offline fails because of no valid
# replicas. # replicas.
# #
@ -75,11 +75,12 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online"
log_must zinject -c all log_must zinject -c all
# 7. Check if scrub scan is NOT replaced by resilver. # 7. Check if scrub scan is NOT replaced by resilver.
log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT
log_mustnot is_scan_restarted $TESTPOOL log_must is_deferred_scan_started $TESTPOOL
# 8. Check if trying to put device to offline fails because of no valid # 8. Check if trying to put device to offline fails because of no valid
# replicas. # replicas.
log_mustnot zpool offline $TESTPOOL $DISK2 log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
log_must zpool offline $TESTPOOL $DISK2
# clean up # clean up
log_must zpool destroy $TESTPOOL log_must zpool destroy $TESTPOOL

View File

@ -72,13 +72,13 @@ log_must zinject -d $REMOVED_DISK_ID -D25:1 $TESTPOOL
log_must wait_for_resilver_start $TESTPOOL $MAXTIMEOUT log_must wait_for_resilver_start $TESTPOOL $MAXTIMEOUT
# 6. Reopen a pool again with -n flag. # 6. Reopen a pool again with -n flag.
zpool reopen -n $TESTPOOL log_must zpool reopen -n $TESTPOOL
# 7. Wait until resilvering is finished and check if it was restarted. # 7. Wait until resilvering is finished and check if it was restarted.
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
# remove delay from disk # remove delay from disk
log_must zinject -c all log_must zinject -c all
log_must is_scan_restarted $TESTPOOL log_mustnot is_scan_restarted $TESTPOOL
# clean up # clean up
log_must zpool destroy $TESTPOOL log_must zpool destroy $TESTPOOL

View File

@ -0,0 +1,9 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_resilver
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
zpool_resilver_bad_args.ksh \
zpool_resilver_restart.ksh
dist_pkgdata_DATA = \
zpool_resilver.cfg

View File

@ -0,0 +1,33 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
verify_runnable "global"
destroy_mirrors

View File

@ -0,0 +1,39 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Datto. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
verify_runnable "global"
verify_disk_count "$DISKS" 3
default_mirror_setup_noexit $DISK1 $DISK2 $DISK3
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
# Create 256M of data
log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile
log_pass

View File

@ -0,0 +1,33 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Datto. All rights reserved.
#
export DISK1=$(echo $DISKS | nawk '{print $1}')
export DISK2=$(echo $DISKS | nawk '{print $2}')
export DISK3=$(echo $DISKS | nawk '{print $3}')
export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
export MAXTIMEOUT=80

View File

@ -0,0 +1,58 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Datto. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# A badly formed parameter passed to 'zpool resilver' should
# return an error.
#
# STRATEGY:
# 1. Create an array containing bad 'zpool reilver' parameters.
# 2. For each element, execute the sub-command.
# 3. Verify it returns an error.
#
verify_runnable "global"
set -A args "" "-?" "blah blah" "-%" "--?" "-*" "-=" \
"-a" "-b" "-c" "-d" "-e" "-f" "-g" "-h" "-i" "-j" "-k" "-l" \
"-m" "-n" "-o" "-p" "-q" "-r" "-s" "-t" "-u" "-v" "-w" "-x" "-y" "-z" \
"-A" "-B" "-C" "-D" "-E" "-F" "-G" "-H" "-I" "-J" "-K" "-L" \
"-M" "-N" "-O" "-P" "-Q" "-R" "-S" "-T" "-U" "-V" "-W" "-X" "-W" "-Z"
log_assert "Execute 'zpool resilver' using invalid parameters."
typeset -i i=0
while [[ $i -lt ${#args[*]} ]]; do
log_mustnot zpool resilver ${args[i]}
((i = i + 1))
done
log_pass "Badly formed 'zpool resilver' parameters fail as expected."

View File

@ -0,0 +1,95 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 Datto Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
#
# DESCRIPTION:
# "Verify 'zpool resilver' restarts in-progress resilvers"
#
# STRATEGY:
# 1. Write some data and detatch the first drive so it has resilver
# work to do
# 2. Repeat the process with a second disk
# 3. Reattach the drives, causing the second drive's resilver to be
# deferred
# 4. Manually restart the resilver with all drives
#
# NOTES:
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
# low and adding a 50ms zio delay in order to ensure that the resilver
# does not complete early.
#
verify_runnable "global"
function cleanup
{
log_must zinject -c all
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
log_must rm -f $mntpnt/biggerfile1
log_must rm -f $mntpnt/biggerfile2
}
log_onexit cleanup
log_assert "Verify 'zpool resilver' restarts in-progress resilvers"
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
# 1. Write some data and detatch the first drive so it has resilver work to do
log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile1
log_must sync
log_must zpool detach $TESTPOOL $DISK2
# 2. Repeat the process with a second disk
log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile2
log_must sync
log_must zpool detach $TESTPOOL $DISK3
# 3. Reattach the drives, causing the second drive's resilver to be deferred
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
log_must zpool attach $TESTPOOL $DISK1 $DISK2
log_must zinject -d $DISK2 -D50:1 $TESTPOOL
log_must is_pool_resilvering $TESTPOOL true
log_must zpool attach $TESTPOOL $DISK1 $DISK3
log_must zinject -d $DISK3 -D50:1 $TESTPOOL
log_must is_pool_resilvering $TESTPOOL true
# 4. Manually restart the resilver with all drives
log_must zpool resilver $TESTPOOL
log_must zinject -c all
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
log_must is_deferred_scan_started $TESTPOOL
log_must check_state $TESTPOOL "$DISK2" "online"
log_must check_state $TESTPOOL "$DISK3" "online"
log_pass "Verified 'zpool resilver' restarts in-progress resilvers"

View File

@ -25,6 +25,7 @@
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg . $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
# #
@ -95,6 +96,7 @@ DISK1="$TEST_BASE_DIR/zpool_disk1.dat"
DISK2="$TEST_BASE_DIR/zpool_disk2.dat" DISK2="$TEST_BASE_DIR/zpool_disk2.dat"
DISK3="$TEST_BASE_DIR/zpool_disk3.dat" DISK3="$TEST_BASE_DIR/zpool_disk3.dat"
DISK4="$TEST_BASE_DIR/zpool_disk4.dat" DISK4="$TEST_BASE_DIR/zpool_disk4.dat"
RESILVER_TIMEOUT=40
# 1. Create the pool # 1. Create the pool
log_must truncate -s $DEVSIZE $DISK1 log_must truncate -s $DEVSIZE $DISK1
@ -117,6 +119,7 @@ zpool_scrub_sync $TESTPOOL
# 5. Online the first device and offline the second device # 5. Online the first device and offline the second device
zpool_do_sync 'online' $TESTPOOL $DISK1 zpool_do_sync 'online' $TESTPOOL $DISK1
zpool_do_sync 'offline' $TESTPOOL $DISK2 zpool_do_sync 'offline' $TESTPOOL $DISK2
log_must wait_for_resilver_end $TESTPOOL $RESILVER_TIMEOUT
# 6. Scrub the pool again # 6. Scrub the pool again
zpool_scrub_sync $TESTPOOL zpool_scrub_sync $TESTPOOL