mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-26 04:07:45 +03:00
Skip spurious resilver IO on raidz vdev
On a raidz vdev, a block that does not span all child vdevs, excluding its skip sectors if any, may not be affected by a child vdev outage or failure. In such cases, the block does not need to be resilvered. However, current resilver algorithm simply resilvers all blocks on a degraded raidz vdev. Such spurious IO is not only wasteful, but also adds the risk of overwriting good data. This patch eliminates such spurious IOs. Reviewed-by: Gvozden Neskovic <neskovic@gmail.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Isaac Huang <he.huang@intel.com> Closes #5316
This commit is contained in:
committed by
Brian Behlendorf
parent
8c54ddd33a
commit
3d6da72d18
+49
-23
@@ -1836,12 +1836,51 @@ dsl_scan_scrub_done(zio_t *zio)
|
||||
mutex_exit(&spa->spa_scrub_lock);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
|
||||
uint64_t phys_birth)
|
||||
{
|
||||
vdev_t *vd;
|
||||
|
||||
if (DVA_GET_GANG(dva)) {
|
||||
/*
|
||||
* Gang members may be spread across multiple
|
||||
* vdevs, so the best estimate we have is the
|
||||
* scrub range, which has already been checked.
|
||||
* XXX -- it would be better to change our
|
||||
* allocation policy to ensure that all
|
||||
* gang members reside on the same vdev.
|
||||
*/
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
|
||||
|
||||
/*
|
||||
* Check if the txg falls within the range which must be
|
||||
* resilvered. DVAs outside this range can always be skipped.
|
||||
*/
|
||||
if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* Check if the top-level vdev must resilver this offset.
|
||||
* When the offset does not intersect with a dirty leaf DTL
|
||||
* then it may be possible to skip the resilver IO. The psize
|
||||
* is provided instead of asize to simplify the check for RAIDZ.
|
||||
*/
|
||||
if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
|
||||
return (B_FALSE);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_scan_scrub_cb(dsl_pool_t *dp,
|
||||
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
||||
{
|
||||
dsl_scan_t *scn = dp->dp_scan;
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
size_t psize = BP_GET_PSIZE(bp);
|
||||
spa_t *spa = dp->dp_spa;
|
||||
uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
|
||||
boolean_t needs_io = B_FALSE;
|
||||
@@ -1875,33 +1914,19 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
|
||||
zio_flags |= ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
for (d = 0; d < BP_GET_NDVAS(bp); d++) {
|
||||
vdev_t *vd = vdev_lookup_top(spa,
|
||||
DVA_GET_VDEV(&bp->blk_dva[d]));
|
||||
const dva_t *dva = &bp->blk_dva[d];
|
||||
|
||||
/*
|
||||
* Keep track of how much data we've examined so that
|
||||
* zpool(1M) status can make useful progress reports.
|
||||
*/
|
||||
scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
|
||||
spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
|
||||
scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
|
||||
spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
|
||||
|
||||
/* if it's a resilver, this may not be in the target range */
|
||||
if (!needs_io) {
|
||||
if (DVA_GET_GANG(&bp->blk_dva[d])) {
|
||||
/*
|
||||
* Gang members may be spread across multiple
|
||||
* vdevs, so the best estimate we have is the
|
||||
* scrub range, which has already been checked.
|
||||
* XXX -- it would be better to change our
|
||||
* allocation policy to ensure that all
|
||||
* gang members reside on the same vdev.
|
||||
*/
|
||||
needs_io = B_TRUE;
|
||||
} else {
|
||||
needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
|
||||
phys_birth, 1);
|
||||
}
|
||||
}
|
||||
if (!needs_io)
|
||||
needs_io = dsl_scan_need_resilver(spa, dva, psize,
|
||||
phys_birth);
|
||||
}
|
||||
|
||||
if (needs_io && !zfs_no_scrub_io) {
|
||||
@@ -1922,8 +1947,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
|
||||
delay(scan_delay);
|
||||
|
||||
zio_nowait(zio_read(NULL, spa, bp,
|
||||
abd_alloc_for_io(size, B_FALSE), size, dsl_scan_scrub_done,
|
||||
NULL, ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
||||
abd_alloc_for_io(psize, B_FALSE),
|
||||
psize, dsl_scan_scrub_done, NULL,
|
||||
ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
||||
}
|
||||
|
||||
/* do not relocate this block */
|
||||
|
||||
Reference in New Issue
Block a user