mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Fix reporting of CKSUM errors in indirect vdevs
When removing and subsequently reattaching a vdev, CKSUM errors may occur as vdev_indirect_read_all() reads from all children of a mirror in case of a resilver. Fix this by checking whether a child is missing the data and setting a flag (ic_error) which is then checked in vdev_indirect_repair() and suppresses incrementing the checksum counter. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #11277
This commit is contained in:
committed by
Brian Behlendorf
parent
058b6fd069
commit
900480bd96
@@ -239,6 +239,7 @@ typedef struct indirect_child {
|
||||
*/
|
||||
struct indirect_child *ic_duplicate;
|
||||
list_node_t ic_node; /* node on is_unique_child */
|
||||
int ic_error; /* set when a child does not contain the data */
|
||||
} indirect_child_t;
|
||||
|
||||
/*
|
||||
@@ -1272,15 +1273,14 @@ vdev_indirect_read_all(zio_t *zio)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Note, we may read from a child whose DTL
|
||||
* indicates that the data may not be present here.
|
||||
* While this might result in a few i/os that will
|
||||
* likely return incorrect data, it simplifies the
|
||||
* code since we can treat scrub and resilver
|
||||
* identically. (The incorrect data will be
|
||||
* detected and ignored when we verify the
|
||||
* checksum.)
|
||||
* If a child is missing the data, set ic_error. Used
|
||||
* in vdev_indirect_repair(). We perform the read
|
||||
* nevertheless which provides the opportunity to
|
||||
* reconstruct the split block if at all possible.
|
||||
*/
|
||||
if (vdev_dtl_contains(ic->ic_vdev, DTL_MISSING,
|
||||
zio->io_txg, 1))
|
||||
ic->ic_error = SET_ERROR(ESTALE);
|
||||
|
||||
ic->ic_data = abd_alloc_sametype(zio->io_abd,
|
||||
is->is_size);
|
||||
@@ -1410,7 +1410,11 @@ vdev_indirect_checksum_error(zio_t *zio,
|
||||
* Issue repair i/os for any incorrect copies. We do this by comparing
|
||||
* each split segment's correct data (is_good_child's ic_data) with each
|
||||
* other copy of the data. If they differ, then we overwrite the bad data
|
||||
* with the good copy. Note that we do this without regard for the DTL's,
|
||||
* with the good copy. The DTL is checked in vdev_indirect_read_all() and
|
||||
* if a vdev is missing a copy of the data we set ic_error and the read is
|
||||
* performed. This provides the opportunity to reconstruct the split block
|
||||
* if at all possible. ic_error is checked here and if set it suppresses
|
||||
* incrementing the checksum counter. Aside from this DTLs are not checked,
|
||||
* which simplifies this code and also issues the optimal number of writes
|
||||
* (based on which copies actually read bad data, as opposed to which we
|
||||
* think might be wrong). For the same reason, we always use
|
||||
@@ -1447,6 +1451,14 @@ vdev_indirect_repair(zio_t *zio)
|
||||
ZIO_FLAG_IO_REPAIR | ZIO_FLAG_SELF_HEAL,
|
||||
NULL, NULL));
|
||||
|
||||
/*
|
||||
* If ic_error is set the current child does not have
|
||||
* a copy of the data, so suppress incrementing the
|
||||
* checksum counter.
|
||||
*/
|
||||
if (ic->ic_error == ESTALE)
|
||||
continue;
|
||||
|
||||
vdev_indirect_checksum_error(zio, is, ic);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user