mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Always validate checksums for Direct I/O reads
This fixes an oversight in the Direct I/O PR. There is nothing that stops a process from manipulating the contents of a buffer for a Direct I/O read while the I/O is in flight. This can lead checksum verify failures. However, the disk contents are still correct, and this would lead to false reporting of checksum validation failures. To remedy this, all Direct I/O reads that have a checksum verification failure are treated as suspicious. In the event a checksum validation failure occurs for a Direct I/O read, then the I/O request will be reissued though the ARC. This allows for actual validation to happen and removes any possibility of the buffer being manipulated after the I/O has been issued. Just as with Direct I/O write checksum validation failures, Direct I/O read checksum validation failures are reported though zpool status -d in the DIO column. Also the zevent has been updated to have both: 1. dio_verify_wr -> Checksum verification failure for writes 2. dio_verify_rd -> Checksum verification failure for reads. This allows for determining what I/O operation was the culprit for the checksum verification failure. All DIO errors are reported only on the top-level VDEV. Even though FreeBSD can write protect pages (stable pages) it still has the same issue as Linux with Direct I/O reads. This commit updates the following: 1. Propogates checksum failures for reads all the way up to the top-level VDEV. 2. Reports errors through zpool status -d as DIO. 3. Has two zevents for checksum verify errors with Direct I/O. One for read and one for write. 4. Updates FreeBSD ABD code to also check for ABD_FLAG_FROM_PAGES and handle ABD buffer contents validation the same as Linux. 5. Updated manipulate_user_buffer.c to also manipulate a buffer while a Direct I/O read is taking place. 6. Adds a new ZTS test case dio_read_verify that stress tests the new code. 7. Updated man pages. 8. Added an IMPLY statement to zio_checksum_verify() to make sure that Direct I/O reads are not issued as speculative. 9. Removed self healing through mirror, raidz, and dRAID VDEVs for Direct I/O reads. This issue was first observed when installing a Windows 11 VM on a ZFS dataset with the dataset property direct set to always. The zpool devices would report checksum failures, but running a subsequent zpool scrub would not repair any data and report no errors. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #16598
This commit is contained in:
@@ -620,9 +620,16 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
|
||||
|
||||
/*
|
||||
* Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
|
||||
* no change the contents of the ABD and will ASSERT that you didn't modify
|
||||
* the buffer since it was borrowed. If you want any changes you made to buf to
|
||||
* be copied back to abd, use abd_return_buf_copy() instead.
|
||||
* not change the contents of the ABD. If you want any changes you made to
|
||||
* buf to be copied back to abd, use abd_return_buf_copy() instead. If the
|
||||
* ABD is not constructed from user pages from Direct I/O then an ASSERT
|
||||
* checks to make sure the contents of the buffer have not changed since it was
|
||||
* borrowed. We can not ASSERT the contents of the buffer have not changed if
|
||||
* it is composed of user pages. While Direct I/O write pages are placed under
|
||||
* write protection and can not be changed, this is not the case for Direct I/O
|
||||
* reads. The pages of a Direct I/O read could be manipulated at any time.
|
||||
* Checksum verifications in the ZIO pipeline check for this issue and handle
|
||||
* it by returning an error on checksum verification failure.
|
||||
*/
|
||||
void
|
||||
abd_return_buf(abd_t *abd, void *buf, size_t n)
|
||||
@@ -632,8 +639,34 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
|
||||
#ifdef ZFS_DEBUG
|
||||
(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
|
||||
#endif
|
||||
if (abd_is_linear(abd)) {
|
||||
if (abd_is_from_pages(abd)) {
|
||||
if (!abd_is_linear_page(abd))
|
||||
zio_buf_free(buf, n);
|
||||
} else if (abd_is_linear(abd)) {
|
||||
ASSERT3P(buf, ==, abd_to_buf(abd));
|
||||
} else if (abd_is_gang(abd)) {
|
||||
#ifdef ZFS_DEBUG
|
||||
/*
|
||||
* We have to be careful with gang ABD's that we do not ASSERT
|
||||
* for any ABD's that contain user pages from Direct I/O. See
|
||||
* the comment above about Direct I/O read buffers possibly
|
||||
* being manipulated. In order to handle this, we jsut iterate
|
||||
* through the gang ABD and only verify ABD's that are not from
|
||||
* user pages.
|
||||
*/
|
||||
void *cmp_buf = buf;
|
||||
|
||||
for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
|
||||
cabd != NULL;
|
||||
cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
|
||||
if (!abd_is_from_pages(cabd)) {
|
||||
ASSERT0(abd_cmp_buf(cabd, cmp_buf,
|
||||
cabd->abd_size));
|
||||
}
|
||||
cmp_buf = (char *)cmp_buf + cabd->abd_size;
|
||||
}
|
||||
#endif
|
||||
zio_buf_free(buf, n);
|
||||
} else {
|
||||
ASSERT0(abd_cmp_buf(abd, buf, n));
|
||||
zio_buf_free(buf, n);
|
||||
|
||||
@@ -1008,7 +1008,9 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
|
||||
* borrowed. We can not ASSERT that the contents of the buffer have not changed
|
||||
* if it is composed of user pages because the pages can not be placed under
|
||||
* write protection and the user could have possibly changed the contents in
|
||||
* the pages at any time.
|
||||
* the pages at any time. This is also an issue for Direct I/O reads. Checksum
|
||||
* verifications in the ZIO pipeline check for this issue and handle it by
|
||||
* returning an error on checksum verification failure.
|
||||
*/
|
||||
void
|
||||
abd_return_buf(abd_t *abd, void *buf, size_t n)
|
||||
|
||||
@@ -206,6 +206,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
|
||||
{ '.', "PR", "PROBE" },
|
||||
{ '.', "TH", "TRYHARD" },
|
||||
{ '.', "OP", "OPTIONAL" },
|
||||
{ '.', "RD", "DIO_READ" },
|
||||
{ '.', "DQ", "DONT_QUEUE" },
|
||||
{ '.', "DP", "DONT_PROPAGATE" },
|
||||
{ '.', "BY", "IO_BYPASS" },
|
||||
|
||||
@@ -330,7 +330,7 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||
*/
|
||||
zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size,
|
||||
dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_FLAG_CANFAIL, &zb);
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_READ, &zb);
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
||||
zfs_racct_read(spa, db->db.db_size, 1, flags);
|
||||
|
||||
@@ -1026,7 +1026,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
|
||||
|
||||
ASSERT3U(vdc->vdc_nparity, >, 0);
|
||||
|
||||
raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth);
|
||||
raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth, zio);
|
||||
rr->rr_bigcols = bc;
|
||||
rr->rr_firstdatacol = vdc->vdc_nparity;
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <sys/zap.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/zthr.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
|
||||
/*
|
||||
* An indirect vdev corresponds to a vdev that has been removed. Since
|
||||
@@ -1832,6 +1833,19 @@ vdev_indirect_io_done(zio_t *zio)
|
||||
|
||||
zio_bad_cksum_t zbc;
|
||||
int ret = zio_checksum_error(zio, &zbc);
|
||||
/*
|
||||
* Any Direct I/O read that has a checksum error must be treated as
|
||||
* suspicious as the contents of the buffer could be getting
|
||||
* manipulated while the I/O is taking place. The checksum verify error
|
||||
* will be reported to the top-level VDEV.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
|
||||
zio->io_error = ret;
|
||||
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
|
||||
zio_dio_chksum_verify_error_report(zio);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
zio_checksum_verified(zio);
|
||||
return;
|
||||
|
||||
@@ -764,6 +764,27 @@ vdev_mirror_io_done(zio_t *zio)
|
||||
|
||||
ASSERT(zio->io_type == ZIO_TYPE_READ);
|
||||
|
||||
/*
|
||||
* Any Direct I/O read that has a checksum error must be treated as
|
||||
* suspicious as the contents of the buffer could be getting
|
||||
* manipulated while the I/O is taking place. The checksum verify error
|
||||
* will be reported to the top-level Mirror VDEV.
|
||||
*
|
||||
* There will be no attampt at reading any additional data copies. If
|
||||
* the buffer is still being manipulated while attempting to read from
|
||||
* another child, there exists a possibly that the checksum could be
|
||||
* verified as valid. However, the buffer contents could again get
|
||||
* manipulated after verifying the checksum. This would lead to bad data
|
||||
* being written out during self healing.
|
||||
*/
|
||||
if ((zio->io_flags & ZIO_FLAG_DIO_READ) &&
|
||||
(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
|
||||
zio_dio_chksum_verify_error_report(zio);
|
||||
zio->io_error = vdev_mirror_worst_error(mm);
|
||||
ASSERT3U(zio->io_error, ==, ECKSUM);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't have a good copy yet, keep trying other children.
|
||||
*/
|
||||
|
||||
+39
-5
@@ -433,7 +433,7 @@ const zio_vsd_ops_t vdev_raidz_vsd_ops = {
|
||||
};
|
||||
|
||||
raidz_row_t *
|
||||
vdev_raidz_row_alloc(int cols)
|
||||
vdev_raidz_row_alloc(int cols, zio_t *zio)
|
||||
{
|
||||
raidz_row_t *rr =
|
||||
kmem_zalloc(offsetof(raidz_row_t, rr_col[cols]), KM_SLEEP);
|
||||
@@ -445,7 +445,17 @@ vdev_raidz_row_alloc(int cols)
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
rc->rc_shadow_devidx = INT_MAX;
|
||||
rc->rc_shadow_offset = UINT64_MAX;
|
||||
rc->rc_allow_repair = 1;
|
||||
/*
|
||||
* We can not allow self healing to take place for Direct I/O
|
||||
* reads. There is nothing that stops the buffer contents from
|
||||
* being manipulated while the I/O is in flight. It is possible
|
||||
* that the checksum could be verified on the buffer and then
|
||||
* the contents of that buffer are manipulated afterwards. This
|
||||
* could lead to bad data being written out during self
|
||||
* healing.
|
||||
*/
|
||||
if (!(zio->io_flags & ZIO_FLAG_DIO_READ))
|
||||
rc->rc_allow_repair = 1;
|
||||
}
|
||||
return (rr);
|
||||
}
|
||||
@@ -619,7 +629,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
|
||||
}
|
||||
|
||||
ASSERT3U(acols, <=, scols);
|
||||
rr = vdev_raidz_row_alloc(scols);
|
||||
rr = vdev_raidz_row_alloc(scols, zio);
|
||||
rm->rm_row[0] = rr;
|
||||
rr->rr_cols = acols;
|
||||
rr->rr_bigcols = bc;
|
||||
@@ -765,7 +775,7 @@ vdev_raidz_map_alloc_expanded(zio_t *zio,
|
||||
|
||||
for (uint64_t row = 0; row < rows; row++) {
|
||||
boolean_t row_use_scratch = B_FALSE;
|
||||
raidz_row_t *rr = vdev_raidz_row_alloc(cols);
|
||||
raidz_row_t *rr = vdev_raidz_row_alloc(cols, zio);
|
||||
rm->rm_row[row] = rr;
|
||||
|
||||
/* The starting RAIDZ (parent) vdev sector of the row. */
|
||||
@@ -2633,6 +2643,20 @@ raidz_checksum_verify(zio_t *zio)
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
|
||||
int ret = zio_checksum_error(zio, &zbc);
|
||||
/*
|
||||
* Any Direct I/O read that has a checksum error must be treated as
|
||||
* suspicious as the contents of the buffer could be getting
|
||||
* manipulated while the I/O is taking place. The checksum verify error
|
||||
* will be reported to the top-level RAIDZ VDEV.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
|
||||
zio->io_error = ret;
|
||||
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
|
||||
zio_dio_chksum_verify_error_report(zio);
|
||||
zio_checksum_verified(zio);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (ret != 0 && zbc.zbc_injected != 0)
|
||||
rm->rm_ecksuminjected = 1;
|
||||
|
||||
@@ -2776,6 +2800,11 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||
(rc->rc_error == 0 || rc->rc_size == 0)) {
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* We do not allow self healing for Direct I/O reads.
|
||||
* See comment in vdev_raid_row_alloc().
|
||||
*/
|
||||
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);
|
||||
|
||||
zfs_dbgmsg("zio=%px repairing c=%u devidx=%u "
|
||||
"offset=%llx",
|
||||
@@ -2979,6 +3008,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
|
||||
|
||||
/* Check for success */
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
|
||||
return (0);
|
||||
|
||||
/* Reconstruction succeeded - report errors */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
@@ -3379,7 +3410,6 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)
|
||||
zio_bad_cksum_t zbc;
|
||||
zbc.zbc_has_cksum = 0;
|
||||
zbc.zbc_injected = rm->rm_ecksuminjected;
|
||||
|
||||
mutex_enter(&cvd->vdev_stat_lock);
|
||||
cvd->vdev_stat.vs_checksum_errors++;
|
||||
mutex_exit(&cvd->vdev_stat_lock);
|
||||
@@ -3444,6 +3474,9 @@ vdev_raidz_io_done(zio_t *zio)
|
||||
}
|
||||
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
|
||||
goto done;
|
||||
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
vdev_raidz_io_done_verified(zio, rr);
|
||||
@@ -3538,6 +3571,7 @@ vdev_raidz_io_done(zio_t *zio)
|
||||
}
|
||||
}
|
||||
}
|
||||
done:
|
||||
if (rm->rm_lr != NULL) {
|
||||
zfs_rangelock_exit(rm->rm_lr);
|
||||
rm->rm_lr = NULL;
|
||||
|
||||
+24
-2
@@ -303,6 +303,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
(void) cr;
|
||||
int error = 0;
|
||||
boolean_t frsync = B_FALSE;
|
||||
boolean_t dio_checksum_failure = B_FALSE;
|
||||
|
||||
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||
@@ -424,8 +425,26 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
|
||||
if (error) {
|
||||
/* convert checksum errors into IO errors */
|
||||
if (error == ECKSUM)
|
||||
error = SET_ERROR(EIO);
|
||||
if (error == ECKSUM) {
|
||||
/*
|
||||
* If a Direct I/O read returned a checksum
|
||||
* verify error, then it must be treated as
|
||||
* suspicious. The contents of the buffer could
|
||||
* have beeen manipulated while the I/O was in
|
||||
* flight. In this case, the remainder of I/O
|
||||
* request will just be reissued through the
|
||||
* ARC.
|
||||
*/
|
||||
if (uio->uio_extflg & UIO_DIRECT) {
|
||||
dio_checksum_failure = B_TRUE;
|
||||
uio->uio_extflg &= ~UIO_DIRECT;
|
||||
n += dio_remaining_resid;
|
||||
dio_remaining_resid = 0;
|
||||
continue;
|
||||
} else {
|
||||
error = SET_ERROR(EIO);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
/*
|
||||
@@ -472,6 +491,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
out:
|
||||
zfs_rangelock_exit(lr);
|
||||
|
||||
if (dio_checksum_failure == B_TRUE)
|
||||
uio->uio_extflg |= UIO_DIRECT;
|
||||
|
||||
/*
|
||||
* Cleanup for Direct I/O if requested.
|
||||
*/
|
||||
|
||||
+85
-35
@@ -804,11 +804,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
|
||||
pio->io_reexecute |= zio->io_reexecute;
|
||||
ASSERT3U(*countp, >, 0);
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
|
||||
ASSERT3U(*errorp, ==, EIO);
|
||||
ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL);
|
||||
/*
|
||||
* Propogate the Direct I/O checksum verify failure to the parent.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
|
||||
pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
|
||||
}
|
||||
|
||||
(*countp)--;
|
||||
|
||||
@@ -1573,6 +1573,14 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
|
||||
*/
|
||||
pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
|
||||
pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
|
||||
/*
|
||||
* We never allow the mirror VDEV to attempt reading from any
|
||||
* additional data copies after the first Direct I/O checksum
|
||||
* verify failure. This is to avoid bad data being written out
|
||||
* through the mirror during self healing. See comment in
|
||||
* vdev_mirror_io_done() for more details.
|
||||
*/
|
||||
ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
|
||||
} else if (type == ZIO_TYPE_WRITE &&
|
||||
pio->io_prop.zp_direct_write == B_TRUE) {
|
||||
/*
|
||||
@@ -4555,18 +4563,18 @@ zio_vdev_io_assess(zio_t *zio)
|
||||
}
|
||||
|
||||
/*
|
||||
* If a Direct I/O write checksum verify error has occurred then this
|
||||
* I/O should not attempt to be issued again. Instead the EIO will
|
||||
* be returned.
|
||||
* If a Direct I/O operation has a checksum verify error then this I/O
|
||||
* should not attempt to be issued again.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
|
||||
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
|
||||
ASSERT3U(zio->io_error, ==, EIO);
|
||||
if (zio->io_type == ZIO_TYPE_WRITE) {
|
||||
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
|
||||
ASSERT3U(zio->io_error, ==, EIO);
|
||||
}
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
return (zio);
|
||||
}
|
||||
|
||||
|
||||
if (zio_injection_enabled && zio->io_error == 0)
|
||||
zio->io_error = zio_handle_fault_injection(zio, EIO);
|
||||
|
||||
@@ -4864,16 +4872,40 @@ zio_checksum_verify(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
|
||||
}
|
||||
|
||||
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
|
||||
IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ,
|
||||
!(zio->io_flags & ZIO_FLAG_SPECULATIVE));
|
||||
|
||||
if ((error = zio_checksum_error(zio, &info)) != 0) {
|
||||
zio->io_error = error;
|
||||
if (error == ECKSUM &&
|
||||
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
||||
mutex_enter(&zio->io_vd->vdev_stat_lock);
|
||||
zio->io_vd->vdev_stat.vs_checksum_errors++;
|
||||
mutex_exit(&zio->io_vd->vdev_stat_lock);
|
||||
(void) zfs_ereport_start_checksum(zio->io_spa,
|
||||
zio->io_vd, &zio->io_bookmark, zio,
|
||||
zio->io_offset, zio->io_size, &info);
|
||||
if (zio->io_flags & ZIO_FLAG_DIO_READ) {
|
||||
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
|
||||
zio_t *pio = zio_unique_parent(zio);
|
||||
/*
|
||||
* Any Direct I/O read that has a checksum
|
||||
* error must be treated as suspicous as the
|
||||
* contents of the buffer could be getting
|
||||
* manipulated while the I/O is taking place.
|
||||
*
|
||||
* The checksum verify error will only be
|
||||
* reported here for disk and file VDEV's and
|
||||
* will be reported on those that the failure
|
||||
* occurred on. Other types of VDEV's report the
|
||||
* verify failure in their own code paths.
|
||||
*/
|
||||
if (pio->io_child_type == ZIO_CHILD_LOGICAL) {
|
||||
zio_dio_chksum_verify_error_report(zio);
|
||||
}
|
||||
} else {
|
||||
mutex_enter(&zio->io_vd->vdev_stat_lock);
|
||||
zio->io_vd->vdev_stat.vs_checksum_errors++;
|
||||
mutex_exit(&zio->io_vd->vdev_stat_lock);
|
||||
(void) zfs_ereport_start_checksum(zio->io_spa,
|
||||
zio->io_vd, &zio->io_bookmark, zio,
|
||||
zio->io_offset, zio->io_size, &info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4899,22 +4931,8 @@ zio_dio_checksum_verify(zio_t *zio)
|
||||
if ((error = zio_checksum_error(zio, NULL)) != 0) {
|
||||
zio->io_error = error;
|
||||
if (error == ECKSUM) {
|
||||
mutex_enter(&zio->io_vd->vdev_stat_lock);
|
||||
zio->io_vd->vdev_stat.vs_dio_verify_errors++;
|
||||
mutex_exit(&zio->io_vd->vdev_stat_lock);
|
||||
zio->io_error = SET_ERROR(EIO);
|
||||
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
|
||||
|
||||
/*
|
||||
* The EIO error must be propagated up to the logical
|
||||
* parent ZIO in zio_notify_parent() so it can be
|
||||
* returned to dmu_write_abd().
|
||||
*/
|
||||
zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE;
|
||||
|
||||
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY,
|
||||
zio->io_spa, zio->io_vd, &zio->io_bookmark,
|
||||
zio, 0);
|
||||
zio_dio_chksum_verify_error_report(zio);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4932,6 +4950,39 @@ zio_checksum_verified(zio_t *zio)
|
||||
zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Report Direct I/O checksum verify error and create ZED event.
|
||||
*/
|
||||
void
|
||||
zio_dio_chksum_verify_error_report(zio_t *zio)
|
||||
{
|
||||
ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
|
||||
|
||||
if (zio->io_child_type == ZIO_CHILD_LOGICAL)
|
||||
return;
|
||||
|
||||
mutex_enter(&zio->io_vd->vdev_stat_lock);
|
||||
zio->io_vd->vdev_stat.vs_dio_verify_errors++;
|
||||
mutex_exit(&zio->io_vd->vdev_stat_lock);
|
||||
if (zio->io_type == ZIO_TYPE_WRITE) {
|
||||
/*
|
||||
* Convert checksum error for writes into EIO.
|
||||
*/
|
||||
zio->io_error = SET_ERROR(EIO);
|
||||
/*
|
||||
* Report dio_verify_wr ZED event.
|
||||
*/
|
||||
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_WR,
|
||||
zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
|
||||
} else {
|
||||
/*
|
||||
* Report dio_verify_rd ZED event.
|
||||
*/
|
||||
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_RD,
|
||||
zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other.
|
||||
@@ -5343,10 +5394,9 @@ zio_done(zio_t *zio)
|
||||
|
||||
if (zio->io_reexecute) {
|
||||
/*
|
||||
* A Direct I/O write that has a checksum verify error should
|
||||
* not attempt to reexecute. Instead, EAGAIN should just be
|
||||
* propagated back up so the write can be attempt to be issued
|
||||
* through the ARC.
|
||||
* A Direct I/O operation that has a checksum verify error
|
||||
* should not attempt to reexecute. Instead, the error should
|
||||
* just be propagated back.
|
||||
*/
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user