Make zpool status counters match error events count

The number of IO and checksum events should match the number of errors
seen in zpool status.  Previously there was a mismatch between the
two counts because zpool status would only count unrecovered errors,
while zpool events would get an event for *all* errors (recovered or
not).  This lead to situations where disks could be faulted for
"too many errors", while at the same time showing zero errors in zpool
status.

This fixes the zpool status error counters to increment at the same
times we post the error events.

Reviewed-by: Tom Caputi <tcaputi@datto.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #4851 
Closes #7817
This commit is contained in:
Tony Hutter
2019-03-14 18:21:53 -07:00
committed by Brian Behlendorf
parent 04a3b0796c
commit 2bbec1c910
6 changed files with 176 additions and 16 deletions
-11
View File
@@ -4051,17 +4051,6 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
return;
mutex_enter(&vd->vdev_stat_lock);
if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
if (zio->io_error == ECKSUM)
vs->vs_checksum_errors++;
else
vs->vs_read_errors++;
}
if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd))
vs->vs_write_errors++;
mutex_exit(&vd->vdev_stat_lock);
if (spa->spa_load_state == SPA_LOAD_NONE &&
type == ZIO_TYPE_WRITE && txg != 0 &&
(!(flags & ZIO_FLAG_IO_REPAIR) ||
+7 -2
View File
@@ -2274,16 +2274,21 @@ vdev_raidz_io_done(zio_t *zio)
if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
for (c = 0; c < rm->rm_cols; c++) {
vdev_t *cvd;
rc = &rm->rm_col[c];
cvd = vd->vdev_child[rc->rc_devidx];
if (rc->rc_error == 0) {
zio_bad_cksum_t zbc;
zbc.zbc_has_cksum = 0;
zbc.zbc_injected =
rm->rm_ecksuminjected;
mutex_enter(&cvd->vdev_stat_lock);
cvd->vdev_stat.vs_checksum_errors++;
mutex_exit(&cvd->vdev_stat_lock);
zfs_ereport_start_checksum(
zio->io_spa,
vd->vdev_child[rc->rc_devidx],
zio->io_spa, cvd,
&zio->io_bookmark, zio,
rc->rc_offset, rc->rc_size,
(void *)(uintptr_t)c, &zbc);
+14 -1
View File
@@ -4132,6 +4132,10 @@ zio_checksum_verify(zio_t *zio)
zio->io_error = error;
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, NULL, &info);
@@ -4467,9 +4471,18 @@ zio_done(zio_t *zio)
* device is currently unavailable.
*/
if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
!vdev_is_dead(zio->io_vd))
!vdev_is_dead(zio->io_vd)) {
mutex_enter(&zio->io_vd->vdev_stat_lock);
if (zio->io_type == ZIO_TYPE_READ) {
zio->io_vd->vdev_stat.vs_read_errors++;
} else if (zio->io_type == ZIO_TYPE_WRITE) {
zio->io_vd->vdev_stat.vs_write_errors++;
}
mutex_exit(&zio->io_vd->vdev_stat_lock);
zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio, 0, 0);
}
if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&