diff --git a/include/os/linux/zfs/sys/trace_common.h b/include/os/linux/zfs/sys/trace_common.h index 85cf8cc20..e1b6d6109 100644 --- a/include/os/linux/zfs/sys/trace_common.h +++ b/include/os/linux/zfs/sys/trace_common.h @@ -45,7 +45,7 @@ __field(zio_flag_t, zio_orig_flags) \ __field(enum zio_stage, zio_orig_stage) \ __field(enum zio_stage, zio_orig_pipeline) \ - __field(uint8_t, zio_reexecute) \ + __field(uint8_t, zio_post) \ __field(uint64_t, zio_txg) \ __field(int, zio_error) \ __field(uint64_t, zio_ena) \ @@ -74,7 +74,7 @@ __entry->zio_orig_flags = zio->io_orig_flags; \ __entry->zio_orig_stage = zio->io_orig_stage; \ __entry->zio_orig_pipeline = zio->io_orig_pipeline; \ - __entry->zio_reexecute = zio->io_reexecute; \ + __entry->zio_post = zio->io_post; \ __entry->zio_txg = zio->io_txg; \ __entry->zio_error = zio->io_error; \ __entry->zio_ena = zio->io_ena; \ @@ -92,7 +92,7 @@ "zio { type %u prio %u size %llu orig_size %llu " \ "offset %llu timestamp %llu delta %llu delay %llu " \ "flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \ - "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \ + "orig_stage 0x%x orig_pipeline 0x%x post %u " \ "txg %llu error %d ena %llu prop { checksum %u compress %u " \ "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }" @@ -102,7 +102,7 @@ __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \ __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \ __entry->zio_orig_flags, __entry->zio_orig_stage, \ - __entry->zio_orig_pipeline, __entry->zio_reexecute, \ + __entry->zio_orig_pipeline, __entry->zio_post, \ __entry->zio_txg, __entry->zio_error, __entry->zio_ena, \ __entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \ __entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \ diff --git a/include/sys/zio.h b/include/sys/zio.h index d91a4eb1e..01f3babeb 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -226,8 +226,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NOPWRITE (1ULL << 29) #define ZIO_FLAG_REEXECUTED (1ULL << 30) #define ZIO_FLAG_DELEGATED (1ULL << 31) -#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32) -#define ZIO_FLAG_PREALLOCATED (1ULL << 33) +#define ZIO_FLAG_PREALLOCATED (1ULL << 32) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) @@ -418,14 +417,16 @@ typedef struct zio_transform { typedef zio_t *zio_pipe_stage_t(zio_t *zio); /* - * The io_reexecute flags are distinct from io_flags because the child must - * be able to propagate them to the parent. The normal io_flags are local - * to the zio, not protected by any lock, and not modifiable by children; - * the reexecute flags are protected by io_lock, modifiable by children, - * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. + * The io_post flags describe additional actions that a parent IO should + * consider or perform on behalf of a child. They are distinct from io_flags + * because the child must be able to propagate them to the parent. The normal + * io_flags are local to the zio, not protected by any lock, and not modifiable + * by children; the reexecute flags are protected by io_lock, modifiable by + * children, and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. */ -#define ZIO_REEXECUTE_NOW 0x01 -#define ZIO_REEXECUTE_SUSPEND 0x02 +#define ZIO_POST_REEXECUTE (1 << 0) +#define ZIO_POST_SUSPEND (1 << 1) +#define ZIO_POST_DIO_CHKSUM_ERR (1 << 2) /* * The io_trim flags are used to specify the type of TRIM to perform. They @@ -461,7 +462,7 @@ struct zio { enum zio_child io_child_type; enum trim_flag io_trim_flags; zio_priority_t io_priority; - uint8_t io_reexecute; + uint8_t io_post; uint8_t io_state[ZIO_WAIT_TYPES]; uint64_t io_txg; spa_t *io_spa; diff --git a/module/zcommon/zfs_valstr.c b/module/zcommon/zfs_valstr.c index c39ac62f6..08813b81c 100644 --- a/module/zcommon/zfs_valstr.c +++ b/module/zcommon/zfs_valstr.c @@ -221,7 +221,6 @@ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "NP", "NOPWRITE" }, { '.', "EX", "REEXECUTED" }, { '.', "DG", "DELEGATED" }, - { '.', "DC", "DIO_CHKSUM_ERR" }, { '.', "PA", "PREALLOCATED" }, ) diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c index 12b0ffa2c..930ff101e 100644 --- a/module/zfs/dmu_direct.c +++ b/module/zfs/dmu_direct.c @@ -104,7 +104,7 @@ dmu_write_direct_done(zio_t *zio) dmu_sync_done(zio, NULL, zio->io_private); if (zio->io_error != 0) { - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) ASSERT3U(zio->io_error, ==, EIO); /* diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index fac2c3a5f..9fc71fa0e 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1842,7 +1842,7 @@ vdev_indirect_io_done(zio_t *zio) */ if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) { zio->io_error = ret; - zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR; zio_dio_chksum_verify_error_report(zio); ret = 0; } diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index a6aee9437..2b78340cf 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -779,7 +779,7 @@ vdev_mirror_io_done(zio_t *zio) * being written out during self healing. */ if ((zio->io_flags & ZIO_FLAG_DIO_READ) && - (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { + (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) { zio_dio_chksum_verify_error_report(zio); zio->io_error = vdev_mirror_worst_error(mm); ASSERT3U(zio->io_error, ==, ECKSUM); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 71c4bfbda..7a6a01603 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2691,7 +2691,7 @@ raidz_checksum_verify(zio_t *zio) */ if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) { zio->io_error = ret; - zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR; zio_dio_chksum_verify_error_report(zio); zio_checksum_verified(zio); return (0); @@ -3048,7 +3048,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) /* Check for success */ if (raidz_checksum_verify(zio) == 0) { - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) return (0); /* Reconstruction succeeded - report errors */ @@ -3514,7 +3514,7 @@ vdev_raidz_io_done(zio_t *zio) } if (raidz_checksum_verify(zio) == 0) { - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) goto done; for (int i = 0; i < rm->rm_nrows; i++) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 6d7bce8b0..c5f385aeb 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -850,15 +850,9 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait, mutex_enter(&pio->io_lock); if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) *errorp = zio_worst_error(*errorp, zio->io_error); - pio->io_reexecute |= zio->io_reexecute; + pio->io_post |= zio->io_post; ASSERT3U(*countp, >, 0); - /* - * Propogate the Direct I/O checksum verify failure to the parent. - */ - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) - pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; - (*countp)--; if (*countp == 0 && pio->io_stall == countp) { @@ -1649,7 +1643,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, * through the mirror during self healing. See comment in * vdev_mirror_io_done() for more details. */ - ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); + ASSERT0(pio->io_post & ZIO_POST_DIO_CHKSUM_ERR); } else if (type == ZIO_TYPE_WRITE && pio->io_prop.zp_direct_write == B_TRUE) { /* @@ -2602,7 +2596,7 @@ zio_reexecute(void *arg) pio->io_flags = pio->io_orig_flags; pio->io_stage = pio->io_orig_stage; pio->io_pipeline = pio->io_orig_pipeline; - pio->io_reexecute = 0; + pio->io_post = 0; pio->io_flags |= ZIO_FLAG_REEXECUTED; pio->io_pipeline_trace = 0; pio->io_error = 0; @@ -4722,7 +4716,7 @@ zio_vdev_io_assess(zio_t *zio) * If a Direct I/O operation has a checksum verify error then this I/O * should not attempt to be issued again. */ - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { + if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) { if (zio->io_type == ZIO_TYPE_WRITE) { ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); ASSERT3U(zio->io_error, ==, EIO); @@ -5031,7 +5025,7 @@ zio_checksum_verify(zio_t *zio) ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL); } - ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); + ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR); IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ, !(zio->io_flags & ZIO_FLAG_SPECULATIVE)); @@ -5040,7 +5034,7 @@ zio_checksum_verify(zio_t *zio) if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { if (zio->io_flags & ZIO_FLAG_DIO_READ) { - zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR; zio_t *pio = zio_unique_parent(zio); /* * Any Direct I/O read that has a checksum @@ -5090,7 +5084,7 @@ zio_dio_checksum_verify(zio_t *zio) if ((error = zio_checksum_error(zio, NULL)) != 0) { zio->io_error = error; if (error == ECKSUM) { - zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR; zio_dio_chksum_verify_error_report(zio); } } @@ -5115,7 +5109,7 @@ zio_checksum_verified(zio_t *zio) void zio_dio_chksum_verify_error_report(zio_t *zio) { - ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); + ASSERT(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR); if (zio->io_child_type == ZIO_CHILD_LOGICAL) return; @@ -5431,7 +5425,7 @@ zio_done(zio_t *zio) */ if (zio->io_error != ECKSUM && zio->io_vd != NULL && !vdev_is_dead(zio->io_vd) && - !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { + !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) { int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0); if (ret != EALREADY) { @@ -5446,7 +5440,7 @@ zio_done(zio_t *zio) if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && - !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) && + !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) && zio == zio->io_logical) { /* * For logical I/O requests, tell the SPA to log the @@ -5467,7 +5461,7 @@ zio_done(zio_t *zio) */ if (zio->io_error == EAGAIN && IO_IS_ALLOCATING(zio) && zio->io_prop.zp_dedup) { - zio->io_reexecute |= ZIO_REEXECUTE_NOW; + zio->io_post |= ZIO_POST_REEXECUTE; zio->io_prop.zp_dedup = B_FALSE; } /* @@ -5479,11 +5473,11 @@ zio_done(zio_t *zio) if (IO_IS_ALLOCATING(zio) && !(zio->io_flags & ZIO_FLAG_CANFAIL) && - !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { + !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) { if (zio->io_error != ENOSPC) - zio->io_reexecute |= ZIO_REEXECUTE_NOW; + zio->io_post |= ZIO_POST_REEXECUTE; else - zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + zio->io_post |= ZIO_POST_SUSPEND; } if ((zio->io_type == ZIO_TYPE_READ || @@ -5492,10 +5486,11 @@ zio_done(zio_t *zio) zio->io_error == ENXIO && spa_load_state(zio->io_spa) == SPA_LOAD_NONE && spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE) - zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + zio->io_post |= ZIO_POST_SUSPEND; - if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) - zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && + !(zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND))) + zio->io_post |= ZIO_POST_SUSPEND; /* * Here is a possibly good place to attempt to do @@ -5514,7 +5509,8 @@ zio_done(zio_t *zio) */ zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL); - if ((zio->io_error || zio->io_reexecute) && + if ((zio->io_error || + (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND))) && IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio && !(zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE))) zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp); @@ -5525,16 +5521,16 @@ zio_done(zio_t *zio) * Godfather I/Os should never suspend. */ if ((zio->io_flags & ZIO_FLAG_GODFATHER) && - (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) - zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND; + (zio->io_post & ZIO_POST_SUSPEND)) + zio->io_post &= ~ZIO_POST_SUSPEND; - if (zio->io_reexecute) { + if (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND)) { /* * A Direct I/O operation that has a checksum verify error * should not attempt to reexecute. Instead, the error should * just be propagated back. */ - ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)); + ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR); /* * This is a logical I/O that wants to reexecute. @@ -5571,7 +5567,7 @@ zio_done(zio_t *zio) pio_next = zio_walk_parents(zio, &zl); if ((pio->io_flags & ZIO_FLAG_GODFATHER) && - (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) { + (zio->io_post & ZIO_POST_SUSPEND)) { zio_remove_child(pio, zio, remove_zl); /* * This is a rare code path, so we don't @@ -5595,13 +5591,14 @@ zio_done(zio_t *zio) * "next_to_execute". */ zio_notify_parent(pio, zio, ZIO_WAIT_DONE, NULL); - } else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) { + } else if (zio->io_post & ZIO_POST_SUSPEND) { /* * We'd fail again if we reexecuted now, so suspend * until conditions improve (e.g. device comes online). */ zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR); } else { + ASSERT(zio->io_post & ZIO_POST_REEXECUTE); /* * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. @@ -5614,7 +5611,8 @@ zio_done(zio_t *zio) } ASSERT(list_is_empty(&zio->io_child_list)); - ASSERT(zio->io_reexecute == 0); + ASSERT0(zio->io_post & ZIO_POST_REEXECUTE); + ASSERT0(zio->io_post & ZIO_POST_SUSPEND); ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); /*