mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Always validate checksums for Direct I/O reads
This fixes an oversight in the Direct I/O PR. There is nothing that stops a process from manipulating the contents of a buffer for a Direct I/O read while the I/O is in flight. This can lead checksum verify failures. However, the disk contents are still correct, and this would lead to false reporting of checksum validation failures. To remedy this, all Direct I/O reads that have a checksum verification failure are treated as suspicious. In the event a checksum validation failure occurs for a Direct I/O read, then the I/O request will be reissued though the ARC. This allows for actual validation to happen and removes any possibility of the buffer being manipulated after the I/O has been issued. Just as with Direct I/O write checksum validation failures, Direct I/O read checksum validation failures are reported though zpool status -d in the DIO column. Also the zevent has been updated to have both: 1. dio_verify_wr -> Checksum verification failure for writes 2. dio_verify_rd -> Checksum verification failure for reads. This allows for determining what I/O operation was the culprit for the checksum verification failure. All DIO errors are reported only on the top-level VDEV. Even though FreeBSD can write protect pages (stable pages) it still has the same issue as Linux with Direct I/O reads. This commit updates the following: 1. Propogates checksum failures for reads all the way up to the top-level VDEV. 2. Reports errors through zpool status -d as DIO. 3. Has two zevents for checksum verify errors with Direct I/O. One for read and one for write. 4. Updates FreeBSD ABD code to also check for ABD_FLAG_FROM_PAGES and handle ABD buffer contents validation the same as Linux. 5. Updated manipulate_user_buffer.c to also manipulate a buffer while a Direct I/O read is taking place. 6. Adds a new ZTS test case dio_read_verify that stress tests the new code. 7. Updated man pages. 8. Added an IMPLY statement to zio_checksum_verify() to make sure that Direct I/O reads are not issued as speculative. 9. Removed self healing through mirror, raidz, and dRAID VDEVs for Direct I/O reads. This issue was first observed when installing a Windows 11 VM on a ZFS dataset with the dataset property direct set to always. The zpool devices would report checksum failures, but running a subsequent zpool scrub would not repair any data and report no errors. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #16598
This commit is contained in:
@@ -42,7 +42,8 @@ extern "C" {
|
||||
#define FM_EREPORT_ZFS_DATA "data"
|
||||
#define FM_EREPORT_ZFS_DELAY "delay"
|
||||
#define FM_EREPORT_ZFS_DEADMAN "deadman"
|
||||
#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify"
|
||||
#define FM_EREPORT_ZFS_DIO_VERIFY_WR "dio_verify_wr"
|
||||
#define FM_EREPORT_ZFS_DIO_VERIFY_RD "dio_verify_rd"
|
||||
#define FM_EREPORT_ZFS_POOL "zpool"
|
||||
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
|
||||
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
|
||||
|
||||
@@ -57,7 +57,7 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
|
||||
void vdev_raidz_child_done(zio_t *);
|
||||
void vdev_raidz_io_done(zio_t *);
|
||||
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
|
||||
struct raidz_row *vdev_raidz_row_alloc(int);
|
||||
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
|
||||
void vdev_raidz_reflow_copy_scratch(spa_t *);
|
||||
void raidz_dtl_reassessed(vdev_t *);
|
||||
|
||||
|
||||
+15
-14
@@ -208,25 +208,25 @@ typedef uint64_t zio_flag_t;
|
||||
#define ZIO_FLAG_PROBE (1ULL << 16)
|
||||
#define ZIO_FLAG_TRYHARD (1ULL << 17)
|
||||
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
|
||||
|
||||
#define ZIO_FLAG_DIO_READ (1ULL << 19)
|
||||
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
|
||||
|
||||
/*
|
||||
* Flags not inherited by any children.
|
||||
*/
|
||||
#define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */
|
||||
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20)
|
||||
#define ZIO_FLAG_IO_BYPASS (1ULL << 21)
|
||||
#define ZIO_FLAG_IO_REWRITE (1ULL << 22)
|
||||
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 23)
|
||||
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24)
|
||||
#define ZIO_FLAG_GANG_CHILD (1ULL << 25)
|
||||
#define ZIO_FLAG_DDT_CHILD (1ULL << 26)
|
||||
#define ZIO_FLAG_GODFATHER (1ULL << 27)
|
||||
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
|
||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31)
|
||||
#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
|
||||
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
|
||||
#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
|
||||
#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
|
||||
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
|
||||
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
|
||||
#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
|
||||
#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
|
||||
#define ZIO_FLAG_GODFATHER (1ULL << 28)
|
||||
#define ZIO_FLAG_NOPWRITE (1ULL << 29)
|
||||
#define ZIO_FLAG_REEXECUTED (1ULL << 30)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 31)
|
||||
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
|
||||
|
||||
#define ZIO_ALLOCATOR_NONE (-1)
|
||||
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
|
||||
@@ -647,6 +647,7 @@ extern void zio_vdev_io_redone(zio_t *zio);
|
||||
extern void zio_change_priority(zio_t *pio, zio_priority_t priority);
|
||||
|
||||
extern void zio_checksum_verified(zio_t *zio);
|
||||
extern void zio_dio_chksum_verify_error_report(zio_t *zio);
|
||||
extern int zio_worst_error(int e1, int e2);
|
||||
|
||||
extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
|
||||
|
||||
Reference in New Issue
Block a user