mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Always validate checksums for Direct I/O reads
This fixes an oversight in the Direct I/O PR. There is nothing that stops a process from manipulating the contents of a buffer for a Direct I/O read while the I/O is in flight. This can lead checksum verify failures. However, the disk contents are still correct, and this would lead to false reporting of checksum validation failures. To remedy this, all Direct I/O reads that have a checksum verification failure are treated as suspicious. In the event a checksum validation failure occurs for a Direct I/O read, then the I/O request will be reissued though the ARC. This allows for actual validation to happen and removes any possibility of the buffer being manipulated after the I/O has been issued. Just as with Direct I/O write checksum validation failures, Direct I/O read checksum validation failures are reported though zpool status -d in the DIO column. Also the zevent has been updated to have both: 1. dio_verify_wr -> Checksum verification failure for writes 2. dio_verify_rd -> Checksum verification failure for reads. This allows for determining what I/O operation was the culprit for the checksum verification failure. All DIO errors are reported only on the top-level VDEV. Even though FreeBSD can write protect pages (stable pages) it still has the same issue as Linux with Direct I/O reads. This commit updates the following: 1. Propogates checksum failures for reads all the way up to the top-level VDEV. 2. Reports errors through zpool status -d as DIO. 3. Has two zevents for checksum verify errors with Direct I/O. One for read and one for write. 4. Updates FreeBSD ABD code to also check for ABD_FLAG_FROM_PAGES and handle ABD buffer contents validation the same as Linux. 5. Updated manipulate_user_buffer.c to also manipulate a buffer while a Direct I/O read is taking place. 6. Adds a new ZTS test case dio_read_verify that stress tests the new code. 7. Updated man pages. 8. Added an IMPLY statement to zio_checksum_verify() to make sure that Direct I/O reads are not issued as speculative. 9. Removed self healing through mirror, raidz, and dRAID VDEVs for Direct I/O reads. This issue was first observed when installing a Windows 11 VM on a ZFS dataset with the dataset property direct set to always. The zpool devices would report checksum failures, but running a subsequent zpool scrub would not repair any data and report no errors. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #16598
This commit is contained in:
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2022 by Triad National Security, LLC.
|
||||
* Copyright (c) 2024 by Triad National Security, LLC.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@@ -39,51 +39,59 @@
|
||||
#define MIN(a, b) ((a) < (b)) ? (a) : (b)
|
||||
#endif
|
||||
|
||||
static char *outputfile = NULL;
|
||||
static char *filename = NULL;
|
||||
static int blocksize = 131072; /* 128K */
|
||||
static int wr_err_expected = 0;
|
||||
static int err_expected = 0;
|
||||
static int read_op = 0;
|
||||
static int write_op = 0;
|
||||
static int numblocks = 100;
|
||||
static char *execname = NULL;
|
||||
static int print_usage = 0;
|
||||
static int randompattern = 0;
|
||||
static int ofd;
|
||||
static int fd;
|
||||
char *buf = NULL;
|
||||
|
||||
typedef struct {
|
||||
int entire_file_written;
|
||||
int entire_file_completed;
|
||||
} pthread_args_t;
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n"
|
||||
" [-n numblocks] [-p randpattern] [-h help]\n"
|
||||
"usage %s -f filename [-b blocksize] [-e wr_error_expected]\n"
|
||||
" [-n numblocks] [-p randompattern] -r read_op \n"
|
||||
" -w write_op [-h help]\n"
|
||||
"\n"
|
||||
"Testing whether checksum verify works correctly for O_DIRECT.\n"
|
||||
"when manipulating the contents of a userspace buffer.\n"
|
||||
"\n"
|
||||
" outputfile: File to write to.\n"
|
||||
" blocksize: Size of each block to write (must be at \n"
|
||||
" least >= 512).\n"
|
||||
" wr_err_expected: Whether pwrite() is expected to return EIO\n"
|
||||
" while manipulating the contents of the\n"
|
||||
" buffer.\n"
|
||||
" numblocks: Total number of blocksized blocks to\n"
|
||||
" write.\n"
|
||||
" randpattern: Fill data buffer with random data. Default\n"
|
||||
" behavior is to fill the buffer with the \n"
|
||||
" known data pattern (0xdeadbeef).\n"
|
||||
" filename: File to read or write to.\n"
|
||||
" blocksize: Size of each block to write (must be at \n"
|
||||
" least >= 512).\n"
|
||||
" err_expected: Whether write() is expected to return EIO\n"
|
||||
" while manipulating the contents of the\n"
|
||||
" buffer.\n"
|
||||
" numblocks: Total number of blocksized blocks to\n"
|
||||
" write.\n"
|
||||
" read_op: Perform reads to the filename file while\n"
|
||||
" while manipulating the buffer contents\n"
|
||||
" write_op: Perform writes to the filename file while\n"
|
||||
" manipulating the buffer contents\n"
|
||||
" randompattern: Fill data buffer with random data for \n"
|
||||
" writes. Default behavior is to fill the \n"
|
||||
" buffer with known data pattern (0xdeadbeef)\n"
|
||||
" help: Print usage information and exit.\n"
|
||||
"\n"
|
||||
" Required parameters:\n"
|
||||
" outputfile\n"
|
||||
" filename\n"
|
||||
" read_op or write_op\n"
|
||||
"\n"
|
||||
" Default Values:\n"
|
||||
" blocksize -> 131072\n"
|
||||
" wr_err_expexted -> false\n"
|
||||
" numblocks -> 100\n"
|
||||
" randpattern -> false\n",
|
||||
" randompattern -> false\n",
|
||||
execname);
|
||||
(void) exit(1);
|
||||
}
|
||||
@@ -97,16 +105,21 @@ parse_options(int argc, char *argv[])
|
||||
extern int optind, optopt;
|
||||
execname = argv[0];
|
||||
|
||||
while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) {
|
||||
while ((c = getopt(argc, argv, "b:ef:hn:rw")) != -1) {
|
||||
switch (c) {
|
||||
case 'b':
|
||||
blocksize = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'e':
|
||||
wr_err_expected = 1;
|
||||
err_expected = 1;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
filename = optarg;
|
||||
break;
|
||||
|
||||
|
||||
case 'h':
|
||||
print_usage = 1;
|
||||
break;
|
||||
@@ -115,12 +128,12 @@ parse_options(int argc, char *argv[])
|
||||
numblocks = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
outputfile = optarg;
|
||||
case 'r':
|
||||
read_op = 1;
|
||||
break;
|
||||
|
||||
case 'p':
|
||||
randompattern = 1;
|
||||
case 'w':
|
||||
write_op = 1;
|
||||
break;
|
||||
|
||||
case ':':
|
||||
@@ -141,7 +154,8 @@ parse_options(int argc, char *argv[])
|
||||
if (errflag || print_usage == 1)
|
||||
(void) usage();
|
||||
|
||||
if (blocksize < 512 || outputfile == NULL || numblocks <= 0) {
|
||||
if (blocksize < 512 || filename == NULL || numblocks <= 0 ||
|
||||
(read_op == 0 && write_op == 0)) {
|
||||
(void) fprintf(stderr,
|
||||
"Required paramater(s) missing or invalid.\n");
|
||||
(void) usage();
|
||||
@@ -160,10 +174,10 @@ write_thread(void *arg)
|
||||
ssize_t wrote = 0;
|
||||
pthread_args_t *args = (pthread_args_t *)arg;
|
||||
|
||||
while (!args->entire_file_written) {
|
||||
wrote = pwrite(ofd, buf, blocksize, offset);
|
||||
while (!args->entire_file_completed) {
|
||||
wrote = pwrite(fd, buf, blocksize, offset);
|
||||
if (wrote != blocksize) {
|
||||
if (wr_err_expected)
|
||||
if (err_expected)
|
||||
assert(errno == EIO);
|
||||
else
|
||||
exit(2);
|
||||
@@ -173,7 +187,35 @@ write_thread(void *arg)
|
||||
left -= blocksize;
|
||||
|
||||
if (left == 0)
|
||||
args->entire_file_written = 1;
|
||||
args->entire_file_completed = 1;
|
||||
}
|
||||
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read blocksize * numblocks to the file using O_DIRECT.
|
||||
*/
|
||||
static void *
|
||||
read_thread(void *arg)
|
||||
{
|
||||
size_t offset = 0;
|
||||
int total_data = blocksize * numblocks;
|
||||
int left = total_data;
|
||||
ssize_t read = 0;
|
||||
pthread_args_t *args = (pthread_args_t *)arg;
|
||||
|
||||
while (!args->entire_file_completed) {
|
||||
read = pread(fd, buf, blocksize, offset);
|
||||
if (read != blocksize) {
|
||||
exit(2);
|
||||
}
|
||||
|
||||
offset = ((offset + blocksize) % total_data);
|
||||
left -= blocksize;
|
||||
|
||||
if (left == 0)
|
||||
args->entire_file_completed = 1;
|
||||
}
|
||||
|
||||
pthread_exit(NULL);
|
||||
@@ -189,7 +231,7 @@ manipulate_buf_thread(void *arg)
|
||||
char rand_char;
|
||||
pthread_args_t *args = (pthread_args_t *)arg;
|
||||
|
||||
while (!args->entire_file_written) {
|
||||
while (!args->entire_file_completed) {
|
||||
rand_offset = (rand() % blocksize);
|
||||
rand_char = (rand() % (126 - 33) + 33);
|
||||
buf[rand_offset] = rand_char;
|
||||
@@ -202,9 +244,9 @@ int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
const char *datapattern = "0xdeadbeef";
|
||||
int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT;
|
||||
int fd_flags = O_DIRECT;
|
||||
mode_t mode = S_IRUSR | S_IWUSR;
|
||||
pthread_t write_thr;
|
||||
pthread_t io_thr;
|
||||
pthread_t manipul_thr;
|
||||
int left = blocksize;
|
||||
int offset = 0;
|
||||
@@ -213,9 +255,15 @@ main(int argc, char *argv[])
|
||||
|
||||
parse_options(argc, argv);
|
||||
|
||||
ofd = open(outputfile, ofd_flags, mode);
|
||||
if (ofd == -1) {
|
||||
(void) fprintf(stderr, "%s, %s\n", execname, outputfile);
|
||||
if (write_op) {
|
||||
fd_flags |= (O_WRONLY | O_CREAT);
|
||||
} else {
|
||||
fd_flags |= O_RDONLY;
|
||||
}
|
||||
|
||||
fd = open(filename, fd_flags, mode);
|
||||
if (fd == -1) {
|
||||
(void) fprintf(stderr, "%s, %s\n", execname, filename);
|
||||
perror("open");
|
||||
exit(2);
|
||||
}
|
||||
@@ -228,24 +276,22 @@ main(int argc, char *argv[])
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (!randompattern) {
|
||||
/* Putting known data pattern in buffer */
|
||||
while (left) {
|
||||
size_t amt = MIN(strlen(datapattern), left);
|
||||
memcpy(&buf[offset], datapattern, amt);
|
||||
offset += amt;
|
||||
left -= amt;
|
||||
if (write_op) {
|
||||
if (!randompattern) {
|
||||
/* Putting known data pattern in buffer */
|
||||
while (left) {
|
||||
size_t amt = MIN(strlen(datapattern), left);
|
||||
memcpy(&buf[offset], datapattern, amt);
|
||||
offset += amt;
|
||||
left -= amt;
|
||||
}
|
||||
} else {
|
||||
/* Putting random data in buffer */
|
||||
for (int i = 0; i < blocksize; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
} else {
|
||||
/* Putting random data in buffer */
|
||||
for (int i = 0; i < blocksize; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
|
||||
/*
|
||||
* Writing using O_DIRECT while manipulating the buffer contents until
|
||||
* the entire file is written.
|
||||
*/
|
||||
if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread,
|
||||
&args))) {
|
||||
fprintf(stderr, "error: pthreads_create, manipul_thr, "
|
||||
@@ -253,18 +299,34 @@ main(int argc, char *argv[])
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) {
|
||||
fprintf(stderr, "error: pthreads_create, write_thr, "
|
||||
"rc: %d\n", rc);
|
||||
exit(2);
|
||||
if (write_op) {
|
||||
/*
|
||||
* Writing using O_DIRECT while manipulating the buffer contents
|
||||
* until the entire file is written.
|
||||
*/
|
||||
if ((rc = pthread_create(&io_thr, NULL, write_thread, &args))) {
|
||||
fprintf(stderr, "error: pthreads_create, io_thr, "
|
||||
"rc: %d\n", rc);
|
||||
exit(2);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Reading using O_DIRECT while manipulating the buffer contents
|
||||
* until the entire file is read.
|
||||
*/
|
||||
if ((rc = pthread_create(&io_thr, NULL, read_thread, &args))) {
|
||||
fprintf(stderr, "error: pthreads_create, io_thr, "
|
||||
"rc: %d\n", rc);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_join(write_thr, NULL);
|
||||
pthread_join(io_thr, NULL);
|
||||
pthread_join(manipul_thr, NULL);
|
||||
|
||||
assert(args.entire_file_written == 1);
|
||||
assert(args.entire_file_completed == 1);
|
||||
|
||||
(void) close(ofd);
|
||||
(void) close(fd);
|
||||
|
||||
free(buf);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user