mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 19:04:45 +03:00
e7524594a9
When copy_file_range overwrites a recent truncation, subsequent reads can incorrectly determine that it is read hole instead of reading the cloned blocks. This can happen when the following conditions are met: - Truncate adds blkid to dn_free_ranges - A new TXG is created - copy_file_range calls dmu_brt_clone which override the block pointer and set DB_NOFILL - Subsequent read, given DB_NOFILL, hits dbuf_read_impl and dbuf_read_hole - dbuf_read_hole calls dnode_block_freed, which returns TRUE because the truncated blkids are still in dn_free_ranges This will not happen if the clone and truncate are in the same TXG, because the block clone would update the current TXG's dn_free_ranges, which is why this bug only triggers under high IO load (such as compilation). Fix this by skipping the dnode_block_freed call if the block is overridden. The fix shouldn't cause an issue when the cloned block is subsequently freed in later TXGs, as dbuf_undirty would remove the override. This requires a dedicated test program as it is much harder to trigger with scripts (this needs to generate a lot of I/O in short period of time for the bug to trigger reliably). Assisted-by: Gemini:gemini-3.1-pro Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Gary Guo <gary@kernel.org> Closes #18412 Closes #18421
118 lines
2.6 KiB
C
118 lines
2.6 KiB
C
// SPDX-License-Identifier: CDDL-1.0
|
|
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <limits.h>
|
|
#include <pthread.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
|
|
#if defined(_GNU_SOURCE) && defined(__linux__)
|
|
_Static_assert(sizeof (loff_t) == sizeof (off_t),
|
|
"loff_t and off_t must be the same size");
|
|
#endif
|
|
|
|
ssize_t
|
|
copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)
|
|
__attribute__((weak));
|
|
|
|
#define FILE_SIZE (1024 * 1024)
|
|
#define RECORD_SIZE (128 * 1024)
|
|
#define NUM_THREADS 64
|
|
|
|
const char *dir;
|
|
volatile int failed;
|
|
|
|
static void *
|
|
run_test(void *arg)
|
|
{
|
|
int thread_id = (int)(long)arg;
|
|
|
|
char src_path[PATH_MAX], dst_path[PATH_MAX];
|
|
snprintf(src_path, PATH_MAX, "%s/src-%d", dir, thread_id);
|
|
snprintf(dst_path, PATH_MAX, "%s/dst-%d", dir, thread_id);
|
|
|
|
unsigned char *write_buf = malloc(FILE_SIZE);
|
|
unsigned char *read_buf = malloc(FILE_SIZE);
|
|
|
|
// Write out expected data.
|
|
memset(write_buf, 0xAA, FILE_SIZE);
|
|
int src = open(src_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (write(src, write_buf, FILE_SIZE) != FILE_SIZE)
|
|
perror("write");
|
|
close(src);
|
|
|
|
// Create destination file so we exercise O_TRUNC.
|
|
int dst = open(dst_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (write(dst, write_buf, FILE_SIZE) != FILE_SIZE)
|
|
perror("write");
|
|
fsync(dst);
|
|
close(dst);
|
|
|
|
// Open file with O_TRUNC and perform copy.
|
|
src = open(src_path, O_RDONLY);
|
|
dst = open(dst_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
|
|
off_t off_in = 0, off_out = 0;
|
|
ssize_t ret =
|
|
copy_file_range(src, &off_in, dst, &off_out, FILE_SIZE, 0);
|
|
if (ret != FILE_SIZE)
|
|
perror("copy_file_range");
|
|
close(src);
|
|
close(dst);
|
|
|
|
// Read back
|
|
dst = open(dst_path, O_RDONLY);
|
|
if (read(dst, read_buf, FILE_SIZE) != FILE_SIZE)
|
|
perror("read");
|
|
close(dst);
|
|
|
|
// Bug check
|
|
if (memcmp(write_buf, read_buf, FILE_SIZE) != 0) {
|
|
failed = 1;
|
|
fprintf(stderr, "[%d]: FAIL\n", thread_id);
|
|
|
|
int all_zeros = 1;
|
|
for (int i = 0; i < RECORD_SIZE; i++) {
|
|
if (read_buf[i] != 0) {
|
|
all_zeros = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (all_zeros) {
|
|
fprintf(stderr, "[%d]: ALL ZERO\n", thread_id);
|
|
}
|
|
}
|
|
|
|
unlink(src_path);
|
|
unlink(dst_path);
|
|
free(write_buf);
|
|
free(read_buf);
|
|
return (NULL);
|
|
}
|
|
|
|
int
|
|
main(int argc, const char **argv)
|
|
{
|
|
if (argc < 2) {
|
|
fprintf(stderr, "usage: %s <dir>\n", argv[0]);
|
|
return (1);
|
|
}
|
|
dir = argv[1];
|
|
|
|
pthread_t threads[NUM_THREADS];
|
|
|
|
for (int i = 0; i < NUM_THREADS; i++) {
|
|
pthread_create(&threads[i], NULL, run_test, (void *)(long)i);
|
|
}
|
|
for (int i = 0; i < NUM_THREADS; i++) {
|
|
pthread_join(threads[i], NULL);
|
|
}
|
|
|
|
return (failed);
|
|
}
|