68be554e71
Use the current ZFS 2.2.4 staging tree [0] with commit deb7a8423 ("Fix corruption caused by mmap flushing problems") on top. Additionally, include an open, but ack'd, pull request [1] that avoids a potential general protection fault due to touching a vbio after it was handed off to the kernel. [0]: https://github.com/openzfs/zfs/commits/zfs-2.2.4-staging/ [1]: https://github.com/openzfs/zfs/pull/16049 Both should mostly touch the module code. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
105 lines
3.5 KiB
Diff
105 lines
3.5 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Robert Evans <rrevans@gmail.com>
|
|
Date: Mon, 25 Mar 2024 17:56:49 -0400
|
|
Subject: [PATCH] Fix corruption caused by mmap flushing problems
|
|
|
|
1) Make mmap flushes synchronous. Linux may skip flushing dirty pages
|
|
already in writeback unless data-integrity sync is requested.
|
|
|
|
2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be
|
|
skipped due to DMU pushing back on TX assign.
|
|
|
|
3) Add missing mmap flush when doing block cloning.
|
|
|
|
4) While here, pass errors from putpage to writepage/writepages.
|
|
|
|
This change fixes corruption edge cases, but unfortunately adds
|
|
synchronous ZIL flushes for dirty mmap pages to llseek and bclone
|
|
operations. It may be possible to avoid these sync writes later
|
|
but would need more tricky refactoring of the writeback code.
|
|
|
|
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
|
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
|
Signed-off-by: Robert Evans <evansr@google.com>
|
|
Closes #15933
|
|
Closes #16019
|
|
---
|
|
module/os/linux/zfs/zfs_vnops_os.c | 5 +----
|
|
module/os/linux/zfs/zpl_file.c | 8 ++++----
|
|
module/zfs/zfs_vnops.c | 6 +++++-
|
|
3 files changed, 10 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
|
|
index c06a75662..7c473bc7e 100644
|
|
--- a/module/os/linux/zfs/zfs_vnops_os.c
|
|
+++ b/module/os/linux/zfs/zfs_vnops_os.c
|
|
@@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
|
zfs_sa_upgrade_txholds(tx, zp);
|
|
|
|
- err = dmu_tx_assign(tx, TXG_NOWAIT);
|
|
+ err = dmu_tx_assign(tx, TXG_WAIT);
|
|
if (err != 0) {
|
|
- if (err == ERESTART)
|
|
- dmu_tx_wait(tx);
|
|
-
|
|
dmu_tx_abort(tx);
|
|
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
|
|
filemap_dirty_folio(page_mapping(pp), page_folio(pp));
|
|
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
|
|
index 3caa0fc6c..9dec52215 100644
|
|
--- a/module/os/linux/zfs/zpl_file.c
|
|
+++ b/module/os/linux/zfs/zpl_file.c
|
|
@@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
|
{
|
|
boolean_t *for_sync = data;
|
|
fstrans_cookie_t cookie;
|
|
+ int ret;
|
|
|
|
ASSERT(PageLocked(pp));
|
|
ASSERT(!PageWriteback(pp));
|
|
|
|
cookie = spl_fstrans_mark();
|
|
- (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
|
+ ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
|
spl_fstrans_unmark(cookie);
|
|
|
|
- return (0);
|
|
+ return (ret);
|
|
}
|
|
|
|
#ifdef HAVE_WRITEPAGE_T_FOLIO
|
|
static int
|
|
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
|
|
{
|
|
- (void) zpl_putpage(&pp->page, wbc, data);
|
|
- return (0);
|
|
+ return (zpl_putpage(&pp->page, wbc, data));
|
|
}
|
|
#endif
|
|
|
|
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
|
|
index 2b37834d5..7020f88ec 100644
|
|
--- a/module/zfs/zfs_vnops.c
|
|
+++ b/module/zfs/zfs_vnops.c
|
|
@@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
|
|
|
|
/* Flush any mmap()'d data to disk */
|
|
if (zn_has_cached_data(zp, 0, file_sz - 1))
|
|
- zn_flush_cached_data(zp, B_FALSE);
|
|
+ zn_flush_cached_data(zp, B_TRUE);
|
|
|
|
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
|
|
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
|
@@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|
}
|
|
}
|
|
|
|
+ /* Flush any mmap()'d data to disk */
|
|
+ if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
|
|
+ zn_flush_cached_data(inzp, B_TRUE);
|
|
+
|
|
/*
|
|
* Maintain predictable lock order.
|
|
*/
|