From d1c88cbd4cc2ebe5b97fa5b39e7fd83294583ed8 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 4 Jun 2025 13:26:58 +1000 Subject: [PATCH] zpl_sync_fs: work around kernels that ignore sync_fs errors If the kernel will honour our error returns, use them. If not, fool it by setting a writeback error on the superblock, if available. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Rob Norris Closes #17420 --- config/kernel-sb-wb-err.m4 | 27 ++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zpl_super.c | 57 ++++++++++++++++++++++++++++++++- 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 config/kernel-sb-wb-err.m4 diff --git a/config/kernel-sb-wb-err.m4 b/config/kernel-sb-wb-err.m4 new file mode 100644 index 000000000..814d2ca53 --- /dev/null +++ b/config/kernel-sb-wb-err.m4 @@ -0,0 +1,27 @@ +# dnl +# dnl 5.8 (735e4ae5ba28) introduced a superblock scoped errseq_t to use to +# dnl record writeback errors for syncfs() to return. Up until 5.17, when +# dnl sync_fs errors were returned directly, this is the only way for us to +# dnl report an error from syncfs(). +# dnl +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR], [ + ZFS_LINUX_TEST_SRC([super_block_s_wb_err], [ + #include + + static const struct super_block + sb __attribute__ ((unused)) = { + .s_wb_err = 0, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR], [ + AC_MSG_CHECKING([whether super_block has s_wb_err]) + ZFS_LINUX_TEST_RESULT([super_block_s_wb_err], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SUPER_BLOCK_S_WB_ERR, 1, + [have super_block s_wb_err]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index b933475e9..c99aed357 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -131,6 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_FILE ZFS_AC_KERNEL_SRC_PIN_USER_PAGES ZFS_AC_KERNEL_SRC_TIMER + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -246,6 +247,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_FILE ZFS_AC_KERNEL_PIN_USER_PAGES ZFS_AC_KERNEL_TIMER + ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 40c25e464..a682bfd33 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -31,6 +31,7 @@ #include #include #include +#include static struct inode * @@ -105,6 +106,42 @@ zpl_put_super(struct super_block *sb) ASSERT3S(error, <=, 0); } +/* + * zfs_sync() is the underlying implementation for the sync(2) and syncfs(2) + * syscalls, via sb->s_op->sync_fs(). + * + * Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() -> + * sync_filesystem() would ignore the return from sync_fs(), instead only + * considing the error from syncing the underlying block device (sb->s_dev). + * Since OpenZFS doesn't _have_ an underlying block device, there's no way for + * us to report a sync directly. + * + * However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra + * error store `s_wb_err`, to carry errors seen on page writeback since the + * last call to syncfs(). If sync_filesystem() does not return an error, any + * existing writeback error on the superblock will be used instead (and cleared + * either way). We don't use this (page writeback is a different thing for us), + * so for 5.8-5.17 we can use that instead to get syncfs() to return the error. + * + * Before 5.8, we have no other good options - no matter what happens, the + * userspace program will be told the call has succeeded, and so we must make + * it so, Therefore, when we are asked to wait for sync to complete (wait == + * 1), if zfs_sync() has returned an error we have no choice but to block, + * regardless of the reason. + * + * The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely + * to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the + * mainline Linux series at time of writing), and has likely been backported to + * vendor kernels before 5.8. We don't really want to use a workaround when we + * don't have to, but we can't really detect whether or not sync_filesystem() + * will return our errors (without a difficult runtime test anyway). So, we use + * a static version check: any kernel reporting its version as 5.17+ will use a + * direct error return, otherwise, we'll either use s_wb_err if it was detected + * at configure (5.8-5.16 + vendor backports). If it's unavailable, we will + * block to ensure the correct semantics. + * + * See https://github.com/openzfs/zfs/issues/17416 for further discussion. + */ static int zpl_sync_fs(struct super_block *sb, int wait) { @@ -115,10 +152,28 @@ zpl_sync_fs(struct super_block *sb, int wait) crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_sync(sb, wait, cr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0) +#ifdef HAVE_SUPER_BLOCK_S_WB_ERR + if (error && wait) + errseq_set(&sb->s_wb_err, error); +#else + if (error && wait) { + zfsvfs_t *zfsvfs = sb->s_fs_info; + ASSERT3P(zfsvfs, !=, NULL); + if (zfs_enter(zfsvfs, FTAG) == 0) { + txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + zfs_exit(zfsvfs, FTAG); + error = 0; + } + } +#endif +#endif /* < 5.17.0 */ + spl_fstrans_unmark(cookie); crfree(cr); - ASSERT3S(error, <=, 0); + ASSERT3S(error, <=, 0); return (error); }