zpl_sync_fs: work around kernels that ignore sync_fs errors

If the kernel will honour our error returns, use them. If not, fool it
by setting a writeback error on the superblock, if available.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #17420
This commit is contained in:
Rob Norris 2025-06-04 13:26:58 +10:00 committed by Brian Behlendorf
parent e3f5e317e0
commit d1c88cbd4c
3 changed files with 85 additions and 1 deletions

View File

@ -0,0 +1,27 @@
# dnl
# dnl 5.8 (735e4ae5ba28) introduced a superblock scoped errseq_t to use to
# dnl record writeback errors for syncfs() to return. Up until 5.17, when
# dnl sync_fs errors were returned directly, this is the only way for us to
# dnl report an error from syncfs().
# dnl
AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR], [
ZFS_LINUX_TEST_SRC([super_block_s_wb_err], [
#include <linux/fs.h>
static const struct super_block
sb __attribute__ ((unused)) = {
.s_wb_err = 0,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR], [
AC_MSG_CHECKING([whether super_block has s_wb_err])
ZFS_LINUX_TEST_RESULT([super_block_s_wb_err], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SUPER_BLOCK_S_WB_ERR, 1,
[have super_block s_wb_err])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -131,6 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_FILE ZFS_AC_KERNEL_SRC_FILE
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
ZFS_AC_KERNEL_SRC_TIMER ZFS_AC_KERNEL_SRC_TIMER
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@ -246,6 +247,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_FILE ZFS_AC_KERNEL_FILE
ZFS_AC_KERNEL_PIN_USER_PAGES ZFS_AC_KERNEL_PIN_USER_PAGES
ZFS_AC_KERNEL_TIMER ZFS_AC_KERNEL_TIMER
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_CPU_HAS_FEATURE

View File

@ -31,6 +31,7 @@
#include <sys/zfs_ctldir.h> #include <sys/zfs_ctldir.h>
#include <sys/zpl.h> #include <sys/zpl.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/version.h>
static struct inode * static struct inode *
@ -105,6 +106,42 @@ zpl_put_super(struct super_block *sb)
ASSERT3S(error, <=, 0); ASSERT3S(error, <=, 0);
} }
/*
* zfs_sync() is the underlying implementation for the sync(2) and syncfs(2)
* syscalls, via sb->s_op->sync_fs().
*
* Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() ->
* sync_filesystem() would ignore the return from sync_fs(), instead only
* considing the error from syncing the underlying block device (sb->s_dev).
* Since OpenZFS doesn't _have_ an underlying block device, there's no way for
* us to report a sync directly.
*
* However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra
* error store `s_wb_err`, to carry errors seen on page writeback since the
* last call to syncfs(). If sync_filesystem() does not return an error, any
* existing writeback error on the superblock will be used instead (and cleared
* either way). We don't use this (page writeback is a different thing for us),
* so for 5.8-5.17 we can use that instead to get syncfs() to return the error.
*
* Before 5.8, we have no other good options - no matter what happens, the
* userspace program will be told the call has succeeded, and so we must make
* it so, Therefore, when we are asked to wait for sync to complete (wait ==
* 1), if zfs_sync() has returned an error we have no choice but to block,
* regardless of the reason.
*
* The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely
* to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the
* mainline Linux series at time of writing), and has likely been backported to
* vendor kernels before 5.8. We don't really want to use a workaround when we
* don't have to, but we can't really detect whether or not sync_filesystem()
* will return our errors (without a difficult runtime test anyway). So, we use
* a static version check: any kernel reporting its version as 5.17+ will use a
* direct error return, otherwise, we'll either use s_wb_err if it was detected
* at configure (5.8-5.16 + vendor backports). If it's unavailable, we will
* block to ensure the correct semantics.
*
* See https://github.com/openzfs/zfs/issues/17416 for further discussion.
*/
static int static int
zpl_sync_fs(struct super_block *sb, int wait) zpl_sync_fs(struct super_block *sb, int wait)
{ {
@ -115,10 +152,28 @@ zpl_sync_fs(struct super_block *sb, int wait)
crhold(cr); crhold(cr);
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
error = -zfs_sync(sb, wait, cr); error = -zfs_sync(sb, wait, cr);
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
#ifdef HAVE_SUPER_BLOCK_S_WB_ERR
if (error && wait)
errseq_set(&sb->s_wb_err, error);
#else
if (error && wait) {
zfsvfs_t *zfsvfs = sb->s_fs_info;
ASSERT3P(zfsvfs, !=, NULL);
if (zfs_enter(zfsvfs, FTAG) == 0) {
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
zfs_exit(zfsvfs, FTAG);
error = 0;
}
}
#endif
#endif /* < 5.17.0 */
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
crfree(cr); crfree(cr);
ASSERT3S(error, <=, 0);
ASSERT3S(error, <=, 0);
return (error); return (error);
} }