mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-06-25 02:28:01 +03:00
zpl_sync_fs: work around kernels that ignore sync_fs errors
If the kernel will honour our error returns, use them. If not, fool it by setting a writeback error on the superblock, if available. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #17420
This commit is contained in:
parent
e3f5e317e0
commit
d1c88cbd4c
27
config/kernel-sb-wb-err.m4
Normal file
27
config/kernel-sb-wb-err.m4
Normal file
@ -0,0 +1,27 @@
|
||||
# dnl
|
||||
# dnl 5.8 (735e4ae5ba28) introduced a superblock scoped errseq_t to use to
|
||||
# dnl record writeback errors for syncfs() to return. Up until 5.17, when
|
||||
# dnl sync_fs errors were returned directly, this is the only way for us to
|
||||
# dnl report an error from syncfs().
|
||||
# dnl
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR], [
|
||||
ZFS_LINUX_TEST_SRC([super_block_s_wb_err], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static const struct super_block
|
||||
sb __attribute__ ((unused)) = {
|
||||
.s_wb_err = 0,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR], [
|
||||
AC_MSG_CHECKING([whether super_block has s_wb_err])
|
||||
ZFS_LINUX_TEST_RESULT([super_block_s_wb_err], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SUPER_BLOCK_S_WB_ERR, 1,
|
||||
[have super_block s_wb_err])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
@ -131,6 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_FILE
|
||||
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
|
||||
ZFS_AC_KERNEL_SRC_TIMER
|
||||
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
@ -246,6 +247,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_FILE
|
||||
ZFS_AC_KERNEL_PIN_USER_PAGES
|
||||
ZFS_AC_KERNEL_TIMER
|
||||
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zpl.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
|
||||
static struct inode *
|
||||
@ -105,6 +106,42 @@ zpl_put_super(struct super_block *sb)
|
||||
ASSERT3S(error, <=, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs_sync() is the underlying implementation for the sync(2) and syncfs(2)
|
||||
* syscalls, via sb->s_op->sync_fs().
|
||||
*
|
||||
* Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() ->
|
||||
* sync_filesystem() would ignore the return from sync_fs(), instead only
|
||||
* considing the error from syncing the underlying block device (sb->s_dev).
|
||||
* Since OpenZFS doesn't _have_ an underlying block device, there's no way for
|
||||
* us to report a sync directly.
|
||||
*
|
||||
* However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra
|
||||
* error store `s_wb_err`, to carry errors seen on page writeback since the
|
||||
* last call to syncfs(). If sync_filesystem() does not return an error, any
|
||||
* existing writeback error on the superblock will be used instead (and cleared
|
||||
* either way). We don't use this (page writeback is a different thing for us),
|
||||
* so for 5.8-5.17 we can use that instead to get syncfs() to return the error.
|
||||
*
|
||||
* Before 5.8, we have no other good options - no matter what happens, the
|
||||
* userspace program will be told the call has succeeded, and so we must make
|
||||
* it so, Therefore, when we are asked to wait for sync to complete (wait ==
|
||||
* 1), if zfs_sync() has returned an error we have no choice but to block,
|
||||
* regardless of the reason.
|
||||
*
|
||||
* The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely
|
||||
* to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the
|
||||
* mainline Linux series at time of writing), and has likely been backported to
|
||||
* vendor kernels before 5.8. We don't really want to use a workaround when we
|
||||
* don't have to, but we can't really detect whether or not sync_filesystem()
|
||||
* will return our errors (without a difficult runtime test anyway). So, we use
|
||||
* a static version check: any kernel reporting its version as 5.17+ will use a
|
||||
* direct error return, otherwise, we'll either use s_wb_err if it was detected
|
||||
* at configure (5.8-5.16 + vendor backports). If it's unavailable, we will
|
||||
* block to ensure the correct semantics.
|
||||
*
|
||||
* See https://github.com/openzfs/zfs/issues/17416 for further discussion.
|
||||
*/
|
||||
static int
|
||||
zpl_sync_fs(struct super_block *sb, int wait)
|
||||
{
|
||||
@ -115,10 +152,28 @@ zpl_sync_fs(struct super_block *sb, int wait)
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_sync(sb, wait, cr);
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
|
||||
#ifdef HAVE_SUPER_BLOCK_S_WB_ERR
|
||||
if (error && wait)
|
||||
errseq_set(&sb->s_wb_err, error);
|
||||
#else
|
||||
if (error && wait) {
|
||||
zfsvfs_t *zfsvfs = sb->s_fs_info;
|
||||
ASSERT3P(zfsvfs, !=, NULL);
|
||||
if (zfs_enter(zfsvfs, FTAG) == 0) {
|
||||
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
error = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif /* < 5.17.0 */
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
ASSERT3S(error, <=, 0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user