From f72226a75cc05e9e013642add3ceab50f789953e Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 28 Jul 2025 10:33:40 +1000 Subject: [PATCH] Linux: sync: remove async/sync accounting All this machinery is there to try to understand when there an async writeback waiting to complete because the intent log callbacks are still outstanding, and force them with a timely zil_commit(). The next commit fixes this properly, so there's no need for all this extra housekeeping. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Rob Norris Closes #17584 --- include/os/linux/zfs/sys/trace_acl.h | 7 +---- include/sys/zfs_znode.h | 2 -- module/os/freebsd/zfs/zfs_znode_os.c | 7 ----- module/os/linux/zfs/zfs_ctldir.c | 2 -- module/os/linux/zfs/zfs_vnops_os.c | 25 ----------------- module/os/linux/zfs/zfs_znode_os.c | 7 ----- module/os/linux/zfs/zpl_file.c | 41 ---------------------------- module/zfs/zfs_vnops.c | 2 -- 8 files changed, 1 insertion(+), 92 deletions(-) diff --git a/include/os/linux/zfs/sys/trace_acl.h b/include/os/linux/zfs/sys/trace_acl.h index 8923657da..d88b4937e 100644 --- a/include/os/linux/zfs/sys/trace_acl.h +++ b/include/os/linux/zfs/sys/trace_acl.h @@ -59,8 +59,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __field(uint64_t, z_size) __field(uint64_t, z_pflags) __field(uint32_t, z_sync_cnt) - __field(uint32_t, z_sync_writes_cnt) - __field(uint32_t, z_async_writes_cnt) __field(mode_t, z_mode) __field(boolean_t, z_is_sa) __field(boolean_t, z_is_ctldir) @@ -92,8 +90,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_size = zn->z_size; __entry->z_pflags = zn->z_pflags; __entry->z_sync_cnt = zn->z_sync_cnt; - __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt; - __entry->z_async_writes_cnt = zn->z_async_writes_cnt; __entry->z_mode = zn->z_mode; __entry->z_is_sa = zn->z_is_sa; __entry->z_is_ctldir = zn->z_is_ctldir; @@ -117,7 +113,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, TP_printk("zn { id %llu unlinked %u atime_dirty %u " "zn_prefetch %u blksz %u seq %u " "mapcnt %llu size %llu pflags %llu " - "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " + "sync_cnt %u " "mode 0x%x is_sa %d is_ctldir %d " "inode { uid %u gid %u ino %lu nlink %u size %lli " "blkbits %u bytes %u mode 0x%x generation %x } } " @@ -126,7 +122,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_zn_prefetch, __entry->z_blksz, __entry->z_seq, __entry->z_mapcnt, __entry->z_size, __entry->z_pflags, __entry->z_sync_cnt, - __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir, __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink, __entry->i_size, __entry->i_blkbits, diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index b3a267e16..fa3c7b5b3 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -201,8 +201,6 @@ typedef struct znode { uint64_t z_size; /* file size (cached) */ uint64_t z_pflags; /* pflags (cached) */ uint32_t z_sync_cnt; /* synchronous open count */ - uint32_t z_sync_writes_cnt; /* synchronous write count */ - uint32_t z_async_writes_cnt; /* asynchronous write count */ mode_t z_mode; /* mode (cached) */ kmutex_t z_acl_lock; /* acl data lock */ zfs_acl_t *z_acl_cached; /* cached acl */ diff --git a/module/os/freebsd/zfs/zfs_znode_os.c b/module/os/freebsd/zfs/zfs_znode_os.c index e97a0dd84..0d3e36c1e 100644 --- a/module/os/freebsd/zfs/zfs_znode_os.c +++ b/module/os/freebsd/zfs/zfs_znode_os.c @@ -150,8 +150,6 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_xattr_cached = NULL; zp->z_xattr_parent = 0; zp->z_vnode = NULL; - zp->z_sync_writes_cnt = 0; - zp->z_async_writes_cnt = 0; return (0); } @@ -172,9 +170,6 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); - - ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); - ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); } @@ -455,8 +450,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_sync_writes_cnt = 0; - zp->z_async_writes_cnt = 0; atomic_store_ptr(&zp->z_cached_symlink, NULL); zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index 84b25cb2c..6552a933c 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -511,8 +511,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_pflags = 0; zp->z_mode = 0; zp->z_sync_cnt = 0; - zp->z_sync_writes_cnt = 0; - zp->z_async_writes_cnt = 0; ip->i_generation = 0; ip->i_ino = id; ip->i_mode = (S_IFDIR | S_IRWXUGO); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 9ceb6cb8d..1d8f76d86 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -3694,11 +3694,9 @@ static void zfs_putpage_async_commit_cb(void *arg) { struct page *pp = arg; - znode_t *zp = ITOZ(pp->mapping->host); ClearPageError(pp); end_page_writeback(pp); - atomic_dec_32(&zp->z_async_writes_cnt); } /* @@ -3818,15 +3816,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, zfs_rangelock_exit(lr); if (wbc->sync_mode != WB_SYNC_NONE) { - /* - * Speed up any non-sync page writebacks since - * they may take several seconds to complete. - * Refer to the comment in zpl_fsync() for details. - */ - if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { - zil_commit(zfsvfs->z_log, zp->z_id); - } - if (PageWriteback(pp)) #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT folio_wait_bit(page_folio(pp), PG_writeback); @@ -3852,8 +3841,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, * was in fact not skipped and should not be counted as if it were. */ wbc->pages_skipped--; - if (!for_sync) - atomic_inc_32(&zp->z_async_writes_cnt); set_page_writeback(pp); unlock_page(pp); @@ -3872,8 +3859,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, #endif ClearPageError(pp); end_page_writeback(pp); - if (!for_sync) - atomic_dec_32(&zp->z_async_writes_cnt); zfs_rangelock_exit(lr); zfs_exit(zfsvfs, FTAG); return (err); @@ -3907,16 +3892,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, * performance reasons. */ commit = B_TRUE; - } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) { - /* - * If the caller does not intend to wait synchronously - * for this page writeback to complete and there are active - * synchronous calls on this file, do a commit so that - * the latter don't accidentally end up waiting for - * our writeback to complete. Refer to the comment in - * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details. - */ - commit = B_TRUE; } zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit, diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index 7b28f2640..cbeb18580 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -126,8 +126,6 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_acl_cached = NULL; zp->z_xattr_cached = NULL; zp->z_xattr_parent = 0; - zp->z_sync_writes_cnt = 0; - zp->z_async_writes_cnt = 0; return (0); } @@ -149,9 +147,6 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT3P(zp->z_dirlocks, ==, NULL); ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); - - ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); - ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); } static int @@ -548,8 +543,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_sync_writes_cnt = 0; - zp->z_async_writes_cnt = 0; zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 1a82c13e1..ef7bd7352 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -111,52 +111,11 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; znode_t *zp = ITOZ(inode); - zfsvfs_t *zfsvfs = ITOZSB(inode); cred_t *cr = CRED(); int error; fstrans_cookie_t cookie; - /* - * The variables z_sync_writes_cnt and z_async_writes_cnt work in - * tandem so that sync writes can detect if there are any non-sync - * writes going on and vice-versa. The "vice-versa" part to this logic - * is located in zfs_putpage() where non-sync writes check if there are - * any ongoing sync writes. If any sync and non-sync writes overlap, - * we do a commit to complete the non-sync writes since the latter can - * potentially take several seconds to complete and thus block sync - * writes in the upcoming call to filemap_write_and_wait_range(). - */ - atomic_inc_32(&zp->z_sync_writes_cnt); - /* - * If the following check does not detect an overlapping non-sync write - * (say because it's just about to start), then it is guaranteed that - * the non-sync write will detect this sync write. This is because we - * always increment z_sync_writes_cnt / z_async_writes_cnt before doing - * the check on z_async_writes_cnt / z_sync_writes_cnt here and in - * zfs_putpage() respectively. - */ - if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { - if ((error = zpl_enter(zfsvfs, FTAG)) != 0) { - atomic_dec_32(&zp->z_sync_writes_cnt); - return (error); - } - zil_commit(zfsvfs->z_log, zp->z_id); - zpl_exit(zfsvfs, FTAG); - } - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - - /* - * The sync write is not complete yet but we decrement - * z_sync_writes_cnt since zfs_fsync() increments and decrements - * it internally. If a non-sync write starts just after the decrement - * operation but before we call zfs_fsync(), it may not detect this - * overlapping sync write but it does not matter since we have already - * gone past filemap_write_and_wait_range() and we won't block due to - * the non-sync write. - */ - atomic_dec_32(&zp->z_sync_writes_cnt); - if (error) return (error); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 948989070..b6d76a548 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -109,9 +109,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); - atomic_inc_32(&zp->z_sync_writes_cnt); zil_commit(zfsvfs->z_log, zp->z_id); - atomic_dec_32(&zp->z_sync_writes_cnt); zfs_exit(zfsvfs, FTAG); } return (error);