mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-09-15 13:50:11 +03:00
Linux: sync: remove async/sync accounting
All this machinery is there to try to understand when there an async writeback waiting to complete because the intent log callbacks are still outstanding, and force them with a timely zil_commit(). The next commit fixes this properly, so there's no need for all this extra housekeeping. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #17584
This commit is contained in:
parent
97fe86837c
commit
f72226a75c
@ -59,8 +59,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
|||||||
__field(uint64_t, z_size)
|
__field(uint64_t, z_size)
|
||||||
__field(uint64_t, z_pflags)
|
__field(uint64_t, z_pflags)
|
||||||
__field(uint32_t, z_sync_cnt)
|
__field(uint32_t, z_sync_cnt)
|
||||||
__field(uint32_t, z_sync_writes_cnt)
|
|
||||||
__field(uint32_t, z_async_writes_cnt)
|
|
||||||
__field(mode_t, z_mode)
|
__field(mode_t, z_mode)
|
||||||
__field(boolean_t, z_is_sa)
|
__field(boolean_t, z_is_sa)
|
||||||
__field(boolean_t, z_is_ctldir)
|
__field(boolean_t, z_is_ctldir)
|
||||||
@ -92,8 +90,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
|||||||
__entry->z_size = zn->z_size;
|
__entry->z_size = zn->z_size;
|
||||||
__entry->z_pflags = zn->z_pflags;
|
__entry->z_pflags = zn->z_pflags;
|
||||||
__entry->z_sync_cnt = zn->z_sync_cnt;
|
__entry->z_sync_cnt = zn->z_sync_cnt;
|
||||||
__entry->z_sync_writes_cnt = zn->z_sync_writes_cnt;
|
|
||||||
__entry->z_async_writes_cnt = zn->z_async_writes_cnt;
|
|
||||||
__entry->z_mode = zn->z_mode;
|
__entry->z_mode = zn->z_mode;
|
||||||
__entry->z_is_sa = zn->z_is_sa;
|
__entry->z_is_sa = zn->z_is_sa;
|
||||||
__entry->z_is_ctldir = zn->z_is_ctldir;
|
__entry->z_is_ctldir = zn->z_is_ctldir;
|
||||||
@ -117,7 +113,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
|||||||
TP_printk("zn { id %llu unlinked %u atime_dirty %u "
|
TP_printk("zn { id %llu unlinked %u atime_dirty %u "
|
||||||
"zn_prefetch %u blksz %u seq %u "
|
"zn_prefetch %u blksz %u seq %u "
|
||||||
"mapcnt %llu size %llu pflags %llu "
|
"mapcnt %llu size %llu pflags %llu "
|
||||||
"sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
|
"sync_cnt %u "
|
||||||
"mode 0x%x is_sa %d is_ctldir %d "
|
"mode 0x%x is_sa %d is_ctldir %d "
|
||||||
"inode { uid %u gid %u ino %lu nlink %u size %lli "
|
"inode { uid %u gid %u ino %lu nlink %u size %lli "
|
||||||
"blkbits %u bytes %u mode 0x%x generation %x } } "
|
"blkbits %u bytes %u mode 0x%x generation %x } } "
|
||||||
@ -126,7 +122,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
|||||||
__entry->z_zn_prefetch, __entry->z_blksz,
|
__entry->z_zn_prefetch, __entry->z_blksz,
|
||||||
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
|
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
|
||||||
__entry->z_pflags, __entry->z_sync_cnt,
|
__entry->z_pflags, __entry->z_sync_cnt,
|
||||||
__entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
|
|
||||||
__entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
|
__entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
|
||||||
__entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
|
__entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
|
||||||
__entry->i_size, __entry->i_blkbits,
|
__entry->i_size, __entry->i_blkbits,
|
||||||
|
@ -201,8 +201,6 @@ typedef struct znode {
|
|||||||
uint64_t z_size; /* file size (cached) */
|
uint64_t z_size; /* file size (cached) */
|
||||||
uint64_t z_pflags; /* pflags (cached) */
|
uint64_t z_pflags; /* pflags (cached) */
|
||||||
uint32_t z_sync_cnt; /* synchronous open count */
|
uint32_t z_sync_cnt; /* synchronous open count */
|
||||||
uint32_t z_sync_writes_cnt; /* synchronous write count */
|
|
||||||
uint32_t z_async_writes_cnt; /* asynchronous write count */
|
|
||||||
mode_t z_mode; /* mode (cached) */
|
mode_t z_mode; /* mode (cached) */
|
||||||
kmutex_t z_acl_lock; /* acl data lock */
|
kmutex_t z_acl_lock; /* acl data lock */
|
||||||
zfs_acl_t *z_acl_cached; /* cached acl */
|
zfs_acl_t *z_acl_cached; /* cached acl */
|
||||||
|
@ -150,8 +150,6 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
|||||||
zp->z_xattr_cached = NULL;
|
zp->z_xattr_cached = NULL;
|
||||||
zp->z_xattr_parent = 0;
|
zp->z_xattr_parent = 0;
|
||||||
zp->z_vnode = NULL;
|
zp->z_vnode = NULL;
|
||||||
zp->z_sync_writes_cnt = 0;
|
|
||||||
zp->z_async_writes_cnt = 0;
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
@ -172,9 +170,6 @@ zfs_znode_cache_destructor(void *buf, void *arg)
|
|||||||
|
|
||||||
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
||||||
ASSERT3P(zp->z_xattr_cached, ==, NULL);
|
ASSERT3P(zp->z_xattr_cached, ==, NULL);
|
||||||
|
|
||||||
ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
|
|
||||||
ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -455,8 +450,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||||||
zp->z_blksz = blksz;
|
zp->z_blksz = blksz;
|
||||||
zp->z_seq = 0x7A4653;
|
zp->z_seq = 0x7A4653;
|
||||||
zp->z_sync_cnt = 0;
|
zp->z_sync_cnt = 0;
|
||||||
zp->z_sync_writes_cnt = 0;
|
|
||||||
zp->z_async_writes_cnt = 0;
|
|
||||||
atomic_store_ptr(&zp->z_cached_symlink, NULL);
|
atomic_store_ptr(&zp->z_cached_symlink, NULL);
|
||||||
|
|
||||||
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
|
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
|
||||||
|
@ -511,8 +511,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
|||||||
zp->z_pflags = 0;
|
zp->z_pflags = 0;
|
||||||
zp->z_mode = 0;
|
zp->z_mode = 0;
|
||||||
zp->z_sync_cnt = 0;
|
zp->z_sync_cnt = 0;
|
||||||
zp->z_sync_writes_cnt = 0;
|
|
||||||
zp->z_async_writes_cnt = 0;
|
|
||||||
ip->i_generation = 0;
|
ip->i_generation = 0;
|
||||||
ip->i_ino = id;
|
ip->i_ino = id;
|
||||||
ip->i_mode = (S_IFDIR | S_IRWXUGO);
|
ip->i_mode = (S_IFDIR | S_IRWXUGO);
|
||||||
|
@ -3694,11 +3694,9 @@ static void
|
|||||||
zfs_putpage_async_commit_cb(void *arg)
|
zfs_putpage_async_commit_cb(void *arg)
|
||||||
{
|
{
|
||||||
struct page *pp = arg;
|
struct page *pp = arg;
|
||||||
znode_t *zp = ITOZ(pp->mapping->host);
|
|
||||||
|
|
||||||
ClearPageError(pp);
|
ClearPageError(pp);
|
||||||
end_page_writeback(pp);
|
end_page_writeback(pp);
|
||||||
atomic_dec_32(&zp->z_async_writes_cnt);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3818,15 +3816,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
zfs_rangelock_exit(lr);
|
zfs_rangelock_exit(lr);
|
||||||
|
|
||||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||||
/*
|
|
||||||
* Speed up any non-sync page writebacks since
|
|
||||||
* they may take several seconds to complete.
|
|
||||||
* Refer to the comment in zpl_fsync() for details.
|
|
||||||
*/
|
|
||||||
if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
|
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PageWriteback(pp))
|
if (PageWriteback(pp))
|
||||||
#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
|
#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
|
||||||
folio_wait_bit(page_folio(pp), PG_writeback);
|
folio_wait_bit(page_folio(pp), PG_writeback);
|
||||||
@ -3852,8 +3841,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
* was in fact not skipped and should not be counted as if it were.
|
* was in fact not skipped and should not be counted as if it were.
|
||||||
*/
|
*/
|
||||||
wbc->pages_skipped--;
|
wbc->pages_skipped--;
|
||||||
if (!for_sync)
|
|
||||||
atomic_inc_32(&zp->z_async_writes_cnt);
|
|
||||||
set_page_writeback(pp);
|
set_page_writeback(pp);
|
||||||
unlock_page(pp);
|
unlock_page(pp);
|
||||||
|
|
||||||
@ -3872,8 +3859,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
#endif
|
#endif
|
||||||
ClearPageError(pp);
|
ClearPageError(pp);
|
||||||
end_page_writeback(pp);
|
end_page_writeback(pp);
|
||||||
if (!for_sync)
|
|
||||||
atomic_dec_32(&zp->z_async_writes_cnt);
|
|
||||||
zfs_rangelock_exit(lr);
|
zfs_rangelock_exit(lr);
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
return (err);
|
return (err);
|
||||||
@ -3907,16 +3892,6 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
* performance reasons.
|
* performance reasons.
|
||||||
*/
|
*/
|
||||||
commit = B_TRUE;
|
commit = B_TRUE;
|
||||||
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
|
|
||||||
/*
|
|
||||||
* If the caller does not intend to wait synchronously
|
|
||||||
* for this page writeback to complete and there are active
|
|
||||||
* synchronous calls on this file, do a commit so that
|
|
||||||
* the latter don't accidentally end up waiting for
|
|
||||||
* our writeback to complete. Refer to the comment in
|
|
||||||
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
|
|
||||||
*/
|
|
||||||
commit = B_TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
|
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
|
||||||
|
@ -126,8 +126,6 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
|||||||
zp->z_acl_cached = NULL;
|
zp->z_acl_cached = NULL;
|
||||||
zp->z_xattr_cached = NULL;
|
zp->z_xattr_cached = NULL;
|
||||||
zp->z_xattr_parent = 0;
|
zp->z_xattr_parent = 0;
|
||||||
zp->z_sync_writes_cnt = 0;
|
|
||||||
zp->z_async_writes_cnt = 0;
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
@ -149,9 +147,6 @@ zfs_znode_cache_destructor(void *buf, void *arg)
|
|||||||
ASSERT3P(zp->z_dirlocks, ==, NULL);
|
ASSERT3P(zp->z_dirlocks, ==, NULL);
|
||||||
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
||||||
ASSERT3P(zp->z_xattr_cached, ==, NULL);
|
ASSERT3P(zp->z_xattr_cached, ==, NULL);
|
||||||
|
|
||||||
ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
|
|
||||||
ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -548,8 +543,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||||||
zp->z_blksz = blksz;
|
zp->z_blksz = blksz;
|
||||||
zp->z_seq = 0x7A4653;
|
zp->z_seq = 0x7A4653;
|
||||||
zp->z_sync_cnt = 0;
|
zp->z_sync_cnt = 0;
|
||||||
zp->z_sync_writes_cnt = 0;
|
|
||||||
zp->z_async_writes_cnt = 0;
|
|
||||||
|
|
||||||
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
|
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
|
||||||
|
|
||||||
|
@ -111,52 +111,11 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
|||||||
{
|
{
|
||||||
struct inode *inode = filp->f_mapping->host;
|
struct inode *inode = filp->f_mapping->host;
|
||||||
znode_t *zp = ITOZ(inode);
|
znode_t *zp = ITOZ(inode);
|
||||||
zfsvfs_t *zfsvfs = ITOZSB(inode);
|
|
||||||
cred_t *cr = CRED();
|
cred_t *cr = CRED();
|
||||||
int error;
|
int error;
|
||||||
fstrans_cookie_t cookie;
|
fstrans_cookie_t cookie;
|
||||||
|
|
||||||
/*
|
|
||||||
* The variables z_sync_writes_cnt and z_async_writes_cnt work in
|
|
||||||
* tandem so that sync writes can detect if there are any non-sync
|
|
||||||
* writes going on and vice-versa. The "vice-versa" part to this logic
|
|
||||||
* is located in zfs_putpage() where non-sync writes check if there are
|
|
||||||
* any ongoing sync writes. If any sync and non-sync writes overlap,
|
|
||||||
* we do a commit to complete the non-sync writes since the latter can
|
|
||||||
* potentially take several seconds to complete and thus block sync
|
|
||||||
* writes in the upcoming call to filemap_write_and_wait_range().
|
|
||||||
*/
|
|
||||||
atomic_inc_32(&zp->z_sync_writes_cnt);
|
|
||||||
/*
|
|
||||||
* If the following check does not detect an overlapping non-sync write
|
|
||||||
* (say because it's just about to start), then it is guaranteed that
|
|
||||||
* the non-sync write will detect this sync write. This is because we
|
|
||||||
* always increment z_sync_writes_cnt / z_async_writes_cnt before doing
|
|
||||||
* the check on z_async_writes_cnt / z_sync_writes_cnt here and in
|
|
||||||
* zfs_putpage() respectively.
|
|
||||||
*/
|
|
||||||
if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
|
|
||||||
if ((error = zpl_enter(zfsvfs, FTAG)) != 0) {
|
|
||||||
atomic_dec_32(&zp->z_sync_writes_cnt);
|
|
||||||
return (error);
|
|
||||||
}
|
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
|
||||||
zpl_exit(zfsvfs, FTAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
||||||
|
|
||||||
/*
|
|
||||||
* The sync write is not complete yet but we decrement
|
|
||||||
* z_sync_writes_cnt since zfs_fsync() increments and decrements
|
|
||||||
* it internally. If a non-sync write starts just after the decrement
|
|
||||||
* operation but before we call zfs_fsync(), it may not detect this
|
|
||||||
* overlapping sync write but it does not matter since we have already
|
|
||||||
* gone past filemap_write_and_wait_range() and we won't block due to
|
|
||||||
* the non-sync write.
|
|
||||||
*/
|
|
||||||
atomic_dec_32(&zp->z_sync_writes_cnt);
|
|
||||||
|
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
|
@ -109,9 +109,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
|
|||||||
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
|
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
|
||||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||||
return (error);
|
return (error);
|
||||||
atomic_inc_32(&zp->z_sync_writes_cnt);
|
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||||
atomic_dec_32(&zp->z_sync_writes_cnt);
|
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
}
|
}
|
||||||
return (error);
|
return (error);
|
||||||
|
Loading…
Reference in New Issue
Block a user