mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-27 04:32:16 +03:00
Speed up WB_SYNC_NONE when a WB_SYNC_ALL occurs simultaneously
Page writebacks with WB_SYNC_NONE can take several seconds to complete since they wait for the transaction group to close before being committed. This is usually not a problem since the caller does not need to wait. However, if we're simultaneously doing a writeback with WB_SYNC_ALL (e.g via msync), the latter can block for several seconds (up to zfs_txg_timeout) due to the active WB_SYNC_NONE writeback since it needs to wait for the transaction to complete and the PG_writeback bit to be cleared. This commit deals with 2 cases: - No page writeback is active. A WB_SYNC_ALL page writeback starts and even completes. But when it's about to check if the PG_writeback bit has been cleared, another writeback with WB_SYNC_NONE starts. The sync page writeback ends up waiting for the non-sync page writeback to complete. - A page writeback with WB_SYNC_NONE is already active when a WB_SYNC_ALL writeback starts. The WB_SYNC_ALL writeback ends up waiting for the WB_SYNC_NONE writeback. The fix works by carefully keeping track of active sync/non-sync writebacks and committing when beneficial. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Shaan Nobee <sniper111@gmail.com> Closes #12662 Closes #12790
This commit is contained in:
@@ -3396,7 +3396,7 @@ top:
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_putpage_commit_cb(void *arg)
|
||||
zfs_putpage_sync_commit_cb(void *arg)
|
||||
{
|
||||
struct page *pp = arg;
|
||||
|
||||
@@ -3404,13 +3404,26 @@ zfs_putpage_commit_cb(void *arg)
|
||||
end_page_writeback(pp);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_putpage_async_commit_cb(void *arg)
|
||||
{
|
||||
struct page *pp = arg;
|
||||
znode_t *zp = ITOZ(pp->mapping->host);
|
||||
|
||||
ClearPageError(pp);
|
||||
end_page_writeback(pp);
|
||||
atomic_dec_32(&zp->z_async_writes_cnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Push a page out to disk, once the page is on stable storage the
|
||||
* registered commit callback will be run as notification of completion.
|
||||
*
|
||||
* IN: ip - page mapped for inode.
|
||||
* pp - page to push (page is locked)
|
||||
* wbc - writeback control data
|
||||
* IN: ip - page mapped for inode.
|
||||
* pp - page to push (page is locked)
|
||||
* wbc - writeback control data
|
||||
* for_sync - does the caller intend to wait synchronously for the
|
||||
* page writeback to complete?
|
||||
*
|
||||
* RETURN: 0 if success
|
||||
* error code if failure
|
||||
@@ -3419,7 +3432,8 @@ zfs_putpage_commit_cb(void *arg)
|
||||
* ip - ctime|mtime updated
|
||||
*/
|
||||
int
|
||||
zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
||||
boolean_t for_sync)
|
||||
{
|
||||
znode_t *zp = ITOZ(ip);
|
||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
@@ -3517,6 +3531,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
zfs_rangelock_exit(lr);
|
||||
|
||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||
/*
|
||||
* Speed up any non-sync page writebacks since
|
||||
* they may take several seconds to complete.
|
||||
* Refer to the comment in zpl_fsync() (when
|
||||
* HAVE_FSYNC_RANGE is defined) for details.
|
||||
*/
|
||||
if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
}
|
||||
|
||||
if (PageWriteback(pp))
|
||||
#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
|
||||
folio_wait_bit(page_folio(pp), PG_writeback);
|
||||
@@ -3542,6 +3566,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
* was in fact not skipped and should not be counted as if it were.
|
||||
*/
|
||||
wbc->pages_skipped--;
|
||||
if (!for_sync)
|
||||
atomic_inc_32(&zp->z_async_writes_cnt);
|
||||
set_page_writeback(pp);
|
||||
unlock_page(pp);
|
||||
|
||||
@@ -3563,6 +3589,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
#endif
|
||||
ClearPageError(pp);
|
||||
end_page_writeback(pp);
|
||||
if (!for_sync)
|
||||
atomic_dec_32(&zp->z_async_writes_cnt);
|
||||
zfs_rangelock_exit(lr);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (err);
|
||||
@@ -3587,7 +3615,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
|
||||
|
||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
|
||||
zfs_putpage_commit_cb, pp);
|
||||
for_sync ? zfs_putpage_sync_commit_cb :
|
||||
zfs_putpage_async_commit_cb, pp);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
zfs_rangelock_exit(lr);
|
||||
@@ -3599,6 +3629,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
||||
* performance reasons.
|
||||
*/
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
|
||||
/*
|
||||
* If the caller does not intend to wait synchronously
|
||||
* for this page writeback to complete and there are active
|
||||
* synchronous calls on this file, do a commit so that
|
||||
* the latter don't accidentally end up waiting for
|
||||
* our writeback to complete. Refer to the comment in
|
||||
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
|
||||
*/
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
}
|
||||
|
||||
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
|
||||
|
||||
Reference in New Issue
Block a user