mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Speed up WB_SYNC_NONE when a WB_SYNC_ALL occurs simultaneously
Page writebacks with WB_SYNC_NONE can take several seconds to complete since they wait for the transaction group to close before being committed. This is usually not a problem since the caller does not need to wait. However, if we're simultaneously doing a writeback with WB_SYNC_ALL (e.g via msync), the latter can block for several seconds (up to zfs_txg_timeout) due to the active WB_SYNC_NONE writeback since it needs to wait for the transaction to complete and the PG_writeback bit to be cleared. This commit deals with 2 cases: - No page writeback is active. A WB_SYNC_ALL page writeback starts and even completes. But when it's about to check if the PG_writeback bit has been cleared, another writeback with WB_SYNC_NONE starts. The sync page writeback ends up waiting for the non-sync page writeback to complete. - A page writeback with WB_SYNC_NONE is already active when a WB_SYNC_ALL writeback starts. The WB_SYNC_ALL writeback ends up waiting for the WB_SYNC_NONE writeback. The fix works by carefully keeping track of active sync/non-sync writebacks and committing when beneficial. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Shaan Nobee <sniper111@gmail.com> Closes #12662 Closes #12790
This commit is contained in:
@@ -672,7 +672,7 @@ tags = ['functional', 'migration']
|
||||
|
||||
[tests/functional/mmap]
|
||||
tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
|
||||
'mmap_write_001_pos']
|
||||
'mmap_write_001_pos', 'mmap_sync_001_pos']
|
||||
tags = ['functional', 'mmap']
|
||||
|
||||
[tests/functional/mount]
|
||||
|
||||
@@ -189,6 +189,7 @@ if sys.platform.startswith('freebsd'):
|
||||
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
|
||||
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
|
||||
'link_count/link_count_001': ['SKIP', na_reason],
|
||||
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
|
||||
})
|
||||
elif sys.platform.startswith('linux'):
|
||||
known.update({
|
||||
|
||||
@@ -20,6 +20,7 @@ SUBDIRS = \
|
||||
mmap_exec \
|
||||
mmap_libaio \
|
||||
mmap_seek \
|
||||
mmap_sync \
|
||||
mmapwrite \
|
||||
nvlist_to_lua \
|
||||
randwritecomp \
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
/mmap_sync
|
||||
@@ -0,0 +1,6 @@
|
||||
include $(top_srcdir)/config/Rules.am
|
||||
|
||||
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
|
||||
|
||||
pkgexec_PROGRAMS = mmap_sync
|
||||
mmap_sync_SOURCES = mmap_sync.c
|
||||
@@ -207,6 +207,7 @@ export ZFSTEST_FILES='badsend
|
||||
mmap_exec
|
||||
mmap_libaio
|
||||
mmap_seek
|
||||
mmap_sync
|
||||
mmapwrite
|
||||
nvlist_to_lua
|
||||
randfree_file
|
||||
|
||||
@@ -6,7 +6,8 @@ dist_pkgdata_SCRIPTS = \
|
||||
mmap_read_001_pos.ksh \
|
||||
mmap_write_001_pos.ksh \
|
||||
mmap_libaio_001_pos.ksh \
|
||||
mmap_seek_001_pos.ksh
|
||||
mmap_seek_001_pos.ksh \
|
||||
mmap_sync_001_pos.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
mmap.cfg
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# msync()s of mmap()'ed file should complete quickly during
|
||||
# background dirty page writebacks by the kernel.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs"
|
||||
log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio"
|
||||
log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs"
|
||||
|
||||
# revert to some sensible defaults if the values we saved
|
||||
# were incorrect due to a previous run being interrupted
|
||||
if [ $(</proc/sys/vm/dirty_expire_centisecs) -eq 1 ]; then
|
||||
log_must eval "echo 3000 > /proc/sys/vm/dirty_expire_centisecs"
|
||||
fi
|
||||
|
||||
if [ $(</proc/sys/vm/dirty_background_ratio) -eq 0 ]; then
|
||||
log_must eval "echo 10 > /proc/sys/vm/dirty_background_ratio"
|
||||
fi
|
||||
|
||||
if [ $(</proc/sys/vm/dirty_writeback_centisecs) -eq 1 ]; then
|
||||
log_must eval "echo 500 > /proc/sys/vm/dirty_writeback_centisecs"
|
||||
fi
|
||||
}
|
||||
|
||||
if ! is_linux; then
|
||||
log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables"
|
||||
fi
|
||||
|
||||
log_onexit cleanup
|
||||
log_assert "Run the tests for mmap_sync"
|
||||
|
||||
read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs
|
||||
read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio
|
||||
read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs
|
||||
|
||||
log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs"
|
||||
log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes"
|
||||
log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs"
|
||||
|
||||
log_must mmap_sync
|
||||
log_pass "mmap_sync tests passed."
|
||||
Reference in New Issue
Block a user