Speed up WB_SYNC_NONE when a WB_SYNC_ALL occurs simultaneously

Page writebacks with WB_SYNC_NONE can take several seconds to complete
since they wait for the transaction group to close before being
committed. This is usually not a problem since the caller does not
need to wait. However, if we're simultaneously doing a writeback
with WB_SYNC_ALL (e.g via msync), the latter can block for several
seconds (up to zfs_txg_timeout) due to the active WB_SYNC_NONE
writeback since it needs to wait for the transaction to complete
and the PG_writeback bit to be cleared.

This commit deals with 2 cases:

- No page writeback is active. A WB_SYNC_ALL page writeback starts
  and even completes. But when it's about to check if the PG_writeback
  bit has been cleared, another writeback with WB_SYNC_NONE starts.
  The sync page writeback ends up waiting for the non-sync page
  writeback to complete.

- A page writeback with WB_SYNC_NONE is already active when a
  WB_SYNC_ALL writeback starts. The WB_SYNC_ALL writeback ends up
  waiting for the WB_SYNC_NONE writeback.

The fix works by carefully keeping track of active sync/non-sync
writebacks and committing when beneficial.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Shaan Nobee <sniper111@gmail.com>
Closes #12662
Closes #12790
This commit is contained in:
Shaan Nobee 2022-05-04 00:23:26 +04:00 committed by Tony Hutter
parent 8a315a30ab
commit 9e5a297de6
19 changed files with 208 additions and 23 deletions

View File

@ -222,6 +222,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/cmd/mmap_exec/Makefile tests/zfs-tests/cmd/mmap_exec/Makefile
tests/zfs-tests/cmd/mmap_libaio/Makefile tests/zfs-tests/cmd/mmap_libaio/Makefile
tests/zfs-tests/cmd/mmap_seek/Makefile tests/zfs-tests/cmd/mmap_seek/Makefile
tests/zfs-tests/cmd/mmap_sync/Makefile
tests/zfs-tests/cmd/mmapwrite/Makefile tests/zfs-tests/cmd/mmapwrite/Makefile
tests/zfs-tests/cmd/nvlist_to_lua/Makefile tests/zfs-tests/cmd/nvlist_to_lua/Makefile
tests/zfs-tests/cmd/randfree_file/Makefile tests/zfs-tests/cmd/randfree_file/Makefile

View File

@ -58,6 +58,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(uint64_t, z_size) __field(uint64_t, z_size)
__field(uint64_t, z_pflags) __field(uint64_t, z_pflags)
__field(uint32_t, z_sync_cnt) __field(uint32_t, z_sync_cnt)
__field(uint32_t, z_sync_writes_cnt)
__field(uint32_t, z_async_writes_cnt)
__field(mode_t, z_mode) __field(mode_t, z_mode)
__field(boolean_t, z_is_sa) __field(boolean_t, z_is_sa)
__field(boolean_t, z_is_mapped) __field(boolean_t, z_is_mapped)
@ -90,6 +92,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_size = zn->z_size; __entry->z_size = zn->z_size;
__entry->z_pflags = zn->z_pflags; __entry->z_pflags = zn->z_pflags;
__entry->z_sync_cnt = zn->z_sync_cnt; __entry->z_sync_cnt = zn->z_sync_cnt;
__entry->z_sync_writes_cnt = zn->z_sync_writes_cnt;
__entry->z_async_writes_cnt = zn->z_async_writes_cnt;
__entry->z_mode = zn->z_mode; __entry->z_mode = zn->z_mode;
__entry->z_is_sa = zn->z_is_sa; __entry->z_is_sa = zn->z_is_sa;
__entry->z_is_mapped = zn->z_is_mapped; __entry->z_is_mapped = zn->z_is_mapped;
@ -114,17 +118,18 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
TP_printk("zn { id %llu unlinked %u atime_dirty %u " TP_printk("zn { id %llu unlinked %u atime_dirty %u "
"zn_prefetch %u blksz %u seq %u " "zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu " "mapcnt %llu size %llu pflags %llu "
"sync_cnt %u mode 0x%x is_sa %d " "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
"is_mapped %d is_ctldir %d inode { " "mode 0x%x is_sa %d is_mapped %d is_ctldir %d inode { "
"uid %u gid %u ino %lu nlink %u size %lli " "uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } " "blkbits %u bytes %u mode 0x%x generation %x } } "
"ace { type %u flags %u access_mask %u } mask_matched %u", "ace { type %u flags %u access_mask %u } mask_matched %u",
__entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty, __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
__entry->z_zn_prefetch, __entry->z_blksz, __entry->z_zn_prefetch, __entry->z_blksz,
__entry->z_seq, __entry->z_mapcnt, __entry->z_size, __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
__entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode, __entry->z_pflags, __entry->z_sync_cnt,
__entry->z_is_sa, __entry->z_is_mapped, __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
__entry->z_is_ctldir, __entry->i_uid, __entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped,
__entry->z_is_ctldir, __entry->i_uid,
__entry->i_gid, __entry->i_ino, __entry->i_nlink, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits, __entry->i_size, __entry->i_blkbits,
__entry->i_bytes, __entry->i_mode, __entry->i_generation, __entry->i_bytes, __entry->i_mode, __entry->i_generation,

View File

@ -70,7 +70,7 @@ extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
extern int zfs_fid(struct inode *ip, fid_t *fidp); extern int zfs_fid(struct inode *ip, fid_t *fidp);
extern int zfs_getpage(struct inode *ip, struct page *pp); extern int zfs_getpage(struct inode *ip, struct page *pp);
extern int zfs_putpage(struct inode *ip, struct page *pp, extern int zfs_putpage(struct inode *ip, struct page *pp,
struct writeback_control *wbc); struct writeback_control *wbc, boolean_t for_sync);
extern int zfs_dirty_inode(struct inode *ip, int flags); extern int zfs_dirty_inode(struct inode *ip, int flags);
extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
size_t len, unsigned long vm_flags); size_t len, unsigned long vm_flags);

View File

@ -198,6 +198,8 @@ typedef struct znode {
uint64_t z_size; /* file size (cached) */ uint64_t z_size; /* file size (cached) */
uint64_t z_pflags; /* pflags (cached) */ uint64_t z_pflags; /* pflags (cached) */
uint32_t z_sync_cnt; /* synchronous open count */ uint32_t z_sync_cnt; /* synchronous open count */
uint32_t z_sync_writes_cnt; /* synchronous write count */
uint32_t z_async_writes_cnt; /* asynchronous write count */
mode_t z_mode; /* mode (cached) */ mode_t z_mode; /* mode (cached) */
kmutex_t z_acl_lock; /* acl data lock */ kmutex_t z_acl_lock; /* acl data lock */
zfs_acl_t *z_acl_cached; /* cached acl */ zfs_acl_t *z_acl_cached; /* cached acl */

View File

@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_xattr_cached = NULL; zp->z_xattr_cached = NULL;
zp->z_xattr_parent = 0; zp->z_xattr_parent = 0;
zp->z_vnode = NULL; zp->z_vnode = NULL;
zp->z_sync_writes_cnt = 0;
zp->z_async_writes_cnt = 0;
return (0); return (0);
} }
@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_acl_cached, ==, NULL);
ASSERT3P(zp->z_xattr_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL);
ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
} }
@ -457,6 +463,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_blksz = blksz; zp->z_blksz = blksz;
zp->z_seq = 0x7A4653; zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0; zp->z_sync_cnt = 0;
zp->z_sync_writes_cnt = 0;
zp->z_async_writes_cnt = 0;
#if __FreeBSD_version >= 1300139 #if __FreeBSD_version >= 1300139
atomic_store_ptr(&zp->z_cached_symlink, NULL); atomic_store_ptr(&zp->z_cached_symlink, NULL);
#endif #endif

View File

@ -478,6 +478,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_pflags = 0; zp->z_pflags = 0;
zp->z_mode = 0; zp->z_mode = 0;
zp->z_sync_cnt = 0; zp->z_sync_cnt = 0;
zp->z_sync_writes_cnt = 0;
zp->z_async_writes_cnt = 0;
ip->i_generation = 0; ip->i_generation = 0;
ip->i_ino = id; ip->i_ino = id;
ip->i_mode = (S_IFDIR | S_IRWXUGO); ip->i_mode = (S_IFDIR | S_IRWXUGO);

View File

@ -3445,7 +3445,7 @@ top:
} }
static void static void
zfs_putpage_commit_cb(void *arg) zfs_putpage_sync_commit_cb(void *arg)
{ {
struct page *pp = arg; struct page *pp = arg;
@ -3453,13 +3453,26 @@ zfs_putpage_commit_cb(void *arg)
end_page_writeback(pp); end_page_writeback(pp);
} }
static void
zfs_putpage_async_commit_cb(void *arg)
{
struct page *pp = arg;
znode_t *zp = ITOZ(pp->mapping->host);
ClearPageError(pp);
end_page_writeback(pp);
atomic_dec_32(&zp->z_async_writes_cnt);
}
/* /*
* Push a page out to disk, once the page is on stable storage the * Push a page out to disk, once the page is on stable storage the
* registered commit callback will be run as notification of completion. * registered commit callback will be run as notification of completion.
* *
* IN: ip - page mapped for inode. * IN: ip - page mapped for inode.
* pp - page to push (page is locked) * pp - page to push (page is locked)
* wbc - writeback control data * wbc - writeback control data
* for_sync - does the caller intend to wait synchronously for the
* page writeback to complete?
* *
* RETURN: 0 if success * RETURN: 0 if success
* error code if failure * error code if failure
@ -3469,7 +3482,8 @@ zfs_putpage_commit_cb(void *arg)
*/ */
/* ARGSUSED */ /* ARGSUSED */
int int
zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
boolean_t for_sync)
{ {
znode_t *zp = ITOZ(ip); znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip); zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -3567,6 +3581,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
zfs_rangelock_exit(lr); zfs_rangelock_exit(lr);
if (wbc->sync_mode != WB_SYNC_NONE) { if (wbc->sync_mode != WB_SYNC_NONE) {
/*
* Speed up any non-sync page writebacks since
* they may take several seconds to complete.
* Refer to the comment in zpl_fsync() (when
* HAVE_FSYNC_RANGE is defined) for details.
*/
if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
zil_commit(zfsvfs->z_log, zp->z_id);
}
if (PageWriteback(pp)) if (PageWriteback(pp))
#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
folio_wait_bit(page_folio(pp), PG_writeback); folio_wait_bit(page_folio(pp), PG_writeback);
@ -3592,6 +3616,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* was in fact not skipped and should not be counted as if it were. * was in fact not skipped and should not be counted as if it were.
*/ */
wbc->pages_skipped--; wbc->pages_skipped--;
if (!for_sync)
atomic_inc_32(&zp->z_async_writes_cnt);
set_page_writeback(pp); set_page_writeback(pp);
unlock_page(pp); unlock_page(pp);
@ -3613,6 +3639,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
#endif #endif
ClearPageError(pp); ClearPageError(pp);
end_page_writeback(pp); end_page_writeback(pp);
if (!for_sync)
atomic_dec_32(&zp->z_async_writes_cnt);
zfs_rangelock_exit(lr); zfs_rangelock_exit(lr);
ZFS_EXIT(zfsvfs); ZFS_EXIT(zfsvfs);
return (err); return (err);
@ -3637,7 +3665,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0, zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
zfs_putpage_commit_cb, pp); for_sync ? zfs_putpage_sync_commit_cb :
zfs_putpage_async_commit_cb, pp);
dmu_tx_commit(tx); dmu_tx_commit(tx);
zfs_rangelock_exit(lr); zfs_rangelock_exit(lr);
@ -3649,6 +3679,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* performance reasons. * performance reasons.
*/ */
zil_commit(zfsvfs->z_log, zp->z_id); zil_commit(zfsvfs->z_log, zp->z_id);
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
/*
* If the caller does not intend to wait synchronously
* for this page writeback to complete and there are active
* synchronous calls on this file, do a commit so that
* the latter don't accidentally end up waiting for
* our writeback to complete. Refer to the comment in
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
*/
zil_commit(zfsvfs->z_log, zp->z_id);
} }
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen); dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);

View File

@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_acl_cached = NULL; zp->z_acl_cached = NULL;
zp->z_xattr_cached = NULL; zp->z_xattr_cached = NULL;
zp->z_xattr_parent = 0; zp->z_xattr_parent = 0;
zp->z_sync_writes_cnt = 0;
zp->z_async_writes_cnt = 0;
return (0); return (0);
} }
@ -151,9 +154,12 @@ zfs_znode_cache_destructor(void *buf, void *arg)
rw_destroy(&zp->z_xattr_lock); rw_destroy(&zp->z_xattr_lock);
zfs_rangelock_fini(&zp->z_rangelock); zfs_rangelock_fini(&zp->z_rangelock);
ASSERT(zp->z_dirlocks == NULL); ASSERT3P(zp->z_dirlocks, ==, NULL);
ASSERT(zp->z_acl_cached == NULL); ASSERT3P(zp->z_acl_cached, ==, NULL);
ASSERT(zp->z_xattr_cached == NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL);
ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
} }
static int static int
@ -549,6 +555,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_blksz = blksz; zp->z_blksz = blksz;
zp->z_seq = 0x7A4653; zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0; zp->z_sync_cnt = 0;
zp->z_sync_writes_cnt = 0;
zp->z_async_writes_cnt = 0;
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);

View File

@ -165,17 +165,56 @@ static int
zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{ {
struct inode *inode = filp->f_mapping->host; struct inode *inode = filp->f_mapping->host;
znode_t *zp = ITOZ(inode);
zfsvfs_t *zfsvfs = ITOZSB(inode);
cred_t *cr = CRED(); cred_t *cr = CRED();
int error; int error;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
/*
* The variables z_sync_writes_cnt and z_async_writes_cnt work in
* tandem so that sync writes can detect if there are any non-sync
* writes going on and vice-versa. The "vice-versa" part to this logic
* is located in zfs_putpage() where non-sync writes check if there are
* any ongoing sync writes. If any sync and non-sync writes overlap,
* we do a commit to complete the non-sync writes since the latter can
* potentially take several seconds to complete and thus block sync
* writes in the upcoming call to filemap_write_and_wait_range().
*/
atomic_inc_32(&zp->z_sync_writes_cnt);
/*
* If the following check does not detect an overlapping non-sync write
* (say because it's just about to start), then it is guaranteed that
* the non-sync write will detect this sync write. This is because we
* always increment z_sync_writes_cnt / z_async_writes_cnt before doing
* the check on z_async_writes_cnt / z_sync_writes_cnt here and in
* zfs_putpage() respectively.
*/
if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
ZPL_ENTER(zfsvfs);
zil_commit(zfsvfs->z_log, zp->z_id);
ZPL_EXIT(zfsvfs);
}
error = filemap_write_and_wait_range(inode->i_mapping, start, end); error = filemap_write_and_wait_range(inode->i_mapping, start, end);
/*
* The sync write is not complete yet but we decrement
* z_sync_writes_cnt since zfs_fsync() increments and decrements
* it internally. If a non-sync write starts just after the decrement
* operation but before we call zfs_fsync(), it may not detect this
* overlapping sync write but it does not matter since we have already
* gone past filemap_write_and_wait_range() and we won't block due to
* the non-sync write.
*/
atomic_dec_32(&zp->z_sync_writes_cnt);
if (error) if (error)
return (error); return (error);
crhold(cr); crhold(cr);
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
error = -zfs_fsync(ITOZ(inode), datasync, cr); error = -zfs_fsync(zp, datasync, cr);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
crfree(cr); crfree(cr);
ASSERT3S(error, <=, 0); ASSERT3S(error, <=, 0);
@ -675,14 +714,14 @@ zpl_readahead(struct readahead_control *ractl)
static int static int
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{ {
struct address_space *mapping = data; boolean_t *for_sync = data;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
ASSERT(PageLocked(pp)); ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp)); ASSERT(!PageWriteback(pp));
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
(void) zfs_putpage(mapping->host, pp, wbc); (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
return (0); return (0);
@ -709,8 +748,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
* we run it once in non-SYNC mode so that the ZIL gets all the data, * we run it once in non-SYNC mode so that the ZIL gets all the data,
* and then we commit it all in one go. * and then we commit it all in one go.
*/ */
boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
wbc->sync_mode = WB_SYNC_NONE; wbc->sync_mode = WB_SYNC_NONE;
result = write_cache_pages(mapping, wbc, zpl_putpage, mapping); result = write_cache_pages(mapping, wbc, zpl_putpage, &for_sync);
if (sync_mode != wbc->sync_mode) { if (sync_mode != wbc->sync_mode) {
ZPL_ENTER(zfsvfs); ZPL_ENTER(zfsvfs);
ZPL_VERIFY_ZP(zp); ZPL_VERIFY_ZP(zp);
@ -726,7 +766,8 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
* details). That being said, this is a no-op in most cases. * details). That being said, this is a no-op in most cases.
*/ */
wbc->sync_mode = sync_mode; wbc->sync_mode = sync_mode;
result = write_cache_pages(mapping, wbc, zpl_putpage, mapping); result = write_cache_pages(mapping, wbc, zpl_putpage,
&for_sync);
} }
return (result); return (result);
} }
@ -743,7 +784,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS) if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
wbc->sync_mode = WB_SYNC_ALL; wbc->sync_mode = WB_SYNC_ALL;
return (zpl_putpage(pp, wbc, pp->mapping)); boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
return (zpl_putpage(pp, wbc, &for_sync));
} }
/* /*

View File

@ -68,7 +68,9 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
ZFS_ENTER(zfsvfs); ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp); ZFS_VERIFY_ZP(zp);
atomic_inc_32(&zp->z_sync_writes_cnt);
zil_commit(zfsvfs->z_log, zp->z_id); zil_commit(zfsvfs->z_log, zp->z_id);
atomic_dec_32(&zp->z_sync_writes_cnt);
ZFS_EXIT(zfsvfs); ZFS_EXIT(zfsvfs);
} }
tsd_set(zfs_fsyncer_key, NULL); tsd_set(zfs_fsyncer_key, NULL);

View File

@ -672,7 +672,7 @@ tags = ['functional', 'migration']
[tests/functional/mmap] [tests/functional/mmap]
tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos', tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
'mmap_write_001_pos'] 'mmap_write_001_pos', 'mmap_sync_001_pos']
tags = ['functional', 'mmap'] tags = ['functional', 'mmap']
[tests/functional/mount] [tests/functional/mount]

View File

@ -189,6 +189,7 @@ if sys.platform.startswith('freebsd'):
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason], 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason], 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
'link_count/link_count_001': ['SKIP', na_reason], 'link_count/link_count_001': ['SKIP', na_reason],
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
}) })
elif sys.platform.startswith('linux'): elif sys.platform.startswith('linux'):
known.update({ known.update({

View File

@ -20,6 +20,7 @@ SUBDIRS = \
mmap_exec \ mmap_exec \
mmap_libaio \ mmap_libaio \
mmap_seek \ mmap_seek \
mmap_sync \
mmapwrite \ mmapwrite \
nvlist_to_lua \ nvlist_to_lua \
randwritecomp \ randwritecomp \

View File

@ -0,0 +1 @@
/mmap_sync

View File

@ -0,0 +1,6 @@
include $(top_srcdir)/config/Rules.am
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
pkgexec_PROGRAMS = mmap_sync
mmap_sync_SOURCES = mmap_sync.c

View File

@ -207,6 +207,7 @@ export ZFSTEST_FILES='badsend
mmap_exec mmap_exec
mmap_libaio mmap_libaio
mmap_seek mmap_seek
mmap_sync
mmapwrite mmapwrite
nvlist_to_lua nvlist_to_lua
randfree_file randfree_file

View File

@ -6,7 +6,8 @@ dist_pkgdata_SCRIPTS = \
mmap_read_001_pos.ksh \ mmap_read_001_pos.ksh \
mmap_write_001_pos.ksh \ mmap_write_001_pos.ksh \
mmap_libaio_001_pos.ksh \ mmap_libaio_001_pos.ksh \
mmap_seek_001_pos.ksh mmap_seek_001_pos.ksh \
mmap_sync_001_pos.ksh
dist_pkgdata_DATA = \ dist_pkgdata_DATA = \
mmap.cfg mmap.cfg

View File

@ -0,0 +1,63 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# msync()s of mmap()'ed file should complete quickly during
# background dirty page writebacks by the kernel.
#
function cleanup
{
log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs"
log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio"
log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs"
# revert to some sensible defaults if the values we saved
# were incorrect due to a previous run being interrupted
if [ $(</proc/sys/vm/dirty_expire_centisecs) -eq 1 ]; then
log_must eval "echo 3000 > /proc/sys/vm/dirty_expire_centisecs"
fi
if [ $(</proc/sys/vm/dirty_background_ratio) -eq 0 ]; then
log_must eval "echo 10 > /proc/sys/vm/dirty_background_ratio"
fi
if [ $(</proc/sys/vm/dirty_writeback_centisecs) -eq 1 ]; then
log_must eval "echo 500 > /proc/sys/vm/dirty_writeback_centisecs"
fi
}
if ! is_linux; then
log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables"
fi
log_onexit cleanup
log_assert "Run the tests for mmap_sync"
read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs
read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio
read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs
log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs"
log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes"
log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs"
log_must mmap_sync
log_pass "mmap_sync tests passed."