FreeBSD: convert teardown inactive lock to a read-mostly sleepable lock

The lock is taken all the time and as a regular read-write lock
avoidably serves as a mount point-wide contention point.

This forward ports FreeBSD revision r357322.

To quote aforementioned commit:

Sample result doing an incremental -j 40 build:
before: 173.30s user 458.97s system 2595% cpu 24.358 total
after:  168.58s user 254.92s system 2211% cpu 19.147 total

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #10896
This commit is contained in:
Mateusz Guzik 2020-09-09 19:15:52 +02:00 committed by Brian Behlendorf
parent 17e2fd3bfd
commit 29bc31f62f
4 changed files with 82 additions and 19 deletions

View File

@ -27,18 +27,31 @@
#ifndef _SYS_FS_ZFS_VFSOPS_H #ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H #define _SYS_FS_ZFS_VFSOPS_H
#if __FreeBSD_version >= 1300109
#define TEARDOWN_INACTIVE_RMS
#endif
#include <sys/dataset_kstats.h> #include <sys/dataset_kstats.h>
#include <sys/list.h> #include <sys/list.h>
#include <sys/vfs.h> #include <sys/vfs.h>
#include <sys/zil.h> #include <sys/zil.h>
#include <sys/sa.h> #include <sys/sa.h>
#include <sys/rrwlock.h> #include <sys/rrwlock.h>
#ifdef TEARDOWN_INACTIVE_RMS
#include <sys/rmlock.h>
#endif
#include <sys/zfs_ioctl.h> #include <sys/zfs_ioctl.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifdef TEARDOWN_INACTIVE_RMS
typedef struct rmslock zfs_teardown_lock_t;
#else
#define zfs_teardown_lock_t krwlock_t
#endif
typedef struct zfsvfs zfsvfs_t; typedef struct zfsvfs zfsvfs_t;
struct znode; struct znode;
@ -67,7 +80,7 @@ struct zfsvfs {
boolean_t z_atime; /* enable atimes mount option */ boolean_t z_atime; /* enable atimes mount option */
boolean_t z_unmounted; /* unmounted */ boolean_t z_unmounted; /* unmounted */
rrmlock_t z_teardown_lock; rrmlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock; zfs_teardown_lock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */ list_t z_all_znodes; /* all vnodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */ uint64_t z_nr_znodes; /* number of znodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */
@ -98,6 +111,56 @@ struct zfsvfs {
struct task z_unlinked_drain_task; struct task z_unlinked_drain_task;
}; };
#ifdef TEARDOWN_INACTIVE_RMS
#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive")
#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
rms_destroy(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_try_rlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_rlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_runlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_wlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_wunlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
rms_wowned(&(zfsvfs)->z_teardown_inactive_lock)
#else
#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL)
#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
rw_destroy(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER)
#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock)
#endif
#define ZSB_XATTR 0x0001 /* Enable user xattrs */ #define ZSB_XATTR 0x0001 /* Enable user xattrs */
/* /*
* Normal filesystems (those not under .zfs/snapshot) have a total * Normal filesystems (those not under .zfs/snapshot) have a total

View File

@ -975,7 +975,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
#else #else
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
#endif #endif
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
@ -1126,7 +1126,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
ASSERT(zfsvfs->z_nr_znodes == 0); ASSERT(zfsvfs->z_nr_znodes == 0);
list_destroy(&zfsvfs->z_all_znodes); list_destroy(&zfsvfs->z_all_znodes);
rrm_destroy(&zfsvfs->z_teardown_lock); rrm_destroy(&zfsvfs->z_teardown_lock);
rw_destroy(&zfsvfs->z_teardown_inactive_lock); ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs);
rw_destroy(&zfsvfs->z_fuid_lock); rw_destroy(&zfsvfs->z_fuid_lock);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zfsvfs->z_hold_mtx[i]); mutex_destroy(&zfsvfs->z_hold_mtx[i]);
@ -1545,7 +1545,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
zfsvfs->z_log = NULL; zfsvfs->z_log = NULL;
} }
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs);
/* /*
* If we are not unmounting (ie: online recv) and someone already * If we are not unmounting (ie: online recv) and someone already
@ -1553,7 +1553,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
* or a reopen of z_os failed then just bail out now. * or a reopen of z_os failed then just bail out now.
*/ */
if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG); rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
return (SET_ERROR(EIO)); return (SET_ERROR(EIO));
} }
@ -1581,7 +1581,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
*/ */
if (unmounting) { if (unmounting) {
zfsvfs->z_unmounted = B_TRUE; zfsvfs->z_unmounted = B_TRUE;
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG); rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
} }
@ -1901,7 +1901,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
znode_t *zp; znode_t *zp;
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/* /*
* We already own this, so just update the objset_t, as the one we * We already own this, so just update the objset_t, as the one we
@ -1939,7 +1939,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
bail: bail:
/* release the VOPs */ /* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG); rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) { if (err) {
@ -2056,7 +2056,7 @@ int
zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
{ {
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/* /*
* We already own this, so just hold and rele it to update the * We already own this, so just hold and rele it to update the
@ -2072,7 +2072,7 @@ zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zfsvfs->z_os = os; zfsvfs->z_os = os;
/* release the VOPs */ /* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG); rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
/* /*

View File

@ -4638,13 +4638,13 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
zfsvfs_t *zfsvfs = zp->z_zfsvfs; zfsvfs_t *zfsvfs = zp->z_zfsvfs;
int error; int error;
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL) { if (zp->z_sa_hdl == NULL) {
/* /*
* The fs has been unmounted, or we did a * The fs has been unmounted, or we did a
* suspend/resume and this file no longer exists. * suspend/resume and this file no longer exists.
*/ */
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp); vrecycle(vp);
return; return;
} }
@ -4653,7 +4653,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
/* /*
* Fast path to recycle a vnode of a removed file. * Fast path to recycle a vnode of a removed file.
*/ */
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp); vrecycle(vp);
return; return;
} }
@ -4673,7 +4673,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
dmu_tx_commit(tx); dmu_tx_commit(tx);
} }
} }
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
} }
@ -5823,10 +5823,10 @@ zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
if (vn_need_pageq_flush(vp)) if (vn_need_pageq_flush(vp))
return (1); return (1);
if (!rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER)) if (!ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs))
return (1); return (1);
need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
return (need); return (need);
} }
@ -5857,12 +5857,12 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
* zfs_znode_dmu_fini in zfsvfs_teardown during * zfs_znode_dmu_fini in zfsvfs_teardown during
* force unmount. * force unmount.
*/ */
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL) if (zp->z_sa_hdl == NULL)
zfs_znode_free(zp); zfs_znode_free(zp);
else else
zfs_zinactive(zp); zfs_zinactive(zp);
rw_exit(&zfsvfs->z_teardown_inactive_lock); ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vp->v_data = NULL; vp->v_data = NULL;
return (0); return (0);

View File

@ -384,7 +384,7 @@ zfs_znode_dmu_fini(znode_t *zp)
{ {
ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
zp->z_unlinked || zp->z_unlinked ||
RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); ZFS_TEARDOWN_INACTIVE_WLOCKED(zp->z_zfsvfs));
sa_handle_destroy(zp->z_sa_hdl); sa_handle_destroy(zp->z_sa_hdl);
zp->z_sa_hdl = NULL; zp->z_sa_hdl = NULL;