Mark all ZPL and ioctl functions as PF_FSTRANS

Prevent deadlocks by disabling direct reclaim during all ZPL and ioctl
calls as well as the l2arc and adapt ARC threads.

This obviates the need for MUTEX_FSTRANS so its previous uses and
definition have been eliminated.

Signed-off-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3225
This commit is contained in:
Tim Chase 2015-03-30 22:43:29 -05:00 committed by Brian Behlendorf
parent 74aa2ba259
commit 40d06e3c78
7 changed files with 96 additions and 19 deletions

View File

@ -273,7 +273,6 @@ typedef struct kmutex {
} kmutex_t;
#define MUTEX_DEFAULT 0
#define MUTEX_FSTRANS MUTEX_DEFAULT
#define MUTEX_HELD(m) ((m)->m_owner == curthread)
#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))

View File

@ -933,7 +933,7 @@ retry:
for (i = 0; i < BUF_LOCKS; i++) {
mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
NULL, MUTEX_FSTRANS, NULL);
NULL, MUTEX_DEFAULT, NULL);
}
}
@ -2412,9 +2412,11 @@ static void
arc_adapt_thread(void)
{
callb_cpr_t cpr;
fstrans_cookie_t cookie;
CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
cookie = spl_fstrans_mark();
mutex_enter(&arc_reclaim_thr_lock);
while (arc_thread_exit == 0) {
#ifndef _KERNEL
@ -2485,6 +2487,7 @@ arc_adapt_thread(void)
arc_thread_exit = 0;
cv_broadcast(&arc_reclaim_thr_cv);
CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */
spl_fstrans_unmark(cookie);
thread_exit();
}
@ -5376,11 +5379,13 @@ l2arc_feed_thread(void)
uint64_t size, wrote;
clock_t begin, next = ddi_get_lbolt();
boolean_t headroom_boost = B_FALSE;
fstrans_cookie_t cookie;
CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
mutex_enter(&l2arc_feed_thr_lock);
cookie = spl_fstrans_mark();
while (l2arc_thread_exit == 0) {
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait_interruptible(&l2arc_feed_thr_cv,
@ -5454,6 +5459,7 @@ l2arc_feed_thread(void)
next = l2arc_write_interval(begin, size, wrote);
spa_config_exit(spa, SCL_L2ARC, dev);
}
spl_fstrans_unmark(cookie);
l2arc_thread_exit = 0;
cv_broadcast(&l2arc_feed_thr_cv);
@ -5570,7 +5576,7 @@ l2arc_init(void)
mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_FSTRANS, NULL);
mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
l2arc_dev_list = &L2ARC_dev_list;

View File

@ -331,7 +331,7 @@ retry:
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_FSTRANS, NULL);
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
dbuf_stats_init(h);
}

View File

@ -5733,6 +5733,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
const zfs_ioc_vec_t *vec;
char *saved_poolname = NULL;
nvlist_t *innvl = NULL;
fstrans_cookie_t cookie;
vecnum = cmd - ZFS_IOC_FIRST;
if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
@ -5827,7 +5828,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
}
outnvl = fnvlist_alloc();
cookie = spl_fstrans_mark();
error = vec->zvec_func(zc->zc_name, innvl, outnvl);
spl_fstrans_unmark(cookie);
if (error == 0 && vec->zvec_allow_log &&
spa_open(zc->zc_name, &spa, FTAG) == 0) {
@ -5855,7 +5858,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
nvlist_free(outnvl);
} else {
cookie = spl_fstrans_mark();
error = vec->zvec_legacy_func(zc);
spl_fstrans_unmark(cookie);
}
out:

View File

@ -1097,23 +1097,13 @@ zfs_zinactive(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
boolean_t drop_mutex = 0;
ASSERT(zp->z_sa_hdl);
/*
* Don't allow a zfs_zget() while were trying to release this znode.
*
* Linux allows direct memory reclaim which means that any KM_SLEEP
* allocation may trigger inode eviction. This can lead to a deadlock
* through the ->shrink_icache_memory()->evict()->zfs_inactive()->
* zfs_zinactive() call path. To avoid this deadlock the process
* must not reacquire the mutex when it is already holding it.
*/
if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
ZFS_OBJ_HOLD_ENTER(zsb, z_id);
drop_mutex = 1;
}
ZFS_OBJ_HOLD_ENTER(zsb, z_id);
mutex_enter(&zp->z_lock);
@ -1124,8 +1114,7 @@ zfs_zinactive(znode_t *zp)
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
zfs_rmnode(zp);
return;
@ -1134,8 +1123,7 @@ zfs_zinactive(znode_t *zp)
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
}
static inline int

View File

@ -35,13 +35,16 @@ zpl_open(struct inode *ip, struct file *filp)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
error = generic_file_open(ip, filp);
if (error)
return (error);
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -53,12 +56,15 @@ zpl_release(struct inode *ip, struct file *filp)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
cookie = spl_fstrans_mark();
if (ITOZ(ip)->z_atime_dirty)
zfs_mark_inode_dirty(ip);
crhold(cr);
error = -zfs_close(ip, filp->f_flags, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -71,9 +77,12 @@ zpl_iterate(struct file *filp, struct dir_context *ctx)
struct dentry *dentry = filp->f_path.dentry;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_readdir(dentry->d_inode, ctx, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -106,9 +115,12 @@ zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(dentry->d_inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -134,9 +146,12 @@ zpl_fsync(struct file *filp, int datasync)
struct inode *inode = filp->f_mapping->host;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -162,13 +177,16 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
struct inode *inode = filp->f_mapping->host;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
return (error);
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -193,6 +211,7 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
ssize_t read;
uio_t uio;
int error;
fstrans_cookie_t cookie;
uio.uio_iov = (struct iovec *)iovp;
uio.uio_resid = count;
@ -201,7 +220,9 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;
cookie = spl_fstrans_mark();
error = -zfs_read(ip, &uio, flags, cr);
spl_fstrans_unmark(cookie);
if (error < 0)
return (error);
@ -271,6 +292,7 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
ssize_t wrote;
uio_t uio;
int error;
fstrans_cookie_t cookie;
if (flags & O_APPEND)
*ppos = i_size_read(ip);
@ -282,7 +304,9 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;
cookie = spl_fstrans_mark();
error = -zfs_write(ip, &uio, flags, cr);
spl_fstrans_unmark(cookie);
if (error < 0)
return (error);
@ -347,13 +371,17 @@ static loff_t
zpl_llseek(struct file *filp, loff_t offset, int whence)
{
#if defined(SEEK_HOLE) && defined(SEEK_DATA)
fstrans_cookie_t cookie;
if (whence == SEEK_DATA || whence == SEEK_HOLE) {
struct inode *ip = filp->f_mapping->host;
loff_t maxbytes = ip->i_sb->s_maxbytes;
loff_t error;
spl_inode_lock(ip);
cookie = spl_fstrans_mark();
error = -zfs_holey(ip, whence, &offset);
spl_fstrans_unmark(cookie);
if (error == 0)
error = lseek_execute(filp, ip, offset, maxbytes);
spl_inode_unlock(ip);
@ -414,9 +442,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
struct inode *ip = filp->f_mapping->host;
znode_t *zp = ITOZ(ip);
int error;
fstrans_cookie_t cookie;
cookie = spl_fstrans_mark();
error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
(size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
spl_fstrans_unmark(cookie);
if (error)
return (error);
@ -446,12 +477,15 @@ zpl_readpage(struct file *filp, struct page *pp)
struct inode *ip;
struct page *pl[1];
int error = 0;
fstrans_cookie_t cookie;
ASSERT(PageLocked(pp));
ip = pp->mapping->host;
pl[0] = pp;
cookie = spl_fstrans_mark();
error = -zfs_getpage(ip, pl, 1);
spl_fstrans_unmark(cookie);
if (error) {
SetPageError(pp);
@ -569,6 +603,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
cred_t *cr = CRED();
flock64_t bf;
loff_t olen;
fstrans_cookie_t cookie;
if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return (error);
@ -593,7 +628,9 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
bf.l_len = len;
bf.l_pid = 0;
cookie = spl_fstrans_mark();
error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
spl_fstrans_unmark(cookie);
spl_inode_unlock(ip);
crfree(cr);
@ -663,6 +700,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
xvattr_t xva;
xoptattr_t *xoap;
int error;
fstrans_cookie_t cookie;
if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
return (-EFAULT);
@ -697,7 +735,9 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
xoap->xoa_nodump = B_TRUE;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
return (error);

View File

@ -40,12 +40,15 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
cred_t *cr = CRED();
struct inode *ip;
int error;
fstrans_cookie_t cookie;
if (dlen(dentry) > ZFS_MAXNAMELEN)
return (ERR_PTR(-ENAMETOOLONG));
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
crfree(cr);
@ -95,12 +98,15 @@ zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
struct inode *ip;
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, mode, cr);
cookie = spl_fstrans_mark();
error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
spl_fstrans_unmark(cookie);
if (error == 0) {
VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
VERIFY0(zpl_init_acl(ip, dir));
@ -122,6 +128,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
struct inode *ip;
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
/*
* We currently expect Linux to supply rdev=0 for all sockets
@ -135,7 +142,9 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
zpl_vap_init(vap, dir, mode, cr);
vap->va_rdev = rdev;
cookie = spl_fstrans_mark();
error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
spl_fstrans_unmark(cookie);
if (error == 0) {
VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
VERIFY0(zpl_init_acl(ip, dir));
@ -154,9 +163,12 @@ zpl_unlink(struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_remove(dir, dname(dentry), cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -170,12 +182,15 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
vattr_t *vap;
struct inode *ip;
int error;
fstrans_cookie_t cookie;
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
cookie = spl_fstrans_mark();
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
spl_fstrans_unmark(cookie);
if (error == 0) {
VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
VERIFY0(zpl_init_acl(ip, dir));
@ -194,9 +209,12 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -208,6 +226,7 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap;
int error;
fstrans_cookie_t cookie;
/*
* Ensure MNT_SHRINKABLE is set on snapshots to ensure they are
@ -220,7 +239,9 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE)))
mnt->mnt_flags |= MNT_SHRINKABLE;
cookie = spl_fstrans_mark();
error = -zfs_getattr_fast(dentry->d_inode, stat);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
return (error);
@ -233,6 +254,7 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
cred_t *cr = CRED();
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
error = inode_change_ok(ip, ia);
if (error)
@ -249,7 +271,9 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
vap->va_mtime = ia->ia_mtime;
vap->va_ctime = ia->ia_ctime;
cookie = spl_fstrans_mark();
error = -zfs_setattr(ip, vap, 0, cr);
spl_fstrans_unmark(cookie);
if (!error && (ia->ia_valid & ATTR_MODE))
error = zpl_chmod_acl(ip);
@ -266,9 +290,12 @@ zpl_rename(struct inode *sdip, struct dentry *sdentry,
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@ -282,12 +309,15 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
vattr_t *vap;
struct inode *ip;
int error;
fstrans_cookie_t cookie;
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
cookie = spl_fstrans_mark();
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
spl_fstrans_unmark(cookie);
if (error == 0) {
VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
d_instantiate(dentry, ip);
@ -309,6 +339,7 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
uio_t uio;
char *link;
int error;
fstrans_cookie_t cookie;
crhold(cr);
@ -320,7 +351,9 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
uio.uio_resid = (MAXPATHLEN - 1);
uio.uio_segflg = UIO_SYSSPACE;
cookie = spl_fstrans_mark();
error = -zfs_readlink(ip, &uio, cr);
spl_fstrans_unmark(cookie);
if (error) {
kmem_free(link, MAXPATHLEN);
nd_set_link(nd, ERR_PTR(error));
@ -347,6 +380,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
cred_t *cr = CRED();
struct inode *ip = old_dentry->d_inode;
int error;
fstrans_cookie_t cookie;
if (ip->i_nlink >= ZFS_LINK_MAX)
return (-EMLINK);
@ -355,7 +389,9 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
ip->i_ctime = CURRENT_TIME_SEC;
igrab(ip); /* Use ihold() if available */
cookie = spl_fstrans_mark();
error = -zfs_link(dir, ip, dname(dentry), cr);
spl_fstrans_unmark(cookie);
if (error) {
iput(ip);
goto out;
@ -375,6 +411,7 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
{
cred_t *cr = CRED();
flock64_t bf;
fstrans_cookie_t cookie;
ASSERT3S(start, <=, end);
@ -392,7 +429,9 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
bf.l_start = start;
bf.l_len = end - start;
bf.l_pid = 0;
cookie = spl_fstrans_mark();
zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
}