mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 19:50:25 +03:00
Fix evict() deadlock
Now that KM_SLEEP is not defined as GFP_NOFS there is the possibility of synchronous reclaim deadlocks. These deadlocks never existed in the original OpenSolaris code because all memory reclaim on Solaris is done asyncronously. Linux does both synchronous (direct) and asynchronous (indirect) reclaim. This commit addresses a deadlock caused by inode eviction. A KM_SLEEP allocation may trigger direct memory reclaim and shrink the inode cache. This can occur while a mutex in the array of ZFS_OBJ_HOLD mutexes is held. Through the ->shrink_icache_memory()->evict()->zfs_inactive()-> zfs_zinactive() call path the same mutex may be reacquired resulting in a deadlock. To avoid this deadlock the process must not reacquire the mutex when it is already holding it. This is a reasonable fix for now but longer term the ZFS_OBJ_HOLD mutex locking should be reevaluated. This infrastructure already prevents us from ever using the Linux lock dependency analysis tools, and it may limit scalability.
This commit is contained in:
parent
691f6ac4c2
commit
d6bd8eaae4
@ -280,6 +280,8 @@ typedef struct znode {
|
|||||||
mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
|
mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
|
||||||
#define ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \
|
#define ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \
|
||||||
mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num)))
|
mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num)))
|
||||||
|
#define ZFS_OBJ_HOLD_OWNED(zsb, obj_num) \
|
||||||
|
mutex_owned(ZFS_OBJ_MUTEX((zsb), (obj_num)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Macros to encode/decode ZFS stored time values from/to struct timespec
|
* Macros to encode/decode ZFS stored time values from/to struct timespec
|
||||||
|
@ -980,13 +980,24 @@ zfs_zinactive(znode_t *zp)
|
|||||||
{
|
{
|
||||||
zfs_sb_t *zsb = ZTOZSB(zp);
|
zfs_sb_t *zsb = ZTOZSB(zp);
|
||||||
uint64_t z_id = zp->z_id;
|
uint64_t z_id = zp->z_id;
|
||||||
|
boolean_t drop_mutex = 0;
|
||||||
|
|
||||||
ASSERT(zp->z_sa_hdl);
|
ASSERT(zp->z_sa_hdl);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't allow a zfs_zget() while were trying to release this znode
|
* Don't allow a zfs_zget() while were trying to release this znode.
|
||||||
|
*
|
||||||
|
* Linux allows direct memory reclaim which means that any KM_SLEEP
|
||||||
|
* allocation may trigger inode eviction. This can lead to a deadlock
|
||||||
|
* through the ->shrink_icache_memory()->evict()->zfs_inactive()->
|
||||||
|
* zfs_zinactive() call path. To avoid this deadlock the process
|
||||||
|
* must not reacquire the mutex when it is already holding it.
|
||||||
*/
|
*/
|
||||||
ZFS_OBJ_HOLD_ENTER(zsb, z_id);
|
if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
|
||||||
|
ZFS_OBJ_HOLD_ENTER(zsb, z_id);
|
||||||
|
drop_mutex = 1;
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&zp->z_lock);
|
mutex_enter(&zp->z_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -995,14 +1006,19 @@ zfs_zinactive(znode_t *zp)
|
|||||||
*/
|
*/
|
||||||
if (zp->z_unlinked) {
|
if (zp->z_unlinked) {
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
|
|
||||||
|
if (drop_mutex)
|
||||||
|
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
|
||||||
|
|
||||||
zfs_rmnode(zp);
|
zfs_rmnode(zp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
zfs_znode_dmu_fini(zp);
|
zfs_znode_dmu_fini(zp);
|
||||||
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
|
|
||||||
|
if (drop_mutex)
|
||||||
|
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
Loading…
Reference in New Issue
Block a user