mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Fix unlink/xattr deadlock
The problem here is that prune_icache() tries to evict/delete both the xattr directory inode as well as at least one xattr inode contained in that directory. Here's what happens: 1. File is created. 2. xattr is created for that file (behind the scenes a xattr directory and a file in that xattr directory are created) 3. File is deleted. 4. Both the xattr directory inode and at least one xattr inode from that directory are evicted by prune_icache(); prune_icache() acquires a lock on both inodes before it calls ->evict() on the inodes When the xattr directory inode is evicted zfs_zinactive attempts to delete the xattr files contained in that directory. While enumerating these files zfs_zget() is called to obtain a reference to the xattr file znode - which tries to lock the xattr inode. However that very same xattr inode was already locked by prune_icache() further up the call stack, thus leading to a deadlock. This can be reliably reproduced like this: $ touch test $ attr -s a -V b test $ rm test $ echo 3 > /proc/sys/vm/drop_caches This patch fixes the deadlock by moving the zfs_purgedir() call to zfs_unlinked_drain(). Instead zfs_rmnode() now checks whether the xattr dir is empty and leaves the xattr dir in the unlinked set if it finds any xattrs. To ensure zfs_unlinked_drain() never accesses a stale super block zfsvfs_teardown() has been update to block until the iput taskq has been drained. This avoids a potential race where a file with an xattr directory is removed and the file system is immediately unmounted. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #266
This commit is contained in:
parent
6f0cf71e0d
commit
b00131d43c
@ -473,57 +473,6 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
|
|||||||
zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
|
zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Clean up any znodes that had no links when we either crashed or
|
|
||||||
* (force) umounted the file system.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
zfs_unlinked_drain(zfs_sb_t *zsb)
|
|
||||||
{
|
|
||||||
zap_cursor_t zc;
|
|
||||||
zap_attribute_t zap;
|
|
||||||
dmu_object_info_t doi;
|
|
||||||
znode_t *zp;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Interate over the contents of the unlinked set.
|
|
||||||
*/
|
|
||||||
for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
|
|
||||||
zap_cursor_retrieve(&zc, &zap) == 0;
|
|
||||||
zap_cursor_advance(&zc)) {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* See what kind of object we have in list
|
|
||||||
*/
|
|
||||||
|
|
||||||
error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
|
|
||||||
if (error != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
|
|
||||||
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
|
|
||||||
/*
|
|
||||||
* We need to re-mark these list entries for deletion,
|
|
||||||
* so we pull them back into core and set zp->z_unlinked.
|
|
||||||
*/
|
|
||||||
error = zfs_zget(zsb, zap.za_first_integer, &zp);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We may pick up znodes that are already marked for deletion.
|
|
||||||
* This could happen during the purge of an extended attribute
|
|
||||||
* directory. All we need to do is skip over them, since they
|
|
||||||
* are already in the system marked z_unlinked.
|
|
||||||
*/
|
|
||||||
if (error != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
zp->z_unlinked = B_TRUE;
|
|
||||||
iput(ZTOI(zp));
|
|
||||||
}
|
|
||||||
zap_cursor_fini(&zc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete the entire contents of a directory. Return a count
|
* Delete the entire contents of a directory. Return a count
|
||||||
* of the number of entries that could not be deleted. If we encounter
|
* of the number of entries that could not be deleted. If we encounter
|
||||||
@ -590,6 +539,71 @@ zfs_purgedir(znode_t *dzp)
|
|||||||
return (skipped);
|
return (skipped);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clean up any znodes that had no links when we either crashed or
|
||||||
|
* (force) umounted the file system.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
zfs_unlinked_drain(zfs_sb_t *zsb)
|
||||||
|
{
|
||||||
|
zap_cursor_t zc;
|
||||||
|
zap_attribute_t zap;
|
||||||
|
dmu_object_info_t doi;
|
||||||
|
znode_t *zp;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Interate over the contents of the unlinked set.
|
||||||
|
*/
|
||||||
|
for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
|
||||||
|
zap_cursor_retrieve(&zc, &zap) == 0;
|
||||||
|
zap_cursor_advance(&zc)) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* See what kind of object we have in list
|
||||||
|
*/
|
||||||
|
|
||||||
|
error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
|
||||||
|
if (error != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
|
||||||
|
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
|
||||||
|
/*
|
||||||
|
* We need to re-mark these list entries for deletion,
|
||||||
|
* so we pull them back into core and set zp->z_unlinked.
|
||||||
|
*/
|
||||||
|
error = zfs_zget(zsb, zap.za_first_integer, &zp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We may pick up znodes that are already marked for deletion.
|
||||||
|
* This could happen during the purge of an extended attribute
|
||||||
|
* directory. All we need to do is skip over them, since they
|
||||||
|
* are already in the system marked z_unlinked.
|
||||||
|
*/
|
||||||
|
if (error != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
zp->z_unlinked = B_TRUE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is an attribute directory, purge its contents.
|
||||||
|
*/
|
||||||
|
if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
|
||||||
|
/*
|
||||||
|
* We don't need to check the return value of
|
||||||
|
* zfs_purgedir here, because zfs_rmnode will just
|
||||||
|
* return this xattr directory to the unlinked set
|
||||||
|
* until all of its xattrs are gone.
|
||||||
|
*/
|
||||||
|
(void) zfs_purgedir(zp);
|
||||||
|
}
|
||||||
|
|
||||||
|
iput(ZTOI(zp));
|
||||||
|
}
|
||||||
|
zap_cursor_fini(&zc);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_rmnode(znode_t *zp)
|
zfs_rmnode(znode_t *zp)
|
||||||
{
|
{
|
||||||
@ -599,6 +613,7 @@ zfs_rmnode(znode_t *zp)
|
|||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
uint64_t acl_obj;
|
uint64_t acl_obj;
|
||||||
uint64_t xattr_obj;
|
uint64_t xattr_obj;
|
||||||
|
uint64_t count;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
ASSERT(zp->z_links == 0);
|
ASSERT(zp->z_links == 0);
|
||||||
@ -608,13 +623,27 @@ zfs_rmnode(znode_t *zp)
|
|||||||
* If this is an attribute directory, purge its contents.
|
* If this is an attribute directory, purge its contents.
|
||||||
*/
|
*/
|
||||||
if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
|
if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
|
||||||
if (zfs_purgedir(zp) != 0) {
|
error = zap_count(os, zp->z_id, &count);
|
||||||
/*
|
if (error) {
|
||||||
* Not enough space to delete some xattrs.
|
|
||||||
* Leave it in the unlinked set.
|
|
||||||
*/
|
|
||||||
zfs_znode_dmu_fini(zp);
|
zfs_znode_dmu_fini(zp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > 0) {
|
||||||
|
taskq_t *taskq;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are still directory entries in this xattr
|
||||||
|
* directory. Let zfs_unlinked_drain() deal with
|
||||||
|
* them to avoid deadlocking this process in the
|
||||||
|
* zfs_purgedir()->zfs_zget()->ilookup() callpath
|
||||||
|
* on the xattr inode's I_FREEING bit.
|
||||||
|
*/
|
||||||
|
taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
|
||||||
|
taskq_dispatch(taskq, (task_func_t *)
|
||||||
|
zfs_unlinked_drain, zsb, TQ_SLEEP);
|
||||||
|
|
||||||
|
zfs_znode_dmu_fini(zp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1091,6 +1091,12 @@ zfsvfs_teardown(zfs_sb_t *zsb, boolean_t unmounting)
|
|||||||
(void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
|
(void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drain the iput_taskq to ensure all active references to the
|
||||||
|
* zfs_sb_t have been handled only then can it be safely destroyed.
|
||||||
|
*/
|
||||||
|
taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Close the zil. NB: Can't close the zil while zfs_inactive
|
* Close the zil. NB: Can't close the zil while zfs_inactive
|
||||||
* threads are blocked as zil_close can call zfs_inactive.
|
* threads are blocked as zil_close can call zfs_inactive.
|
||||||
|
Loading…
Reference in New Issue
Block a user