mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 11:18:52 +03:00
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs <justing@spectralogic.com> Reviewed by: Will Andrews <willa@spectralogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
committed by
Brian Behlendorf
parent
d683ddbb72
commit
0c66c32d1d
+28
-40
@@ -22,6 +22,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -402,53 +403,41 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
|
||||
void
|
||||
dnode_evict_dbufs(dnode_t *dn)
|
||||
{
|
||||
int progress;
|
||||
int pass = 0;
|
||||
dmu_buf_impl_t *db_marker;
|
||||
dmu_buf_impl_t *db, *db_next;
|
||||
|
||||
do {
|
||||
dmu_buf_impl_t *db, *db_next;
|
||||
int evicting = FALSE;
|
||||
db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
|
||||
|
||||
mutex_enter(&dn->dn_dbufs_mtx);
|
||||
for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
|
||||
|
||||
progress = FALSE;
|
||||
mutex_enter(&dn->dn_dbufs_mtx);
|
||||
for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
|
||||
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
||||
#ifdef DEBUG
|
||||
DB_DNODE_ENTER(db);
|
||||
ASSERT3P(DB_DNODE(db), ==, dn);
|
||||
DB_DNODE_EXIT(db);
|
||||
DB_DNODE_ENTER(db);
|
||||
ASSERT3P(DB_DNODE(db), ==, dn);
|
||||
DB_DNODE_EXIT(db);
|
||||
#endif /* DEBUG */
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
if (db->db_state == DB_EVICTING) {
|
||||
progress = TRUE;
|
||||
evicting = TRUE;
|
||||
mutex_exit(&db->db_mtx);
|
||||
} else if (refcount_is_zero(&db->db_holds)) {
|
||||
progress = TRUE;
|
||||
dbuf_clear(db); /* exits db_mtx for us */
|
||||
} else {
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
mutex_enter(&db->db_mtx);
|
||||
if (db->db_state != DB_EVICTING &&
|
||||
refcount_is_zero(&db->db_holds)) {
|
||||
db_marker->db_level = db->db_level;
|
||||
db_marker->db_blkid = db->db_blkid;
|
||||
db_marker->db_state = DB_SEARCH;
|
||||
avl_insert_here(&dn->dn_dbufs, db_marker, db,
|
||||
AVL_BEFORE);
|
||||
|
||||
dbuf_clear(db);
|
||||
|
||||
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
|
||||
avl_remove(&dn->dn_dbufs, db_marker);
|
||||
} else {
|
||||
mutex_exit(&db->db_mtx);
|
||||
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
||||
}
|
||||
/*
|
||||
* NB: we need to drop dn_dbufs_mtx between passes so
|
||||
* that any DB_EVICTING dbufs can make progress.
|
||||
* Ideally, we would have some cv we could wait on, but
|
||||
* since we don't, just wait a bit to give the other
|
||||
* thread a chance to run.
|
||||
*/
|
||||
mutex_exit(&dn->dn_dbufs_mtx);
|
||||
if (evicting)
|
||||
delay(1);
|
||||
pass++;
|
||||
if ((pass % 100) == 0)
|
||||
dprintf("Exceeded %d passes evicting dbufs\n", pass);
|
||||
} while (progress);
|
||||
}
|
||||
mutex_exit(&dn->dn_dbufs_mtx);
|
||||
|
||||
if (pass >= 100)
|
||||
dprintf("Required %d passes to evict dbufs\n", pass);
|
||||
kmem_free(db_marker, sizeof (dmu_buf_impl_t));
|
||||
|
||||
dnode_evict_bonus(dn);
|
||||
}
|
||||
@@ -513,7 +502,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
||||
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
|
||||
dnode_evict_dbufs(dn);
|
||||
ASSERT(avl_is_empty(&dn->dn_dbufs));
|
||||
ASSERT3P(dn->dn_bonus, ==, NULL);
|
||||
|
||||
/*
|
||||
* XXX - It would be nice to assert this, but we may still
|
||||
|
||||
Reference in New Issue
Block a user