mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 19:57:43 +03:00
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs <justing@spectralogic.com> Reviewed by: Will Andrews <willa@spectralogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
committed by
Brian Behlendorf
parent
d683ddbb72
commit
0c66c32d1d
+133
-52
@@ -23,6 +23,7 @@
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -78,10 +79,16 @@ static void dbuf_destroy(dmu_buf_impl_t *db);
|
||||
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
|
||||
|
||||
#ifndef __lint
|
||||
extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
|
||||
dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
|
||||
#endif /* ! __lint */
|
||||
|
||||
/*
|
||||
* Global data structures and functions for the dbuf cache.
|
||||
*/
|
||||
static kmem_cache_t *dbuf_cache;
|
||||
static taskq_t *dbu_evict_taskq;
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
@@ -247,17 +254,72 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
|
||||
|
||||
static arc_evict_func_t dbuf_do_evict;
|
||||
|
||||
typedef enum {
|
||||
DBVU_EVICTING,
|
||||
DBVU_NOT_EVICTING
|
||||
} dbvu_verify_type_t;
|
||||
|
||||
static void
|
||||
dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
int64_t holds;
|
||||
|
||||
if (db->db_user == NULL)
|
||||
return;
|
||||
|
||||
/* Only data blocks support the attachment of user data. */
|
||||
ASSERT(db->db_level == 0);
|
||||
|
||||
/* Clients must resolve a dbuf before attaching user data. */
|
||||
ASSERT(db->db.db_data != NULL);
|
||||
ASSERT3U(db->db_state, ==, DB_CACHED);
|
||||
|
||||
holds = refcount_count(&db->db_holds);
|
||||
if (verify_type == DBVU_EVICTING) {
|
||||
/*
|
||||
* Immediate eviction occurs when holds == dirtycnt.
|
||||
* For normal eviction buffers, holds is zero on
|
||||
* eviction, except when dbuf_fix_old_data() calls
|
||||
* dbuf_clear_data(). However, the hold count can grow
|
||||
* during eviction even though db_mtx is held (see
|
||||
* dmu_bonus_hold() for an example), so we can only
|
||||
* test the generic invariant that holds >= dirtycnt.
|
||||
*/
|
||||
ASSERT3U(holds, >=, db->db_dirtycnt);
|
||||
} else {
|
||||
if (db->db_immediate_evict == TRUE)
|
||||
ASSERT3U(holds, >=, db->db_dirtycnt);
|
||||
else
|
||||
ASSERT3U(holds, >, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_evict_user(dmu_buf_impl_t *db)
|
||||
{
|
||||
dmu_buf_user_t *dbu = db->db_user;
|
||||
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
|
||||
if (db->db_level != 0 || db->db_evict_func == NULL)
|
||||
if (dbu == NULL)
|
||||
return;
|
||||
|
||||
db->db_evict_func(&db->db, db->db_user_ptr);
|
||||
db->db_user_ptr = NULL;
|
||||
db->db_evict_func = NULL;
|
||||
dbuf_verify_user(db, DBVU_EVICTING);
|
||||
db->db_user = NULL;
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
if (dbu->dbu_clear_on_evict_dbufp != NULL)
|
||||
*dbu->dbu_clear_on_evict_dbufp = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Invoke the callback from a taskq to avoid lock order reversals
|
||||
* and limit stack depth.
|
||||
*/
|
||||
taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
|
||||
&dbu->dbu_tqent);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
@@ -331,6 +393,12 @@ retry:
|
||||
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
dbuf_stats_init(h);
|
||||
|
||||
/*
|
||||
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
|
||||
* configuration is not required.
|
||||
*/
|
||||
dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -353,6 +421,7 @@ dbuf_fini(void)
|
||||
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
|
||||
#endif
|
||||
kmem_cache_destroy(dbuf_cache);
|
||||
taskq_destroy(dbu_evict_taskq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -470,22 +539,28 @@ dbuf_verify(dmu_buf_impl_t *db)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
dbuf_clear_data(dmu_buf_impl_t *db)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
dbuf_evict_user(db);
|
||||
db->db_buf = NULL;
|
||||
db->db.db_data = NULL;
|
||||
if (db->db_state != DB_NOFILL)
|
||||
db->db_state = DB_UNCACHED;
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
ASSERT(buf != NULL);
|
||||
|
||||
db->db_buf = buf;
|
||||
if (buf != NULL) {
|
||||
ASSERT(buf->b_data != NULL);
|
||||
db->db.db_data = buf->b_data;
|
||||
if (!arc_released(buf))
|
||||
arc_set_callback(buf, dbuf_do_evict, db);
|
||||
} else {
|
||||
dbuf_evict_user(db);
|
||||
db->db.db_data = NULL;
|
||||
if (db->db_state != DB_NOFILL)
|
||||
db->db_state = DB_UNCACHED;
|
||||
}
|
||||
ASSERT(buf->b_data != NULL);
|
||||
db->db.db_data = buf->b_data;
|
||||
if (!arc_released(buf))
|
||||
arc_set_callback(buf, dbuf_do_evict, db);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -507,7 +582,7 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
|
||||
} else {
|
||||
abuf = db->db_buf;
|
||||
arc_loan_inuse_buf(abuf, db);
|
||||
dbuf_set_data(db, NULL);
|
||||
dbuf_clear_data(db);
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
return (abuf);
|
||||
@@ -747,7 +822,7 @@ dbuf_noread(dmu_buf_impl_t *db)
|
||||
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
|
||||
db->db_state = DB_FILL;
|
||||
} else if (db->db_state == DB_NOFILL) {
|
||||
dbuf_set_data(db, NULL);
|
||||
dbuf_clear_data(db);
|
||||
} else {
|
||||
ASSERT3U(db->db_state, ==, DB_CACHED);
|
||||
}
|
||||
@@ -803,7 +878,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
|
||||
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
|
||||
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
|
||||
} else {
|
||||
dbuf_set_data(db, NULL);
|
||||
dbuf_clear_data(db);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -854,7 +929,8 @@ void
|
||||
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db, *db_next, *db_search;
|
||||
dmu_buf_impl_t *db_search;
|
||||
dmu_buf_impl_t *db, *db_next;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
avl_index_t where;
|
||||
boolean_t freespill =
|
||||
@@ -864,7 +940,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
|
||||
end_blkid = dn->dn_maxblkid;
|
||||
dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
|
||||
|
||||
db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
|
||||
db_search = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
|
||||
db_search->db_level = 0;
|
||||
db_search->db_blkid = start_blkid;
|
||||
db_search->db_state = DB_SEARCH;
|
||||
@@ -1436,7 +1512,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
arc_buf_t *buf = db->db_buf;
|
||||
|
||||
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
|
||||
dbuf_set_data(db, NULL);
|
||||
dbuf_clear_data(db);
|
||||
VERIFY(arc_buf_remove_ref(buf, db));
|
||||
dbuf_evict(db);
|
||||
return (B_TRUE);
|
||||
@@ -1785,8 +1861,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
|
||||
db->db_parent = parent;
|
||||
db->db_blkptr = blkptr;
|
||||
|
||||
db->db_user_ptr = NULL;
|
||||
db->db_evict_func = NULL;
|
||||
db->db_user = NULL;
|
||||
db->db_immediate_evict = 0;
|
||||
db->db_freed_in_flight = 0;
|
||||
|
||||
@@ -2273,7 +2348,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||
/*
|
||||
* This dbuf has anonymous data associated with it.
|
||||
*/
|
||||
dbuf_set_data(db, NULL);
|
||||
dbuf_clear_data(db);
|
||||
VERIFY(arc_buf_remove_ref(buf, db));
|
||||
dbuf_evict(db);
|
||||
} else {
|
||||
@@ -2306,7 +2381,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||
} else {
|
||||
dbuf_clear(db);
|
||||
}
|
||||
} else if (arc_buf_eviction_needed(db->db_buf)) {
|
||||
} else if (db->db_objset->os_evicting ||
|
||||
arc_buf_eviction_needed(db->db_buf)) {
|
||||
dbuf_clear(db);
|
||||
} else {
|
||||
mutex_exit(&db->db_mtx);
|
||||
@@ -2325,51 +2401,57 @@ dbuf_refcount(dmu_buf_impl_t *db)
|
||||
}
|
||||
|
||||
void *
|
||||
dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr,
|
||||
dmu_buf_evict_func_t *evict_func)
|
||||
dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
|
||||
dmu_buf_user_t *new_user)
|
||||
{
|
||||
return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
dbuf_verify_user(db, DBVU_NOT_EVICTING);
|
||||
if (db->db_user == old_user)
|
||||
db->db_user = new_user;
|
||||
else
|
||||
old_user = db->db_user;
|
||||
dbuf_verify_user(db, DBVU_NOT_EVICTING);
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
||||
return (old_user);
|
||||
}
|
||||
|
||||
void *
|
||||
dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr,
|
||||
dmu_buf_evict_func_t *evict_func)
|
||||
dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
|
||||
{
|
||||
return (dmu_buf_replace_user(db_fake, NULL, user));
|
||||
}
|
||||
|
||||
void *
|
||||
dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
|
||||
db->db_immediate_evict = TRUE;
|
||||
return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
|
||||
return (dmu_buf_set_user(db_fake, user));
|
||||
}
|
||||
|
||||
void *
|
||||
dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
|
||||
dmu_buf_evict_func_t *evict_func)
|
||||
dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
ASSERT(db->db_level == 0);
|
||||
|
||||
ASSERT((user_ptr == NULL) == (evict_func == NULL));
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
|
||||
if (db->db_user_ptr == old_user_ptr) {
|
||||
db->db_user_ptr = user_ptr;
|
||||
db->db_evict_func = evict_func;
|
||||
} else {
|
||||
old_user_ptr = db->db_user_ptr;
|
||||
}
|
||||
|
||||
mutex_exit(&db->db_mtx);
|
||||
return (old_user_ptr);
|
||||
return (dmu_buf_replace_user(db_fake, user, NULL));
|
||||
}
|
||||
|
||||
void *
|
||||
dmu_buf_get_user(dmu_buf_t *db_fake)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
ASSERT(!refcount_is_zero(&db->db_holds));
|
||||
|
||||
return (db->db_user_ptr);
|
||||
dbuf_verify_user(db, DBVU_NOT_EVICTING);
|
||||
return (db->db_user);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_user_evict_wait()
|
||||
{
|
||||
taskq_wait(dbu_evict_taskq);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
@@ -3037,7 +3119,6 @@ EXPORT_SYMBOL(dbuf_refcount);
|
||||
EXPORT_SYMBOL(dbuf_sync_list);
|
||||
EXPORT_SYMBOL(dmu_buf_set_user);
|
||||
EXPORT_SYMBOL(dmu_buf_set_user_ie);
|
||||
EXPORT_SYMBOL(dmu_buf_update_user);
|
||||
EXPORT_SYMBOL(dmu_buf_get_user);
|
||||
EXPORT_SYMBOL(dmu_buf_freeable);
|
||||
EXPORT_SYMBOL(dmu_buf_get_blkptr);
|
||||
|
||||
Reference in New Issue
Block a user