mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs <justing@spectralogic.com> Reviewed by: Will Andrews <willa@spectralogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
committed by
Brian Behlendorf
parent
d683ddbb72
commit
0c66c32d1d
+54
-31
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -405,8 +406,9 @@ static dnode_t *
|
||||
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
uint64_t object, dnode_handle_t *dnh)
|
||||
{
|
||||
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
|
||||
dnode_t *dn;
|
||||
|
||||
dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
|
||||
ASSERT(!POINTER_IS_VALID(dn->dn_objset));
|
||||
dn->dn_moved = 0;
|
||||
|
||||
@@ -443,13 +445,31 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
list_insert_head(&os->os_dnodes, dn);
|
||||
membar_producer();
|
||||
if (dnh->dnh_dnode != NULL) {
|
||||
/* Lost the allocation race. */
|
||||
mutex_exit(&os->os_lock);
|
||||
kmem_cache_free(dnode_cache, dn);
|
||||
return (dnh->dnh_dnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Everything else must be valid before assigning dn_objset makes the
|
||||
* dnode eligible for dnode_move().
|
||||
* Exclude special dnodes from os_dnodes so an empty os_dnodes
|
||||
* signifies that the special dnodes have no references from
|
||||
* their children (the entries in os_dnodes). This allows
|
||||
* dnode_destroy() to easily determine if the last child has
|
||||
* been removed and then complete eviction of the objset.
|
||||
*/
|
||||
if (!DMU_OBJECT_IS_SPECIAL(object))
|
||||
list_insert_head(&os->os_dnodes, dn);
|
||||
membar_producer();
|
||||
|
||||
/*
|
||||
* Everything else must be valid before assigning dn_objset
|
||||
* makes the dnode eligible for dnode_move().
|
||||
*/
|
||||
dn->dn_objset = os;
|
||||
|
||||
dnh->dnh_dnode = dn;
|
||||
mutex_exit(&os->os_lock);
|
||||
|
||||
arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
|
||||
@@ -463,12 +483,18 @@ static void
|
||||
dnode_destroy(dnode_t *dn)
|
||||
{
|
||||
objset_t *os = dn->dn_objset;
|
||||
boolean_t complete_os_eviction = B_FALSE;
|
||||
|
||||
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
POINTER_INVALIDATE(&dn->dn_objset);
|
||||
list_remove(&os->os_dnodes, dn);
|
||||
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
list_remove(&os->os_dnodes, dn);
|
||||
complete_os_eviction =
|
||||
list_is_empty(&os->os_dnodes) &&
|
||||
list_link_active(&os->os_evicting_node);
|
||||
}
|
||||
mutex_exit(&os->os_lock);
|
||||
|
||||
/* the dnode can no longer move, so we can release the handle */
|
||||
@@ -503,6 +529,9 @@ dnode_destroy(dnode_t *dn)
|
||||
dmu_zfetch_rele(&dn->dn_zfetch);
|
||||
kmem_cache_free(dnode_cache, dn);
|
||||
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
|
||||
|
||||
if (complete_os_eviction)
|
||||
dmu_objset_evict_done(os);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -968,33 +997,32 @@ dnode_special_close(dnode_handle_t *dnh)
|
||||
*/
|
||||
while (refcount_count(&dn->dn_holds) > 0)
|
||||
delay(1);
|
||||
ASSERT(dn->dn_dbuf == NULL ||
|
||||
dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
|
||||
zrl_add(&dnh->dnh_zrlock);
|
||||
dnode_destroy(dn); /* implicit zrl_remove() */
|
||||
zrl_destroy(&dnh->dnh_zrlock);
|
||||
dnh->dnh_dnode = NULL;
|
||||
}
|
||||
|
||||
dnode_t *
|
||||
void
|
||||
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
|
||||
dnode_handle_t *dnh)
|
||||
{
|
||||
dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
|
||||
dnh->dnh_dnode = dn;
|
||||
dnode_t *dn;
|
||||
|
||||
dn = dnode_create(os, dnp, NULL, object, dnh);
|
||||
zrl_init(&dnh->dnh_zrlock);
|
||||
DNODE_VERIFY(dn);
|
||||
return (dn);
|
||||
}
|
||||
|
||||
static void
|
||||
dnode_buf_pageout(dmu_buf_t *db, void *arg)
|
||||
dnode_buf_pageout(void *dbu)
|
||||
{
|
||||
dnode_children_t *children_dnodes = arg;
|
||||
dnode_children_t *children_dnodes = dbu;
|
||||
int i;
|
||||
int epb = db->db_size >> DNODE_SHIFT;
|
||||
|
||||
ASSERT(epb == children_dnodes->dnc_count);
|
||||
|
||||
for (i = 0; i < epb; i++) {
|
||||
for (i = 0; i < children_dnodes->dnc_count; i++) {
|
||||
dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
|
||||
dnode_t *dn;
|
||||
|
||||
@@ -1024,7 +1052,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
|
||||
dnh->dnh_dnode = NULL;
|
||||
}
|
||||
kmem_free(children_dnodes, sizeof (dnode_children_t) +
|
||||
epb * sizeof (dnode_handle_t));
|
||||
children_dnodes->dnc_count * sizeof (dnode_handle_t));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1108,16 +1136,17 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
|
||||
if (children_dnodes == NULL) {
|
||||
int i;
|
||||
dnode_children_t *winner;
|
||||
children_dnodes = kmem_alloc(sizeof (dnode_children_t) +
|
||||
children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
|
||||
epb * sizeof (dnode_handle_t), KM_SLEEP);
|
||||
children_dnodes->dnc_count = epb;
|
||||
dnh = &children_dnodes->dnc_children[0];
|
||||
for (i = 0; i < epb; i++) {
|
||||
zrl_init(&dnh[i].dnh_zrlock);
|
||||
dnh[i].dnh_dnode = NULL;
|
||||
}
|
||||
if ((winner = dmu_buf_set_user(&db->db, children_dnodes,
|
||||
dnode_buf_pageout))) {
|
||||
dmu_buf_init_user(&children_dnodes->dnc_dbu,
|
||||
dnode_buf_pageout, NULL);
|
||||
winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
|
||||
if (winner != NULL) {
|
||||
|
||||
for (i = 0; i < epb; i++) {
|
||||
zrl_destroy(&dnh[i].dnh_zrlock);
|
||||
@@ -1132,17 +1161,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
|
||||
|
||||
dnh = &children_dnodes->dnc_children[idx];
|
||||
zrl_add(&dnh->dnh_zrlock);
|
||||
if ((dn = dnh->dnh_dnode) == NULL) {
|
||||
dn = dnh->dnh_dnode;
|
||||
if (dn == NULL) {
|
||||
dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
|
||||
dnode_t *winner;
|
||||
|
||||
dn = dnode_create(os, phys, db, object, dnh);
|
||||
winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
|
||||
if (winner != NULL) {
|
||||
zrl_add(&dnh->dnh_zrlock);
|
||||
dnode_destroy(dn); /* implicit zrl_remove() */
|
||||
dn = winner;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
@@ -1156,10 +1179,10 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
|
||||
dbuf_rele(db, FTAG);
|
||||
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
|
||||
}
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
if (refcount_add(&dn->dn_holds, tag) == 1)
|
||||
dbuf_add_ref(db, dnh);
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
/* Now we can rely on the hold to prevent the dnode from moving. */
|
||||
zrl_remove(&dnh->dnh_zrlock);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user