Illumos 5056 - ZFS deadlock on db_mtx and dn_holds

5056 ZFS deadlock on db_mtx and dn_holds
Author: Justin Gibbs <justing@spectralogic.com>
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/5056
  https://github.com/illumos/illumos-gate/commit/bc9014e

Porting Notes:

sa_handle_get_from_db():
  - the original patch includes an otherwise unmentioned fix for a
    possible usage of an uninitialised variable

dmu_objset_open_impl():
  - Under Illumos list_link_init() is the same as filling a list_node_t
    with NULLs, so they don't notice if they miss doing list_link_init()
    on a zero'd containing structure (e.g. allocated with kmem_zalloc as
    here). Under Linux, not so much: an uninitialised list_node_t goes
    "Boom!" some time later when it's used or destroyed.

dmu_objset_evict_dbufs():
  - reduce stack usage using kmem_alloc()

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Justin T. Gibbs
2015-04-02 14:44:32 +11:00
committed by Brian Behlendorf
parent d683ddbb72
commit 0c66c32d1d
35 changed files with 645 additions and 316 deletions
+28 -10
View File
@@ -23,6 +23,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/dmu.h>
@@ -126,14 +127,15 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
/* ARGSUSED */
static void
dsl_dir_evict(dmu_buf_t *db, void *arg)
dsl_dir_evict(void *dbu)
{
dsl_dir_t *dd = arg;
dsl_dir_t *dd = dbu;
int t;
ASSERTV(dsl_pool_t *dp = dd->dd_pool);
dd->dd_dbuf = NULL;
for (t = 0; t < TXG_SIZE; t++) {
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
ASSERT(dd->dd_tempreserved[t] == 0);
@@ -141,9 +143,9 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd);
dsl_dir_async_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd);
spa_async_close(dd->dd_pool->dp_spa, dd);
/*
* The props callback list should have been cleaned up by
@@ -239,8 +241,9 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_buf_rele(origin_bonus, FTAG);
}
winner = dmu_buf_set_user_ie(dbuf, dd, dsl_dir_evict);
if (winner) {
dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
if (winner != NULL) {
if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
@@ -284,6 +287,21 @@ dsl_dir_rele(dsl_dir_t *dd, void *tag)
dmu_buf_rele(dd->dd_dbuf, tag);
}
/*
* Remove a reference to the given dsl dir that is being asynchronously
* released. Async releases occur from a taskq performing eviction of
* dsl datasets and dirs. This process is identical to a normal release
* with the exception of using the async API for releasing the reference on
* the spa.
*/
void
dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_async_close(dd->dd_pool->dp_spa, tag);
dmu_buf_rele(dd->dd_dbuf, tag);
}
/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
void
dsl_dir_name(dsl_dir_t *dd, char *buf)
@@ -417,7 +435,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
}
while (next != NULL) {
dsl_dir_t *child_ds;
dsl_dir_t *child_dd;
err = getcomponent(next, buf, &nextnext);
if (err != 0)
break;
@@ -436,11 +454,11 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
break;
}
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
if (err != 0)
break;
dsl_dir_rele(dd, tag);
dd = child_ds;
dd = child_dd;
next = nextnext;
}