mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 19:28:53 +03:00
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs <justing@spectralogic.com> Reviewed by: Will Andrews <willa@spectralogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
committed by
Brian Behlendorf
parent
d683ddbb72
commit
0c66c32d1d
+74
-43
@@ -23,6 +23,7 @@
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@@ -347,7 +348,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
|
||||
secondary_cache_changed_cb, os);
|
||||
}
|
||||
if (!dsl_dataset_is_snapshot(ds)) {
|
||||
if (!ds->ds_is_snapshot) {
|
||||
if (err == 0) {
|
||||
err = dsl_prop_register(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
|
||||
@@ -404,7 +405,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
os->os_secondary_cache = ZFS_CACHE_ALL;
|
||||
}
|
||||
|
||||
if (ds == NULL || !dsl_dataset_is_snapshot(ds))
|
||||
if (ds == NULL || !ds->ds_is_snapshot)
|
||||
os->os_zil_header = os->os_phys->os_zil_header;
|
||||
os->os_zil = zil_alloc(os, &os->os_zil_header);
|
||||
|
||||
@@ -419,20 +420,19 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
|
||||
offsetof(dmu_buf_impl_t, db_link));
|
||||
|
||||
list_link_init(&os->os_evicting_node);
|
||||
|
||||
mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
DMU_META_DNODE(os) = dnode_special_open(os,
|
||||
&os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
|
||||
&os->os_meta_dnode);
|
||||
dnode_special_open(os, &os->os_phys->os_meta_dnode,
|
||||
DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
|
||||
if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
|
||||
DMU_USERUSED_DNODE(os) = dnode_special_open(os,
|
||||
&os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
|
||||
&os->os_userused_dnode);
|
||||
DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
|
||||
&os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
|
||||
&os->os_groupused_dnode);
|
||||
dnode_special_open(os, &os->os_phys->os_userused_dnode,
|
||||
DMU_USERUSED_OBJECT, &os->os_userused_dnode);
|
||||
dnode_special_open(os, &os->os_phys->os_groupused_dnode,
|
||||
DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
|
||||
}
|
||||
|
||||
*osp = os;
|
||||
@@ -520,7 +520,7 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
|
||||
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EINVAL));
|
||||
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
|
||||
} else if (!readonly && ds->ds_is_snapshot) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EROFS));
|
||||
}
|
||||
@@ -576,41 +576,57 @@ dmu_objset_disown(objset_t *os, void *tag)
|
||||
void
|
||||
dmu_objset_evict_dbufs(objset_t *os)
|
||||
{
|
||||
dnode_t *dn_marker;
|
||||
dnode_t *dn;
|
||||
|
||||
dn_marker = kmem_alloc(sizeof (dnode_t), KM_SLEEP);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
dn = list_head(&os->os_dnodes);
|
||||
while (dn != NULL) {
|
||||
/*
|
||||
* Skip dnodes without holds. We have to do this dance
|
||||
* because dnode_add_ref() only works if there is already a
|
||||
* hold. If the dnode has no holds, then it has no dbufs.
|
||||
*/
|
||||
if (dnode_add_ref(dn, FTAG)) {
|
||||
list_insert_after(&os->os_dnodes, dn, dn_marker);
|
||||
mutex_exit(&os->os_lock);
|
||||
|
||||
/* process the mdn last, since the other dnodes have holds on it */
|
||||
list_remove(&os->os_dnodes, DMU_META_DNODE(os));
|
||||
list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
|
||||
dnode_evict_dbufs(dn);
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
/*
|
||||
* Find the first dnode with holds. We have to do this dance
|
||||
* because dnode_add_ref() only works if you already have a
|
||||
* hold. If there are no holds then it has no dbufs so OK to
|
||||
* skip.
|
||||
*/
|
||||
for (dn = list_head(&os->os_dnodes);
|
||||
dn && !dnode_add_ref(dn, FTAG);
|
||||
dn = list_next(&os->os_dnodes, dn))
|
||||
continue;
|
||||
|
||||
while (dn) {
|
||||
dnode_t *next_dn = dn;
|
||||
|
||||
do {
|
||||
next_dn = list_next(&os->os_dnodes, next_dn);
|
||||
} while (next_dn && !dnode_add_ref(next_dn, FTAG));
|
||||
|
||||
mutex_exit(&os->os_lock);
|
||||
dnode_evict_dbufs(dn);
|
||||
dnode_rele(dn, FTAG);
|
||||
mutex_enter(&os->os_lock);
|
||||
dn = next_dn;
|
||||
mutex_enter(&os->os_lock);
|
||||
dn = list_next(&os->os_dnodes, dn_marker);
|
||||
list_remove(&os->os_dnodes, dn_marker);
|
||||
} else {
|
||||
dn = list_next(&os->os_dnodes, dn);
|
||||
}
|
||||
}
|
||||
mutex_exit(&os->os_lock);
|
||||
|
||||
kmem_free(dn_marker, sizeof (dnode_t));
|
||||
|
||||
if (DMU_USERUSED_DNODE(os) != NULL) {
|
||||
dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
|
||||
dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
|
||||
}
|
||||
dnode_evict_dbufs(DMU_META_DNODE(os));
|
||||
}
|
||||
|
||||
/*
|
||||
* Objset eviction processing is split into into two pieces.
|
||||
* The first marks the objset as evicting, evicts any dbufs that
|
||||
* have a refcount of zero, and then queues up the objset for the
|
||||
* second phase of eviction. Once os->os_dnodes has been cleared by
|
||||
* dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
|
||||
* The second phase closes the special dnodes, dequeues the objset from
|
||||
* the list of those undergoing eviction, and finally frees the objset.
|
||||
*
|
||||
* NOTE: Due to asynchronous eviction processing (invocation of
|
||||
* dnode_buf_pageout()), it is possible for the meta dnode for the
|
||||
* objset to have no holds even though os->os_dnodes is not empty.
|
||||
*/
|
||||
void
|
||||
dmu_objset_evict(objset_t *os)
|
||||
{
|
||||
@@ -622,7 +638,7 @@ dmu_objset_evict(objset_t *os)
|
||||
ASSERT(!dmu_objset_is_dirty(os, t));
|
||||
|
||||
if (ds) {
|
||||
if (!dsl_dataset_is_snapshot(ds)) {
|
||||
if (!ds->ds_is_snapshot) {
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
|
||||
checksum_changed_cb, os));
|
||||
@@ -656,8 +672,24 @@ dmu_objset_evict(objset_t *os)
|
||||
if (os->os_sa)
|
||||
sa_tear_down(os);
|
||||
|
||||
os->os_evicting = B_TRUE;
|
||||
dmu_objset_evict_dbufs(os);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
spa_evicting_os_register(os->os_spa, os);
|
||||
if (list_is_empty(&os->os_dnodes)) {
|
||||
mutex_exit(&os->os_lock);
|
||||
dmu_objset_evict_done(os);
|
||||
} else {
|
||||
mutex_exit(&os->os_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_evict_done(objset_t *os)
|
||||
{
|
||||
ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
|
||||
|
||||
dnode_special_close(&os->os_meta_dnode);
|
||||
if (DMU_USERUSED_DNODE(os)) {
|
||||
dnode_special_close(&os->os_userused_dnode);
|
||||
@@ -665,8 +697,6 @@ dmu_objset_evict(objset_t *os)
|
||||
}
|
||||
zil_free(os->os_zil);
|
||||
|
||||
ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
|
||||
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
|
||||
|
||||
/*
|
||||
@@ -681,6 +711,7 @@ dmu_objset_evict(objset_t *os)
|
||||
mutex_destroy(&os->os_lock);
|
||||
mutex_destroy(&os->os_obj_lock);
|
||||
mutex_destroy(&os->os_user_ptr_lock);
|
||||
spa_evicting_os_deregister(os->os_spa, os);
|
||||
kmem_free(os, sizeof (objset_t));
|
||||
}
|
||||
|
||||
@@ -888,7 +919,7 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
/* You can only clone snapshots, not the head datasets. */
|
||||
if (!dsl_dataset_is_snapshot(origin)) {
|
||||
if (!origin->ds_is_snapshot) {
|
||||
dsl_dataset_rele(origin, FTAG);
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
@@ -1453,7 +1484,7 @@ int
|
||||
dmu_objset_is_snapshot(objset_t *os)
|
||||
{
|
||||
if (os->os_dsl_dataset != NULL)
|
||||
return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
|
||||
return (os->os_dsl_dataset->ds_is_snapshot);
|
||||
else
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user