Fix dnode_hold_impl() soft lockup

Soft lockups could happen when multiple threads trying
to get zrl on the same dnode handle in order to allocate
and initialize the dnode marked as DN_SLOT_ALLOCATED.

Don't loop from beginning when we can't get zrl, otherwise
we would increase the zrl refcount and nobody can actually
lock it.

Reviewed by: Tom Caputi <tcaputi@datto.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Closes #8433
This commit is contained in:
lidongyang 2019-02-23 04:48:37 +11:00 committed by Brian Behlendorf
parent f8bb2a7e0c
commit 8d9e51c084
2 changed files with 53 additions and 56 deletions

View File

@ -236,6 +236,7 @@ extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
#define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0)
#define cond_resched() sched_yield()
/*
* Mutexes

View File

@ -1153,8 +1153,10 @@ dnode_free_interior_slots(dnode_t *dn)
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
while (!dnode_slots_tryenter(children, idx, slots))
while (!dnode_slots_tryenter(children, idx, slots)) {
DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
cond_resched();
}
dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
dnode_slots_rele(children, idx, slots);
@ -1401,18 +1403,15 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
}
ASSERT(dnc->dnc_count == epb);
dn = DN_SLOT_UNINIT;
if (flag & DNODE_MUST_BE_ALLOCATED) {
slots = 1;
while (dn == DN_SLOT_UNINIT) {
dnode_slots_hold(dnc, idx, slots);
dnh = &dnc->dnc_children[idx];
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
dn = dnh->dnh_dnode;
break;
} else if (dnh->dnh_dnode == DN_SLOT_INTERIOR) {
DNODE_STAT_BUMP(dnode_hold_alloc_interior);
dnode_slots_rele(dnc, idx, slots);
@ -1423,12 +1422,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOENT));
}
} else {
dnode_slots_rele(dnc, idx, slots);
if (!dnode_slots_tryenter(dnc, idx, slots)) {
while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
continue;
cond_resched();
}
/*
@ -1463,7 +1461,6 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(ENOSPC));
}
while (dn == DN_SLOT_UNINIT) {
dnode_slots_hold(dnc, idx, slots);
if (!dnode_check_slots_free(dnc, idx, slots)) {
@ -1474,9 +1471,9 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
}
dnode_slots_rele(dnc, idx, slots);
if (!dnode_slots_tryenter(dnc, idx, slots)) {
while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
continue;
cond_resched();
}
if (!dnode_check_slots_free(dnc, idx, slots)) {
@ -1502,7 +1499,6 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dn = dnode_create(os, dn_block + idx, db,
object, dnh);
}
}
mutex_enter(&dn->dn_mtx);
if (!zfs_refcount_is_zero(&dn->dn_holds) || dn->dn_free_txg) {