Fix dnode_hold_impl() soft lockup

Soft lockups could happen when multiple threads trying
to get zrl on the same dnode handle in order to allocate
and initialize the dnode marked as DN_SLOT_ALLOCATED.

Don't loop from beginning when we can't get zrl, otherwise
we would increase the zrl refcount and nobody can actually
lock it.

Reviewed by: Tom Caputi <tcaputi@datto.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Closes #8433
This commit is contained in:
lidongyang 2019-02-23 04:48:37 +11:00 committed by Brian Behlendorf
parent f8bb2a7e0c
commit 8d9e51c084
2 changed files with 53 additions and 56 deletions

View File

@ -236,6 +236,7 @@ extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
#define kpreempt_disable() ((void)0) #define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0) #define kpreempt_enable() ((void)0)
#define cond_resched() sched_yield()
/* /*
* Mutexes * Mutexes

View File

@ -1153,8 +1153,10 @@ dnode_free_interior_slots(dnode_t *dn)
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
while (!dnode_slots_tryenter(children, idx, slots)) while (!dnode_slots_tryenter(children, idx, slots)) {
DNODE_STAT_BUMP(dnode_free_interior_lock_retry); DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
cond_resched();
}
dnode_set_slots(children, idx, slots, DN_SLOT_FREE); dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
dnode_slots_rele(children, idx, slots); dnode_slots_rele(children, idx, slots);
@ -1401,34 +1403,30 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
} }
ASSERT(dnc->dnc_count == epb); ASSERT(dnc->dnc_count == epb);
dn = DN_SLOT_UNINIT;
if (flag & DNODE_MUST_BE_ALLOCATED) { if (flag & DNODE_MUST_BE_ALLOCATED) {
slots = 1; slots = 1;
while (dn == DN_SLOT_UNINIT) { dnode_slots_hold(dnc, idx, slots);
dnode_slots_hold(dnc, idx, slots); dnh = &dnc->dnc_children[idx];
dnh = &dnc->dnc_children[idx];
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
dn = dnh->dnh_dnode;
break;
} else if (dnh->dnh_dnode == DN_SLOT_INTERIOR) {
DNODE_STAT_BUMP(dnode_hold_alloc_interior);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(EEXIST));
} else if (dnh->dnh_dnode != DN_SLOT_ALLOCATED) {
DNODE_STAT_BUMP(dnode_hold_alloc_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOENT));
}
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
dn = dnh->dnh_dnode;
} else if (dnh->dnh_dnode == DN_SLOT_INTERIOR) {
DNODE_STAT_BUMP(dnode_hold_alloc_interior);
dnode_slots_rele(dnc, idx, slots); dnode_slots_rele(dnc, idx, slots);
if (!dnode_slots_tryenter(dnc, idx, slots)) { dbuf_rele(db, FTAG);
return (SET_ERROR(EEXIST));
} else if (dnh->dnh_dnode != DN_SLOT_ALLOCATED) {
DNODE_STAT_BUMP(dnode_hold_alloc_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOENT));
} else {
dnode_slots_rele(dnc, idx, slots);
while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry); DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
continue; cond_resched();
} }
/* /*
@ -1463,45 +1461,43 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(ENOSPC)); return (SET_ERROR(ENOSPC));
} }
while (dn == DN_SLOT_UNINIT) { dnode_slots_hold(dnc, idx, slots);
dnode_slots_hold(dnc, idx, slots);
if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOSPC));
}
if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_misses);
dnode_slots_rele(dnc, idx, slots); dnode_slots_rele(dnc, idx, slots);
if (!dnode_slots_tryenter(dnc, idx, slots)) { dbuf_rele(db, FTAG);
DNODE_STAT_BUMP(dnode_hold_free_lock_retry); return (SET_ERROR(ENOSPC));
continue; }
}
if (!dnode_check_slots_free(dnc, idx, slots)) { dnode_slots_rele(dnc, idx, slots);
DNODE_STAT_BUMP(dnode_hold_free_lock_misses); while (!dnode_slots_tryenter(dnc, idx, slots)) {
dnode_slots_rele(dnc, idx, slots); DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
dbuf_rele(db, FTAG); cond_resched();
return (SET_ERROR(ENOSPC)); }
}
/* if (!dnode_check_slots_free(dnc, idx, slots)) {
* Allocated but otherwise free dnodes which would DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
* be in the interior of a multi-slot dnodes need dnode_slots_rele(dnc, idx, slots);
* to be freed. Single slot dnodes can be safely dbuf_rele(db, FTAG);
* re-purposed as a performance optimization. return (SET_ERROR(ENOSPC));
*/ }
if (slots > 1)
dnode_reclaim_slots(dnc, idx + 1, slots - 1);
dnh = &dnc->dnc_children[idx]; /*
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { * Allocated but otherwise free dnodes which would
dn = dnh->dnh_dnode; * be in the interior of a multi-slot dnodes need
} else { * to be freed. Single slot dnodes can be safely
dn = dnode_create(os, dn_block + idx, db, * re-purposed as a performance optimization.
object, dnh); */
} if (slots > 1)
dnode_reclaim_slots(dnc, idx + 1, slots - 1);
dnh = &dnc->dnc_children[idx];
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
dn = dnh->dnh_dnode;
} else {
dn = dnode_create(os, dn_block + idx, db,
object, dnh);
} }
mutex_enter(&dn->dn_mtx); mutex_enter(&dn->dn_mtx);