Fix zil replay panic when TX_REMOVE followed by TX_CREATE

If TX_REMOVE is followed by TX_CREATE on the same object id, we need to
make sure the object removal is completely finished before creation. The
current implementation relies on dnode_hold_impl with
DNODE_MUST_BE_ALLOCATED returning ENOENT. While this check seems to work
fine before, in current version it does not guarantee the object removal
is completed.

We fix this by checking if DNODE_MUST_BE_FREE returns successful
instead. Also add test and remove dead code in dnode_hold_impl.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tom Caputi <tcaputi@datto.com>
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
Closes #7151
Closes #8910
Closes #9123
Closes #9145
This commit is contained in:
Chunwei Chen
2019-08-28 10:42:02 -07:00
committed by Brian Behlendorf
parent 9c9dcd6e04
commit 035e96118b
7 changed files with 184 additions and 24 deletions
+37 -12
View File
@@ -55,7 +55,6 @@ dnode_stats_t dnode_stats = {
{ "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
{ "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_allocate", KSTAT_DATA_UINT64 },
{ "dnode_reallocate", KSTAT_DATA_UINT64 },
@@ -1263,6 +1262,10 @@ dnode_buf_evict_async(void *dbu)
* as an extra dnode slot by an large dnode, in which case it returns
* ENOENT.
*
* If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just
* return whether the hold would succeed or not. tag and dnp should set to
* NULL in this case.
*
* errors:
* EINVAL - Invalid object number or flags.
* ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE)
@@ -1291,6 +1294,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL));
/*
* If you are holding the spa config lock as writer, you shouldn't
@@ -1320,8 +1324,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
return (SET_ERROR(EEXIST));
DNODE_VERIFY(dn);
(void) zfs_refcount_add(&dn->dn_holds, tag);
*dnp = dn;
/* Don't actually hold if dry run, just return 0 */
if (!(flag & DNODE_DRY_RUN)) {
(void) zfs_refcount_add(&dn->dn_holds, tag);
*dnp = dn;
}
return (0);
}
@@ -1462,6 +1469,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(ENOENT));
}
/* Don't actually hold if dry run, just return 0 */
if (flag & DNODE_DRY_RUN) {
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (0);
}
DNODE_STAT_BUMP(dnode_hold_alloc_hits);
} else if (flag & DNODE_MUST_BE_FREE) {
@@ -1519,6 +1534,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(EEXIST));
}
/* Don't actually hold if dry run, just return 0 */
if (flag & DNODE_DRY_RUN) {
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (0);
}
dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR);
DNODE_STAT_BUMP(dnode_hold_free_hits);
} else {
@@ -1526,15 +1549,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(EINVAL));
}
if (dn->dn_free_txg) {
DNODE_STAT_BUMP(dnode_hold_free_txg);
type = dn->dn_type;
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ?
ENOENT : EEXIST));
}
ASSERT0(dn->dn_free_txg);
if (zfs_refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
@@ -1625,6 +1640,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
}
}
/*
* Test whether we can create a dnode at the specified location.
*/
int
dnode_try_claim(objset_t *os, uint64_t object, int slots)
{
return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN,
slots, NULL, NULL));
}
void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
+4 -4
View File
@@ -337,8 +337,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
if (error)
goto bail;
if (lr->lr_common.lrc_txtype & TX_CI)
@@ -473,8 +473,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;
error = dmu_object_info(zfsvfs->z_os, objid, NULL);
if (error != ENOENT)
error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
if (error)
goto out;
if (lr->lr_common.lrc_txtype & TX_CI)