Fix problems receiving reallocated dnodes

This is a port of 047116ac - Raw sends must be able to decrease nlevels,
to the zfs-0.7-stable branch.  It includes the various fixes to the
problem of receiving incremental streams which include reallocated dnodes
in which the number of dnode slots has changed but excludes the parts
which are related to raw streams.

From 047116ac:

    Currently, when a raw zfs send file includes a
    DRR_OBJECT record that would decrease the number of
    levels of an existing object, the object is reallocated
    with dmu_object_reclaim() which creates the new dnode
    using the old object's nlevels. For non-raw sends this
    doesn't really matter, but raw sends require that
    nlevels on the receive side match that of the send
    side so that the checksum-of-MAC tree can be properly
    maintained. This patch corrects the issue by freeing
    the object completely before allocating it again in
    this case.

    This patch also corrects several issues with
    dnode_hold_impl() and related functions that prevented
    dnodes (particularly multi-slot dnodes) from being
    reallocated properly due to the fact that existing
    dnodes were not being fully cleaned up when they
    were freed.

    This patch adds a test to make sure that zfs recv
    functions properly with incremental streams containing
    dnodes of different sizes.

This also includes a one-liner fix from loli10K to fix a test failure:
https://github.com/zfsonlinux/zfs/pull/7792#discussion_r212769264

Authored-by: Tom Caputi <tcaputi@datto.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>

Closes #6821
Closes #6864

NOTE: This is the first of the port of 3 related patches patches to the
zfs-0.7-release branch of ZoL.  The other two patches should immediately
follow this one.
This commit is contained in:
Tim Chase
2018-08-27 10:28:32 -04:00
committed by Tony Hutter
parent 3ea1f7f193
commit d2c8103a68
10 changed files with 258 additions and 15 deletions
-1
View File
@@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
return (err);
}
int
dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
+46 -5
View File
@@ -2156,10 +2156,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT)
if (err != 0 && err != ENOENT && err != EEXIST)
return (SET_ERROR(EINVAL));
object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
if (drro->drr_object > rwa->max_object)
rwa->max_object = drro->drr_object;
@@ -2175,13 +2173,56 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
nblkptr = deduce_nblkptr(drro->drr_bonustype,
drro->drr_bonuslen);
object = drro->drr_object;
if (drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr) {
nblkptr < doi.doi_nblkptr ||
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
err = dmu_free_long_range(rwa->os, drro->drr_object,
0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
}
} else if (err == EEXIST) {
/*
* The object requested is currently an interior slot of a
* multi-slot dnode. This will be resolved when the next txg
* is synced out, since the send stream will have told us
* to free this slot when we freed the associated dnode
* earlier in the stream.
*/
txg_wait_synced(dmu_objset_pool(rwa->os), 0);
object = drro->drr_object;
} else {
/* object is free and we are about to allocate a new one */
object = DMU_NEW_OBJECT;
}
/*
* If this is a multi-slot dnode there is a chance that this
* object will expand into a slot that is already used by
* another object from the previous snapshot. We must free
* these objects before we attempt to allocate the new dnode.
*/
if (drro->drr_dn_slots > 1) {
for (uint64_t slot = drro->drr_object + 1;
slot < drro->drr_object + drro->drr_dn_slots;
slot++) {
dmu_object_info_t slot_doi;
err = dmu_object_info(rwa->os, slot, &slot_doi);
if (err == ENOENT || err == EEXIST)
continue;
else if (err != 0)
return (err);
err = dmu_free_long_object(rwa->os, slot);
if (err != 0)
return (err);
}
txg_wait_synced(dmu_objset_pool(rwa->os), 0);
}
tx = dmu_tx_create(rwa->os);
@@ -2732,7 +2773,7 @@ receive_read_record(struct receive_arg *ra)
* See receive_read_prefetch for an explanation why we're
* storing this object in the ignore_obj_list.
*/
if (err == ENOENT ||
if (err == ENOENT || err == EEXIST ||
(err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
objlist_insert(&ra->ignore_objlist, drro->drr_object);
err = 0;
+78 -6
View File
@@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = {
{ "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
{ "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_allocate", KSTAT_DATA_UINT64 },
{ "dnode_reallocate", KSTAT_DATA_UINT64 },
{ "dnode_buf_evict", KSTAT_DATA_UINT64 },
@@ -516,7 +517,8 @@ dnode_destroy(dnode_t *dn)
mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */
zrl_remove(&dn->dn_handle->dnh_zrlock);
if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock))
zrl_remove(&dn->dn_handle->dnh_zrlock);
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
@@ -662,6 +664,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
dnode_free_interior_slots(dn);
DNODE_STAT_BUMP(dnode_reallocate);
/* clean up any unreferenced dbufs */
@@ -1062,19 +1066,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr)
}
static boolean_t
dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr)
dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
{
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
for (int i = idx; i < idx + slots; i++) {
dnode_handle_t *dnh = &children->dnc_children[i];
if (dnh->dnh_dnode != ptr)
dnode_t *dn = dnh->dnh_dnode;
if (dn == DN_SLOT_FREE) {
continue;
} else if (DN_SLOT_IS_PTR(dn)) {
mutex_enter(&dn->dn_mtx);
dmu_object_type_t type = dn->dn_type;
mutex_exit(&dn->dn_mtx);
if (type != DMU_OT_NONE)
return (B_FALSE);
continue;
} else {
return (B_FALSE);
}
return (B_FALSE);
}
return (B_TRUE);
}
static void
dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
{
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
for (int i = idx; i < idx + slots; i++) {
dnode_handle_t *dnh = &children->dnc_children[i];
ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE);
dnode_destroy(dnh->dnh_dnode);
dnh->dnh_dnode = DN_SLOT_FREE;
}
}
}
void
dnode_free_interior_slots(dnode_t *dn)
{
dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db);
int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT;
int idx = (dn->dn_object & (epb - 1)) + 1;
int slots = dn->dn_num_slots - 1;
if (slots == 0)
return;
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
while (!dnode_slots_tryenter(children, idx, slots))
DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
dnode_slots_rele(children, idx, slots);
}
void
dnode_special_close(dnode_handle_t *dnh)
{
@@ -1355,7 +1413,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
while (dn == DN_SLOT_UNINIT) {
dnode_slots_hold(dnc, idx, slots);
if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
@@ -1368,15 +1426,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
continue;
}
if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOSPC));
}
/*
* Allocated but otherwise free dnodes which would
* be in the interior of a multi-slot dnodes need
* to be freed. Single slot dnodes can be safely
* re-purposed as a performance optimization.
*/
if (slots > 1)
dnode_reclaim_slots(dnc, idx + 1, slots - 1);
dnh = &dnc->dnc_children[idx];
dn = dnode_create(os, dn_block + idx, db, object, dnh);
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
dn = dnh->dnh_dnode;
} else {
dn = dnode_create(os, dn_block + idx, db,
object, dnh);
}
}
mutex_enter(&dn->dn_mtx);
+2
View File
@@ -533,6 +533,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
if (dn->dn_allocated_txg != dn->dn_free_txg)
dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
dnode_free_interior_slots(dn);
mutex_enter(&dn->dn_mtx);
dn->dn_type = DMU_OT_NONE;
@@ -540,6 +541,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_have_spill = B_FALSE;
dn->dn_num_slots = 1;
mutex_exit(&dn->dn_mtx);
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);