mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Fix ENOSPC in "Handle zap_add() failures in ..."
Commitcc63068caused ENOSPC error when copy a large amount of files between two directories. The reason is that the patch limits zap leaf expansion to 2 retries, and return ENOSPC when failed. The intent for limiting retries is to prevent pointlessly growing table to max size when adding a block full of entries with same name in different case in mixed mode. However, it turns out we cannot use any limit on the retry. When we copy files from one directory in readdir order, we are copying in hash order, one leaf block at a time. Which means that if the leaf block in source directory has expanded 6 times, and you copy those entries in that block, by the time you need to expand the leaf in destination directory, you need to expand it 6 times in one go. So any limit on the retry will result in error where it shouldn't. Note that while we do use different salt for different directories, it seems that the salt/hash function doesn't provide enough randomization to the hash distance to prevent this from happening. Sincecc63068has already been reverted. This patch adds it back and removes the retry limit. Also, as it turn out, failing on zap_add() has a serious side effect for mzap_upgrade(). When upgrading from micro zap to fat zap, it will call zap_add() to transfer entries one at a time. If it hit any error halfway through, the remaining entries will be lost, causing those files to become orphan. This patch add a VERIFY to catch it. Reviewed-by: Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com> Reviewed-by: Richard Yao <ryao@gentoo.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Albert Lee <trisk@forkgnu.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Chunwei Chen <david.chen@nutanix.com> Closes #7401 Closes #7421
This commit is contained in:
committed by
Brian Behlendorf
parent
b0ee5946aa
commit
599b864813
+9
-1
@@ -852,8 +852,16 @@ retry:
|
||||
} else if (err == EAGAIN) {
|
||||
err = zap_expand_leaf(zn, l, tag, tx, &l);
|
||||
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
|
||||
if (err == 0)
|
||||
if (err == 0) {
|
||||
goto retry;
|
||||
} else if (err == ENOSPC) {
|
||||
/*
|
||||
* If we failed to expand the leaf, then bailout
|
||||
* as there is no point trying
|
||||
* zap_put_leaf_maybe_grow_ptrtbl().
|
||||
*/
|
||||
return (err);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
|
||||
((h) >> \
|
||||
(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
|
||||
|
||||
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
|
||||
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
|
||||
|
||||
extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
|
||||
|
||||
|
||||
+41
-6
@@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
|
||||
return (cd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each mzap entry requires at max : 4 chunks
|
||||
* 3 chunks for names + 1 chunk for value.
|
||||
*/
|
||||
#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
|
||||
ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
|
||||
|
||||
/*
|
||||
* Check if the current entry keeps the colliding entries under the fatzap leaf
|
||||
* size.
|
||||
*/
|
||||
static boolean_t
|
||||
mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
mzap_ent_t mze_tofind;
|
||||
mzap_ent_t *mze;
|
||||
avl_index_t idx;
|
||||
avl_tree_t *avl = &zap->zap_m.zap_avl;
|
||||
uint32_t mzap_ents = 0;
|
||||
|
||||
mze_tofind.mze_hash = hash;
|
||||
mze_tofind.mze_cd = 0;
|
||||
|
||||
for (mze = avl_find(avl, &mze_tofind, &idx);
|
||||
mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
|
||||
mzap_ents++;
|
||||
}
|
||||
|
||||
/* Include the new entry being added */
|
||||
mzap_ents++;
|
||||
|
||||
return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
|
||||
}
|
||||
|
||||
static void
|
||||
mze_remove(zap_t *zap, mzap_ent_t *mze)
|
||||
{
|
||||
@@ -638,16 +673,15 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
|
||||
dprintf("adding %s=%llu\n",
|
||||
mze->mze_name, mze->mze_value);
|
||||
zn = zap_name_alloc(zap, mze->mze_name, 0);
|
||||
err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
|
||||
tag, tx);
|
||||
/* If we fail here, we would end up losing entries */
|
||||
VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
|
||||
tag, tx));
|
||||
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
vmem_free(mzp, sz);
|
||||
*zapp = zap;
|
||||
return (err);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1190,7 +1224,8 @@ zap_add_impl(zap_t *zap, const char *key,
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
} else if (integer_size != 8 || num_integers != 1 ||
|
||||
strlen(key) >= MZAP_NAME_LEN) {
|
||||
strlen(key) >= MZAP_NAME_LEN ||
|
||||
!mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
|
||||
err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
|
||||
if (err == 0) {
|
||||
err = fzap_add(zn, integer_size, num_integers, val,
|
||||
|
||||
+23
-6
@@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
|
||||
}
|
||||
|
||||
/*
|
||||
* Link zp into dl. Can only fail if zp has been unlinked.
|
||||
* Link zp into dl. Can fail in the following cases :
|
||||
* - if zp has been unlinked.
|
||||
* - if the number of entries with the same hash (aka. colliding entries)
|
||||
* exceed the capacity of a leaf-block of fatzap and splitting of the
|
||||
* leaf-block does not help.
|
||||
*/
|
||||
int
|
||||
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
@@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
NULL, &links, sizeof (links));
|
||||
}
|
||||
}
|
||||
|
||||
value = zfs_dirent(zp, zp->z_mode);
|
||||
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
|
||||
&value, tx);
|
||||
|
||||
/*
|
||||
* zap_add could fail to add the entry if it exceeds the capacity of the
|
||||
* leaf-block and zap_leaf_split() failed to help.
|
||||
* The caller of this routine is responsible for failing the transaction
|
||||
* which will rollback the SA updates done above.
|
||||
*/
|
||||
if (error != 0) {
|
||||
if (!(flag & ZRENAMING) && !(flag & ZNEW))
|
||||
drop_nlink(ZTOI(zp));
|
||||
mutex_exit(&zp->z_lock);
|
||||
return (error);
|
||||
}
|
||||
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
|
||||
&dzp->z_id, sizeof (dzp->z_id));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
|
||||
@@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
ASSERT(error == 0);
|
||||
mutex_exit(&dzp->z_lock);
|
||||
|
||||
value = zfs_dirent(zp, zp->z_mode);
|
||||
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
|
||||
8, 1, &value, tx);
|
||||
ASSERT(error == 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
+56
-18
@@ -1450,6 +1450,7 @@ top:
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
|
||||
0, acl_ids.z_aclp->z_acl_bytes);
|
||||
}
|
||||
|
||||
error = dmu_tx_assign(tx,
|
||||
(waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
|
||||
if (error) {
|
||||
@@ -1467,10 +1468,22 @@ top:
|
||||
}
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
|
||||
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* Since, we failed to add the directory entry for it,
|
||||
* delete the newly created dnode.
|
||||
*/
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
|
||||
if (flag & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
@@ -2064,13 +2077,18 @@ top:
|
||||
*/
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
/*
|
||||
* Now put new name in parent dir.
|
||||
*/
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
*ipp = ZTOI(zp);
|
||||
|
||||
@@ -2080,6 +2098,7 @@ top:
|
||||
zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
|
||||
acl_ids.z_fuidp, vap);
|
||||
|
||||
out:
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
@@ -2089,10 +2108,14 @@ top:
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
if (error != 0) {
|
||||
iput(ZTOI(zp));
|
||||
} else {
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
}
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3950,6 +3973,13 @@ top:
|
||||
VERIFY3U(zfs_link_destroy(tdl, szp, tx,
|
||||
ZRENAMING, NULL), ==, 0);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If we had removed the existing target, subsequent
|
||||
* call to zfs_link_create() to add back the same entry
|
||||
* but, the new dnode (szp) should not fail.
|
||||
*/
|
||||
ASSERT(tzp == NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4120,14 +4150,18 @@ top:
|
||||
/*
|
||||
* Insert the new object into the directory.
|
||||
*/
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
} else {
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
|
||||
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
|
||||
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
}
|
||||
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
|
||||
@@ -4135,10 +4169,14 @@ top:
|
||||
|
||||
zfs_dirent_unlock(dl);
|
||||
|
||||
*ipp = ZTOI(zp);
|
||||
if (error == 0) {
|
||||
*ipp = ZTOI(zp);
|
||||
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
} else {
|
||||
iput(ZTOI(zp));
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
|
||||
Reference in New Issue
Block a user