mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-26 03:09:34 +03:00
Handle zap_add() failures in mixed case mode
With "casesensitivity=mixed", zap_add() could fail when the number of files/directories with the same name (varying in case) exceed the capacity of the leaf node of a Fatzap. This results in a ASSERT() failure as zfs_link_create() does not expect zap_add() to fail. The fix is to handle these failures and rollback the transactions. Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Chunwei Chen <david.chen@nutanix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com> Closes #7011 Closes #7054
This commit is contained in:
parent
eb9c4532dd
commit
cc63068e95
@ -46,10 +46,15 @@ struct zap_stats;
|
||||
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
|
||||
* entries - header space (2*chunksize)
|
||||
*/
|
||||
#define ZAP_LEAF_NUMCHUNKS(l) \
|
||||
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
|
||||
#define ZAP_LEAF_NUMCHUNKS_BS(bs) \
|
||||
(((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
|
||||
ZAP_LEAF_CHUNKSIZE - 2)
|
||||
|
||||
#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
|
||||
|
||||
#define ZAP_LEAF_NUMCHUNKS_DEF \
|
||||
(ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift))
|
||||
|
||||
/*
|
||||
* The amount of space within the chunk available for the array is:
|
||||
* chunk size - space for type (1) - space for next pointer (2)
|
||||
@ -74,8 +79,10 @@ struct zap_stats;
|
||||
* which is less than block size / CHUNKSIZE (24) / minimum number of
|
||||
* chunks per entry (3).
|
||||
*/
|
||||
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
|
||||
#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
|
||||
#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
|
||||
|
||||
/*
|
||||
* The chunks start immediately after the hash table. The end of the
|
||||
|
@ -818,15 +818,19 @@ fzap_lookup(zap_name_t *zn,
|
||||
return (err);
|
||||
}
|
||||
|
||||
#define MAX_EXPAND_RETRIES 2
|
||||
|
||||
int
|
||||
fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
zap_leaf_t *l;
|
||||
zap_leaf_t *prev_l = NULL;
|
||||
int err;
|
||||
zap_entry_handle_t zeh;
|
||||
zap_t *zap = zn->zn_zap;
|
||||
int expand_retries = 0;
|
||||
|
||||
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
|
||||
ASSERT(!zap->zap_ismicro);
|
||||
@ -850,10 +854,29 @@ retry:
|
||||
if (err == 0) {
|
||||
zap_increment_num_entries(zap, 1, tx);
|
||||
} else if (err == EAGAIN) {
|
||||
/*
|
||||
* If the last two expansions did not help, there is no point
|
||||
* trying to expand again
|
||||
*/
|
||||
if (expand_retries > MAX_EXPAND_RETRIES && prev_l == l) {
|
||||
err = SET_ERROR(ENOSPC);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = zap_expand_leaf(zn, l, tag, tx, &l);
|
||||
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
|
||||
if (err == 0)
|
||||
if (err == 0) {
|
||||
prev_l = l;
|
||||
expand_retries++;
|
||||
goto retry;
|
||||
} else if (err == ENOSPC) {
|
||||
/*
|
||||
* If we failed to expand the leaf, then bailout
|
||||
* as there is no point trying
|
||||
* zap_put_leaf_maybe_grow_ptrtbl().
|
||||
*/
|
||||
return (err);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
|
||||
((h) >> \
|
||||
(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
|
||||
|
||||
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
|
||||
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
|
||||
|
||||
extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
|
||||
|
||||
|
@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
|
||||
return (cd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each mzap entry requires at max : 4 chunks
|
||||
* 3 chunks for names + 1 chunk for value.
|
||||
*/
|
||||
#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
|
||||
ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
|
||||
|
||||
/*
|
||||
* Check if the current entry keeps the colliding entries under the fatzap leaf
|
||||
* size.
|
||||
*/
|
||||
static boolean_t
|
||||
mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
mzap_ent_t mze_tofind;
|
||||
mzap_ent_t *mze;
|
||||
avl_index_t idx;
|
||||
avl_tree_t *avl = &zap->zap_m.zap_avl;
|
||||
uint32_t mzap_ents = 0;
|
||||
|
||||
mze_tofind.mze_hash = hash;
|
||||
mze_tofind.mze_cd = 0;
|
||||
|
||||
for (mze = avl_find(avl, &mze_tofind, &idx);
|
||||
mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
|
||||
mzap_ents++;
|
||||
}
|
||||
|
||||
/* Include the new entry being added */
|
||||
mzap_ents++;
|
||||
|
||||
return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
|
||||
}
|
||||
|
||||
static void
|
||||
mze_remove(zap_t *zap, mzap_ent_t *mze)
|
||||
{
|
||||
@ -1190,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key,
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
} else if (integer_size != 8 || num_integers != 1 ||
|
||||
strlen(key) >= MZAP_NAME_LEN) {
|
||||
strlen(key) >= MZAP_NAME_LEN ||
|
||||
!mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
|
||||
err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
|
||||
if (err == 0) {
|
||||
err = fzap_add(zn, integer_size, num_integers, val,
|
||||
|
@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
|
||||
}
|
||||
|
||||
/*
|
||||
* Link zp into dl. Can only fail if zp has been unlinked.
|
||||
* Link zp into dl. Can fail in the following cases :
|
||||
* - if zp has been unlinked.
|
||||
* - if the number of entries with the same hash (aka. colliding entries)
|
||||
* exceed the capacity of a leaf-block of fatzap and splitting of the
|
||||
* leaf-block does not help.
|
||||
*/
|
||||
int
|
||||
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
NULL, &links, sizeof (links));
|
||||
}
|
||||
}
|
||||
|
||||
value = zfs_dirent(zp, zp->z_mode);
|
||||
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
|
||||
&value, tx);
|
||||
|
||||
/*
|
||||
* zap_add could fail to add the entry if it exceeds the capacity of the
|
||||
* leaf-block and zap_leaf_split() failed to help.
|
||||
* The caller of this routine is responsible for failing the transaction
|
||||
* which will rollback the SA updates done above.
|
||||
*/
|
||||
if (error != 0) {
|
||||
if (!(flag & ZRENAMING) && !(flag & ZNEW))
|
||||
drop_nlink(ZTOI(zp));
|
||||
mutex_exit(&zp->z_lock);
|
||||
return (error);
|
||||
}
|
||||
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
|
||||
&dzp->z_id, sizeof (dzp->z_id));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
|
||||
@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
|
||||
ASSERT(error == 0);
|
||||
mutex_exit(&dzp->z_lock);
|
||||
|
||||
value = zfs_dirent(zp, zp->z_mode);
|
||||
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
|
||||
8, 1, &value, tx);
|
||||
ASSERT(error == 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -1429,6 +1429,7 @@ top:
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
|
||||
0, acl_ids.z_aclp->z_acl_bytes);
|
||||
}
|
||||
|
||||
error = dmu_tx_assign(tx,
|
||||
(waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
|
||||
if (error) {
|
||||
@ -1446,10 +1447,22 @@ top:
|
||||
}
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
|
||||
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* Since, we failed to add the directory entry for it,
|
||||
* delete the newly created dnode.
|
||||
*/
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
|
||||
if (flag & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
@ -2040,13 +2053,18 @@ top:
|
||||
*/
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
/*
|
||||
* Now put new name in parent dir.
|
||||
*/
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
*ipp = ZTOI(zp);
|
||||
|
||||
@ -2056,6 +2074,7 @@ top:
|
||||
zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
|
||||
acl_ids.z_fuidp, vap);
|
||||
|
||||
out:
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
@ -2065,10 +2084,14 @@ top:
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
if (error != 0) {
|
||||
iput(ZTOI(zp));
|
||||
} else {
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
}
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3686,6 +3709,13 @@ top:
|
||||
VERIFY3U(zfs_link_destroy(tdl, szp, tx,
|
||||
ZRENAMING, NULL), ==, 0);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If we had removed the existing target, subsequent
|
||||
* call to zfs_link_create() to add back the same entry
|
||||
* but, the new dnode (szp) should not fail.
|
||||
*/
|
||||
ASSERT(tzp == NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3856,14 +3886,18 @@ top:
|
||||
/*
|
||||
* Insert the new object into the directory.
|
||||
*/
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
error = zfs_link_create(dl, zp, tx, ZNEW);
|
||||
if (error != 0) {
|
||||
zfs_znode_delete(zp, tx);
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
} else {
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
|
||||
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
|
||||
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
zfs_inode_update(dzp);
|
||||
zfs_inode_update(zp);
|
||||
}
|
||||
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
|
||||
@ -3871,10 +3905,14 @@ top:
|
||||
|
||||
zfs_dirent_unlock(dl);
|
||||
|
||||
*ipp = ZTOI(zp);
|
||||
if (error == 0) {
|
||||
*ipp = ZTOI(zp);
|
||||
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, 0);
|
||||
} else {
|
||||
iput(ZTOI(zp));
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
|
@ -59,7 +59,7 @@ tags = ['functional', 'cachefile']
|
||||
# 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
|
||||
# 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
|
||||
[tests/functional/casenorm]
|
||||
tests = ['case_all_values', 'norm_all_values']
|
||||
tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure']
|
||||
tags = ['functional', 'casenorm']
|
||||
|
||||
[tests/functional/channel_program/lua_core]
|
||||
|
@ -9,6 +9,7 @@ dist_pkgdata_SCRIPTS = \
|
||||
insensitive_formd_lookup.ksh \
|
||||
insensitive_none_delete.ksh \
|
||||
insensitive_none_lookup.ksh \
|
||||
mixed_create_failure.ksh \
|
||||
mixed_formd_delete.ksh \
|
||||
mixed_formd_lookup_ci.ksh \
|
||||
mixed_formd_lookup.ksh \
|
||||
|
136
tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
Executable file
136
tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
Executable file
@ -0,0 +1,136 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
#
|
||||
# Copyright 2018 Nutanix Inc. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib
|
||||
|
||||
# DESCRIPTION:
|
||||
# For the filesystem with casesensitivity=mixed, normalization=none,
|
||||
# when multiple files with the same name (differing only in case) are created,
|
||||
# the number of files is limited to what can fit in a fatzap leaf-block.
|
||||
# And beyond that, it fails with ENOSPC.
|
||||
#
|
||||
# Ensure that the create/rename operations fail gracefully and not trigger an
|
||||
# ASSERT.
|
||||
#
|
||||
# STRATEGY:
|
||||
# Repeat the below steps for objects: files, directories, symlinks and hardlinks
|
||||
# 1. Create objects with same name but varying in case.
|
||||
# E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc.
|
||||
# The create should fail with ENOSPC.
|
||||
# 2. Create an object with name 'tmp_obj' and try to rename it to name that we
|
||||
# failed to add in step 1 above.
|
||||
# This should fail as well.
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
destroy_testfs
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit"
|
||||
|
||||
create_testfs "-o casesensitivity=mixed -o normalization=none"
|
||||
|
||||
# Different object types
|
||||
obj_type=('file' 'dir' 'symlink' 'hardlink')
|
||||
|
||||
# Commands to create different object types
|
||||
typeset -A ops
|
||||
ops['file']='touch'
|
||||
ops['dir']='mkdir'
|
||||
ops['symlink']='ln -s'
|
||||
ops['hardlink']='ln'
|
||||
|
||||
# This function tests the following for a give object type :
|
||||
# - Create multiple objects with the same name (varying only in case).
|
||||
# Ensure that it eventually fails once the leaf-block limit is exceeded.
|
||||
# - Create another object with a different name. And attempt rename it to the
|
||||
# name (for which the create had failed in the previous step).
|
||||
# This should fail as well.
|
||||
# Args :
|
||||
# $1 - object type (file/dir/symlink/hardlink)
|
||||
# $2 - test directory
|
||||
#
|
||||
function test_ops
|
||||
{
|
||||
typeset obj_type=$1
|
||||
typeset testdir=$2
|
||||
|
||||
target_obj='target-file'
|
||||
|
||||
op="${ops[$obj_type]}"
|
||||
|
||||
log_note "The op : $op"
|
||||
log_note "testdir=$testdir obj_type=$obj_type"
|
||||
|
||||
test_path="$testdir/$obj_type"
|
||||
mkdir $test_path
|
||||
log_note "Created test dir $test_path"
|
||||
|
||||
if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then
|
||||
touch $test_path/$target_obj
|
||||
log_note "Created target: $test_path/$target_obj"
|
||||
op="$op $test_path/$target_obj"
|
||||
fi
|
||||
|
||||
log_note "op : $op"
|
||||
names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}'
|
||||
for name in $names; do
|
||||
cmd="$op $test_path/$name"
|
||||
out=$($cmd 2>&1)
|
||||
ret=$?
|
||||
log_note "cmd: $cmd ret: $ret out=$out"
|
||||
if (($ret != 0)); then
|
||||
if [[ $out = *@(No space left on device)* ]]; then
|
||||
save_name="$test_path/$name"
|
||||
break;
|
||||
else
|
||||
log_err "$cmd failed with unexpected error : $out"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
log_note 'Test rename \"sample_name\" rename'
|
||||
TMP_OBJ="$test_path/tmp_obj"
|
||||
cmd="$op $TMP_OBJ"
|
||||
out=$($cmd 2>&1)
|
||||
ret=$?
|
||||
if (($ret != 0)); then
|
||||
log_err "cmd:$cmd failed out:$out"
|
||||
fi
|
||||
|
||||
# Now, try to rename the tmp_obj to the name which we failed to add earlier.
|
||||
# This should fail as well.
|
||||
out=$(mv $TMP_OBJ $save_name 2>&1)
|
||||
ret=$?
|
||||
if (($ret != 0)); then
|
||||
if [[ $out = *@(No space left on device)* ]]; then
|
||||
log_note "$cmd failed as expected : $out"
|
||||
else
|
||||
log_err "$cmd failed with : $out"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
for obj_type in ${obj_type[*]};
|
||||
do
|
||||
log_note "Testing create of $obj_type"
|
||||
test_ops $obj_type $TESTDIR
|
||||
done
|
||||
|
||||
log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully"
|
Loading…
Reference in New Issue
Block a user