Fix several bugs in the FreeBSD rename VOP implementation

- To avoid a use-after-free, zfsvfs->z_log needs to be loaded after the
  teardown lock is acquired with ZFS_ENTER().
- Avoid leaking vnode locks in zfs_rename_relock() and zfs_rename_()
  when the ZFS_ENTER() macros forces an early return.

Refactor the rename implementation so that ZFS_ENTER() can be used
safely.  As a bonus, this lets us use the ZFS_VERIFY_ZP() macro instead
of open-coding its implementation.

Reported-by: Peter Holm <pho@FreeBSD.org>
Tested-by: Peter Holm <pho@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Signed-off-by: Mark Johnston <markj@FreeBSD.org>
Sponsored-by: The FreeBSD Foundation
Closes #12717
This commit is contained in:
Mark Johnston 2021-11-19 17:26:39 -05:00 committed by Tony Hutter
parent b96737b83e
commit 19337332cc

View File

@ -2935,48 +2935,18 @@ out2:
}
/*
* We acquire all but fdvp locks using non-blocking acquisitions. If we
* fail to acquire any lock in the path we will drop all held locks,
* acquire the new lock in a blocking fashion, and then release it and
* restart the rename. This acquire/release step ensures that we do not
* spin on a lock waiting for release. On error release all vnode locks
* and decrement references the way tmpfs_rename() would do.
* Look up the directory entries corresponding to the source and target
* directory/name pairs.
*/
static int
zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
struct vnode *tdvp, struct vnode **tvpp,
const struct componentname *scnp, const struct componentname *tcnp)
zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
znode_t **tzpp)
{
zfsvfs_t *zfsvfs;
struct vnode *nvp, *svp, *tvp;
znode_t *sdzp, *tdzp, *szp, *tzp;
const char *snm = scnp->cn_nameptr;
const char *tnm = tcnp->cn_nameptr;
znode_t *szp, *tzp;
int error;
VOP_UNLOCK1(tdvp);
if (*tvpp != NULL && *tvpp != tdvp)
VOP_UNLOCK1(*tvpp);
relock:
error = vn_lock(sdvp, LK_EXCLUSIVE);
if (error)
goto out;
sdzp = VTOZ(sdvp);
error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
if (error != 0) {
VOP_UNLOCK1(sdvp);
if (error != EBUSY)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE);
if (error)
goto out;
VOP_UNLOCK1(tdvp);
goto relock;
}
tdzp = VTOZ(tdvp);
/*
* Before using sdzp and tdzp we must ensure that they are live.
* As a porting legacy from illumos we have two things to worry
@ -2991,59 +2961,86 @@ relock:
zfsvfs = sdzp->z_zfsvfs;
ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
ZFS_ENTER(zfsvfs);
/*
* We can not use ZFS_VERIFY_ZP() here because it could directly return
* bypassing the cleanup code in the case of an error.
*/
if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
ZFS_EXIT(zfsvfs);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
error = SET_ERROR(EIO);
goto out;
}
ZFS_VERIFY_ZP(sdzp);
ZFS_VERIFY_ZP(tdzp);
/*
* Re-resolve svp to be certain it still exists and fetch the
* correct vnode.
*/
error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
if (error != 0) {
/* Source entry invalid or not there. */
ZFS_EXIT(zfsvfs);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
if ((scnp->cn_flags & ISDOTDOT) != 0 ||
(scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
error = SET_ERROR(EINVAL);
goto out;
}
svp = ZTOV(szp);
*szpp = szp;
/*
* Re-resolve tvp, if it disappeared we just carry on.
*/
error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
if (error != 0) {
ZFS_EXIT(zfsvfs);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
vrele(svp);
vrele(ZTOV(szp));
if ((tcnp->cn_flags & ISDOTDOT) != 0)
error = SET_ERROR(EINVAL);
goto out;
}
if (tzp != NULL)
tvp = ZTOV(tzp);
else
tvp = NULL;
/*
* At present the vnode locks must be acquired before z_teardown_lock,
* although it would be more logical to use the opposite order.
*/
*tzpp = tzp;
out:
ZFS_EXIT(zfsvfs);
return (error);
}
/*
* We acquire all but fdvp locks using non-blocking acquisitions. If we
* fail to acquire any lock in the path we will drop all held locks,
* acquire the new lock in a blocking fashion, and then release it and
* restart the rename. This acquire/release step ensures that we do not
* spin on a lock waiting for release. On error release all vnode locks
* and decrement references the way tmpfs_rename() would do.
*/
static int
zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
struct vnode *tdvp, struct vnode **tvpp,
const struct componentname *scnp, const struct componentname *tcnp)
{
struct vnode *nvp, *svp, *tvp;
znode_t *sdzp, *tdzp, *szp, *tzp;
int error;
VOP_UNLOCK1(tdvp);
if (*tvpp != NULL && *tvpp != tdvp)
VOP_UNLOCK1(*tvpp);
relock:
error = vn_lock(sdvp, LK_EXCLUSIVE);
if (error)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
if (error != 0) {
VOP_UNLOCK1(sdvp);
if (error != EBUSY)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE);
if (error)
goto out;
VOP_UNLOCK1(tdvp);
goto relock;
}
tdzp = VTOZ(tdvp);
sdzp = VTOZ(sdvp);
error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
if (error != 0) {
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
goto out;
}
svp = ZTOV(szp);
tvp = tzp != NULL ? ZTOV(tzp) : NULL;
/*
* Now try acquire locks on svp and tvp.
@ -3180,17 +3177,22 @@ cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
}
#endif
static int
zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr);
/*
* Move an entry from the provided source directory to the target
* directory. Change the entry name as indicated.
*
* IN: sdvp - Source directory containing the "old entry".
* snm - Old entry name.
* scnp - Old entry name.
* tdvp - Target directory to contain the "new entry".
* tnm - New entry name.
* tcnp - New entry name.
* cr - credentials of caller.
* ct - caller context
* flags - case flags
* INOUT: svpp - Source file
* tvpp - Target file, may point to NULL initially
*
* RETURN: 0 on success, error code on failure.
*
@ -3199,18 +3201,15 @@ cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
*/
/*ARGSUSED*/
static int
zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr, int log)
cred_t *cr)
{
zfsvfs_t *zfsvfs;
znode_t *sdzp, *tdzp, *szp, *tzp;
zilog_t *zilog = NULL;
dmu_tx_t *tx;
const char *snm = scnp->cn_nameptr;
const char *tnm = tcnp->cn_nameptr;
int error = 0;
bool want_seqc_end __maybe_unused = false;
int error;
ASSERT_VOP_ELOCKED(tdvp, __func__);
if (*tvpp != NULL)
ASSERT_VOP_ELOCKED(*tvpp, __func__);
/* Reject renames across filesystems. */
if ((*svpp)->v_mount != tdvp->v_mount ||
@ -3233,51 +3232,64 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
return (error);
}
error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(*svpp);
out:
if (*tvpp != NULL)
VOP_UNLOCK1(*tvpp);
if (tdvp != *tvpp)
VOP_UNLOCK1(tdvp);
return (error);
}
static int
zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr)
{
dmu_tx_t *tx;
zfsvfs_t *zfsvfs;
zilog_t *zilog;
znode_t *tdzp, *sdzp, *tzp, *szp;
const char *snm = scnp->cn_nameptr;
const char *tnm = tcnp->cn_nameptr;
int error;
tdzp = VTOZ(tdvp);
sdzp = VTOZ(sdvp);
zfsvfs = tdzp->z_zfsvfs;
zilog = zfsvfs->z_log;
/*
* After we re-enter ZFS_ENTER() we will have to revalidate all
* znodes involved.
*/
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(tdzp);
ZFS_VERIFY_ZP(sdzp);
zilog = zfsvfs->z_log;
if (zfsvfs->z_utf8 && u8_validate(tnm,
strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
error = SET_ERROR(EILSEQ);
goto unlockout;
goto out;
}
/* If source and target are the same file, there is nothing to do. */
if ((*svpp) == (*tvpp)) {
error = 0;
goto unlockout;
goto out;
}
if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
(*tvpp)->v_mountedhere != NULL)) {
error = SET_ERROR(EXDEV);
goto unlockout;
}
/*
* We can not use ZFS_VERIFY_ZP() here because it could directly return
* bypassing the cleanup code in the case of an error.
*/
if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
error = SET_ERROR(EIO);
goto unlockout;
goto out;
}
szp = VTOZ(*svpp);
ZFS_VERIFY_ZP(szp);
tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
error = SET_ERROR(EIO);
goto unlockout;
}
if (tzp != NULL)
ZFS_VERIFY_ZP(tzp);
/*
* This is to prevent the creation of links into attribute space
@ -3286,7 +3298,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
*/
if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
error = SET_ERROR(EINVAL);
goto unlockout;
goto out;
}
/*
@ -3299,7 +3311,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
if (tdzp->z_pflags & ZFS_PROJINHERIT &&
tdzp->z_projid != szp->z_projid) {
error = SET_ERROR(EXDEV);
goto unlockout;
goto out;
}
/*
@ -3309,7 +3321,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
* done in a single check.
*/
if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
goto unlockout;
goto out;
if ((*svpp)->v_type == VDIR) {
/*
@ -3319,7 +3331,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
sdzp == szp ||
(scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
error = EINVAL;
goto unlockout;
goto out;
}
/*
@ -3327,7 +3339,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
* Can't do a move like this: /usr/a/b to /usr/a/b/c/d
*/
if ((error = zfs_rename_check(szp, sdzp, tdzp)))
goto unlockout;
goto out;
}
/*
@ -3340,7 +3352,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
if ((*svpp)->v_type == VDIR) {
if ((*tvpp)->v_type != VDIR) {
error = SET_ERROR(ENOTDIR);
goto unlockout;
goto out;
} else {
cache_purge(tdvp);
if (sdvp != tdvp)
@ -3349,7 +3361,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
} else {
if ((*tvpp)->v_type == VDIR) {
error = SET_ERROR(EISDIR);
goto unlockout;
goto out;
}
}
}
@ -3360,9 +3372,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vn_seqc_write_begin(*tvpp);
if (tdvp != *tvpp)
vn_seqc_write_begin(tdvp);
#if __FreeBSD_version >= 1300102
want_seqc_end = true;
#endif
vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
if (tzp)
vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
@ -3394,10 +3404,9 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
goto unlockout;
goto out_seq;
}
if (tzp) /* Attempt to remove the existing target */
error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
@ -3444,30 +3453,19 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
dmu_tx_commit(tx);
unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */
if (want_seqc_end) {
out_seq:
vn_seqc_write_end(*svpp);
vn_seqc_write_end(sdvp);
if (*tvpp != NULL)
vn_seqc_write_end(*tvpp);
if (tdvp != *tvpp)
vn_seqc_write_end(tdvp);
want_seqc_end = false;
}
VOP_UNLOCK1(*svpp);
VOP_UNLOCK1(sdvp);
out:
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
ZFS_EXIT(zfsvfs);
out: /* original two vnodes are locked */
MPASS(!want_seqc_end);
if (*tvpp != NULL)
VOP_UNLOCK1(*tvpp);
if (tdvp != *tvpp)
VOP_UNLOCK1(tdvp);
return (error);
}
@ -3499,7 +3497,7 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
goto fail;
}
error = zfs_rename_(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr, 0);
error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
fail:
if (svp != NULL)
vrele(svp);
@ -4979,8 +4977,8 @@ zfs_freebsd_rename(struct vop_rename_args *ap)
ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
error = zfs_rename_(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
ap->a_tcnp, ap->a_fcnp->cn_cred, 1);
error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
ap->a_tcnp, ap->a_fcnp->cn_cred);
vrele(fdvp);
vrele(fvp);