mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
zfs_rename: support RENAME_* flags
Implement support for Linux's RENAME_* flags (for renameat2). Aside from being quite useful for userspace (providing race-free ways to exchange paths and implement mv --no-clobber), they are used by overlayfs and are thus required in order to use overlayfs-on-ZFS. In order for us to represent the new renameat2(2) flags in the ZIL, we create two new transaction types for the two flags which need transactional-level support (RENAME_EXCHANGE and RENAME_WHITEOUT). RENAME_NOREPLACE does not need any ZIL support because we know that if the operation succeeded before creating the ZIL entry, there was no file to be clobbered and thus it can be treated as a regular TX_RENAME. Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Pavel Snajdr <snajpa@snajpa.net> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> Closes #12209 Closes #14070
This commit is contained in:
committed by
Brian Behlendorf
parent
e015d6cc0b
commit
dbf6108b4d
+81
-5
@@ -494,11 +494,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TX_RENAME transactions.
|
||||
*/
|
||||
void
|
||||
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
|
||||
static void
|
||||
do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
|
||||
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
|
||||
{
|
||||
itx_t *itx;
|
||||
@@ -520,6 +517,85 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TX_RENAME transactions.
|
||||
*/
|
||||
void
|
||||
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
|
||||
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
|
||||
{
|
||||
txtype |= TX_RENAME;
|
||||
do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TX_RENAME_EXCHANGE transactions.
|
||||
*/
|
||||
void
|
||||
zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
|
||||
znode_t *szp)
|
||||
{
|
||||
txtype |= TX_RENAME_EXCHANGE;
|
||||
do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TX_RENAME_WHITEOUT transactions.
|
||||
*
|
||||
* Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
|
||||
* zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
|
||||
*/
|
||||
void
|
||||
zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
|
||||
znode_t *szp, znode_t *wzp)
|
||||
{
|
||||
itx_t *itx;
|
||||
lr_rename_whiteout_t *lr;
|
||||
size_t snamesize = strlen(sname) + 1;
|
||||
size_t dnamesize = strlen(dname) + 1;
|
||||
|
||||
if (zil_replaying(zilog, tx))
|
||||
return;
|
||||
|
||||
txtype |= TX_RENAME_WHITEOUT;
|
||||
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
|
||||
lr = (lr_rename_whiteout_t *)&itx->itx_lr;
|
||||
lr->lr_rename.lr_sdoid = sdzp->z_id;
|
||||
lr->lr_rename.lr_tdoid = tdzp->z_id;
|
||||
|
||||
/*
|
||||
* RENAME_WHITEOUT will create an entry at the source znode, so we need
|
||||
* to store the same data that the equivalent call to zfs_log_create()
|
||||
* would.
|
||||
*/
|
||||
lr->lr_wfoid = wzp->z_id;
|
||||
LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
|
||||
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
|
||||
sizeof (uint64_t));
|
||||
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
|
||||
lr->lr_wcrtime, sizeof (uint64_t) * 2);
|
||||
lr->lr_wmode = wzp->z_mode;
|
||||
lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
|
||||
lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));
|
||||
|
||||
/*
|
||||
* This rdev will always be makdevice(0, 0) but because the ZIL log and
|
||||
* replay code needs to be platform independent (and there is no
|
||||
* platform independent makdev()) we need to copy the one created
|
||||
* during the rename operation.
|
||||
*/
|
||||
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
|
||||
sizeof (lr->lr_wrdev));
|
||||
|
||||
memcpy((char *)(lr + 1), sname, snamesize);
|
||||
memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
|
||||
itx->itx_oid = szp->z_id;
|
||||
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs_log_write() handles TX_WRITE transactions. The specified callback is
|
||||
* called as soon as the write is on stable storage (be it via a DMU sync or a
|
||||
|
||||
+96
-10
@@ -643,18 +643,21 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
|
||||
do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname,
|
||||
char *tname, uint64_t rflags, vattr_t *wo_vap)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_t *lr = arg2;
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
znode_t *sdzp, *tdzp;
|
||||
int error;
|
||||
int vflg = 0;
|
||||
int error, vflg = 0;
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
/* Only Linux currently supports RENAME_* flags. */
|
||||
#ifdef __linux__
|
||||
VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT));
|
||||
|
||||
/* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */
|
||||
VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
|
||||
#else
|
||||
VERIFY0(rflags);
|
||||
#endif
|
||||
|
||||
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
|
||||
return (error);
|
||||
@@ -667,13 +670,94 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
|
||||
if (lr->lr_common.lrc_txtype & TX_CI)
|
||||
vflg |= FIGNORECASE;
|
||||
|
||||
error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, NULL);
|
||||
error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
|
||||
wo_vap, NULL);
|
||||
|
||||
zrele(tdzp);
|
||||
zrele(sdzp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_t *lr = arg2;
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
#ifdef __linux__
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_t *lr = arg2;
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
|
||||
NULL));
|
||||
#else
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
#ifdef __linux__
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_whiteout_t *lr = arg2;
|
||||
int error;
|
||||
/* sname and tname follow lr_rename_whiteout_t */
|
||||
char *sname = (char *)(lr + 1);
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
/* For the whiteout file. */
|
||||
xvattr_t xva;
|
||||
uint64_t objid;
|
||||
uint64_t dnodesize;
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
objid = LR_FOID_GET_OBJ(lr->lr_wfoid);
|
||||
dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT;
|
||||
|
||||
xva_init(&xva);
|
||||
zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
|
||||
lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid);
|
||||
|
||||
/*
|
||||
* As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which
|
||||
* assigns the object's creation time, generation number, and dnode
|
||||
* slot count. The generic zfs_rename() has no concept of these
|
||||
* attributes, so we smuggle the values inside the vattr's otherwise
|
||||
* unused va_ctime, va_nblocks, and va_fsid fields.
|
||||
*/
|
||||
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime);
|
||||
xva.xva_vattr.va_nblocks = lr->lr_wgen;
|
||||
xva.xva_vattr.va_fsid = dnodesize;
|
||||
|
||||
error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
|
||||
RENAME_WHITEOUT, &xva.xva_vattr));
|
||||
#else
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
@@ -1069,4 +1153,6 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = {
|
||||
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
|
||||
zfs_replay_write2, /* TX_WRITE2 */
|
||||
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
|
||||
zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */
|
||||
zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */
|
||||
};
|
||||
|
||||
+2
-4
@@ -759,11 +759,9 @@ zil_commit_activate_saxattr_feature(zilog_t *zilog)
|
||||
uint64_t txg = 0;
|
||||
dmu_tx_t *tx = NULL;
|
||||
|
||||
if (spa_feature_is_enabled(zilog->zl_spa,
|
||||
SPA_FEATURE_ZILSAXATTR) &&
|
||||
if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) &&
|
||||
dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL &&
|
||||
!dsl_dataset_feature_is_active(ds,
|
||||
SPA_FEATURE_ZILSAXATTR)) {
|
||||
!dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)) {
|
||||
tx = dmu_tx_create(zilog->zl_os);
|
||||
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
|
||||
dsl_dataset_dirty(ds, tx);
|
||||
|
||||
@@ -514,6 +514,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
|
||||
zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
|
||||
zvol_replay_err, /* TX_WRITE2 */
|
||||
zvol_replay_err, /* TX_SETSAXATTR */
|
||||
zvol_replay_err, /* TX_RENAME_EXCHANGE */
|
||||
zvol_replay_err, /* TX_RENAME_WHITEOUT */
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user