zfs_rename: support RENAME_* flags

Implement support for Linux's RENAME_* flags (for renameat2). Aside from
being quite useful for userspace (providing race-free ways to exchange
paths and implement mv --no-clobber), they are used by overlayfs and are
thus required in order to use overlayfs-on-ZFS.

In order for us to represent the new renameat2(2) flags in the ZIL, we
create two new transaction types for the two flags which need
transactional-level support (RENAME_EXCHANGE and RENAME_WHITEOUT).
RENAME_NOREPLACE does not need any ZIL support because we know that if
the operation succeeded before creating the ZIL entry, there was no file
to be clobbered and thus it can be treated as a regular TX_RENAME.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Pavel Snajdr <snajpa@snajpa.net>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Closes #12209
Closes #14070
This commit is contained in:
Aleksa Sarai
2019-06-22 10:35:11 +10:00
committed by Brian Behlendorf
parent e015d6cc0b
commit dbf6108b4d
33 changed files with 932 additions and 74 deletions
+81 -5
View File
@@ -494,11 +494,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
zil_itx_assign(zilog, itx, tx);
}
/*
* Handles TX_RENAME transactions.
*/
void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
static void
do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
{
itx_t *itx;
@@ -520,6 +517,85 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
zil_itx_assign(zilog, itx, tx);
}
/*
* Handles TX_RENAME transactions.
*/
void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
{
txtype |= TX_RENAME;
do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
}
/*
* Handles TX_RENAME_EXCHANGE transactions.
*/
void
zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp)
{
txtype |= TX_RENAME_EXCHANGE;
do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
}
/*
* Handles TX_RENAME_WHITEOUT transactions.
*
* Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
* zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
*/
void
zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp, znode_t *wzp)
{
itx_t *itx;
lr_rename_whiteout_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
if (zil_replaying(zilog, tx))
return;
txtype |= TX_RENAME_WHITEOUT;
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr = (lr_rename_whiteout_t *)&itx->itx_lr;
lr->lr_rename.lr_sdoid = sdzp->z_id;
lr->lr_rename.lr_tdoid = tdzp->z_id;
/*
* RENAME_WHITEOUT will create an entry at the source znode, so we need
* to store the same data that the equivalent call to zfs_log_create()
* would.
*/
lr->lr_wfoid = wzp->z_id;
LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
sizeof (uint64_t));
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
lr->lr_wcrtime, sizeof (uint64_t) * 2);
lr->lr_wmode = wzp->z_mode;
lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));
/*
* This rdev will always be makdevice(0, 0) but because the ZIL log and
* replay code needs to be platform independent (and there is no
* platform independent makdev()) we need to copy the one created
* during the rename operation.
*/
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
sizeof (lr->lr_wrdev));
memcpy((char *)(lr + 1), sname, snamesize);
memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
itx->itx_oid = szp->z_id;
zil_itx_assign(zilog, itx, tx);
}
/*
* zfs_log_write() handles TX_WRITE transactions. The specified callback is
* called as soon as the write is on stable storage (be it via a DMU sync or a