Fix out-of-order ZIL txtype lost on hardlinked files

We should only call zil_remove_async when an object is removed. However,
in current implementation, it is called whenever TX_REMOVE is called. In
the case of hardlinked file, every unlink will generate TX_REMOVE and
causing operations to be dropped even when the object is not removed.

We fix this by only calling zil_remove_async when the file is fully
unlinked.

Reviewed-by: George Wilson <gwilson@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
Closes #8769
Closes #9061
This commit is contained in:
Chunwei Chen 2019-08-13 20:21:27 -07:00 committed by Tony Hutter
parent 6d1599c1e1
commit 569f5d5d05
5 changed files with 27 additions and 15 deletions

View File

@ -371,7 +371,7 @@ extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
vattr_t *vap); vattr_t *vap);
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name, uint64_t foid); znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked);
#define ZFS_NO_OBJECT 0 /* no object id */ #define ZFS_NO_OBJECT 0 /* no object id */
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name); znode_t *dzp, znode_t *zp, char *name);

View File

@ -380,12 +380,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
zil_itx_assign(zilog, itx, tx); zil_itx_assign(zilog, itx, tx);
} }
void zil_remove_async(zilog_t *zilog, uint64_t oid);
/* /*
* Handles both TX_REMOVE and TX_RMDIR transactions. * Handles both TX_REMOVE and TX_RMDIR transactions.
*/ */
void void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name, uint64_t foid) znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked)
{ {
itx_t *itx; itx_t *itx;
lr_remove_t *lr; lr_remove_t *lr;
@ -401,6 +403,17 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
itx->itx_oid = foid; itx->itx_oid = foid;
/*
* Object ids can be re-instantiated in the next txg so
* remove any async transactions to avoid future leaks.
* This can happen if a fsync occurs on the re-instantiated
* object for a WR_INDIRECT or WR_NEED_COPY write, which gets
* the new file data and flushes a write record for the old object.
*/
if (unlinked) {
ASSERT((txtype & ~TX_CI) == TX_REMOVE);
zil_remove_async(zilog, foid);
}
zil_itx_assign(zilog, itx, tx); zil_itx_assign(zilog, itx, tx);
} }

View File

@ -1886,7 +1886,7 @@ top:
txtype = TX_REMOVE; txtype = TX_REMOVE;
if (flags & FIGNORECASE) if (flags & FIGNORECASE)
txtype |= TX_CI; txtype |= TX_CI;
zfs_log_remove(zilog, tx, txtype, dzp, name, obj); zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
dmu_tx_commit(tx); dmu_tx_commit(tx);
out: out:
@ -2219,7 +2219,8 @@ top:
uint64_t txtype = TX_RMDIR; uint64_t txtype = TX_RMDIR;
if (flags & FIGNORECASE) if (flags & FIGNORECASE)
txtype |= TX_CI; txtype |= TX_CI;
zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
B_FALSE);
} }
dmu_tx_commit(tx); dmu_tx_commit(tx);

View File

@ -1824,7 +1824,7 @@ zil_aitx_compare(const void *x1, const void *x2)
/* /*
* Remove all async itx with the given oid. * Remove all async itx with the given oid.
*/ */
static void void
zil_remove_async(zilog_t *zilog, uint64_t oid) zil_remove_async(zilog_t *zilog, uint64_t oid)
{ {
uint64_t otxg, txg; uint64_t otxg, txg;
@ -1876,16 +1876,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
itxg_t *itxg; itxg_t *itxg;
itxs_t *itxs, *clean = NULL; itxs_t *itxs, *clean = NULL;
/*
* Object ids can be re-instantiated in the next txg so
* remove any async transactions to avoid future leaks.
* This can happen if a fsync occurs on the re-instantiated
* object for a WR_INDIRECT or WR_NEED_COPY write, which gets
* the new file data and flushes a write record for the old object.
*/
if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE)
zil_remove_async(zilog, itx->itx_oid);
/* /*
* Ensure the data of a renamed file is committed before the rename. * Ensure the data of a renamed file is committed before the rename.
*/ */

View File

@ -160,6 +160,14 @@ log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file
# TX_WRITE, TX_LINK, TX_REMOVE
# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \
count=8
log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \
/$TESTPOOL/$TESTFS/link_and_unlink.link
log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link
# #
# 4. Copy TESTFS to temporary location (TESTDIR/copy) # 4. Copy TESTFS to temporary location (TESTDIR/copy)
# #