Improve block cloning transactions accounting

Previous dmu_tx_count_clone() was broken, stating that cloning is
similar to free.  While they might be from some points, cloning
is not net-free.  It will likely consume space and memory, and
unlike free it will do it no matter whether the destination has
the blocks or not (usually not, so previous code did nothing).

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #17431
This commit is contained in:
Alexander Motin 2025-06-11 14:59:16 -04:00 committed by GitHub
parent 66ec7fb269
commit e0ef4d2768
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 45 additions and 13 deletions

View File

@ -855,7 +855,7 @@ void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
uint64_t len, uint_t blksz);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,

View File

@ -3901,6 +3901,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
if (!fail_sparse)
ASSERT3U(dn->dn_nlevels, >, level);
*dbp = NULL;

View File

@ -36,6 +36,7 @@
#include <sys/dsl_pool.h>
#include <sys/zap_impl.h>
#include <sys/spa.h>
#include <sys/brt_impl.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/zfs_context.h>
@ -547,17 +548,45 @@ dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}
static void
dmu_tx_count_clone(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
dmu_tx_count_clone(dmu_tx_hold_t *txh, uint64_t off, uint64_t len,
uint_t blksz)
{
dmu_tx_t *tx = txh->txh_tx;
dnode_t *dn = txh->txh_dnode;
int err;
/*
* Reuse dmu_tx_count_free(), it does exactly what we need for clone.
*/
dmu_tx_count_free(txh, off, len);
ASSERT0(tx->tx_txg);
ASSERT(dn->dn_indblkshift != 0);
ASSERT(blksz != 0);
ASSERT0(off % blksz);
(void) zfs_refcount_add_many(&txh->txh_memory_tohold,
len / blksz * sizeof (brt_entry_t), FTAG);
int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
uint64_t start = off / blksz >> shift;
uint64_t end = (off + len) / blksz >> shift;
(void) zfs_refcount_add_many(&txh->txh_space_towrite,
(end - start + 1) << dn->dn_indblkshift, FTAG);
zio_t *zio = zio_root(tx->tx_pool->dp_spa,
NULL, NULL, ZIO_FLAG_CANFAIL);
for (uint64_t i = start; i <= end; i++) {
err = dmu_tx_check_ioerr(zio, dn, 1, i);
if (err != 0) {
tx->tx_err = err;
break;
}
}
err = zio_wait(zio);
if (err != 0)
tx->tx_err = err;
}
void
dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
uint64_t len, uint_t blksz)
{
dmu_tx_hold_t *txh;
@ -567,7 +596,7 @@ dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
txh = dmu_tx_hold_dnode_impl(tx, dn, THT_CLONE, off, len);
if (txh != NULL) {
dmu_tx_count_dnode(txh);
dmu_tx_count_clone(txh, off, len);
dmu_tx_count_clone(txh, off, len, blksz);
}
}

View File

@ -1836,7 +1836,8 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE);
db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl);
DB_DNODE_ENTER(db);
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), outoff, size);
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), outoff, size,
inblksz);
DB_DNODE_EXIT(db);
zfs_sa_upgrade_txholds(tx, outzp);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
@ -2003,7 +2004,7 @@ zfs_clone_range_replay(znode_t *zp, uint64_t off, uint64_t len, uint64_t blksz,
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
DB_DNODE_ENTER(db);
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), off, len);
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), off, len, blksz);
DB_DNODE_EXIT(db);
zfs_sa_upgrade_txholds(tx, zp);
error = dmu_tx_assign(tx, DMU_TX_WAIT);

View File

@ -577,7 +577,7 @@ zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
if (error != 0 || !zv->zv_dn)
return (error);
tx = dmu_tx_create(os);
dmu_tx_hold_clone_by_dnode(tx, zv->zv_dn, off, len);
dmu_tx_hold_clone_by_dnode(tx, zv->zv_dn, off, len, blksz);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
@ -742,7 +742,8 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
}
tx = dmu_tx_create(zv_dst->zv_objset);
dmu_tx_hold_clone_by_dnode(tx, zv_dst->zv_dn, outoff, size);
dmu_tx_hold_clone_by_dnode(tx, zv_dst->zv_dn, outoff, size,
zv_src->zv_volblocksize);
error = dmu_tx_assign(tx, DMU_TX_WAIT);
if (error != 0) {
dmu_tx_abort(tx);