Implement physical rewrites

Based on previous commit this implements `zfs rewrite -P` flag,
making ZFS to keep blocks logical birth times while rewriting
files.  It should exclude the rewritten blocks from incremental
sends, snapshot diffs, etc.  Snapshots space usage same time will
reflect the additional space usage from newly allocated blocks.

Since this begins to use new "rewrite" flag in the block pointers,
this commit introduces a new read-compatible per-dataset feature
physical_rewrite.  It must be enabled for the command to not fail,
it is activated on first use and deactivated on deletion of the
last affected dataset.

Reviewed-by: Rob Norris <robn@despairlabs.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:  Alexander Motin <alexander.motin@TrueNAS.com>
Closes #17565
This commit is contained in:
Alexander Motin
2025-07-23 15:51:00 -04:00
committed by Brian Behlendorf
parent 4ae8bf406b
commit 60f714e6e2
19 changed files with 270 additions and 18 deletions
+57
View File
@@ -2160,6 +2160,12 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
ASSERT(arc_released(db->db_buf));
arc_buf_thaw(db->db_buf);
}
/*
* Clear the rewrite flag since this is now a logical
* modification.
*/
dr->dt.dl.dr_rewrite = B_FALSE;
}
}
@@ -2707,6 +2713,38 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
}
void
dmu_buf_will_rewrite(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
ASSERT(tx->tx_txg != 0);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
/*
* If the dbuf is already dirty in this txg, it will be written
* anyway, so there's nothing to do.
*/
mutex_enter(&db->db_mtx);
if (dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
mutex_exit(&db->db_mtx);
return;
}
mutex_exit(&db->db_mtx);
/*
* The dbuf is not dirty, so we need to make it dirty and
* mark it for rewrite (preserve logical birth time).
*/
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
mutex_enter(&db->db_mtx);
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
if (dr != NULL && db->db_level == 0)
dr->dt.dl.dr_rewrite = B_TRUE;
mutex_exit(&db->db_mtx);
}
boolean_t
dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
@@ -5338,6 +5376,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
/*
* Set rewrite properties for zfs_rewrite() operations.
*/
if (db->db_level == 0 && dr->dt.dl.dr_rewrite) {
zp.zp_rewrite = B_TRUE;
/*
* Mark physical rewrite feature for activation.
* This will be activated automatically during dataset sync.
*/
dsl_dataset_t *ds = os->os_dsl_dataset;
if (!dsl_dataset_feature_is_active(ds,
SPA_FEATURE_PHYSICAL_REWRITE)) {
ds->ds_feature_activation[
SPA_FEATURE_PHYSICAL_REWRITE] = (void *)B_TRUE;
}
}
/*
* We copy the blkptr now (rather than when we instantiate the dirty
* record), because its value can change between open context and
@@ -5408,6 +5464,7 @@ EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty);
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty);
EXPORT_SYMBOL(dmu_buf_will_rewrite);
EXPORT_SYMBOL(dmu_buf_is_dirty);
EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
EXPORT_SYMBOL(dmu_buf_will_not_fill);