mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Implement physical rewrites
Based on previous commit this implements `zfs rewrite -P` flag, making ZFS to keep blocks logical birth times while rewriting files. It should exclude the rewritten blocks from incremental sends, snapshot diffs, etc. Snapshots space usage same time will reflect the additional space usage from newly allocated blocks. Since this begins to use new "rewrite" flag in the block pointers, this commit introduces a new read-compatible per-dataset feature physical_rewrite. It must be enabled for the command to not fail, it is activated on first use and deactivated on deletion of the last affected dataset. Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #17565
This commit is contained in:
committed by
Brian Behlendorf
parent
4ae8bf406b
commit
60f714e6e2
@@ -2160,6 +2160,12 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
|
||||
ASSERT(arc_released(db->db_buf));
|
||||
arc_buf_thaw(db->db_buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the rewrite flag since this is now a logical
|
||||
* modification.
|
||||
*/
|
||||
dr->dt.dl.dr_rewrite = B_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2707,6 +2713,38 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_will_rewrite(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||
|
||||
/*
|
||||
* If the dbuf is already dirty in this txg, it will be written
|
||||
* anyway, so there's nothing to do.
|
||||
*/
|
||||
mutex_enter(&db->db_mtx);
|
||||
if (dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
|
||||
mutex_exit(&db->db_mtx);
|
||||
return;
|
||||
}
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
||||
/*
|
||||
* The dbuf is not dirty, so we need to make it dirty and
|
||||
* mark it for rewrite (preserve logical birth time).
|
||||
*/
|
||||
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||
if (dr != NULL && db->db_level == 0)
|
||||
dr->dt.dl.dr_rewrite = B_TRUE;
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
{
|
||||
@@ -5338,6 +5376,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
|
||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||
|
||||
/*
|
||||
* Set rewrite properties for zfs_rewrite() operations.
|
||||
*/
|
||||
if (db->db_level == 0 && dr->dt.dl.dr_rewrite) {
|
||||
zp.zp_rewrite = B_TRUE;
|
||||
|
||||
/*
|
||||
* Mark physical rewrite feature for activation.
|
||||
* This will be activated automatically during dataset sync.
|
||||
*/
|
||||
dsl_dataset_t *ds = os->os_dsl_dataset;
|
||||
if (!dsl_dataset_feature_is_active(ds,
|
||||
SPA_FEATURE_PHYSICAL_REWRITE)) {
|
||||
ds->ds_feature_activation[
|
||||
SPA_FEATURE_PHYSICAL_REWRITE] = (void *)B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We copy the blkptr now (rather than when we instantiate the dirty
|
||||
* record), because its value can change between open context and
|
||||
@@ -5408,6 +5464,7 @@ EXPORT_SYMBOL(dbuf_release_bp);
|
||||
EXPORT_SYMBOL(dbuf_dirty);
|
||||
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
|
||||
EXPORT_SYMBOL(dmu_buf_will_dirty);
|
||||
EXPORT_SYMBOL(dmu_buf_will_rewrite);
|
||||
EXPORT_SYMBOL(dmu_buf_is_dirty);
|
||||
EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
|
||||
EXPORT_SYMBOL(dmu_buf_will_not_fill);
|
||||
|
||||
@@ -2508,6 +2508,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
zp->zp_encrypt = encrypt;
|
||||
zp->zp_byteorder = ZFS_HOST_BYTEORDER;
|
||||
zp->zp_direct_write = (wp & WP_DIRECT_WR) ? B_TRUE : B_FALSE;
|
||||
zp->zp_rewrite = B_FALSE;
|
||||
memset(zp->zp_salt, 0, ZIO_DATA_SALT_LEN);
|
||||
memset(zp->zp_iv, 0, ZIO_DATA_IV_LEN);
|
||||
memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN);
|
||||
|
||||
+14
-2
@@ -49,6 +49,7 @@
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/dbuf.h>
|
||||
@@ -1101,13 +1102,21 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
|
||||
{
|
||||
int error;
|
||||
|
||||
if (flags != 0 || arg != 0)
|
||||
if ((flags & ~ZFS_REWRITE_PHYSICAL) != 0 || arg != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||
return (error);
|
||||
|
||||
/* Check if physical rewrite is allowed */
|
||||
spa_t *spa = zfsvfs->z_os->os_spa;
|
||||
if ((flags & ZFS_REWRITE_PHYSICAL) &&
|
||||
!spa_feature_is_enabled(spa, SPA_FEATURE_PHYSICAL_REWRITE)) {
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
if (zfs_is_readonly(zfsvfs)) {
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (SET_ERROR(EROFS));
|
||||
@@ -1195,7 +1204,10 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
|
||||
if (dmu_buf_is_dirty(dbp[i], tx))
|
||||
continue;
|
||||
nw += dbp[i]->db_size;
|
||||
dmu_buf_will_dirty(dbp[i], tx);
|
||||
if (flags & ZFS_REWRITE_PHYSICAL)
|
||||
dmu_buf_will_rewrite(dbp[i], tx);
|
||||
else
|
||||
dmu_buf_will_dirty(dbp[i], tx);
|
||||
}
|
||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||
|
||||
|
||||
@@ -3923,6 +3923,23 @@ zio_ddt_write(zio_t *zio)
|
||||
* then we can just use them as-is.
|
||||
*/
|
||||
if (have_dvas >= need_dvas) {
|
||||
/*
|
||||
* For rewrite operations, try preserving the original
|
||||
* logical birth time. If the result matches the
|
||||
* original BP, this becomes a NOP.
|
||||
*/
|
||||
if (zp->zp_rewrite) {
|
||||
uint64_t orig_logical_birth =
|
||||
BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
|
||||
ddt_bp_fill(ddp, v, bp, orig_logical_birth);
|
||||
if (BP_EQUAL(bp, &zio->io_bp_orig)) {
|
||||
/* We can skip accounting. */
|
||||
zio->io_flags |= ZIO_FLAG_NOPWRITE;
|
||||
ddt_exit(ddt);
|
||||
return (zio);
|
||||
}
|
||||
}
|
||||
|
||||
ddt_bp_fill(ddp, v, bp, txg);
|
||||
ddt_phys_addref(ddp, v);
|
||||
ddt_exit(ddt);
|
||||
@@ -4355,6 +4372,15 @@ again:
|
||||
error);
|
||||
}
|
||||
zio->io_error = error;
|
||||
} else if (zio->io_prop.zp_rewrite) {
|
||||
/*
|
||||
* For rewrite operations, preserve the logical birth time
|
||||
* but set the physical birth time to the current txg.
|
||||
*/
|
||||
uint64_t logical_birth = BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
|
||||
ASSERT3U(logical_birth, <=, zio->io_txg);
|
||||
BP_SET_BIRTH(zio->io_bp, logical_birth, zio->io_txg);
|
||||
BP_SET_REWRITE(zio->io_bp, 1);
|
||||
}
|
||||
|
||||
return (zio);
|
||||
|
||||
Reference in New Issue
Block a user