diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 4152bbf8d..1917a0f39 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -439,8 +439,8 @@ get_usage(zfs_help_t idx) return (gettext("\tredact " " ...\n")); case HELP_REWRITE: - return (gettext("\trewrite [-Prvx] [-o ] [-l ] " - "\n")); + return (gettext("\trewrite [-CPSrvx] [-o ] " + "[-l ] \n")); case HELP_JAIL: return (gettext("\tjail \n")); case HELP_UNJAIL: @@ -9080,11 +9080,17 @@ zfs_do_rewrite(int argc, char **argv) zfs_rewrite_args_t args; memset(&args, 0, sizeof (args)); - while ((c = getopt(argc, argv, "Pl:o:rvx")) != -1) { + while ((c = getopt(argc, argv, "CPSl:o:rvx")) != -1) { switch (c) { + case 'C': + args.flags |= ZFS_REWRITE_SKIP_BRT; + break; case 'P': args.flags |= ZFS_REWRITE_PHYSICAL; break; + case 'S': + args.flags |= ZFS_REWRITE_SKIP_SNAPSHOT; + break; case 'l': args.len = strtoll(optarg, NULL, 0); break; diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 492be2920..a7653582f 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -236,6 +236,7 @@ int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj, void *arg, int flags); void dmu_objset_evict_dbufs(objset_t *os); inode_timespec_t dmu_objset_snap_cmtime(objset_t *os); +boolean_t dmu_objset_block_is_shared(objset_t *os, const blkptr_t *bp); /* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 507d1fa2d..ca929ed51 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1638,7 +1638,9 @@ typedef struct zfs_rewrite_args { } zfs_rewrite_args_t; /* zfs_rewrite_args flags */ -#define ZFS_REWRITE_PHYSICAL 0x1 /* Preserve logical birth time. */ +#define ZFS_REWRITE_PHYSICAL 0x1 /* Preserve logical birth time. */ +#define ZFS_REWRITE_SKIP_SNAPSHOT 0x2 /* Skip snapshot-shared blocks. */ +#define ZFS_REWRITE_SKIP_BRT 0x4 /* Skip BRT-cloned blocks. */ #define ZFS_IOC_REWRITE _IOW(0x83, 3, zfs_rewrite_args_t) diff --git a/man/man8/zfs-rewrite.8 b/man/man8/zfs-rewrite.8 index ae0a15882..daec277d8 100644 --- a/man/man8/zfs-rewrite.8 +++ b/man/man8/zfs-rewrite.8 @@ -32,7 +32,7 @@ .Sh SYNOPSIS .Nm zfs .Cm rewrite -.Oo Fl Prvx Ns Oc +.Oo Fl CPSrvx Ns Oc .Op Fl l Ar length .Op Fl o Ar offset .Ar file Ns | Ns Ar directory Ns … @@ -45,6 +45,11 @@ as if they were atomically read and written back. .No See Sx NOTES . for more information about property changes that may be applied during rewrite. .Bl -tag -width "-r" +.It Fl C +Skip blocks that are shared via block cloning (BRT). +Cloned blocks are referenced by multiple files or datasets. +Rewriting these blocks would create separate copies and increase space usage. +This flag prevents such expansion by skipping cloned blocks. .It Fl P Perform physical rewrite, preserving logical birth time of blocks. By default, rewrite updates logical birth times, making blocks appear @@ -54,6 +59,12 @@ inclusion in incremental streams. Physical rewrite requires the .Sy physical_rewrite feature to be enabled on the pool. +.It Fl S +Skip blocks that are shared with snapshots. +Blocks created before the most recent snapshot are shared with that snapshot. +Rewriting these blocks would create new copies, leaving the old copies for +the snapshot and increasing space usage. +This flag prevents such expansion by skipping snapshot-shared blocks. .It Fl l Ar length Rewrite at most this number of bytes. .It Fl o Ar offset @@ -82,6 +93,12 @@ will have no effect. .Pp Rewrite of cloned blocks and blocks that are part of any snapshots, same as some property changes may increase pool space usage. +Use the +.Fl C +and +.Fl S +flags to skip cloned and snapshot-shared blocks respectively to prevent +this expansion. Holes that were never written or were previously zero-compressed are not rewritten and will remain holes even if compression is disabled. .Pp diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 5a815b59e..ed81647bc 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -3046,6 +3046,24 @@ dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx) dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx); } +/* + * Check if a block is shared with a snapshot in this objset. + * Returns B_TRUE if block was created before or at the time of the + * previous snapshot, B_FALSE otherwise. + */ +boolean_t +dmu_objset_block_is_shared(objset_t *os, const blkptr_t *bp) +{ + if (BP_IS_HOLE(bp)) + return (B_FALSE); + + dsl_dataset_t *ds = os->os_dsl_dataset; + if (ds == NULL) + return (B_FALSE); + + return (BP_GET_BIRTH(bp) <= dsl_dataset_phys(ds)->ds_prev_snap_txg); +} + #if defined(_KERNEL) EXPORT_SYMBOL(dmu_objset_zil); EXPORT_SYMBOL(dmu_objset_pool); @@ -3090,4 +3108,5 @@ EXPORT_SYMBOL(dmu_objset_projectquota_enabled); EXPORT_SYMBOL(dmu_objset_projectquota_present); EXPORT_SYMBOL(dmu_objset_projectquota_upgradable); EXPORT_SYMBOL(dmu_objset_id_quota_upgrade); +EXPORT_SYMBOL(dmu_objset_block_is_shared); #endif diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 7bb9ba57c..8f29474ac 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -1095,6 +1096,34 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (0); } +/* + * Check if a block should be skipped during rewrite. + * Returns B_TRUE if block should be skipped. + */ +static boolean_t +zfs_rewrite_skip(dmu_buf_t *db, objset_t *os, uint64_t flags) +{ + /* + * This may be slightly stale and racy, but should be OK for + * the advisory use. + */ + blkptr_t *bp = dmu_buf_get_blkptr(db); + if (bp == NULL) + return (B_TRUE); + + if (flags & ZFS_REWRITE_SKIP_SNAPSHOT) { + if (dmu_objset_block_is_shared(os, bp)) + return (B_TRUE); + } + + if (flags & ZFS_REWRITE_SKIP_BRT) { + if (brt_maybe_exists(os->os_spa, bp)) + return (B_TRUE); + } + + return (B_FALSE); +} + /* * Rewrite a range of file as-is without modification. * @@ -1113,7 +1142,11 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags, { int error; - if ((flags & ~ZFS_REWRITE_PHYSICAL) != 0 || arg != 0) +#define ZFS_REWRITE_VALID_FLAGS \ + (ZFS_REWRITE_PHYSICAL | ZFS_REWRITE_SKIP_SNAPSHOT | \ + ZFS_REWRITE_SKIP_BRT) + + if ((flags & ~ZFS_REWRITE_VALID_FLAGS) != 0 || arg != 0) return (SET_ERROR(EINVAL)); zfsvfs_t *zfsvfs = ZTOZSB(zp); @@ -1214,6 +1247,10 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags, nr += dbp[i]->db_size; if (dmu_buf_is_dirty(dbp[i], tx)) continue; + + if (zfs_rewrite_skip(dbp[i], zfsvfs->z_os, flags)) + continue; + nw += dbp[i]->db_size; if (flags & ZFS_REWRITE_PHYSICAL) dmu_buf_will_rewrite(dbp[i], tx); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index b83f50a4b..19df29ec3 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -309,7 +309,8 @@ tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] tags = ['functional', 'cli_root', 'zfs_reservation'] [tests/functional/cli_root/zfs_rewrite] -tests = ['zfs_rewrite', 'zfs_rewrite_physical'] +tests = ['zfs_rewrite', 'zfs_rewrite_physical', 'zfs_rewrite_skip_clone', + 'zfs_rewrite_skip_snapshot'] tags = ['functional', 'cli_root', 'zfs_rewrite'] [tests/functional/cli_root/zfs_rollback] diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index 95bdf05dc..dad51d2e9 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -195,7 +195,8 @@ tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] tags = ['functional', 'cli_root', 'zfs_reservation'] [tests/functional/cli_root/zfs_rewrite] -tests = ['zfs_rewrite', 'zfs_rewrite_physical'] +tests = ['zfs_rewrite', 'zfs_rewrite_physical', 'zfs_rewrite_skip_clone', + 'zfs_rewrite_skip_snapshot'] tags = ['functional', 'cli_root', 'zfs_rewrite'] [tests/functional/cli_root/zfs_rollback] diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 3d697726a..096d6b18e 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3943,4 +3943,26 @@ function pop_coredump_pattern esac } +# +# get_same_blocks dataset1 path/to/file1 dataset2 path/to/file2 [key] +# +# Returns a space-separated list of the indexes (starting at 0) of the L0 +# blocks that are shared between both files (by first DVA and checksum). +# +function get_same_blocks # dataset1 file1 dataset2 file2 [key] +{ + typeset KEY=$5 + if [ ${#KEY} -gt 0 ]; then + KEY="--key=$KEY" + fi + typeset zdbout1=$(mktemp) + typeset zdbout2=$(mktemp) + zdb $KEY -vvvvv $1 -O $2 | \ + awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout1 + zdb $KEY -vvvvv $3 -O $4 | \ + awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout2 + echo $(sort -n $zdbout1 $zdbout2 | uniq -d | cut -f1 -d' ') + rm -f $zdbout1 $zdbout2 +} + . ${STF_SUITE}/include/kstat.shlib diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 19b31d2ea..9bb39b012 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -876,6 +876,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs_rewrite/setup.ksh \ functional/cli_root/zfs_rewrite/zfs_rewrite.ksh \ functional/cli_root/zfs_rewrite/zfs_rewrite_physical.ksh \ + functional/cli_root/zfs_rewrite/zfs_rewrite_skip_clone.ksh \ + functional/cli_root/zfs_rewrite/zfs_rewrite_skip_snapshot.ksh \ functional/cli_root/zfs_rollback/cleanup.ksh \ functional/cli_root/zfs_rollback/setup.ksh \ functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index b2a2b09f5..67f4b109b 100644 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -35,27 +35,3 @@ function have_same_content log_must [ "$hash1" = "$hash2" ] } -# -# get_same_blocks dataset1 path/to/file1 dataset2 path/to/file2 -# -# Returns a space-separated list of the indexes (starting at 0) of the L0 -# blocks that are shared between both files (by first DVA and checksum). -# Assumes that the two files have the same content, use have_same_content to -# confirm that. -# -function get_same_blocks -{ - KEY=$5 - if [ ${#KEY} -gt 0 ]; then - KEY="--key=$KEY" - fi - typeset zdbout1=$(mktemp) - typeset zdbout2=$(mktemp) - zdb $KEY -vvvvv $1 -O $2 | \ - awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout1 - zdb $KEY -vvvvv $3 -O $4 | \ - awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout2 - echo $(sort -n $zdbout1 $zdbout2 | uniq -d | cut -f1 -d' ') - rm -f $zdbout1 $zdbout2 -} - diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_clone.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_clone.ksh new file mode 100755 index 000000000..464f41535 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_clone.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, iXsystems, Inc. +# + +# DESCRIPTION: +# Verify zfs rewrite -C flag skips BRT-cloned blocks. +# +# STRATEGY: +# 1. Create a test file and sync it. +# 2. Clone the file using block cloning to share blocks via BRT. +# 3. Rewrite clone with -C flag and verify blocks are NOT rewritten. +# 4. Rewrite clone without -C flag and verify blocks ARE rewritten. + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +function cleanup +{ + rm -rf $TESTDIR/* +} + +log_assert "zfs rewrite -C flag skips BRT-cloned blocks" + +log_onexit cleanup + +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +# Create source file (4 x 128KB = 4 blocks) +log_must dd if=/dev/urandom of=$TESTDIR/source bs=128k count=4 +log_must sync_pool $TESTPOOL + +# Clone the file using block cloning +log_must clonefile -f $TESTDIR/source $TESTDIR/clone +log_must sync_pool $TESTPOOL + +# Verify blocks are actually shared initially +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS source \ + $TESTPOOL/$TESTFS clone) +log_must [ "$blocks" = "0 1 2 3" ] + +# Test 1: Rewrite clone WITH -C flag (should skip all cloned blocks) +log_must zfs rewrite -C $TESTDIR/clone +log_must sync_pool $TESTPOOL + +# Blocks should still be shared (all blocks were skipped) +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS source \ + $TESTPOOL/$TESTFS clone) +log_must [ "$blocks" = "0 1 2 3" ] + +# Test 2: Rewrite clone WITHOUT -C flag (should rewrite all blocks) +log_must zfs rewrite $TESTDIR/clone +log_must sync_pool $TESTPOOL + +# No blocks should be shared (clone has new blocks) +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS source \ + $TESTPOOL/$TESTFS clone) +log_must [ -z "$blocks" ] + +log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_snapshot.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_snapshot.ksh new file mode 100755 index 000000000..9fc039ba2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rewrite/zfs_rewrite_skip_snapshot.ksh @@ -0,0 +1,74 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, iXsystems, Inc. +# + +# DESCRIPTION: +# Verify zfs rewrite -S flag skips snapshot-shared blocks. +# +# STRATEGY: +# 1. Create a test file and sync it. +# 2. Take a snapshot to share the blocks. +# 3. Rewrite with -S flag and verify blocks are NOT rewritten. +# 4. Rewrite without -S flag and verify blocks ARE rewritten. + +. $STF_SUITE/include/libtest.shlib + +function cleanup +{ + rm -rf $TESTDIR/* + zfs destroy -R $TESTPOOL/$TESTFS@snap1 2>/dev/null || true +} + +log_assert "zfs rewrite -S flag skips snapshot-shared blocks" + +log_onexit cleanup + +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +# Create test file (4 x 128KB = 4 blocks) and snapshot +log_must dd if=/dev/urandom of=$TESTDIR/testfile bs=128k count=4 +log_must sync_pool $TESTPOOL +log_must zfs snapshot $TESTPOOL/$TESTFS@snap1 + +# Test 1: Rewrite WITH -S flag (should skip all snapshot-shared blocks) +log_must zfs rewrite -S $TESTDIR/testfile +log_must sync_pool $TESTPOOL + +# All blocks should still be shared (all blocks were skipped) +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS testfile \ + $TESTPOOL/$TESTFS@snap1 testfile) +log_must [ "$blocks" = "0 1 2 3" ] + +# Test 2: Rewrite WITHOUT -S flag (should rewrite all blocks) +log_must zfs rewrite $TESTDIR/testfile +log_must sync_pool $TESTPOOL + +# No blocks should be shared (all blocks were rewritten) +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS testfile \ + $TESTPOOL/$TESTFS@snap1 testfile) +log_must [ -z "$blocks" ] + +log_pass