From 582e7847f6ef47283c1c15850df261a52f4834d8 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 25 Jul 2025 07:42:23 -0700 Subject: [PATCH] Default to zfs_bclone_wait_dirty=1 Update the default FICLONE and FICLONERANGE ioctl behavior to wait on dirty blocks. While this does remove some control from the application, in practice ZFS is better positioned to the optimial thing and immediately force a TXG sync. Reviewed-by: Rob Norris Reviewed-by: Alexander Motin Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #17455 --- man/man4/zfs.4 | 15 ++++++------- module/zfs/zfs_vnops.c | 13 ++++++------ ...loning_copyfilerange_fallback_same_txg.ksh | 21 +++++++++++++++++++ .../functional/cp_files/cp_files_002_pos.ksh | 4 +++- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 730236481..f0c15bd75 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1384,14 +1384,15 @@ If this setting is 0, then even if feature@block_cloning is enabled, using functions and system calls that attempt to clone blocks will act as though the feature is disabled. . -.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int -When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be -written to disk. -This allows the clone operation to reliably succeed when a file is +.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 1 Ns | Ns 0 Pq int +When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty +data to be written to disk before proceeding. +This ensures that the clone operation reliably succeeds, even if a file is modified and then immediately cloned. -For small files this may be slower than making a copy of the file. -Therefore, this setting defaults to 0 which causes a clone operation to -immediately fail when encountering a dirty block. +Note that for small files this may be slower than simply copying the file. +When set to 0 the clone operation will immediately fail if it encounters +any dirty blocks. +By default waiting is enabled. . .It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string Select a BLAKE3 implementation. diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 7bf630b55..4ef391ed7 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -67,13 +67,14 @@ int zfs_bclone_enabled = 1; /* - * When set zfs_clone_range() waits for dirty data to be written to disk. - * This allows the clone operation to reliably succeed when a file is modified - * and then immediately cloned. For small files this may be slower than making - * a copy of the file and is therefore not the default. However, in certain - * scenarios this behavior may be desirable so a tunable is provided. + * When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty + * data to be written to disk before proceeding. This ensures that the clone + * operation reliably succeeds, even if a file is modified and then immediately + * cloned. Note that for small files this may be slower than simply copying + * the file. When set to 0 the clone operation will immediately fail if it + * encounters any dirty blocks. By default waiting is enabled. */ -int zfs_bclone_wait_dirty = 0; +int zfs_bclone_wait_dirty = 1; /* * Enable Direct I/O. If this setting is 0, then all I/O requests will be diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh index 54ffdc756..4cede26b9 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh @@ -41,16 +41,22 @@ function cleanup { datasetexists $TESTPOOL && destroy_pool $TESTPOOL set_tunable64 TXG_TIMEOUT $timeout + log_must restore_tunable BCLONE_WAIT_DIRTY } log_onexit cleanup +log_must save_tunable BCLONE_WAIT_DIRTY + log_must set_tunable64 TXG_TIMEOUT 5000 log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS log_must sync_pool $TESTPOOL true +# Verify fallback to copy when there are dirty blocks +log_must set_tunable32 BCLONE_WAIT_DIRTY 0 + log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4 log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288 @@ -61,5 +67,20 @@ log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone) log_must [ "$blocks" = "" ] +log_must rm /$TESTPOOL/file /$TESTPOOL/clone + +# Verify blocks are cloned even when there are dirty blocks +log_must set_tunable32 BCLONE_WAIT_DIRTY 1 + +log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4 +log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288 + +log_must sync_pool $TESTPOOL + +log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone + +typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone) +log_must [ "$blocks" = "0 1 2 3" ] + log_pass $claim diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh index 8f3e6d12e..449dedacb 100755 --- a/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh @@ -56,7 +56,7 @@ function cleanup { datasetexists $TESTPOOL/cp-reflink && \ destroy_dataset $$TESTPOOL/cp-reflink -f - log_must set_tunable32 BCLONE_WAIT_DIRTY 0 + log_must restore_tunable BCLONE_WAIT_DIRTY } function verify_copy @@ -81,6 +81,8 @@ SRC_SIZE=$((1024 + $RANDOM % 1024)) # A smaller recordsize is used merely to speed up the test. RECORDSIZE=4096 +log_must save_tunable BCLONE_WAIT_DIRTY + log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)