Default to zfs_bclone_wait_dirty=1

Update the default FICLONE and FICLONERANGE ioctl behavior to wait
on dirty blocks.  While this does remove some control from the
application, in practice ZFS is better positioned to the optimial
thing and immediately force a TXG sync.

Reviewed-by: Rob Norris <robn@despairlabs.com>
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #17455
This commit is contained in:
Brian Behlendorf 2025-07-25 07:42:23 -07:00 committed by Alexander Motin
parent 6d378564b4
commit 582e7847f6
4 changed files with 39 additions and 14 deletions

View File

@ -1384,14 +1384,15 @@ If this setting is 0, then even if feature@block_cloning is enabled,
using functions and system calls that attempt to clone blocks will act as
though the feature is disabled.
.
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int
When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be
written to disk.
This allows the clone operation to reliably succeed when a file is
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 1 Ns | Ns 0 Pq int
When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
data to be written to disk before proceeding.
This ensures that the clone operation reliably succeeds, even if a file is
modified and then immediately cloned.
For small files this may be slower than making a copy of the file.
Therefore, this setting defaults to 0 which causes a clone operation to
immediately fail when encountering a dirty block.
Note that for small files this may be slower than simply copying the file.
When set to 0 the clone operation will immediately fail if it encounters
any dirty blocks.
By default waiting is enabled.
.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.

View File

@ -67,13 +67,14 @@
int zfs_bclone_enabled = 1;
/*
* When set zfs_clone_range() waits for dirty data to be written to disk.
* This allows the clone operation to reliably succeed when a file is modified
* and then immediately cloned. For small files this may be slower than making
* a copy of the file and is therefore not the default. However, in certain
* scenarios this behavior may be desirable so a tunable is provided.
* When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
* data to be written to disk before proceeding. This ensures that the clone
* operation reliably succeeds, even if a file is modified and then immediately
* cloned. Note that for small files this may be slower than simply copying
* the file. When set to 0 the clone operation will immediately fail if it
* encounters any dirty blocks. By default waiting is enabled.
*/
int zfs_bclone_wait_dirty = 0;
int zfs_bclone_wait_dirty = 1;
/*
* Enable Direct I/O. If this setting is 0, then all I/O requests will be

View File

@ -41,16 +41,22 @@ function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
set_tunable64 TXG_TIMEOUT $timeout
log_must restore_tunable BCLONE_WAIT_DIRTY
}
log_onexit cleanup
log_must save_tunable BCLONE_WAIT_DIRTY
log_must set_tunable64 TXG_TIMEOUT 5000
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
log_must sync_pool $TESTPOOL true
# Verify fallback to copy when there are dirty blocks
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
@ -61,5 +67,20 @@ log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
log_must [ "$blocks" = "" ]
log_must rm /$TESTPOOL/file /$TESTPOOL/clone
# Verify blocks are cloned even when there are dirty blocks
log_must set_tunable32 BCLONE_WAIT_DIRTY 1
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
log_must sync_pool $TESTPOOL
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
log_must [ "$blocks" = "0 1 2 3" ]
log_pass $claim

View File

@ -56,7 +56,7 @@ function cleanup
{
datasetexists $TESTPOOL/cp-reflink && \
destroy_dataset $$TESTPOOL/cp-reflink -f
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
log_must restore_tunable BCLONE_WAIT_DIRTY
}
function verify_copy
@ -81,6 +81,8 @@ SRC_SIZE=$((1024 + $RANDOM % 1024))
# A smaller recordsize is used merely to speed up the test.
RECORDSIZE=4096
log_must save_tunable BCLONE_WAIT_DIRTY
log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)