BRT: Fix FICLONE/FICLONERANGE shortened copy

On Linux the ioctl_ficlonerange() and ioctl_ficlone() system calls
are expected to either fully clone the specified range or return an
error.  The range may be for an entire file.  While internally ZFS
supports cloning partial ranges there's no way to return the length
cloned to the caller so we need to make this all or nothing.

As part of this change support for the REMAP_FILE_CAN_SHORTEN flag
has been added.  When REMAP_FILE_CAN_SHORTEN is set zfs_clone_range()
will return a shortened range when encountering pending dirty records.
When it's clear zfs_clone_range() will block and wait for the records
to be written out allowing the blocks to be cloned.

Furthermore, the file range lock is held over the region being cloned
to prevent it from being modified while cloning.  This doesn't quite
provide an atomic semantics since if an error is encountered only a
portion of the range may be cloned.  This will be converted to an
error if REMAP_FILE_CAN_SHORTEN was not provided and returned to the
caller.  However, the destination file range is left in an undefined
state.

A test case has been added which exercises this functionality by
verifying that `cp --reflink=never|auto|always` works correctly.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #15728
Closes #15842
This commit is contained in:
Brian Behlendorf
2024-02-05 16:44:45 -08:00
committed by GitHub
parent a0d3fe72bf
commit 6dccdf501e
13 changed files with 243 additions and 39 deletions
+1
View File
@@ -94,6 +94,7 @@ VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
BCLONE_WAIT_DIRTY zfs_bclone_wait_dirty zfs_bclone_wait_dirty
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
+1
View File
@@ -1394,6 +1394,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/compression/setup.ksh \
functional/cp_files/cleanup.ksh \
functional/cp_files/cp_files_001_pos.ksh \
functional/cp_files/cp_files_002_pos.ksh \
functional/cp_files/cp_stress.ksh \
functional/cp_files/setup.ksh \
functional/crtime/cleanup.ksh \
@@ -0,0 +1,161 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
#
# DESCRIPTION:
# Verify all cp --reflink modes work with modified file.
#
# STRATEGY:
# 1. Verify "cp --reflink=never|auto|always" behaves as expected.
# Two different modes of operation are tested.
#
# a. zfs_bclone_wait_dirty=0: FICLONE and FICLONERANGE fail with EINVAL
# when there are dirty blocks which cannot be immediately cloned.
# This is the default behavior.
#
# b. zfs_bclone_wait_dirty=1: FICLONE and FICLONERANGE wait for
# dirty blocks to be written to disk allowing the clone to succeed.
# The downside to this is it may be slow which depending on the
# situtation may defeat the point of making a clone.
#
verify_runnable "global"
verify_block_cloning
if ! is_linux; then
log_unsupported "cp --reflink is a GNU coreutils option"
fi
function cleanup
{
datasetexists $TESTPOOL/cp-reflink && \
destroy_dataset $$TESTPOOL/cp-reflink -f
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
}
function verify_copy
{
src_cksum=$(sha256digest $1)
dst_cksum=$(sha256digest $2)
if [[ "$src_cksum" != "$dst_cksum" ]]; then
log_must ls -l $CP_TESTDIR
log_fail "checksum mismatch ($src_cksum != $dst_cksum)"
fi
}
log_assert "Verify all cp --reflink modes work with modified file"
log_onexit cleanup
SRC_FILE=src.data
DST_FILE=dst.data
SRC_SIZE=$(($RANDOM % 2048))
# A smaller recordsize is used merely to speed up the test.
RECORDSIZE=4096
log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)
log_must cd $CP_TESTDIR
# Never wait on dirty blocks (zfs_bclone_wait_dirty=0)
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
for mode in "never" "auto" "always"; do
log_note "Checking 'cp --reflink=$mode'"
# Create a new file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $DST_FILE
# Append to an existing file and immediately copy it.
sync_pool $TESTPOOL
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
count=1 conv=notrunc
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $DST_FILE
# Overwrite a random range of an existing file and immediately copy it.
sync_pool $TESTPOOL
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $SRC_FILE $DST_FILE
done
# Wait on dirty blocks (zfs_bclone_wait_dirty=1)
log_must set_tunable32 BCLONE_WAIT_DIRTY 1
for mode in "never" "auto" "always"; do
log_note "Checking 'cp --reflink=$mode'"
# Create a new file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $DST_FILE
# Append to an existing file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
count=1 conv=notrunc
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $DST_FILE
# Overwrite a random range of an existing file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $SRC_FILE $DST_FILE
done
log_pass