mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-25 01:16:34 +03:00
linux: implement filesystem-side copy/clone functions
This implements the Linux VFS ops required to service the file copy/clone APIs: .copy_file_range (4.5+) .clone_file_range (4.5-4.19) .dedupe_file_range (4.5-4.19) .remap_file_range (4.20+) Note that dedupe_file_range() and remap_file_range(REMAP_FILE_DEDUP) are hooked up here, but are not implemented yet. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Kay Pedersen <mail@mkwg.de> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Closes #15050
This commit is contained in:
parent
f6facd2429
commit
5a35c68b67
164
config/kernel-vfs-file_range.m4
Normal file
164
config/kernel-vfs-file_range.m4
Normal file
@ -0,0 +1,164 @@
|
||||
dnl #
|
||||
dnl # The *_file_range APIs have a long history:
|
||||
dnl #
|
||||
dnl # 2.6.29: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE ioctl introduced
|
||||
dnl # 3.12: BTRFS_IOC_FILE_EXTENT_SAME ioctl introduced
|
||||
dnl #
|
||||
dnl # 4.5: copy_file_range() syscall introduced, added to VFS
|
||||
dnl # 4.5: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE renamed to FICLONE ands
|
||||
dnl # FICLONERANGE, added to VFS as clone_file_range()
|
||||
dnl # 4.5: BTRFS_IOC_FILE_EXTENT_SAME renamed to FIDEDUPERANGE, added to VFS
|
||||
dnl # as dedupe_file_range()
|
||||
dnl #
|
||||
dnl # 4.20: VFS clone_file_range() and dedupe_file_range() replaced by
|
||||
dnl # remap_file_range()
|
||||
dnl #
|
||||
dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
|
||||
dnl # generic_copy_file_range() added to support it
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static ssize_t test_copy_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
size_t len, unsigned int flags) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len; (void) flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.copy_file_range = test_copy_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->copy_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_copy_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_COPY_FILE_RANGE, 1,
|
||||
[fops->copy_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([generic_copy_file_range], [
|
||||
#include <linux/fs.h>
|
||||
], [
|
||||
struct file *src_file __attribute__ ((unused)) = NULL;
|
||||
loff_t src_off __attribute__ ((unused)) = 0;
|
||||
struct file *dst_file __attribute__ ((unused)) = NULL;
|
||||
loff_t dst_off __attribute__ ((unused)) = 0;
|
||||
size_t len __attribute__ ((unused)) = 0;
|
||||
unsigned int flags __attribute__ ((unused)) = 0;
|
||||
generic_copy_file_range(src_file, src_off, dst_file, dst_off,
|
||||
len, flags);
|
||||
])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether generic_copy_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT_SYMBOL([generic_copy_file_range],
|
||||
[generic_copy_file_range], [fs/read_write.c], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_VFS_GENERIC_COPY_FILE_RANGE, 1,
|
||||
[generic_copy_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static int test_clone_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
u64 len) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.clone_file_range = test_clone_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->clone_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_clone_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_CLONE_FILE_RANGE, 1,
|
||||
[fops->clone_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_dedupe_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static int test_dedupe_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
u64 len) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.dedupe_file_range = test_dedupe_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->dedupe_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_dedupe_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_DEDUPE_FILE_RANGE, 1,
|
||||
[fops->dedupe_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_remap_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static loff_t test_remap_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
loff_t len, unsigned int flags) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len; (void) flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.remap_file_range = test_remap_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->remap_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_remap_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_REMAP_FILE_RANGE, 1,
|
||||
[fops->remap_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
@ -116,6 +116,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
|
||||
ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
|
||||
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
|
||||
ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
|
||||
ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
|
||||
@ -249,6 +254,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_VFS_RW_ITERATE
|
||||
ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
|
||||
ZFS_AC_KERNEL_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
|
||||
ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
|
||||
ZFS_AC_KERNEL_MAKE_REQUEST_FN
|
||||
|
@ -180,6 +180,20 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
|
||||
}
|
||||
#endif /* HAVE_VFS_ITERATE */
|
||||
|
||||
|
||||
/* zpl_file_range.c */
|
||||
|
||||
/* handlers for file_operations of the same name */
|
||||
extern ssize_t zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags);
|
||||
extern loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags);
|
||||
extern int zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len);
|
||||
extern int zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len);
|
||||
|
||||
|
||||
#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
|
||||
#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip)
|
||||
#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
|
||||
|
@ -461,6 +461,7 @@ ZFS_OBJS_OS := \
|
||||
zpl_ctldir.o \
|
||||
zpl_export.o \
|
||||
zpl_file.o \
|
||||
zpl_file_range.o \
|
||||
zpl_inode.o \
|
||||
zpl_super.o \
|
||||
zpl_xattr.o \
|
||||
|
@ -1283,7 +1283,6 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
}
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
|
||||
const struct address_space_operations zpl_address_space_operations = {
|
||||
#ifdef HAVE_VFS_READPAGES
|
||||
.readpages = zpl_readpages,
|
||||
@ -1333,6 +1332,18 @@ const struct file_operations zpl_file_operations = {
|
||||
.aio_fsync = zpl_aio_fsync,
|
||||
#endif
|
||||
.fallocate = zpl_fallocate,
|
||||
#ifdef HAVE_VFS_COPY_FILE_RANGE
|
||||
.copy_file_range = zpl_copy_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_REMAP_FILE_RANGE
|
||||
.remap_file_range = zpl_remap_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_CLONE_FILE_RANGE
|
||||
.clone_file_range = zpl_clone_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
|
||||
.dedupe_file_range = zpl_dedupe_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_FILE_FADVISE
|
||||
.fadvise = zpl_fadvise,
|
||||
#endif
|
||||
|
183
module/os/linux/zfs/zpl_file_range.c
Normal file
183
module/os/linux/zfs/zpl_file_range.c
Normal file
@ -0,0 +1,183 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#include <linux/compat.h>
|
||||
#endif
|
||||
#include <linux/fs.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/*
|
||||
* Clone part of a file via block cloning.
|
||||
*
|
||||
* Note that we are not required to update file offsets; the kernel will take
|
||||
* care of that depending on how it was called.
|
||||
*/
|
||||
static ssize_t
|
||||
__zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len)
|
||||
{
|
||||
struct inode *src_i = file_inode(src_file);
|
||||
struct inode *dst_i = file_inode(dst_file);
|
||||
uint64_t src_off_o = (uint64_t)src_off;
|
||||
uint64_t dst_off_o = (uint64_t)dst_off;
|
||||
uint64_t len_o = (uint64_t)len;
|
||||
cred_t *cr = CRED();
|
||||
fstrans_cookie_t cookie;
|
||||
int err;
|
||||
|
||||
if (!spa_feature_is_enabled(
|
||||
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
||||
return (-EOPNOTSUPP);
|
||||
|
||||
if (src_i != dst_i)
|
||||
spl_inode_lock_shared(src_i);
|
||||
spl_inode_lock(dst_i);
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
|
||||
&dst_off_o, &len_o, cr);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
|
||||
spl_inode_unlock(dst_i);
|
||||
if (src_i != dst_i)
|
||||
spl_inode_unlock_shared(src_i);
|
||||
|
||||
if (err < 0)
|
||||
return (err);
|
||||
|
||||
return ((ssize_t)len_o);
|
||||
}
|
||||
|
||||
#ifdef HAVE_VFS_COPY_FILE_RANGE
|
||||
/*
|
||||
* Entry point for copy_file_range(). Copy len bytes from src_off in src_file
|
||||
* to dst_off in dst_file. We are permitted to do this however we like, so we
|
||||
* try to just clone the blocks, and if we can't support it, fall back to the
|
||||
* kernel's generic byte copy function.
|
||||
*/
|
||||
ssize_t
|
||||
zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
if (flags != 0)
|
||||
return (-EINVAL);
|
||||
|
||||
/* Try to do it via zfs_clone_range() */
|
||||
ret =__zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len);
|
||||
|
||||
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
|
||||
/*
|
||||
* Since Linux 5.3 the filesystem driver is responsible for executing
|
||||
* an appropriate fallback, and a generic fallback function is provided.
|
||||
*/
|
||||
if (ret == -EOPNOTSUPP || ret == -EXDEV)
|
||||
ret = generic_copy_file_range(src_file, src_off, dst_file,
|
||||
dst_off, len, flags);
|
||||
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
|
||||
|
||||
return (ret);
|
||||
}
|
||||
#endif /* HAVE_VFS_COPY_FILE_RANGE */
|
||||
|
||||
#ifdef HAVE_VFS_REMAP_FILE_RANGE
|
||||
/*
|
||||
* Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
|
||||
*
|
||||
* FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
|
||||
* that they must clone - they cannot fall back to copying. FICLONE is exactly
|
||||
* FICLONERANGE, for the entire file. We don't need to try to tell them apart;
|
||||
* the kernel will sort that out for us.
|
||||
*
|
||||
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
|
||||
* range in both files and if they're the same, arrange for them to be backed
|
||||
* by the same storage.
|
||||
*/
|
||||
loff_t
|
||||
zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
|
||||
{
|
||||
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
|
||||
return (-EINVAL);
|
||||
|
||||
/*
|
||||
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
|
||||
* range if we want. Its designed for filesystems that make data past
|
||||
* EOF available, and don't want it to be visible in both files. ZFS
|
||||
* doesn't do that, so we just turn the flag off.
|
||||
*/
|
||||
flags &= ~REMAP_FILE_CAN_SHORTEN;
|
||||
|
||||
if (flags & REMAP_FILE_DEDUP)
|
||||
/* No support for dedup yet */
|
||||
return (-EOPNOTSUPP);
|
||||
|
||||
/* Zero length means to clone everything to the end of the file */
|
||||
if (len == 0)
|
||||
len = i_size_read(file_inode(src_file)) - src_off;
|
||||
|
||||
return (__zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len));
|
||||
}
|
||||
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
|
||||
|
||||
#ifdef HAVE_VFS_CLONE_FILE_RANGE
|
||||
/*
|
||||
* Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
|
||||
*/
|
||||
int
|
||||
zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len)
|
||||
{
|
||||
/* Zero length means to clone everything to the end of the file */
|
||||
if (len == 0)
|
||||
len = i_size_read(file_inode(src_file)) - src_off;
|
||||
|
||||
return (__zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len));
|
||||
}
|
||||
#endif /* HAVE_VFS_CLONE_FILE_RANGE */
|
||||
|
||||
#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
|
||||
/*
|
||||
* Entry point for FIDEDUPERANGE, before Linux 4.20.
|
||||
*/
|
||||
int
|
||||
zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len)
|
||||
{
|
||||
/* No support for dedup yet */
|
||||
return (-EOPNOTSUPP);
|
||||
}
|
||||
#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
|
Loading…
Reference in New Issue
Block a user