mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 11:18:52 +03:00
Implementation of block cloning for ZFS
Block Cloning allows to manually clone a file (or a subset of its blocks) into another (or the same) file by just creating additional references to the data blocks without copying the data itself. Those references are kept in the Block Reference Tables (BRTs). The whole design of block cloning is documented in module/zfs/brt.c. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Christian Schwarz <christian.schwarz@nutanix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Rich Ercolani <rincebrain@gmail.com> Signed-off-by: Pawel Jakub Dawidek <pawel@dawidek.net> Closes #13392
This commit is contained in:
committed by
GitHub
parent
da19d919a8
commit
67a1b03791
@@ -97,6 +97,8 @@ __FBSDID("$FreeBSD$");
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0,
|
||||
"ZFS adaptive replacement cache");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, brt, CTLFLAG_RW, 0,
|
||||
"ZFS Block Reference Table");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0,
|
||||
|
||||
@@ -153,7 +153,12 @@ struct vfsops zfs_vfsops = {
|
||||
.vfs_quotactl = zfs_quotactl,
|
||||
};
|
||||
|
||||
VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
|
||||
#ifdef VFCF_CROSS_COPY_FILE_RANGE
|
||||
VFS_SET(zfs_vfsops, zfs,
|
||||
VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE);
|
||||
#else
|
||||
VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We need to keep a count of active fs's.
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/systm.h>
|
||||
@@ -84,6 +83,12 @@
|
||||
#include <vm/vm_param.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/sysent.h>
|
||||
#include <security/mac/mac_framework.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/brt.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
#include <vm/vm_object.h>
|
||||
|
||||
@@ -6209,6 +6214,93 @@ zfs_deallocate(struct vop_deallocate_args *ap)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef _SYS_SYSPROTO_H_
|
||||
struct vop_copy_file_range_args {
|
||||
struct vnode *a_invp;
|
||||
off_t *a_inoffp;
|
||||
struct vnode *a_outvp;
|
||||
off_t *a_outoffp;
|
||||
size_t *a_lenp;
|
||||
unsigned int a_flags;
|
||||
struct ucred *a_incred;
|
||||
struct ucred *a_outcred;
|
||||
struct thread *a_fsizetd;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* TODO: FreeBSD will only call file system-specific copy_file_range() if both
|
||||
* files resides under the same mountpoint. In case of ZFS we want to be called
|
||||
* even is files are in different datasets (but on the same pools, but we need
|
||||
* to check that ourselves).
|
||||
*/
|
||||
static int
|
||||
zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||
{
|
||||
struct vnode *invp = ap->a_invp;
|
||||
struct vnode *outvp = ap->a_outvp;
|
||||
struct mount *mp;
|
||||
struct uio io;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* TODO: If offset/length is not aligned to recordsize, use
|
||||
* vn_generic_copy_file_range() on this fragment.
|
||||
* It would be better to do this after we lock the vnodes, but then we
|
||||
* need something else than vn_generic_copy_file_range().
|
||||
*/
|
||||
|
||||
/* Lock both vnodes, avoiding risk of deadlock. */
|
||||
do {
|
||||
mp = NULL;
|
||||
error = vn_start_write(outvp, &mp, V_WAIT);
|
||||
if (error == 0) {
|
||||
error = vn_lock(outvp, LK_EXCLUSIVE);
|
||||
if (error == 0) {
|
||||
if (invp == outvp)
|
||||
break;
|
||||
error = vn_lock(invp, LK_SHARED | LK_NOWAIT);
|
||||
if (error == 0)
|
||||
break;
|
||||
VOP_UNLOCK(outvp);
|
||||
if (mp != NULL)
|
||||
vn_finished_write(mp);
|
||||
mp = NULL;
|
||||
error = vn_lock(invp, LK_SHARED);
|
||||
if (error == 0)
|
||||
VOP_UNLOCK(invp);
|
||||
}
|
||||
}
|
||||
if (mp != NULL)
|
||||
vn_finished_write(mp);
|
||||
} while (error == 0);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
#ifdef MAC
|
||||
error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
|
||||
outvp);
|
||||
if (error != 0)
|
||||
goto unlock;
|
||||
#endif
|
||||
|
||||
io.uio_offset = *ap->a_outoffp;
|
||||
io.uio_resid = *ap->a_lenp;
|
||||
error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
|
||||
if (error != 0)
|
||||
goto unlock;
|
||||
|
||||
error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
|
||||
ap->a_outoffp, ap->a_lenp, ap->a_fsizetd->td_ucred);
|
||||
|
||||
unlock:
|
||||
if (invp != outvp)
|
||||
VOP_UNLOCK(invp);
|
||||
VOP_UNLOCK(outvp);
|
||||
if (mp != NULL)
|
||||
vn_finished_write(mp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
struct vop_vector zfs_vnodeops;
|
||||
struct vop_vector zfs_fifoops;
|
||||
struct vop_vector zfs_shareops;
|
||||
@@ -6272,6 +6364,7 @@ struct vop_vector zfs_vnodeops = {
|
||||
#if __FreeBSD_version >= 1400043
|
||||
.vop_add_writecount = vop_stdadd_writecount_nomsync,
|
||||
#endif
|
||||
.vop_copy_file_range = zfs_freebsd_copy_file_range,
|
||||
};
|
||||
VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/resourcevar.h>
|
||||
#include <sys/mntent.h>
|
||||
#include <sys/u8_textprep.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
@@ -2113,3 +2114,28 @@ zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
|
||||
return (err);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef _KERNEL
|
||||
int
|
||||
zfs_rlimit_fsize(off_t fsize)
|
||||
{
|
||||
struct thread *td = curthread;
|
||||
off_t lim;
|
||||
|
||||
if (td == NULL)
|
||||
return (0);
|
||||
|
||||
lim = lim_cur(td, RLIMIT_FSIZE);
|
||||
if (__predict_true((uoff_t)fsize <= lim))
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* The limit is reached.
|
||||
*/
|
||||
PROC_LOCK(td->td_proc);
|
||||
kern_psignal(td->td_proc, SIGXFSZ);
|
||||
PROC_UNLOCK(td->td_proc);
|
||||
|
||||
return (EFBIG);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
Reference in New Issue
Block a user