From bbdc6ae49518a4be7230ab673370e9231e2f72e7 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Mon, 3 Sep 2012 14:56:26 +0200 Subject: [PATCH] Add interface for file hole punching. This adds an interface to "punch holes" (deallocate space) in VFS files. The interface is identical to the Solaris VOP_SPACE interface. This interface is necessary for TRIM support on file vdevs. This is implemented using Linux fallocate(FALLOC_FL_PUNCH_HOLE), which was introduced in 2.6.38. For a brief time before 2.6.38 this was done using the truncate_range inode operation, which was quickly deprecated. This patch only supports FALLOC_FL_PUNCH_HOLE. This adds support for the truncate_range() inode operation to VOP_SPACE() for file hole punching. This API is deprecated and removed in 3.5, so it's only useful for old kernels. On tmpfs, the truncate_range() inode operation translates to shmem_truncate_range(). Unfortunately, this function expects the end offset to be inclusive and aligned to the end of a page. If it is not, the kernel will stop with a BUG_ON(). This patch fixes the issue by adapting to the constraints set forth by shmem_truncate_range(). Signed-off-by: Brian Behlendorf Closes #168 --- config/spl-build.m4 | 22 ++++++++++++++++++ include/sys/vnode.h | 6 +++++ module/spl/spl-vnode.c | 53 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/config/spl-build.m4 b/config/spl-build.m4 index b7ab54d32..0c7a03cf1 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -72,6 +72,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [ SPL_AC_4ARGS_VFS_RENAME SPL_AC_VFS_FSYNC SPL_AC_2ARGS_VFS_FSYNC + SPL_AC_INODE_TRUNCATE_RANGE SPL_AC_FS_STRUCT_SPINLOCK SPL_AC_CRED_STRUCT SPL_AC_GROUPS_SEARCH @@ -1992,6 +1993,27 @@ AC_DEFUN([SPL_AC_2ARGS_VFS_FSYNC], [ ]) ]) +dnl # +dnl # 3.5 API change, +dnl # inode_operations.truncate_range removed +dnl # (deprecated in favor of FALLOC_FL_PUNCH_HOLE) +dnl # +AC_DEFUN([SPL_AC_INODE_TRUNCATE_RANGE], [ + AC_MSG_CHECKING([whether truncate_range() inode operation is available]) + SPL_LINUX_TRY_COMPILE([ + #include + ],[ + struct inode_operations ops; + ops.truncate_range = NULL; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_TRUNCATE_RANGE, 1, + [truncate_range() inode operation is available]) + ],[ + AC_MSG_RESULT(no) + ]) +])) + dnl # dnl # 2.6.33 API change. Also backported in RHEL5 as of 2.6.18-190.el5. dnl # Earlier versions of rwsem_is_locked() were inline and had a race diff --git a/include/sys/vnode.h b/include/sys/vnode.h index 03462bd70..99614d55b 100644 --- a/include/sys/vnode.h +++ b/include/sys/vnode.h @@ -66,6 +66,9 @@ #define FNODSYNC 0x10000 /* fsync pseudo flag */ #define FNOFOLLOW 0x20000 /* don't follow symlinks */ +#define F_FREESP 11 /* Free file space */ + + /* * The vnode AT_ flags are mapped to the Linux ATTR_* flags. * This allows them to be used safely with an iattr structure. @@ -185,6 +188,8 @@ extern int vn_remove(const char *path, uio_seg_t seg, int flags); extern int vn_rename(const char *path1, const char *path2, int x1); extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4); extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4); +extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, + offset_t offset, void *x6, void *x7); extern file_t *vn_getf(int fd); extern void vn_releasef(int fd); extern int vn_set_pwd(const char *filename); @@ -197,6 +202,7 @@ void spl_vn_fini(void); #define VOP_SEEK vn_seek #define VOP_GETATTR vn_getattr #define VOP_FSYNC vn_fsync +#define VOP_SPACE vn_space #define VOP_PUTPAGE(vp, o, s, f, x1, x2) ((void)0) #define vn_is_readonly(vp) 0 #define getf vn_getf diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index 2e55b007b..f5fc65d26 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -25,6 +25,7 @@ \*****************************************************************************/ #include +#include #include #ifdef SS_DEBUG_SUBSYS @@ -510,6 +511,58 @@ int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4) } /* vn_fsync() */ EXPORT_SYMBOL(vn_fsync); +int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, + offset_t offset, void *x6, void *x7) +{ + int error = EOPNOTSUPP; + SENTRY; + + if (cmd != F_FREESP || bfp->l_whence != 0) + SRETURN(EOPNOTSUPP); + + ASSERT(vp); + ASSERT(vp->v_file); + ASSERT(bfp->l_start >= 0 && bfp->l_len > 0); + +#ifdef FALLOC_FL_PUNCH_HOLE + if (vp->v_file->f_op->fallocate) { + error = -vp->v_file->f_op->fallocate(vp->v_file, + FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + bfp->l_start, bfp->l_len); + if (!error) + SRETURN(0); + } +#endif + +#ifdef HAVE_INODE_TRUNCATE_RANGE + if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode && + vp->v_file->f_dentry->d_inode->i_op && + vp->v_file->f_dentry->d_inode->i_op->truncate_range) { + off_t end = bfp->l_start + bfp->l_len; + /* + * Judging from the code in shmem_truncate_range(), + * it seems the kernel expects the end offset to be + * inclusive and aligned to the end of a page. + */ + if (end % PAGE_SIZE != 0) { + end &= ~(off_t)(PAGE_SIZE - 1); + if (end <= bfp->l_start) + SRETURN(0); + } + --end; + + vp->v_file->f_dentry->d_inode->i_op->truncate_range( + vp->v_file->f_dentry->d_inode, + bfp->l_start, end + ); + SRETURN(0); + } +#endif + + SRETURN(error); +} +EXPORT_SYMBOL(vn_space); + /* Function must be called while holding the vn_file_lock */ static file_t * file_find(int fd)