diff --git a/configure.ac b/configure.ac index 867c3351d..79246833d 100644 --- a/configure.ac +++ b/configure.ac @@ -325,6 +325,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/devices/Makefile tests/zfs-tests/tests/functional/events/Makefile tests/zfs-tests/tests/functional/exec/Makefile + tests/zfs-tests/tests/functional/fallocate/Makefile tests/zfs-tests/tests/functional/fault/Makefile tests/zfs-tests/tests/functional/features/Makefile tests/zfs-tests/tests/functional/features/async_destroy/Makefile diff --git a/include/os/linux/zfs/sys/zfs_vfsops.h b/include/os/linux/zfs/sys/zfs_vfsops.h index 0cc659918..4e60a081a 100644 --- a/include/os/linux/zfs/sys/zfs_vfsops.h +++ b/include/os/linux/zfs/sys/zfs_vfsops.h @@ -210,7 +210,7 @@ extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent); extern void zfs_preumount(struct super_block *sb); extern int zfs_umount(struct super_block *sb); extern int zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm); -extern int zfs_statvfs(struct dentry *dentry, struct kstatfs *statp); +extern int zfs_statvfs(struct inode *ip, struct kstatfs *statp); extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp); extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects); diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 1ab592390..687b85d0b 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1658,6 +1658,25 @@ as a percentage of \fBzfs_dirty_data_max\fR. This should be less than Default value: \fB20\fR% of \fBzfs_dirty_data_max\fR. .RE +.sp +.ne 2 +.na +\fBzfs_fallocate_reserve_percent\fR (uint) +.ad +.RS 12n +Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be +preallocated for a file in order to guarantee that later writes will not +run out of space. Instead, fallocate() space preallocation only checks +that sufficient space is currently available in the pool or the user's +project quota allocation, and then creates a sparse file of the requested +size. The requested space is multiplied by \fBzfs_fallocate_reserve_percent\fR +to allow additional space for indirect blocks and other internal metadata. +Setting this value to 0 disables support for fallocate(2) and returns +EOPNOTSUPP for fallocate() space preallocation again. +.sp +Default value: \fB110\fR% +.RE + .sp .ne 2 .na diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index ea5971b0c..9561960bc 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1088,9 +1088,9 @@ objs: } int -zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) +zfs_statvfs(struct inode *ip, struct kstatfs *statp) { - zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; + zfsvfs_t *zfsvfs = ITOZSB(ip); uint64_t refdbytes, availbytes, usedobjs, availobjs; int err = 0; @@ -1148,7 +1148,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) if (dmu_objset_projectquota_enabled(zfsvfs->z_os) && dmu_objset_projectquota_present(zfsvfs->z_os)) { - znode_t *zp = ITOZ(dentry->d_inode); + znode_t *zp = ITOZ(ip); if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid && zpl_is_valid_projid(zp->z_projid)) diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 0fad63a4f..c26ed5d09 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -34,6 +34,11 @@ #include #include +/* + * When using fallocate(2) to preallocate space, inflate the requested + * capacity check by 10% to account for the required metadata blocks. + */ +unsigned int zfs_fallocate_reserve_percent = 110; static int zpl_open(struct inode *ip, struct file *filp) @@ -721,20 +726,23 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) } /* - * The only flag combination which matches the behavior of zfs_space() - * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE + * The flag combination which matches the behavior of zfs_space() is + * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE * flag was introduced in the 2.6.38 kernel. + * + * The original mode=0 (allocate space) behavior can be reasonably emulated + * by checking if enough space exists and creating a sparse file, as real + * persistent space reservation is not possible due to COW, snapshots, etc. */ static long zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) { cred_t *cr = CRED(); - flock64_t bf; loff_t olen; fstrans_cookie_t cookie; - int error; + int error = 0; - if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if ((mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) != 0) return (-EOPNOTSUPP); if (offset < 0 || len <= 0) @@ -743,21 +751,54 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) spl_inode_lock(ip); olen = i_size_read(ip); - if (offset > olen) { - spl_inode_unlock(ip); - return (0); - } - if (offset + len > olen) - len = olen - offset; - bf.l_type = F_WRLCK; - bf.l_whence = SEEK_SET; - bf.l_start = offset; - bf.l_len = len; - bf.l_pid = 0; - crhold(cr); cookie = spl_fstrans_mark(); - error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr); + if (mode & FALLOC_FL_PUNCH_HOLE) { + flock64_t bf; + + if (offset > olen) + goto out_unmark; + + if (offset + len > olen) + len = olen - offset; + bf.l_type = F_WRLCK; + bf.l_whence = SEEK_SET; + bf.l_start = offset; + bf.l_len = len; + bf.l_pid = 0; + + error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr); + } else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) { + unsigned int percent = zfs_fallocate_reserve_percent; + struct kstatfs statfs; + + /* Legacy mode, disable fallocate compatibility. */ + if (percent == 0) { + error = -EOPNOTSUPP; + goto out_unmark; + } + + /* + * Use zfs_statvfs() instead of dmu_objset_space() since it + * also checks project quota limits, which are relevant here. + */ + error = zfs_statvfs(ip, &statfs); + if (error) + goto out_unmark; + + /* + * Shrink available space a bit to account for overhead/races. + * We know the product previously fit into availbytes from + * dmu_objset_space(), so the smaller product will also fit. + */ + if (len > statfs.f_bavail * (statfs.f_bsize * 100 / percent)) { + error = -ENOSPC; + goto out_unmark; + } + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen) + error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE); + } +out_unmark: spl_fstrans_unmark(cookie); spl_inode_unlock(ip); @@ -1030,3 +1071,9 @@ const struct file_operations zpl_dir_file_operations = { .compat_ioctl = zpl_compat_ioctl, #endif }; + +/* BEGIN CSTYLED */ +module_param(zfs_fallocate_reserve_percent, uint, 0644); +MODULE_PARM_DESC(zfs_fallocate_reserve_percent, + "Percentage of length to use for the available capacity check"); +/* END CSTYLED */ diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 08cf75862..75adff517 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -138,7 +138,7 @@ zpl_statfs(struct dentry *dentry, struct kstatfs *statp) int error; cookie = spl_fstrans_mark(); - error = -zfs_statvfs(dentry, statp); + error = -zfs_statvfs(dentry->d_inode, statp); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 897a6a955..a800e6bb8 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -94,6 +94,10 @@ tags = ['functional', 'devices'] tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter'] tags = ['functional', 'events'] +[tests/functional/fallocate:Linux] +tests = ['fallocate_prealloc', 'fallocate_punch-hole'] +tags = ['functional', 'fallocate'] + [tests/functional/fault:Linux] tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple', diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index 776222f08..2df78d260 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -22,6 +22,7 @@ SUBDIRS = \ devices \ events \ exec \ + fallocate \ fault \ features \ grow \ diff --git a/tests/zfs-tests/tests/functional/fallocate/Makefile.am b/tests/zfs-tests/tests/functional/fallocate/Makefile.am new file mode 100644 index 000000000..5ff366d24 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/Makefile.am @@ -0,0 +1,6 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fallocate +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + fallocate_prealloc.ksh \ + fallocate_punch-hole.ksh diff --git a/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh b/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh new file mode 100755 index 000000000..bdfa61471 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh @@ -0,0 +1,27 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh b/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh new file mode 100755 index 000000000..7bb020fe5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test fallocate(2) preallocation. +# +# STRATEGY: +# 1. Verify mode 0 fallocate is supported. +# 2. Verify default 10% reserve space is honored by setting a quota. +# + +verify_runnable "global" + +FILE=$TESTDIR/$TESTFILE0 + +function cleanup +{ + log_must zfs set quota=none $TESTPOOL + + [[ -e $TESTDIR ]] && log_must rm -Rf $TESTDIR/* +} + +log_assert "Ensure sparse files can be preallocated" + +log_onexit cleanup + +# Pre-allocate a sparse 1GB file. +log_must fallocate -l $((1024 * 1024 * 1024)) $FILE +log_must rm -Rf $TESTDIR/* + +# Verify that an additional ~10% reserve space is required. +log_must zfs set quota=100M $TESTPOOL +log_mustnot fallocate -l $((150 * 1024 * 1024)) $FILE +log_mustnot fallocate -l $((110 * 1024 * 1024)) $FILE +log_must fallocate -l $((90 * 1024 * 1024)) $FILE + +log_pass "Ensure sparse files can be preallocated" diff --git a/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh b/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh new file mode 100755 index 000000000..6a8faa487 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh @@ -0,0 +1,97 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test `fallocate --punch-hole` +# +# STRATEGY: +# 1. Create a dense file +# 2. Punch an assortment of holes in the file and verify the result. +# + +verify_runnable "global" + +FILE=$TESTDIR/$TESTFILE0 +BLKSZ=$(get_prop recordsize $TESTPOOL) + +function cleanup +{ + [[ -e $TESTDIR ]] && log_must rm -f $FILE +} + +function check_disk_size +{ + typeset expected_size=$1 + + disk_size=$(du $TESTDIR/file | awk '{print $1}') + if [ $disk_size -ne $expected_size ]; then + log_fail "Incorrect size: $disk_size != $expected_size" + fi +} + +function check_apparent_size +{ + typeset expected_size=$1 + + apparent_size=$(stat_size) + if [ $apparent_size -ne $expected_size ]; then + log_fail "Incorrect size: $apparent_size != $expected_size" + fi +} + +log_assert "Ensure holes can be punched in files making them sparse" + +log_onexit cleanup + +# Create a dense file and check it is the correct size. +log_must file_write -o create -f $FILE -b $BLKSZ -c 8 +log_must check_disk_size $((131072 * 8)) + +# Punch a hole for the first full block. +log_must fallocate --punch-hole --offset 0 --length $BLKSZ $FILE +log_must check_disk_size $((131072 * 7)) + +# Partially punch a hole in the second block. +log_must fallocate --punch-hole --offset $BLKSZ --length $((BLKSZ / 2)) $FILE +log_must check_disk_size $((131072 * 7)) + +# Punch a hole which overlaps the third and forth block. +log_must fallocate --punch-hole --offset $(((BLKSZ * 2) + (BLKSZ / 2))) \ + --length $((BLKSZ)) $FILE +log_must check_disk_size $((131072 * 7)) + +# Punch a hole from the fifth block past the end of file. The apparent +# file size should not change since --keep-size is implied. +apparent_size=$(stat_size $FILE) +log_must fallocate --punch-hole --offset $((BLKSZ * 4)) \ + --length $((BLKSZ * 10)) $FILE +log_must check_disk_size $((131072 * 4)) +log_must check_apparent_size $apparent_size + +log_pass "Ensure holes can be punched in files making them sparse" diff --git a/tests/zfs-tests/tests/functional/fallocate/setup.ksh b/tests/zfs-tests/tests/functional/fallocate/setup.ksh new file mode 100755 index 000000000..32334d396 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/setup.ksh @@ -0,0 +1,29 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_setup $DISK