diff --git a/include/sys/zpl.h b/include/sys/zpl.h index eb0e9f057..2d82d1ada 100644 --- a/include/sys/zpl.h +++ b/include/sys/zpl.h @@ -52,8 +52,10 @@ extern ssize_t zpl_read_common(struct inode *ip, const char *buf, extern ssize_t zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos, uio_seg_t segment, int flags, cred_t *cr); +#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) extern long zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len); +#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */ extern const struct address_space_operations zpl_address_space_operations; extern const struct file_operations zpl_file_operations; diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 18b2564a2..ddd997fae 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -2565,8 +2565,6 @@ top: if (err) goto out3; - truncate_setsize(ip, vap->va_size); - /* * XXX - Note, we are not providing any open * mode flags here (like FNDELAY), so we may diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 5fcb9e930..f2e305f7a 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1344,6 +1344,50 @@ zfs_extend(znode_t *zp, uint64_t end) return (0); } +/* + * zfs_zero_partial_page - Modeled after update_pages() but + * with different arguments and semantics for use by zfs_freesp(). + * + * Zeroes a piece of a single page cache entry for zp at offset + * start and length len. + * + * Caller must acquire a range lock on the file for the region + * being zeroed in order that the ARC and page cache stay in sync. + */ +static void +zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) +{ + struct address_space *mp = ZTOI(zp)->i_mapping; + struct page *pp; + int64_t off; + void *pb; + + ASSERT((start & PAGE_CACHE_MASK) == + ((start + len - 1) & PAGE_CACHE_MASK)); + + off = start & (PAGE_CACHE_SIZE - 1); + start &= PAGE_CACHE_MASK; + + pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT); + if (pp) { + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); + + pb = kmap(pp); + bzero(pb + off, len); + kunmap(pp); + + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); + + mark_page_accessed(pp); + SetPageUptodate(pp); + ClearPageError(pp); + unlock_page(pp); + page_cache_release(pp); + } +} + /* * Free space in a file. * @@ -1378,6 +1422,40 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); + /* + * Zero partial page cache entries. This must be done under a + * range lock in order to keep the ARC and page cache in sync. + */ + if (zp->z_is_mapped) { + loff_t first_page, last_page, page_len; + loff_t first_page_offset, last_page_offset; + + /* first possible full page in hole */ + first_page = (off + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + /* last page of hole */ + last_page = (off + len) >> PAGE_CACHE_SHIFT; + + /* offset of first_page */ + first_page_offset = first_page << PAGE_CACHE_SHIFT; + /* offset of last_page */ + last_page_offset = last_page << PAGE_CACHE_SHIFT; + + if (first_page > last_page) { + /* entire punched area within a single page */ + zfs_zero_partial_page(zp, off, len); + } else { + /* beginning of punched area at the end of a page */ + page_len = first_page_offset - off; + if (page_len > 0) + zfs_zero_partial_page(zp, off, page_len); + + /* end of punched area at the beginning of a page */ + page_len = off + len - last_page_offset; + if (page_len > 0) + zfs_zero_partial_page(zp, last_page_offset, + page_len); + } + } zfs_range_unlock(rl); return (error); @@ -1479,8 +1557,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) error = zfs_extend(zp, off+len); if (error == 0 && log) goto log; - else - return (error); + goto out; } /* @@ -1500,7 +1577,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) error = zfs_extend(zp, off+len); } if (error || !log) - return (error); + goto out; log: tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); @@ -1508,7 +1585,7 @@ log: error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - return (error); + goto out; } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16); @@ -1522,8 +1599,40 @@ log: zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); + zfs_inode_update(zp); - return (0); + error = 0; + +out: + /* + * Truncate the page cache - for file truncate operations, use + * the purpose-built API for truncations. For punching operations, + * truncate only whole pages within the region; partial pages are + * zeroed under a range lock in zfs_free_range(). + */ + if (len == 0) + truncate_setsize(ZTOI(zp), off); + else if (zp->z_is_mapped) { + loff_t first_page, last_page; + loff_t first_page_offset, last_page_offset; + + /* first possible full page in hole */ + first_page = (off + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + /* last page of hole */ + last_page = (off + len) >> PAGE_CACHE_SHIFT; + + /* offset of first_page */ + first_page_offset = first_page << PAGE_CACHE_SHIFT; + /* offset of last_page */ + last_page_offset = last_page << PAGE_CACHE_SHIFT; + + /* truncate whole pages */ + if (last_page_offset > first_page_offset) { + truncate_inode_pages_range(ZTOI(zp)->i_mapping, + first_page_offset, last_page_offset - 1); + } + } + return (error); } void diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c index 5ea892320..c72d5c947 100644 --- a/module/zfs/zpl_file.c +++ b/module/zfs/zpl_file.c @@ -558,38 +558,53 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) /* * The only flag combination which matches the behavior of zfs_space() - * is FALLOC_FL_PUNCH_HOLE. This flag was introduced in the 2.6.38 kernel. + * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE + * flag was introduced in the 2.6.38 kernel. */ +#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) long zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) { - cred_t *cr = CRED(); int error = -EOPNOTSUPP; - if (mode & FALLOC_FL_KEEP_SIZE) - return (-EOPNOTSUPP); +#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) + cred_t *cr = CRED(); + flock64_t bf; + loff_t olen; + + if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return (error); crhold(cr); -#ifdef FALLOC_FL_PUNCH_HOLE - if (mode & FALLOC_FL_PUNCH_HOLE) { - flock64_t bf; + if (offset < 0 || len <= 0) + return (-EINVAL); - bf.l_type = F_WRLCK; - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - bf.l_pid = 0; + spl_inode_lock(ip); + olen = i_size_read(ip); - error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr); + if (offset > olen) { + spl_inode_unlock(ip); + return (0); } -#endif /* FALLOC_FL_PUNCH_HOLE */ + if (offset + len > olen) + len = olen - offset; + bf.l_type = F_WRLCK; + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + bf.l_pid = 0; + + error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr); + spl_inode_unlock(ip); crfree(cr); +#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */ ASSERT3S(error, <=, 0); return (error); } +#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */ #ifdef HAVE_FILE_FALLOCATE static long