mirror_zfs/include/linux/vfs_compat.h

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
 */

#ifndef _ZFS_VFS_H
#define _ZFS_VFS_H

/*
 * 2.6.28 API change,
 * Added insert_inode_locked() helper function, prior to this most callers
 * used insert_inode_hash().  The older method doesn't check for collisions
 * in the inode_hashtable but it still acceptible for use.
 */
#ifndef HAVE_INSERT_INODE_LOCKED
static inline int
insert_inode_locked(struct inode *ip)
{
	insert_inode_hash(ip);
	return (0);
}
#endif /* HAVE_INSERT_INODE_LOCKED */

/*
 * 2.6.35 API change,
 * Add truncate_setsize() if it is not exported by the Linux kernel.
 *
 * Truncate the inode and pages associated with the inode. The pages are
 * unmapped and removed from cache.
 */
#ifndef HAVE_TRUNCATE_SETSIZE
static inline void
truncate_setsize(struct inode *ip, loff_t new)
{
	struct address_space *mapping = ip->i_mapping;

	i_size_write(ip, new);

	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
	truncate_inode_pages(mapping, new);
	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
}
#endif /* HAVE_TRUNCATE_SETSIZE */

#if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)
/*
 * 2.6.34 API change,
 * Add bdi_setup_and_register() function if not yet provided by kernel.
 * It is used to quickly initialize and register a BDI for the filesystem.
 */
extern atomic_long_t zfs_bdi_seq;

static inline int
bdi_setup_and_register(struct backing_dev_info *bdi,char *name,unsigned int cap)
{
	char tmp[32];
	int error;

	bdi->name = name;
	bdi->capabilities = cap;
	error = bdi_init(bdi);
	if (error)
		return (error);

	sprintf(tmp, "%.28s%s", name, "-%d");
	error = bdi_register(bdi, NULL, tmp,
	    atomic_long_inc_return(&zfs_bdi_seq));
	if (error) {
		bdi_destroy(bdi);
		return (error);
	}

	return (error);
}
#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */

/*
 * 3.2-rc1 API change,
 * Add set_nlink() if it is not exported by the Linux kernel.
 *
 * i_nlink is read-only in Linux 3.2, but it can be set directly in
 * earlier kernels.
 */
#ifndef HAVE_SET_NLINK
static inline void
set_nlink(struct inode *inode, unsigned int nlink)
{
	inode->i_nlink = nlink;
}
#endif /* HAVE_SET_NLINK */

/*
 * 3.3 API change,
 * The VFS .create, .mkdir and .mknod callbacks were updated to take a
 * umode_t type rather than an int.  To cleanly handle both definitions
 * the zpl_umode_t type is introduced and set accordingly.
 */
#ifdef HAVE_CREATE_UMODE_T
typedef	umode_t		zpl_umode_t;
#else
typedef	int		zpl_umode_t;
#endif

/*
 * 3.5 API change,
 * The clear_inode() function replaces end_writeback() and introduces an
 * ordering change regarding when the inode_sync_wait() occurs.  See the
 * configure check in config/kernel-clear-inode.m4 for full details.
 */
#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE)
#define clear_inode(ip)		end_writeback(ip)
#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */

#endif /* _ZFS_VFS_H */
Linux 2.6.35 compat, fops->fsync() The fsync() callback in the file_operations structure used to take 3 arguments. The callback now only takes 2 arguments because the dentry argument was determined to be unused by all consumers. To handle this a compatibility prototype was added to ensure the right prototype is used. Our implementation never used the dentry argument either so it's just a matter of using the right prototype. 2011-02-11 19:58:55 +03:00			`/*`
			`* CDDL HEADER START`
			`*`
			`* The contents of this file are subject to the terms of the`
			`* Common Development and Distribution License (the "License").`
			`* You may not use this file except in compliance with the License.`
			`*`
			`* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE`
			`* or http://www.opensolaris.org/os/licensing.`
			`* See the License for the specific language governing permissions`
			`* and limitations under the License.`
			`*`
			`* When distributing Covered Code, include this CDDL HEADER in each`
			`* file and include the License file at usr/src/OPENSOLARIS.LICENSE.`
			`* If applicable, add the following below this CDDL HEADER, with the`
			`* fields enclosed by brackets "[]" replaced with your own identifying`
			`* information: Portions Copyright [yyyy] [name of copyright owner]`
			`*`
			`* CDDL HEADER END`
			`*/`

			`/*`
			`* Copyright (C) 2011 Lawrence Livermore National Security, LLC.`
			`*/`

			`#ifndef _ZFS_VFS_H`
			`#define _ZFS_VFS_H`

Linux 2.6.28 compat, insert_inode_locked() Added insert_inode_locked() helper function, prior to this most callers used insert_inode_hash(). The older method doesn't check for collisions in the inode_hashtable but it still acceptible for use. Fallback to using insert_inode_hash() when insert_inode_locked() is unavailable. 2011-03-22 19:55:09 +03:00			`/*`
			`* 2.6.28 API change,`
			`* Added insert_inode_locked() helper function, prior to this most callers`
			`* used insert_inode_hash(). The older method doesn't check for collisions`
			`* in the inode_hashtable but it still acceptible for use.`
			`*/`
			`#ifndef HAVE_INSERT_INODE_LOCKED`
			`static inline int`
			`insert_inode_locked(struct inode *ip)`
			`{`
			`insert_inode_hash(ip);`
			`return (0);`
			`}`
			`#endif /* HAVE_INSERT_INODE_LOCKED */`
Linux 2.6.35 compat, fops->fsync() The fsync() callback in the file_operations structure used to take 3 arguments. The callback now only takes 2 arguments because the dentry argument was determined to be unused by all consumers. To handle this a compatibility prototype was added to ensure the right prototype is used. Our implementation never used the dentry argument either so it's just a matter of using the right prototype. 2011-02-11 19:58:55 +03:00
Tear down and flush the mmap region The inode eviction should unmap the pages associated with the inode. These pages should also be flushed to disk to avoid the data loss. Therefore, use truncate_setsize() in evict_inode() to release the pagecache. The API truncate_setsize() was added in 2.6.35 kernel. To ensure compatibility with the old kernel, the patch defines its own truncate_setsize function. Signed-off-by: Prasad Joshi <pjoshi@stec-inc.com> Closes #255 2011-06-25 16:30:29 +04:00			`/*`
			`* 2.6.35 API change,`
			`* Add truncate_setsize() if it is not exported by the Linux kernel.`
			`*`
			`* Truncate the inode and pages associated with the inode. The pages are`
			`* unmapped and removed from cache.`
			`*/`
			`#ifndef HAVE_TRUNCATE_SETSIZE`
			`static inline void`
			`truncate_setsize(struct inode *ip, loff_t new)`
			`{`
			`struct address_space *mapping = ip->i_mapping;`

			`i_size_write(ip, new);`

			`unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);`
			`truncate_inode_pages(mapping, new);`
			`unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);`
			`}`
			`#endif /* HAVE_TRUNCATE_SETSIZE */`

Simplify BDI integration Update the code to use the bdi_setup_and_register() helper to simplify the bdi integration code. The updated code now just registers the bdi during mount and destroys it during unmount. The only complication is that for 2.6.32 - 2.6.33 kernels the helper wasn't available so in these cases the zfs code must provide it. Luckily the bdi_setup_and_register() function is trivial. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #367 2011-11-08 04:39:03 +04:00			`#if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)`
Add backing_device_info per-filesystem For a long time now the kernel has been moving away from using the pdflush daemon to write 'old' dirty pages to disk. The primary reason for this is because the pdflush daemon is single threaded and can be a limiting factor for performance. Since pdflush sequentially walks the dirty inode list for each super block any delay in processing can slow down dirty page writeback for all filesystems. The replacement for pdflush is called bdi (backing device info). The bdi system involves creating a per-filesystem control structure each with its own private sets of queues to manage writeback. The advantage is greater parallelism which improves performance and prevents a single filesystem from slowing writeback to the others. For a long time both systems co-existed in the kernel so it wasn't strictly required to implement the bdi scheme. However, as of Linux 2.6.36 kernels the pdflush functionality has been retired. Since ZFS already bypasses the page cache for most I/O this is only an issue for mmap(2) writes which must go through the page cache. Even then adding this missing support for newer kernels was overlooked because there are other mechanisms which can trigger writeback. However, there is one critical case where not implementing the bdi functionality can cause problems. If an application handles a page fault it can enter the balance_dirty_pages() callpath. This will result in the application hanging until the number of dirty pages in the system drops below the dirty ratio. Without a registered backing_device_info for the filesystem the dirty pages will not get written out. Thus the application will hang. As mentioned above this was less of an issue with older kernels because pdflush would eventually write out the dirty pages. This change adds a backing_device_info structure to the zfs_sb_t which is already allocated per-super block. It is then registered when the filesystem mounted and unregistered on unmount. It will not be registered for mounted snapshots which are read-only. This change will result in flush-<pool> thread being dynamically created and destroyed per-mounted filesystem for writeback. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #174 2011-08-02 05:24:40 +04:00			`/*`
Simplify BDI integration Update the code to use the bdi_setup_and_register() helper to simplify the bdi integration code. The updated code now just registers the bdi during mount and destroys it during unmount. The only complication is that for 2.6.32 - 2.6.33 kernels the helper wasn't available so in these cases the zfs code must provide it. Luckily the bdi_setup_and_register() function is trivial. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #367 2011-11-08 04:39:03 +04:00			`* 2.6.34 API change,`
			`* Add bdi_setup_and_register() function if not yet provided by kernel.`
			`* It is used to quickly initialize and register a BDI for the filesystem.`
Add backing_device_info per-filesystem For a long time now the kernel has been moving away from using the pdflush daemon to write 'old' dirty pages to disk. The primary reason for this is because the pdflush daemon is single threaded and can be a limiting factor for performance. Since pdflush sequentially walks the dirty inode list for each super block any delay in processing can slow down dirty page writeback for all filesystems. The replacement for pdflush is called bdi (backing device info). The bdi system involves creating a per-filesystem control structure each with its own private sets of queues to manage writeback. The advantage is greater parallelism which improves performance and prevents a single filesystem from slowing writeback to the others. For a long time both systems co-existed in the kernel so it wasn't strictly required to implement the bdi scheme. However, as of Linux 2.6.36 kernels the pdflush functionality has been retired. Since ZFS already bypasses the page cache for most I/O this is only an issue for mmap(2) writes which must go through the page cache. Even then adding this missing support for newer kernels was overlooked because there are other mechanisms which can trigger writeback. However, there is one critical case where not implementing the bdi functionality can cause problems. If an application handles a page fault it can enter the balance_dirty_pages() callpath. This will result in the application hanging until the number of dirty pages in the system drops below the dirty ratio. Without a registered backing_device_info for the filesystem the dirty pages will not get written out. Thus the application will hang. As mentioned above this was less of an issue with older kernels because pdflush would eventually write out the dirty pages. This change adds a backing_device_info structure to the zfs_sb_t which is already allocated per-super block. It is then registered when the filesystem mounted and unregistered on unmount. It will not be registered for mounted snapshots which are read-only. This change will result in flush-<pool> thread being dynamically created and destroyed per-mounted filesystem for writeback. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #174 2011-08-02 05:24:40 +04:00			`*/`
Simplify BDI integration Update the code to use the bdi_setup_and_register() helper to simplify the bdi integration code. The updated code now just registers the bdi during mount and destroys it during unmount. The only complication is that for 2.6.32 - 2.6.33 kernels the helper wasn't available so in these cases the zfs code must provide it. Luckily the bdi_setup_and_register() function is trivial. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #367 2011-11-08 04:39:03 +04:00			`extern atomic_long_t zfs_bdi_seq;`

			`static inline int`
			`bdi_setup_and_register(struct backing_dev_info bdi,char name,unsigned int cap)`
			`{`
			`char tmp[32];`
			`int error;`

			`bdi->name = name;`
			`bdi->capabilities = cap;`
			`error = bdi_init(bdi);`
			`if (error)`
			`return (error);`

			`sprintf(tmp, "%.28s%s", name, "-%d");`
			`error = bdi_register(bdi, NULL, tmp,`
			`atomic_long_inc_return(&zfs_bdi_seq));`
			`if (error) {`
			`bdi_destroy(bdi);`
			`return (error);`
			`}`

			`return (error);`
			`}`
			`#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */`
Add backing_device_info per-filesystem For a long time now the kernel has been moving away from using the pdflush daemon to write 'old' dirty pages to disk. The primary reason for this is because the pdflush daemon is single threaded and can be a limiting factor for performance. Since pdflush sequentially walks the dirty inode list for each super block any delay in processing can slow down dirty page writeback for all filesystems. The replacement for pdflush is called bdi (backing device info). The bdi system involves creating a per-filesystem control structure each with its own private sets of queues to manage writeback. The advantage is greater parallelism which improves performance and prevents a single filesystem from slowing writeback to the others. For a long time both systems co-existed in the kernel so it wasn't strictly required to implement the bdi scheme. However, as of Linux 2.6.36 kernels the pdflush functionality has been retired. Since ZFS already bypasses the page cache for most I/O this is only an issue for mmap(2) writes which must go through the page cache. Even then adding this missing support for newer kernels was overlooked because there are other mechanisms which can trigger writeback. However, there is one critical case where not implementing the bdi functionality can cause problems. If an application handles a page fault it can enter the balance_dirty_pages() callpath. This will result in the application hanging until the number of dirty pages in the system drops below the dirty ratio. Without a registered backing_device_info for the filesystem the dirty pages will not get written out. Thus the application will hang. As mentioned above this was less of an issue with older kernels because pdflush would eventually write out the dirty pages. This change adds a backing_device_info structure to the zfs_sb_t which is already allocated per-super block. It is then registered when the filesystem mounted and unregistered on unmount. It will not be registered for mounted snapshots which are read-only. This change will result in flush-<pool> thread being dynamically created and destroyed per-mounted filesystem for writeback. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #174 2011-08-02 05:24:40 +04:00
Linux 3.2 compat: set_nlink() Directly changing inode->i_nlink is deprecated in Linux 3.2 by commit SHA: bfe8684869601dacfcb2cd69ef8cfd9045f62170 Use the new set_nlink() kernel function instead. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes: #462 2011-12-17 01:15:12 +04:00			`/*`
			`* 3.2-rc1 API change,`
			`* Add set_nlink() if it is not exported by the Linux kernel.`
			`*`
			`* i_nlink is read-only in Linux 3.2, but it can be set directly in`
			`* earlier kernels.`
			`*/`
			`#ifndef HAVE_SET_NLINK`
			`static inline void`
			`set_nlink(struct inode *inode, unsigned int nlink)`
			`{`
			`inode->i_nlink = nlink;`
			`}`
			`#endif /* HAVE_SET_NLINK */`

Linux 3.3 compat, iops->create()/mkdir()/mknod() The mode argument of iops->create()/mkdir()/mknod() was changed from an 'int' to a 'umode_t'. To prevent a compiler warning an autoconf check was added to detect the API change and then correctly set a zpl_umode_t typedef. There is no functional change. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #701 2012-04-30 23:01:49 +04:00			`/*`
			`* 3.3 API change,`
			`* The VFS .create, .mkdir and .mknod callbacks were updated to take a`
			`* umode_t type rather than an int. To cleanly handle both definitions`
			`* the zpl_umode_t type is introduced and set accordingly.`
			`*/`
			`#ifdef HAVE_CREATE_UMODE_T`
			`typedef umode_t zpl_umode_t;`
			`#else`
			`typedef int zpl_umode_t;`
			`#endif`

Linux 3.5 compat, end_writeback() changed to clear_inode() The end_writeback() function was changed by moving the call to inode_sync_wait() earlier in to evict(). This effecitvely changes the ordering of the sync but it does not impact the details of the zfs implementation. However, as part of this change end_writeback() was renamed to clear_inode() to reflect the new semantics. This change does impact us and clear_inode() now maps to end_writeback() for kernels prior to 3.5. Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #784 2012-07-23 22:39:25 +04:00			`/*`
			`* 3.5 API change,`
			`* The clear_inode() function replaces end_writeback() and introduces an`
			`* ordering change regarding when the inode_sync_wait() occurs. See the`
			`* configure check in config/kernel-clear-inode.m4 for full details.`
			`*/`
			`#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE)`
			`#define clear_inode(ip) end_writeback(ip)`
			`#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */`

Linux 2.6.35 compat, fops->fsync() The fsync() callback in the file_operations structure used to take 3 arguments. The callback now only takes 2 arguments because the dentry argument was determined to be unused by all consumers. To handle this a compatibility prototype was added to ensure the right prototype is used. Our implementation never used the dentry argument either so it's just a matter of using the right prototype. 2011-02-11 19:58:55 +03:00			`#endif /* _ZFS_VFS_H */`