mirror_zfs/include/os/linux/zfs/sys/zpl.h

209 lines
6.1 KiB
C
Raw Normal View History

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*/
#ifndef _SYS_ZPL_H
#define _SYS_ZPL_H
#include <sys/mntent.h>
#include <sys/vfs.h>
#include <linux/aio.h>
Fix 'zfs rollback' on mounted file systems Rolling back a mounted filesystem with open file handles and cached dentries+inodes never worked properly in ZoL. The major issue was that Linux provides no easy mechanism for modules to invalidate the inode cache for a file system. Because of this it was possible that an inode from the previous filesystem would not get properly dropped from the cache during rolling back. Then a new inode with the same inode number would be create and collide with the existing cached inode. Ideally this would trigger an VERIFY() but in practice the error wasn't handled and it would just NULL reference. Luckily, this issue can be resolved by sprucing up the existing Solaris zfs_rezget() functionality for the Linux VFS. The way it works now is that when a file system is rolled back all the cached inodes will be traversed and refetched from disk. If a version of the cached inode exists on disk the in-core copy will be updated accordingly. If there is no match for that object on disk it will be unhashed from the inode cache and marked as stale. This will effectively make the inode unfindable for lookups allowing the inode number to be immediately recycled. The inode will then only be accessible from the cached dentries. Subsequent dentry lookups which reference a stale inode will result in the dentry being invalidated. Once invalidated the dentry will drop its reference on the inode allowing it to be safely pruned from the cache. Special care is taken for negative dentries since they do not reference any inode. These dentires will be invalidate based on when they were added to the dentry cache. Entries added before the last rollback will be invalidate to prevent them from masking real files in the dataset. Two nice side effects of this fix are: * Removes the dependency on spl_invalidate_inodes(), it can now be safely removed from the SPL when we choose to do so. * zfs_znode_alloc() no longer requires a dentry to be passed. This effectively reverts this portition of the code to its upstream counterpart. The dentry is not instantiated more correctly in the Linux ZPL layer. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Ned Bass <bass6@llnl.gov> Closes #795
2013-01-16 04:41:09 +04:00
#include <linux/dcache_compat.h>
#include <linux/exportfs.h>
#include <linux/falloc.h>
#include <linux/parser.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/vfs_compat.h>
#include <linux/writeback.h>
#include <linux/xattr_compat.h>
/* zpl_inode.c */
extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
umode_t mode, cred_t *cr, zuserns_t *mnt_ns);
extern const struct inode_operations zpl_inode_operations;
#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
extern const struct inode_operations_wrapper zpl_dir_inode_operations;
#else
extern const struct inode_operations zpl_dir_inode_operations;
#endif
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
/* zpl_file.c */
extern const struct address_space_operations zpl_address_space_operations;
extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
/* zpl_super.c */
Restructure per-filesystem reclaim Originally when the ARC prune callback was introduced the idea was to register a single callback for the ZPL. The ARC could invoke this call back if it needed the ZPL to drop dentries, inodes, or other cache objects which might be pinning buffers in the ARC. The ZPL would iterate over all ZFS super blocks and perform the reclaim. For the most part this design has worked well but due to limitations in 2.6.35 and earlier kernels there were some problems. This patch is designed to address those issues. 1) iterate_supers_type() is not provided by all kernels which makes it impossible to safely iterate over all zpl_fs_type filesystems in a single callback. The most straight forward and portable way to resolve this is to register a callback per-filesystem during mount. The arc_*_prune_callback() functions have always supported multiple callbacks so this is functionally a very small change. 2) Commit 050d22b removed the non-portable shrink_dcache_memory() and shrink_icache_memory() functions and didn't replace them with equivalent functionality. This meant that for Linux 3.1 and older kernels the ARC had no mechanism to drop dentries and inodes from the caches if needed. This patch adds that missing functionality by calling shrink_dcache_parent() to release dentries which may be pinning inodes. This will result in all unused cache entries being dropped which is a bit heavy handed but it's the only interface available for old kernels. 3) A zpl_drop_inode() callback is registered for kernels older than 2.6.35 which do not support the .evict_inode callback. This ensures that when the last reference on an inode is dropped it is immediately removed from the cache. If this isn't done than inode can end up on the global unused LRU with no mechanism available to ZFS to drop them. Since the ARC buffers are not dropped the hottest inodes can still be recreated without performing disk IO. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Pavel Snajdr <snajpa@snajpa.net> Issue #3160
2015-03-18 01:07:47 +03:00
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
Linux 3.1 compat, super_block->s_shrink The Linux 3.1 kernel has introduced the concept of per-filesystem shrinkers which are directly assoicated with a super block. Prior to this change there was one shared global shrinker. The zfs code relied on being able to call the global shrinker when the arc_meta_limit was exceeded. This would cause the VFS to drop references on a fraction of the dentries in the dcache. The ARC could then safely reclaim the memory used by these entries and honor the arc_meta_limit. Unfortunately, when per-filesystem shrinkers were added the old interfaces were made unavailable. This change adds support to use the new per-filesystem shrinker interface so we can continue to honor the arc_meta_limit. The major benefit of the new interface is that we can now target only the zfs filesystem for dentry and inode pruning. Thus we can minimize any impact on the caching of other filesystems. In the context of making this change several other important issues related to managing the ARC were addressed, they include: * The dnlc_reduce_cache() function which was called by the ARC to drop dentries for the Posix layer was replaced with a generic zfs_prune_t callback. The ZPL layer now registers a callback to drop these dentries removing a layering violation which dates back to the Solaris code. This callback can also be used by other ARC consumers such as Lustre. arc_add_prune_callback() arc_remove_prune_callback() * The arc_reduce_dnlc_percent module option has been changed to arc_meta_prune for clarity. The dnlc functions are specific to Solaris's VFS and have already been largely eliminated already. The replacement tunable now represents the number of bytes the prune callback will request when invoked. * Less aggressively invoke the prune callback. We used to call this whenever we exceeded the arc_meta_limit however that's not strictly correct since it results in over zeleous reclaim of dentries and inodes. It is now only called once the arc_meta_limit is exceeded and every effort has been made to evict other data from the ARC cache. * More promptly manage exceeding the arc_meta_limit. When reading meta data in to the cache if a buffer was unable to be recycled notify the arc_reclaim thread to invoke the required prune. * Added arcstat_prune kstat which is incremented when the ARC is forced to request that a consumer prune its cache. Remember this will only occur when the ARC has no other choice. If it can evict buffers safely without invoking the prune callback it will. * This change is also expected to resolve the unexpect collapses of the ARC cache. This would occur because when exceeded just the arc_meta_limit reclaim presure would be excerted on the arc_c value via arc_shrink(). This effectively shrunk the entire cache when really we just needed to reclaim meta data. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #466 Closes #292
2011-12-23 00:20:43 +04:00
extern const struct super_operations zpl_super_operations;
extern const struct export_operations zpl_export_operations;
extern struct file_system_type zpl_fs_type;
/* zpl_xattr.c */
extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size);
extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
const struct qstr *qstr);
#if defined(CONFIG_FS_POSIX_ACL)
Linux compat: Minimum kernel version 3.10 Increase the minimum supported kernel version from 2.6.32 to 3.10. This removes support for the following Linux enterprise distributions. Distribution | Kernel | End of Life ---------------- | ------ | ------------- Ubuntu 12.04 LTS | 3.2 | Apr 28, 2017 SLES 11 | 3.0 | Mar 32, 2019 RHEL / CentOS 6 | 2.6.32 | Nov 30, 2020 The following changes were made as part of removing support. * Updated `configure` to enforce a minimum kernel version as specified in the META file (Linux-Minimum: 3.10). configure: error: *** Cannot build against kernel version 2.6.32. *** The minimum supported kernel version is 3.10. * Removed all `configure` kABI checks and matching C code for interfaces which solely predate the Linux 3.10 kernel. * Updated all `configure` kABI checks to fail when an interface is missing which was in the 3.10 kernel up to the latest 5.1 kernel. Removed the HAVE_* preprocessor defines for these checks and updated the code to unconditionally use the verified interface. * Inverted the detection logic in several kABI checks to match the new interface as it appears in 3.10 and newer and not the legacy interface. * Consolidated the following checks in to individual files. Due the large number of changes in the checks it made sense to handle this now. It would be desirable to group other related checks in the same fashion, but this as left as future work. - config/kernel-blkdev.m4 - Block device kABI checks - config/kernel-blk-queue.m4 - Block queue kABI checks - config/kernel-bio.m4 - Bio interface kABI checks * Removed the kABI checks for sops->nr_cached_objects() and sops->free_cached_objects(). These interfaces are currently unused. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #9566
2019-11-12 19:59:06 +03:00
#if defined(HAVE_SET_ACL)
#if defined(HAVE_SET_ACL_USERNS)
extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
struct posix_acl *acl, int type);
#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
struct posix_acl *acl, int type);
#else
extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type);
#endif /* HAVE_SET_ACL_USERNS */
Linux compat: Minimum kernel version 3.10 Increase the minimum supported kernel version from 2.6.32 to 3.10. This removes support for the following Linux enterprise distributions. Distribution | Kernel | End of Life ---------------- | ------ | ------------- Ubuntu 12.04 LTS | 3.2 | Apr 28, 2017 SLES 11 | 3.0 | Mar 32, 2019 RHEL / CentOS 6 | 2.6.32 | Nov 30, 2020 The following changes were made as part of removing support. * Updated `configure` to enforce a minimum kernel version as specified in the META file (Linux-Minimum: 3.10). configure: error: *** Cannot build against kernel version 2.6.32. *** The minimum supported kernel version is 3.10. * Removed all `configure` kABI checks and matching C code for interfaces which solely predate the Linux 3.10 kernel. * Updated all `configure` kABI checks to fail when an interface is missing which was in the 3.10 kernel up to the latest 5.1 kernel. Removed the HAVE_* preprocessor defines for these checks and updated the code to unconditionally use the verified interface. * Inverted the detection logic in several kABI checks to match the new interface as it appears in 3.10 and newer and not the legacy interface. * Consolidated the following checks in to individual files. Due the large number of changes in the checks it made sense to handle this now. It would be desirable to group other related checks in the same fashion, but this as left as future work. - config/kernel-blkdev.m4 - Block device kABI checks - config/kernel-blk-queue.m4 - Block queue kABI checks - config/kernel-bio.m4 - Bio interface kABI checks * Removed the kABI checks for sops->nr_cached_objects() and sops->free_cached_objects(). These interfaces are currently unused. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #9566
2019-11-12 19:59:06 +03:00
#endif /* HAVE_SET_ACL */
#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu);
#elif defined(HAVE_GET_ACL)
extern struct posix_acl *zpl_get_acl(struct inode *ip, int type);
#endif
extern int zpl_init_acl(struct inode *ip, struct inode *dir);
extern int zpl_chmod_acl(struct inode *ip);
#else
static inline int
zpl_init_acl(struct inode *ip, struct inode *dir)
{
return (0);
}
static inline int
zpl_chmod_acl(struct inode *ip)
{
return (0);
}
#endif /* CONFIG_FS_POSIX_ACL */
extern xattr_handler_t *zpl_xattr_handlers[];
/* zpl_ctldir.c */
extern const struct file_operations zpl_fops_root;
extern const struct inode_operations zpl_ops_root;
extern const struct file_operations zpl_fops_snapdir;
extern const struct inode_operations zpl_ops_snapdir;
extern const struct file_operations zpl_fops_shares;
extern const struct inode_operations zpl_ops_shares;
#if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED)
#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \
.actor = _actor, \
.pos = _pos, \
}
typedef struct dir_context zpl_dir_context_t;
#define zpl_dir_emit dir_emit
#define zpl_dir_emit_dot dir_emit_dot
#define zpl_dir_emit_dotdot dir_emit_dotdot
#define zpl_dir_emit_dots dir_emit_dots
#else
typedef struct zpl_dir_context {
void *dirent;
const filldir_t actor;
loff_t pos;
} zpl_dir_context_t;
#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \
.dirent = _dirent, \
.actor = _actor, \
.pos = _pos, \
}
static inline bool
zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen,
uint64_t ino, unsigned type)
{
return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type));
}
static inline bool
zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx)
{
return (ctx->actor(ctx->dirent, ".", 1, ctx->pos,
file_inode(file)->i_ino, DT_DIR) == 0);
}
static inline bool
zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx)
{
return (ctx->actor(ctx->dirent, "..", 2, ctx->pos,
parent_ino(file_dentry(file)), DT_DIR) == 0);
}
static inline bool
zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
{
if (ctx->pos == 0) {
if (!zpl_dir_emit_dot(file, ctx))
return (false);
ctx->pos = 1;
}
if (ctx->pos == 1) {
if (!zpl_dir_emit_dotdot(file, ctx))
return (false);
ctx->pos = 2;
}
return (true);
}
#endif /* HAVE_VFS_ITERATE */
#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip)
#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
#define zpl_inode_timestamp_truncate(ts, ip) \
timespec64_trunc(ts, (ip)->i_sb->s_time_gran)
#else
#define zpl_inode_timestamp_truncate(ts, ip) \
timespec_trunc(ts, (ip)->i_sb->s_time_gran)
#endif
#if defined(HAVE_INODE_OWNER_OR_CAPABLE)
#define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ip)
#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
#define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ns, ip)
#else
#error "Unsupported kernel"
#endif
#ifdef HAVE_SETATTR_PREPARE_USERNS
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(ns, dentry, ia)
#else
/*
* Use kernel-provided version, or our own from
* linux/vfs_compat.h
*/
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
#endif
#endif /* _SYS_ZPL_H */