mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 03:30:34 +03:00
278bee9319
Re-factor the .zfs/snapshot auto-mouting code to take in to account changes made to the upstream kernels. And to lay the groundwork for enabling access to .zfs snapshots via NFS clients. This patch makes the following core improvements. * All actively auto-mounted snapshots are now tracked in two global trees which are indexed by snapshot name and objset id respectively. This allows for fast lookups of any auto-mounted snapshot regardless without needing access to the parent dataset. * Snapshot entries are added to the tree in zfsctl_snapshot_mount(). However, they are now removed from the tree in the context of the unmount process. This eliminates the need complicated error logic in zfsctl_snapshot_unmount() to handle unmount failures. * References are now taken on the snapshot entries in the tree to ensure they always remain valid while a task is outstanding. * The MNT_SHRINKABLE flag is set on the snapshot vfsmount_t right after the auto-mount succeeds. This allows to kernel to unmount idle auto-mounted snapshots if needed removing the need for the zfsctl_unmount_snapshots() function. * Snapshots in active use will not be automatically unmounted. As long as at least one dentry is revalidated every zfs_expire_snapshot/2 seconds the auto-unmount expiration timer will be extended. * Commit torvalds/linux@bafc9b7 caused snapshots auto-mounted by ZFS to be immediately unmounted when the dentry was revalidated. This was a consequence of ZFS invaliding all snapdir dentries to ensure that negative dentries didn't mask new snapshots. This patch modifies the behavior such that only negative dentries are invalidated. This solves the issue and may result in a performance improvement. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3589 Closes #3344 Closes #3295 Closes #3257 Closes #3243 Closes #3030 Closes #2841
543 lines
12 KiB
C
543 lines
12 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (C) 2011 Lawrence Livermore National Security, LLC.
|
|
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
|
* LLNL-CODE-403049.
|
|
* Rewritten for Linux by:
|
|
* Rohan Puri <rohan.puri15@gmail.com>
|
|
* Brian Behlendorf <behlendorf1@llnl.gov>
|
|
*/
|
|
|
|
#include <sys/zfs_vfsops.h>
|
|
#include <sys/zfs_vnops.h>
|
|
#include <sys/zfs_znode.h>
|
|
#include <sys/zfs_ctldir.h>
|
|
#include <sys/zpl.h>
|
|
|
|
/*
|
|
* Common open routine. Disallow any write access.
|
|
*/
|
|
/* ARGSUSED */
|
|
static int
|
|
zpl_common_open(struct inode *ip, struct file *filp)
|
|
{
|
|
if (filp->f_mode & FMODE_WRITE)
|
|
return (-EACCES);
|
|
|
|
return (generic_file_open(ip, filp));
|
|
}
|
|
|
|
/*
|
|
* Get root directory contents.
|
|
*/
|
|
static int
|
|
zpl_root_iterate(struct file *filp, struct dir_context *ctx)
|
|
{
|
|
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
|
|
int error = 0;
|
|
|
|
ZFS_ENTER(zsb);
|
|
|
|
if (!dir_emit_dots(filp, ctx))
|
|
goto out;
|
|
|
|
if (ctx->pos == 2) {
|
|
if (!dir_emit(ctx, ZFS_SNAPDIR_NAME, strlen(ZFS_SNAPDIR_NAME),
|
|
ZFSCTL_INO_SNAPDIR, DT_DIR))
|
|
goto out;
|
|
|
|
ctx->pos++;
|
|
}
|
|
|
|
if (ctx->pos == 3) {
|
|
if (!dir_emit(ctx, ZFS_SHAREDIR_NAME, strlen(ZFS_SHAREDIR_NAME),
|
|
ZFSCTL_INO_SHARES, DT_DIR))
|
|
goto out;
|
|
|
|
ctx->pos++;
|
|
}
|
|
out:
|
|
ZFS_EXIT(zsb);
|
|
|
|
return (error);
|
|
}
|
|
|
|
#if !defined(HAVE_VFS_ITERATE)
|
|
static int
|
|
zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
|
{
|
|
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
|
int error;
|
|
|
|
error = zpl_root_iterate(filp, &ctx);
|
|
filp->f_pos = ctx.pos;
|
|
|
|
return (error);
|
|
}
|
|
#endif /* HAVE_VFS_ITERATE */
|
|
|
|
/*
|
|
* Get root directory attributes.
|
|
*/
|
|
/* ARGSUSED */
|
|
static int
|
|
zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|
struct kstat *stat)
|
|
{
|
|
int error;
|
|
|
|
error = simple_getattr(mnt, dentry, stat);
|
|
stat->atime = CURRENT_TIME;
|
|
|
|
return (error);
|
|
}
|
|
|
|
static struct dentry *
|
|
#ifdef HAVE_LOOKUP_NAMEIDATA
|
|
zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
|
|
#else
|
|
zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
|
|
#endif
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
|
|
ASSERT3S(error, <=, 0);
|
|
crfree(cr);
|
|
|
|
if (error) {
|
|
if (error == -ENOENT)
|
|
return (d_splice_alias(NULL, dentry));
|
|
else
|
|
return (ERR_PTR(error));
|
|
}
|
|
|
|
return (d_splice_alias(ip, dentry));
|
|
}
|
|
|
|
/*
|
|
* The '.zfs' control directory file and inode operations.
|
|
*/
|
|
const struct file_operations zpl_fops_root = {
|
|
.open = zpl_common_open,
|
|
.llseek = generic_file_llseek,
|
|
.read = generic_read_dir,
|
|
#ifdef HAVE_VFS_ITERATE
|
|
.iterate = zpl_root_iterate,
|
|
#else
|
|
.readdir = zpl_root_readdir,
|
|
#endif
|
|
};
|
|
|
|
const struct inode_operations zpl_ops_root = {
|
|
.lookup = zpl_root_lookup,
|
|
.getattr = zpl_root_getattr,
|
|
};
|
|
|
|
#ifdef HAVE_AUTOMOUNT
|
|
static struct vfsmount *
|
|
zpl_snapdir_automount(struct path *path)
|
|
{
|
|
int error;
|
|
|
|
error = -zfsctl_snapshot_mount(path, 0);
|
|
if (error)
|
|
return (ERR_PTR(error));
|
|
|
|
/*
|
|
* Rather than returning the new vfsmount for the snapshot we must
|
|
* return NULL to indicate a mount collision. This is done because
|
|
* the user space mount calls do_add_mount() which adds the vfsmount
|
|
* to the name space. If we returned the new mount here it would be
|
|
* added again to the vfsmount list resulting in list corruption.
|
|
*/
|
|
return (NULL);
|
|
}
|
|
#endif /* HAVE_AUTOMOUNT */
|
|
|
|
/*
|
|
* Negative dentries must always be revalidated so newly created snapshots
|
|
* can be detected and automounted. Normal dentries should be kept because
|
|
* as of the 3.18 kernel revaliding the mountpoint dentry will result in
|
|
* the snapshot being immediately unmounted.
|
|
*/
|
|
static int
|
|
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
|
|
zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
|
|
#else
|
|
zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
|
|
#endif
|
|
{
|
|
return (!!dentry->d_inode);
|
|
}
|
|
|
|
dentry_operations_t zpl_dops_snapdirs = {
|
|
/*
|
|
* Auto mounting of snapshots is only supported for 2.6.37 and
|
|
* newer kernels. Prior to this kernel the ops->follow_link()
|
|
* callback was used as a hack to trigger the mount. The
|
|
* resulting vfsmount was then explicitly grafted in to the
|
|
* name space. While it might be possible to add compatibility
|
|
* code to accomplish this it would require considerable care.
|
|
*/
|
|
#ifdef HAVE_AUTOMOUNT
|
|
.d_automount = zpl_snapdir_automount,
|
|
#endif /* HAVE_AUTOMOUNT */
|
|
.d_revalidate = zpl_snapdir_revalidate,
|
|
};
|
|
|
|
static struct dentry *
|
|
#ifdef HAVE_LOOKUP_NAMEIDATA
|
|
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
|
|
struct nameidata *nd)
|
|
#else
|
|
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
|
|
unsigned int flags)
|
|
#endif
|
|
|
|
{
|
|
fstrans_cookie_t cookie;
|
|
cred_t *cr = CRED();
|
|
struct inode *ip = NULL;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
cookie = spl_fstrans_mark();
|
|
error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
|
|
0, cr, NULL, NULL);
|
|
ASSERT3S(error, <=, 0);
|
|
spl_fstrans_unmark(cookie);
|
|
crfree(cr);
|
|
|
|
if (error && error != -ENOENT)
|
|
return (ERR_PTR(error));
|
|
|
|
ASSERT(error == 0 || ip == NULL);
|
|
d_clear_d_op(dentry);
|
|
d_set_d_op(dentry, &zpl_dops_snapdirs);
|
|
#ifdef HAVE_AUTOMOUNT
|
|
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
|
|
#endif
|
|
|
|
return (d_splice_alias(ip, dentry));
|
|
}
|
|
|
|
static int
|
|
zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
|
|
{
|
|
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
|
|
fstrans_cookie_t cookie;
|
|
char snapname[MAXNAMELEN];
|
|
boolean_t case_conflict;
|
|
uint64_t id, pos;
|
|
int error = 0;
|
|
|
|
ZFS_ENTER(zsb);
|
|
cookie = spl_fstrans_mark();
|
|
|
|
if (!dir_emit_dots(filp, ctx))
|
|
goto out;
|
|
|
|
pos = ctx->pos;
|
|
while (error == 0) {
|
|
dsl_pool_config_enter(dmu_objset_pool(zsb->z_os), FTAG);
|
|
error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
|
|
snapname, &id, &pos, &case_conflict);
|
|
dsl_pool_config_exit(dmu_objset_pool(zsb->z_os), FTAG);
|
|
if (error)
|
|
goto out;
|
|
|
|
if (!dir_emit(ctx, snapname, strlen(snapname),
|
|
ZFSCTL_INO_SHARES - id, DT_DIR))
|
|
goto out;
|
|
|
|
ctx->pos = pos;
|
|
}
|
|
out:
|
|
spl_fstrans_unmark(cookie);
|
|
ZFS_EXIT(zsb);
|
|
|
|
if (error == -ENOENT)
|
|
return (0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
#if !defined(HAVE_VFS_ITERATE)
|
|
static int
|
|
zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
|
{
|
|
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
|
int error;
|
|
|
|
error = zpl_snapdir_iterate(filp, &ctx);
|
|
filp->f_pos = ctx.pos;
|
|
|
|
return (error);
|
|
}
|
|
#endif /* HAVE_VFS_ITERATE */
|
|
|
|
int
|
|
zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
|
|
struct inode *tdip, struct dentry *tdentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
|
|
tdip, dname(tdentry), cr, 0);
|
|
ASSERT3S(error, <=, 0);
|
|
crfree(cr);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
|
|
ASSERT3S(error, <=, 0);
|
|
crfree(cr);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
|
|
{
|
|
cred_t *cr = CRED();
|
|
vattr_t *vap;
|
|
struct inode *ip;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
|
zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
|
|
|
|
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
|
|
if (error == 0) {
|
|
d_clear_d_op(dentry);
|
|
d_set_d_op(dentry, &zpl_dops_snapdirs);
|
|
d_instantiate(dentry, ip);
|
|
}
|
|
|
|
kmem_free(vap, sizeof (vattr_t));
|
|
ASSERT3S(error, <=, 0);
|
|
crfree(cr);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Get snapshot directory attributes.
|
|
*/
|
|
/* ARGSUSED */
|
|
static int
|
|
zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|
struct kstat *stat)
|
|
{
|
|
zfs_sb_t *zsb = ITOZSB(dentry->d_inode);
|
|
int error;
|
|
|
|
ZFS_ENTER(zsb);
|
|
error = simple_getattr(mnt, dentry, stat);
|
|
stat->nlink = stat->size = 2;
|
|
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
|
|
stat->atime = CURRENT_TIME;
|
|
ZFS_EXIT(zsb);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* The '.zfs/snapshot' directory file operations. These mainly control
|
|
* generating the list of available snapshots when doing an 'ls' in the
|
|
* directory. See zpl_snapdir_readdir().
|
|
*/
|
|
const struct file_operations zpl_fops_snapdir = {
|
|
.open = zpl_common_open,
|
|
.llseek = generic_file_llseek,
|
|
.read = generic_read_dir,
|
|
#ifdef HAVE_VFS_ITERATE
|
|
.iterate = zpl_snapdir_iterate,
|
|
#else
|
|
.readdir = zpl_snapdir_readdir,
|
|
#endif
|
|
|
|
};
|
|
|
|
/*
|
|
* The '.zfs/snapshot' directory inode operations. These mainly control
|
|
* creating an inode for a snapshot directory and initializing the needed
|
|
* infrastructure to automount the snapshot. See zpl_snapdir_lookup().
|
|
*/
|
|
const struct inode_operations zpl_ops_snapdir = {
|
|
.lookup = zpl_snapdir_lookup,
|
|
.getattr = zpl_snapdir_getattr,
|
|
.rename = zpl_snapdir_rename,
|
|
.rmdir = zpl_snapdir_rmdir,
|
|
.mkdir = zpl_snapdir_mkdir,
|
|
};
|
|
|
|
static struct dentry *
|
|
#ifdef HAVE_LOOKUP_NAMEIDATA
|
|
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
|
|
struct nameidata *nd)
|
|
#else
|
|
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
|
|
unsigned int flags)
|
|
#endif
|
|
{
|
|
fstrans_cookie_t cookie;
|
|
cred_t *cr = CRED();
|
|
struct inode *ip = NULL;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
cookie = spl_fstrans_mark();
|
|
error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
|
|
0, cr, NULL, NULL);
|
|
ASSERT3S(error, <=, 0);
|
|
spl_fstrans_unmark(cookie);
|
|
crfree(cr);
|
|
|
|
if (error) {
|
|
if (error == -ENOENT)
|
|
return (d_splice_alias(NULL, dentry));
|
|
else
|
|
return (ERR_PTR(error));
|
|
}
|
|
|
|
return (d_splice_alias(ip, dentry));
|
|
}
|
|
|
|
static int
|
|
zpl_shares_iterate(struct file *filp, struct dir_context *ctx)
|
|
{
|
|
fstrans_cookie_t cookie;
|
|
cred_t *cr = CRED();
|
|
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
|
|
znode_t *dzp;
|
|
int error = 0;
|
|
|
|
ZFS_ENTER(zsb);
|
|
cookie = spl_fstrans_mark();
|
|
|
|
if (zsb->z_shares_dir == 0) {
|
|
dir_emit_dots(filp, ctx);
|
|
goto out;
|
|
}
|
|
|
|
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
|
|
if (error)
|
|
goto out;
|
|
|
|
crhold(cr);
|
|
error = -zfs_readdir(ZTOI(dzp), ctx, cr);
|
|
crfree(cr);
|
|
|
|
iput(ZTOI(dzp));
|
|
out:
|
|
spl_fstrans_unmark(cookie);
|
|
ZFS_EXIT(zsb);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
#if !defined(HAVE_VFS_ITERATE)
|
|
static int
|
|
zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
|
{
|
|
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
|
int error;
|
|
|
|
error = zpl_shares_iterate(filp, &ctx);
|
|
filp->f_pos = ctx.pos;
|
|
|
|
return (error);
|
|
}
|
|
#endif /* HAVE_VFS_ITERATE */
|
|
|
|
/* ARGSUSED */
|
|
static int
|
|
zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|
struct kstat *stat)
|
|
{
|
|
struct inode *ip = dentry->d_inode;
|
|
zfs_sb_t *zsb = ITOZSB(ip);
|
|
znode_t *dzp;
|
|
int error;
|
|
|
|
ZFS_ENTER(zsb);
|
|
|
|
if (zsb->z_shares_dir == 0) {
|
|
error = simple_getattr(mnt, dentry, stat);
|
|
stat->nlink = stat->size = 2;
|
|
stat->atime = CURRENT_TIME;
|
|
ZFS_EXIT(zsb);
|
|
return (error);
|
|
}
|
|
|
|
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
|
|
if (error == 0) {
|
|
error = -zfs_getattr_fast(ZTOI(dzp), stat);
|
|
iput(ZTOI(dzp));
|
|
}
|
|
|
|
ZFS_EXIT(zsb);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* The '.zfs/shares' directory file operations.
|
|
*/
|
|
const struct file_operations zpl_fops_shares = {
|
|
.open = zpl_common_open,
|
|
.llseek = generic_file_llseek,
|
|
.read = generic_read_dir,
|
|
#ifdef HAVE_VFS_ITERATE
|
|
.iterate = zpl_shares_iterate,
|
|
#else
|
|
.readdir = zpl_shares_readdir,
|
|
#endif
|
|
|
|
};
|
|
|
|
/*
|
|
* The '.zfs/shares' directory inode operations.
|
|
*/
|
|
const struct inode_operations zpl_ops_shares = {
|
|
.lookup = zpl_shares_lookup,
|
|
.getattr = zpl_shares_getattr,
|
|
};
|