mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-02 14:29:35 +03:00
9f0a21e641
Add the FreeBSD platform code to the OpenZFS repository. As of this commit the source can be compiled and tested on FreeBSD 11 and 12. Subsequent commits are now required to compile on FreeBSD and Linux. Additionally, they must pass the ZFS Test Suite on FreeBSD which is being run by the CI. As of this commit 1230 tests pass on FreeBSD and there are no unexpected failures. Reviewed-by: Sean Eric Fagan <sef@ixsystems.com> Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Richard Laager <rlaager@wiktel.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Matt Macy <mmacy@FreeBSD.org> Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Closes #898 Closes #8987
2449 lines
62 KiB
C
2449 lines
62 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
|
|
* All rights reserved.
|
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
|
* Copyright (c) 2014 Integros [integros.com]
|
|
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
|
|
*/
|
|
|
|
/* Portions Copyright 2010 Robert Milkowski */
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/sysmacros.h>
|
|
#include <sys/kmem.h>
|
|
#include <sys/acl.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/vfs.h>
|
|
#include <sys/mntent.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/cmn_err.h>
|
|
#include <sys/zfs_znode.h>
|
|
#include <sys/zfs_dir.h>
|
|
#include <sys/zil.h>
|
|
#include <sys/fs/zfs.h>
|
|
#include <sys/dmu.h>
|
|
#include <sys/dsl_prop.h>
|
|
#include <sys/dsl_dataset.h>
|
|
#include <sys/dsl_deleg.h>
|
|
#include <sys/spa.h>
|
|
#include <sys/zap.h>
|
|
#include <sys/sa.h>
|
|
#include <sys/sa_impl.h>
|
|
#include <sys/policy.h>
|
|
#include <sys/atomic.h>
|
|
#include <sys/zfs_ioctl.h>
|
|
#include <sys/zfs_ctldir.h>
|
|
#include <sys/zfs_fuid.h>
|
|
#include <sys/sunddi.h>
|
|
#include <sys/dmu_objset.h>
|
|
#include <sys/dsl_dir.h>
|
|
#include <sys/spa_boot.h>
|
|
#include <sys/jail.h>
|
|
#include <ufs/ufs/quota.h>
|
|
#include <sys/zfs_quota.h>
|
|
|
|
#include "zfs_comutil.h"
|
|
|
|
#ifndef MNTK_VMSETSIZE_BUG
|
|
#define MNTK_VMSETSIZE_BUG 0
|
|
#endif
|
|
#ifndef MNTK_NOMSYNC
|
|
#define MNTK_NOMSYNC 8
|
|
#endif
|
|
|
|
/* BEGIN CSTYLED */
|
|
struct mtx zfs_debug_mtx;
|
|
MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
|
|
|
|
SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
|
|
|
|
int zfs_super_owner;
|
|
SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
|
|
"File system owner can perform privileged operation on his file systems");
|
|
|
|
int zfs_debug_level;
|
|
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
|
"Debug level");
|
|
|
|
SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
|
|
static int zfs_version_acl = ZFS_ACL_VERSION;
|
|
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
|
|
"ZFS_ACL_VERSION");
|
|
static int zfs_version_spa = SPA_VERSION;
|
|
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
|
|
"SPA_VERSION");
|
|
static int zfs_version_zpl = ZPL_VERSION;
|
|
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
|
|
"ZPL_VERSION");
|
|
/* END CSTYLED */
|
|
|
|
static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
|
|
static int zfs_mount(vfs_t *vfsp);
|
|
static int zfs_umount(vfs_t *vfsp, int fflag);
|
|
static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
|
|
static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
|
|
static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
|
|
static int zfs_sync(vfs_t *vfsp, int waitfor);
|
|
static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
|
|
struct ucred **credanonp, int *numsecflavors, int **secflavors);
|
|
static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
|
|
static void zfs_freevfs(vfs_t *vfsp);
|
|
|
|
struct vfsops zfs_vfsops = {
|
|
.vfs_mount = zfs_mount,
|
|
.vfs_unmount = zfs_umount,
|
|
#if __FreeBSD_version >= 1300049
|
|
.vfs_root = vfs_cache_root,
|
|
.vfs_cachedroot = zfs_root,
|
|
#else
|
|
.vfs_root = zfs_root,
|
|
#endif
|
|
.vfs_statfs = zfs_statfs,
|
|
.vfs_vget = zfs_vget,
|
|
.vfs_sync = zfs_sync,
|
|
.vfs_checkexp = zfs_checkexp,
|
|
.vfs_fhtovp = zfs_fhtovp,
|
|
.vfs_quotactl = zfs_quotactl,
|
|
};
|
|
|
|
VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
|
|
|
|
/*
|
|
* We need to keep a count of active fs's.
|
|
* This is necessary to prevent our module
|
|
* from being unloaded after a umount -f
|
|
*/
|
|
static uint32_t zfs_active_fs_count = 0;
|
|
|
|
int
|
|
zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
|
|
char *setpoint)
|
|
{
|
|
int error;
|
|
zfsvfs_t *zfvp;
|
|
vfs_t *vfsp;
|
|
objset_t *os;
|
|
uint64_t tmp = *val;
|
|
|
|
error = dmu_objset_from_ds(ds, &os);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = getzfsvfs_impl(os, &zfvp);
|
|
if (error != 0)
|
|
return (error);
|
|
if (zfvp == NULL)
|
|
return (ENOENT);
|
|
vfsp = zfvp->z_vfs;
|
|
switch (zfs_prop) {
|
|
case ZFS_PROP_ATIME:
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
|
|
tmp = 1;
|
|
break;
|
|
case ZFS_PROP_DEVICES:
|
|
if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
|
|
tmp = 1;
|
|
break;
|
|
case ZFS_PROP_EXEC:
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
|
|
tmp = 1;
|
|
break;
|
|
case ZFS_PROP_SETUID:
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
|
|
tmp = 1;
|
|
break;
|
|
case ZFS_PROP_READONLY:
|
|
if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
|
|
tmp = 1;
|
|
break;
|
|
case ZFS_PROP_XATTR:
|
|
if (zfvp->z_flags & ZSB_XATTR)
|
|
tmp = zfvp->z_xattr;
|
|
break;
|
|
case ZFS_PROP_NBMAND:
|
|
if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
|
|
tmp = 0;
|
|
if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
|
|
tmp = 1;
|
|
break;
|
|
default:
|
|
vfs_unbusy(vfsp);
|
|
return (ENOENT);
|
|
}
|
|
|
|
vfs_unbusy(vfsp);
|
|
if (tmp != *val) {
|
|
(void) strcpy(setpoint, "temporary");
|
|
*val = tmp;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
|
|
{
|
|
int error = 0;
|
|
char buf[32];
|
|
uint64_t usedobj, quotaobj;
|
|
uint64_t quota, used = 0;
|
|
timespec_t now;
|
|
|
|
usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
|
|
quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
|
|
|
|
if (quotaobj == 0 || zfsvfs->z_replay) {
|
|
error = ENOENT;
|
|
goto done;
|
|
}
|
|
(void) sprintf(buf, "%llx", (longlong_t)id);
|
|
if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
|
|
buf, sizeof (quota), 1, "a)) != 0) {
|
|
dprintf("%s(%d): quotaobj lookup failed\n",
|
|
__FUNCTION__, __LINE__);
|
|
goto done;
|
|
}
|
|
/*
|
|
* quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
|
|
* So we set them to be the same.
|
|
*/
|
|
dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
|
|
error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
|
|
if (error && error != ENOENT) {
|
|
dprintf("%s(%d): usedobj failed; %d\n",
|
|
__FUNCTION__, __LINE__, error);
|
|
goto done;
|
|
}
|
|
dqp->dqb_curblocks = btodb(used);
|
|
dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
|
|
vfs_timestamp(&now);
|
|
/*
|
|
* Setting this to 0 causes FreeBSD quota(8) to print
|
|
* the number of days since the epoch, which isn't
|
|
* particularly useful.
|
|
*/
|
|
dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
|
|
done:
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
struct thread *td;
|
|
int cmd, type, error = 0;
|
|
int bitsize;
|
|
zfs_userquota_prop_t quota_type;
|
|
struct dqblk64 dqblk = { 0 };
|
|
|
|
td = curthread;
|
|
cmd = cmds >> SUBCMDSHIFT;
|
|
type = cmds & SUBCMDMASK;
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
if (id == -1) {
|
|
switch (type) {
|
|
case USRQUOTA:
|
|
id = td->td_ucred->cr_ruid;
|
|
break;
|
|
case GRPQUOTA:
|
|
id = td->td_ucred->cr_rgid;
|
|
break;
|
|
default:
|
|
error = EINVAL;
|
|
if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
|
|
vfs_unbusy(vfsp);
|
|
goto done;
|
|
}
|
|
}
|
|
/*
|
|
* Map BSD type to:
|
|
* ZFS_PROP_USERUSED,
|
|
* ZFS_PROP_USERQUOTA,
|
|
* ZFS_PROP_GROUPUSED,
|
|
* ZFS_PROP_GROUPQUOTA
|
|
*/
|
|
switch (cmd) {
|
|
case Q_SETQUOTA:
|
|
case Q_SETQUOTA32:
|
|
if (type == USRQUOTA)
|
|
quota_type = ZFS_PROP_USERQUOTA;
|
|
else if (type == GRPQUOTA)
|
|
quota_type = ZFS_PROP_GROUPQUOTA;
|
|
else
|
|
error = EINVAL;
|
|
break;
|
|
case Q_GETQUOTA:
|
|
case Q_GETQUOTA32:
|
|
if (type == USRQUOTA)
|
|
quota_type = ZFS_PROP_USERUSED;
|
|
else if (type == GRPQUOTA)
|
|
quota_type = ZFS_PROP_GROUPUSED;
|
|
else
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Depending on the cmd, we may need to get
|
|
* the ruid and domain (see fuidstr_to_sid?),
|
|
* the fuid (how?), or other information.
|
|
* Create fuid using zfs_fuid_create(zfsvfs, id,
|
|
* ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
|
|
* I think I can use just the id?
|
|
*
|
|
* Look at zfs_id_overquota() to look up a quota.
|
|
* zap_lookup(something, quotaobj, fuidstring,
|
|
* sizeof (long long), 1, "a)
|
|
*
|
|
* See zfs_set_userquota() to set a quota.
|
|
*/
|
|
if ((uint32_t)type >= MAXQUOTAS) {
|
|
error = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case Q_GETQUOTASIZE:
|
|
bitsize = 64;
|
|
error = copyout(&bitsize, arg, sizeof (int));
|
|
break;
|
|
case Q_QUOTAON:
|
|
// As far as I can tell, you can't turn quotas on or off on zfs
|
|
error = 0;
|
|
vfs_unbusy(vfsp);
|
|
break;
|
|
case Q_QUOTAOFF:
|
|
error = ENOTSUP;
|
|
vfs_unbusy(vfsp);
|
|
break;
|
|
case Q_SETQUOTA:
|
|
error = copyin(&dqblk, arg, sizeof (dqblk));
|
|
if (error == 0)
|
|
error = zfs_set_userquota(zfsvfs, quota_type,
|
|
"", id, dbtob(dqblk.dqb_bhardlimit));
|
|
break;
|
|
case Q_GETQUOTA:
|
|
error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
|
|
if (error == 0)
|
|
error = copyout(&dqblk, arg, sizeof (dqblk));
|
|
break;
|
|
default:
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
done:
|
|
ZFS_EXIT(zfsvfs);
|
|
return (error);
|
|
}
|
|
|
|
|
|
boolean_t
|
|
zfs_is_readonly(zfsvfs_t *zfsvfs)
|
|
{
|
|
return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
zfs_sync(vfs_t *vfsp, int waitfor)
|
|
{
|
|
|
|
/*
|
|
* Data integrity is job one. We don't want a compromised kernel
|
|
* writing to the storage pool, so we never sync during panic.
|
|
*/
|
|
if (panicstr)
|
|
return (0);
|
|
|
|
/*
|
|
* Ignore the system syncher. ZFS already commits async data
|
|
* at zfs_txg_timeout intervals.
|
|
*/
|
|
if (waitfor == MNT_LAZY)
|
|
return (0);
|
|
|
|
if (vfsp != NULL) {
|
|
/*
|
|
* Sync a specific filesystem.
|
|
*/
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
dsl_pool_t *dp;
|
|
int error;
|
|
|
|
error = vfs_stdsync(vfsp, waitfor);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
dp = dmu_objset_pool(zfsvfs->z_os);
|
|
|
|
/*
|
|
* If the system is shutting down, then skip any
|
|
* filesystems which may exist on a suspended pool.
|
|
*/
|
|
if (rebooting && spa_suspended(dp->dp_spa)) {
|
|
ZFS_EXIT(zfsvfs);
|
|
return (0);
|
|
}
|
|
|
|
if (zfsvfs->z_log != NULL)
|
|
zil_commit(zfsvfs->z_log, 0);
|
|
|
|
ZFS_EXIT(zfsvfs);
|
|
} else {
|
|
/*
|
|
* Sync all ZFS filesystems. This is what happens when you
|
|
* run sync(1M). Unlike other filesystems, ZFS honors the
|
|
* request by waiting for all pools to commit all dirty data.
|
|
*/
|
|
spa_sync_allpools();
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
atime_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
if (newval == TRUE) {
|
|
zfsvfs->z_atime = TRUE;
|
|
zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
|
|
} else {
|
|
zfsvfs->z_atime = FALSE;
|
|
zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
xattr_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
if (newval == ZFS_XATTR_OFF) {
|
|
zfsvfs->z_flags &= ~ZSB_XATTR;
|
|
} else {
|
|
zfsvfs->z_flags |= ZSB_XATTR;
|
|
|
|
if (newval == ZFS_XATTR_SA)
|
|
zfsvfs->z_xattr_sa = B_TRUE;
|
|
else
|
|
zfsvfs->z_xattr_sa = B_FALSE;
|
|
}
|
|
}
|
|
|
|
static void
|
|
blksz_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
|
|
ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
|
|
ASSERT(ISP2(newval));
|
|
|
|
zfsvfs->z_max_blksz = newval;
|
|
zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
|
|
}
|
|
|
|
static void
|
|
readonly_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
if (newval) {
|
|
/* XXX locking on vfs_flag? */
|
|
zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
|
|
} else {
|
|
/* XXX locking on vfs_flag? */
|
|
zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
setuid_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
if (newval == FALSE) {
|
|
zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
|
|
} else {
|
|
zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
exec_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
if (newval == FALSE) {
|
|
zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
|
|
} else {
|
|
zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The nbmand mount option can be changed at mount time.
|
|
* We can't allow it to be toggled on live file systems or incorrect
|
|
* behavior may be seen from cifs clients
|
|
*
|
|
* This property isn't registered via dsl_prop_register(), but this callback
|
|
* will be called when a file system is first mounted
|
|
*/
|
|
static void
|
|
nbmand_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
if (newval == FALSE) {
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
|
|
} else {
|
|
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
|
|
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
snapdir_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
zfsvfs->z_show_ctldir = newval;
|
|
}
|
|
|
|
static void
|
|
vscan_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
zfsvfs->z_vscan = newval;
|
|
}
|
|
|
|
static void
|
|
acl_mode_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
zfsvfs->z_acl_mode = newval;
|
|
}
|
|
|
|
static void
|
|
acl_inherit_changed_cb(void *arg, uint64_t newval)
|
|
{
|
|
zfsvfs_t *zfsvfs = arg;
|
|
|
|
zfsvfs->z_acl_inherit = newval;
|
|
}
|
|
|
|
static int
|
|
zfs_register_callbacks(vfs_t *vfsp)
|
|
{
|
|
struct dsl_dataset *ds = NULL;
|
|
objset_t *os = NULL;
|
|
zfsvfs_t *zfsvfs = NULL;
|
|
uint64_t nbmand;
|
|
boolean_t readonly = B_FALSE;
|
|
boolean_t do_readonly = B_FALSE;
|
|
boolean_t setuid = B_FALSE;
|
|
boolean_t do_setuid = B_FALSE;
|
|
boolean_t exec = B_FALSE;
|
|
boolean_t do_exec = B_FALSE;
|
|
boolean_t xattr = B_FALSE;
|
|
boolean_t atime = B_FALSE;
|
|
boolean_t do_atime = B_FALSE;
|
|
boolean_t do_xattr = B_FALSE;
|
|
int error = 0;
|
|
|
|
ASSERT(vfsp);
|
|
zfsvfs = vfsp->vfs_data;
|
|
ASSERT(zfsvfs);
|
|
os = zfsvfs->z_os;
|
|
|
|
/*
|
|
* This function can be called for a snapshot when we update snapshot's
|
|
* mount point, which isn't really supported.
|
|
*/
|
|
if (dmu_objset_is_snapshot(os))
|
|
return (EOPNOTSUPP);
|
|
|
|
/*
|
|
* The act of registering our callbacks will destroy any mount
|
|
* options we may have. In order to enable temporary overrides
|
|
* of mount options, we stash away the current values and
|
|
* restore them after we register the callbacks.
|
|
*/
|
|
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
|
|
!spa_writeable(dmu_objset_spa(os))) {
|
|
readonly = B_TRUE;
|
|
do_readonly = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
|
|
readonly = B_FALSE;
|
|
do_readonly = B_TRUE;
|
|
}
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
|
|
setuid = B_FALSE;
|
|
do_setuid = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
|
|
setuid = B_TRUE;
|
|
do_setuid = B_TRUE;
|
|
}
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
|
|
exec = B_FALSE;
|
|
do_exec = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
|
|
exec = B_TRUE;
|
|
do_exec = B_TRUE;
|
|
}
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
|
|
zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
|
|
do_xattr = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
|
|
zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
|
|
do_xattr = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
|
|
zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
|
|
do_xattr = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
|
|
zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
|
|
do_xattr = B_TRUE;
|
|
}
|
|
if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
|
|
atime = B_FALSE;
|
|
do_atime = B_TRUE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
|
|
atime = B_TRUE;
|
|
do_atime = B_TRUE;
|
|
}
|
|
|
|
/*
|
|
* We need to enter pool configuration here, so that we can use
|
|
* dsl_prop_get_int_ds() to handle the special nbmand property below.
|
|
* dsl_prop_get_integer() can not be used, because it has to acquire
|
|
* spa_namespace_lock and we can not do that because we already hold
|
|
* z_teardown_lock. The problem is that spa_write_cachefile() is called
|
|
* with spa_namespace_lock held and the function calls ZFS vnode
|
|
* operations to write the cache file and thus z_teardown_lock is
|
|
* acquired after spa_namespace_lock.
|
|
*/
|
|
ds = dmu_objset_ds(os);
|
|
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
|
|
|
|
/*
|
|
* nbmand is a special property. It can only be changed at
|
|
* mount time.
|
|
*
|
|
* This is weird, but it is documented to only be changeable
|
|
* at mount time.
|
|
*/
|
|
if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
|
|
nbmand = B_FALSE;
|
|
} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
|
|
nbmand = B_TRUE;
|
|
} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) {
|
|
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Register property callbacks.
|
|
*
|
|
* It would probably be fine to just check for i/o error from
|
|
* the first prop_register(), but I guess I like to go
|
|
* overboard...
|
|
*/
|
|
error = dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
|
|
zfsvfs);
|
|
error = error ? error : dsl_prop_register(ds,
|
|
zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
|
|
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
|
if (error)
|
|
goto unregister;
|
|
|
|
/*
|
|
* Invoke our callbacks to restore temporary mount options.
|
|
*/
|
|
if (do_readonly)
|
|
readonly_changed_cb(zfsvfs, readonly);
|
|
if (do_setuid)
|
|
setuid_changed_cb(zfsvfs, setuid);
|
|
if (do_exec)
|
|
exec_changed_cb(zfsvfs, exec);
|
|
if (do_xattr)
|
|
xattr_changed_cb(zfsvfs, xattr);
|
|
if (do_atime)
|
|
atime_changed_cb(zfsvfs, atime);
|
|
|
|
nbmand_changed_cb(zfsvfs, nbmand);
|
|
|
|
return (0);
|
|
|
|
unregister:
|
|
dsl_prop_unregister_all(ds, zfsvfs);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Associate this zfsvfs with the given objset, which must be owned.
|
|
* This will cache a bunch of on-disk state from the objset in the
|
|
* zfsvfs.
|
|
*/
|
|
static int
|
|
zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
|
|
{
|
|
int error;
|
|
uint64_t val;
|
|
|
|
zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
|
|
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
|
|
zfsvfs->z_os = os;
|
|
|
|
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
|
|
if (error != 0)
|
|
return (error);
|
|
if (zfsvfs->z_version >
|
|
zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
|
|
(void) printf("Can't mount a version %lld file system "
|
|
"on a version %lld pool\n. Pool must be upgraded to mount "
|
|
"this file system.", (u_longlong_t)zfsvfs->z_version,
|
|
(u_longlong_t)spa_version(dmu_objset_spa(os)));
|
|
return (SET_ERROR(ENOTSUP));
|
|
}
|
|
error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
|
|
if (error != 0)
|
|
return (error);
|
|
zfsvfs->z_norm = (int)val;
|
|
|
|
error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
|
|
if (error != 0)
|
|
return (error);
|
|
zfsvfs->z_utf8 = (val != 0);
|
|
|
|
error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
|
|
if (error != 0)
|
|
return (error);
|
|
zfsvfs->z_case = (uint_t)val;
|
|
|
|
/*
|
|
* Fold case on file systems that are always or sometimes case
|
|
* insensitive.
|
|
*/
|
|
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
|
|
zfsvfs->z_case == ZFS_CASE_MIXED)
|
|
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
|
|
|
|
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
|
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
|
|
|
|
uint64_t sa_obj = 0;
|
|
if (zfsvfs->z_use_sa) {
|
|
/* should either have both of these objects or none */
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
|
|
&sa_obj);
|
|
if (error != 0)
|
|
return (error);
|
|
}
|
|
|
|
error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
|
|
&zfsvfs->z_attr_table);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
if (zfsvfs->z_version >= ZPL_VERSION_SA)
|
|
sa_register_update_callback(os, zfs_sa_upgrade);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
|
|
&zfsvfs->z_root);
|
|
if (error != 0)
|
|
return (error);
|
|
ASSERT(zfsvfs->z_root != 0);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
|
|
&zfsvfs->z_unlinkedobj);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
|
|
8, 1, &zfsvfs->z_userquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_userquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
|
|
8, 1, &zfsvfs->z_groupquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_groupquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
|
|
8, 1, &zfsvfs->z_projectquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_projectquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
|
|
8, 1, &zfsvfs->z_userobjquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_userobjquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
|
|
8, 1, &zfsvfs->z_groupobjquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_groupobjquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ,
|
|
zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
|
|
8, 1, &zfsvfs->z_projectobjquota_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_projectobjquota_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
|
|
&zfsvfs->z_fuid_obj);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_fuid_obj = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
|
|
&zfsvfs->z_shares_dir);
|
|
if (error == ENOENT)
|
|
zfsvfs->z_shares_dir = 0;
|
|
else if (error != 0)
|
|
return (error);
|
|
|
|
/*
|
|
* Only use the name cache if we are looking for a
|
|
* name on a file system that does not require normalization
|
|
* or case folding. We can also look there if we happen to be
|
|
* on a non-normalizing, mixed sensitivity file system IF we
|
|
* are looking for the exact name (which is always the case on
|
|
* FreeBSD).
|
|
*/
|
|
zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
|
|
((zfsvfs->z_case == ZFS_CASE_MIXED) &&
|
|
!(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
|
|
|
|
return (0);
|
|
}
|
|
|
|
taskq_t *zfsvfs_taskq;
|
|
|
|
static void
|
|
zfsvfs_task_unlinked_drain(void *context, int pending __unused)
|
|
{
|
|
|
|
zfs_unlinked_drain((zfsvfs_t *)context);
|
|
}
|
|
|
|
int
|
|
zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
|
|
{
|
|
objset_t *os;
|
|
zfsvfs_t *zfsvfs;
|
|
int error;
|
|
boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
|
|
|
|
/*
|
|
* XXX: Fix struct statfs so this isn't necessary!
|
|
*
|
|
* The 'osname' is used as the filesystem's special node, which means
|
|
* it must fit in statfs.f_mntfromname, or else it can't be
|
|
* enumerated, so libzfs_mnttab_find() returns NULL, which causes
|
|
* 'zfs unmount' to think it's not mounted when it is.
|
|
*/
|
|
if (strlen(osname) >= MNAMELEN)
|
|
return (SET_ERROR(ENAMETOOLONG));
|
|
|
|
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
|
|
|
|
error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
|
|
&os);
|
|
if (error != 0) {
|
|
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
|
return (error);
|
|
}
|
|
|
|
error = zfsvfs_create_impl(zfvp, zfsvfs, os);
|
|
|
|
return (error);
|
|
}
|
|
|
|
|
|
int
|
|
zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
|
|
{
|
|
int error;
|
|
|
|
zfsvfs->z_vfs = NULL;
|
|
zfsvfs->z_parent = zfsvfs;
|
|
|
|
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
|
|
offsetof(znode_t, z_link_node));
|
|
TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
|
|
zfsvfs_task_unlinked_drain, zfsvfs);
|
|
#ifdef DIAGNOSTIC
|
|
rrm_init(&zfsvfs->z_teardown_lock, B_TRUE);
|
|
#else
|
|
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
|
|
#endif
|
|
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
|
|
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
|
|
for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
|
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
|
|
|
|
error = zfsvfs_init(zfsvfs, os);
|
|
if (error != 0) {
|
|
dmu_objset_disown(os, B_TRUE, zfsvfs);
|
|
*zfvp = NULL;
|
|
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
|
return (error);
|
|
}
|
|
|
|
*zfvp = zfsvfs;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|
{
|
|
int error;
|
|
|
|
/*
|
|
* Check for a bad on-disk format version now since we
|
|
* lied about owning the dataset readonly before.
|
|
*/
|
|
if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
|
|
dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
|
|
return (SET_ERROR(EROFS));
|
|
|
|
error = zfs_register_callbacks(zfsvfs->z_vfs);
|
|
if (error)
|
|
return (error);
|
|
|
|
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
|
|
|
|
/*
|
|
* If we are not mounting (ie: online recv), then we don't
|
|
* have to worry about replaying the log as we blocked all
|
|
* operations out since we closed the ZIL.
|
|
*/
|
|
if (mounting) {
|
|
boolean_t readonly;
|
|
|
|
/*
|
|
* During replay we remove the read only flag to
|
|
* allow replays to succeed.
|
|
*/
|
|
readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
|
|
if (readonly != 0) {
|
|
zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
|
|
} else {
|
|
dsl_dir_t *dd;
|
|
|
|
zfs_unlinked_drain(zfsvfs);
|
|
dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
|
|
dd->dd_activity_cancelled = B_FALSE;
|
|
}
|
|
|
|
/*
|
|
* Parse and replay the intent log.
|
|
*
|
|
* Because of ziltest, this must be done after
|
|
* zfs_unlinked_drain(). (Further note: ziltest
|
|
* doesn't use readonly mounts, where
|
|
* zfs_unlinked_drain() isn't called.) This is because
|
|
* ziltest causes spa_sync() to think it's committed,
|
|
* but actually it is not, so the intent log contains
|
|
* many txg's worth of changes.
|
|
*
|
|
* In particular, if object N is in the unlinked set in
|
|
* the last txg to actually sync, then it could be
|
|
* actually freed in a later txg and then reallocated
|
|
* in a yet later txg. This would write a "create
|
|
* object N" record to the intent log. Normally, this
|
|
* would be fine because the spa_sync() would have
|
|
* written out the fact that object N is free, before
|
|
* we could write the "create object N" intent log
|
|
* record.
|
|
*
|
|
* But when we are in ziltest mode, we advance the "open
|
|
* txg" without actually spa_sync()-ing the changes to
|
|
* disk. So we would see that object N is still
|
|
* allocated and in the unlinked set, and there is an
|
|
* intent log record saying to allocate it.
|
|
*/
|
|
if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
|
|
if (zil_replay_disable) {
|
|
zil_destroy(zfsvfs->z_log, B_FALSE);
|
|
} else {
|
|
boolean_t use_nc = zfsvfs->z_use_namecache;
|
|
zfsvfs->z_use_namecache = B_FALSE;
|
|
zfsvfs->z_replay = B_TRUE;
|
|
zil_replay(zfsvfs->z_os, zfsvfs,
|
|
zfs_replay_vector);
|
|
zfsvfs->z_replay = B_FALSE;
|
|
zfsvfs->z_use_namecache = use_nc;
|
|
}
|
|
}
|
|
|
|
/* restore readonly bit */
|
|
if (readonly != 0)
|
|
zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
|
|
}
|
|
|
|
/*
|
|
* Set the objset user_ptr to track its zfsvfs.
|
|
*/
|
|
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
|
|
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
|
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
|
|
|
|
return (0);
|
|
}
|
|
|
|
extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
|
|
|
|
void
|
|
zfsvfs_free(zfsvfs_t *zfsvfs)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* This is a barrier to prevent the filesystem from going away in
|
|
* zfs_znode_move() until we can safely ensure that the filesystem is
|
|
* not unmounted. We consider the filesystem valid before the barrier
|
|
* and invalid after the barrier.
|
|
*/
|
|
rw_enter(&zfsvfs_lock, RW_READER);
|
|
rw_exit(&zfsvfs_lock);
|
|
|
|
zfs_fuid_destroy(zfsvfs);
|
|
|
|
mutex_destroy(&zfsvfs->z_znodes_lock);
|
|
mutex_destroy(&zfsvfs->z_lock);
|
|
ASSERT(zfsvfs->z_nr_znodes == 0);
|
|
list_destroy(&zfsvfs->z_all_znodes);
|
|
rrm_destroy(&zfsvfs->z_teardown_lock);
|
|
rw_destroy(&zfsvfs->z_teardown_inactive_lock);
|
|
rw_destroy(&zfsvfs->z_fuid_lock);
|
|
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
|
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
|
|
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
|
}
|
|
|
|
static void
|
|
zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
|
|
{
|
|
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
|
if (zfsvfs->z_vfs) {
|
|
if (zfsvfs->z_use_fuids) {
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
|
|
vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
|
|
} else {
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
|
|
vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
|
|
}
|
|
}
|
|
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
|
|
}
|
|
|
|
static int
|
|
zfs_domount(vfs_t *vfsp, char *osname)
|
|
{
|
|
uint64_t recordsize, fsid_guid;
|
|
int error = 0;
|
|
zfsvfs_t *zfsvfs;
|
|
|
|
ASSERT(vfsp);
|
|
ASSERT(osname);
|
|
|
|
error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
|
|
if (error)
|
|
return (error);
|
|
zfsvfs->z_vfs = vfsp;
|
|
|
|
if ((error = dsl_prop_get_integer(osname,
|
|
"recordsize", &recordsize, NULL)))
|
|
goto out;
|
|
zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
|
|
zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
|
|
|
|
vfsp->vfs_data = zfsvfs;
|
|
vfsp->mnt_flag |= MNT_LOCAL;
|
|
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
|
|
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
|
|
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
|
|
/*
|
|
* This can cause a loss of coherence between ARC and page cache
|
|
* on ZoF - unclear if the problem is in FreeBSD or ZoF
|
|
*/
|
|
vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
|
|
vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
|
|
vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
|
|
|
|
/*
|
|
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
|
* separates our fsid from any other filesystem types, and a
|
|
* 56-bit objset unique ID. The objset unique ID is unique to
|
|
* all objsets open on this system, provided by unique_create().
|
|
* The 8-bit fs type must be put in the low bits of fsid[1]
|
|
* because that's where other Solaris filesystems put it.
|
|
*/
|
|
fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
|
|
ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
|
|
vfsp->vfs_fsid.val[0] = fsid_guid;
|
|
vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
|
|
(vfsp->mnt_vfc->vfc_typenum & 0xFF);
|
|
|
|
/*
|
|
* Set features for file system.
|
|
*/
|
|
zfs_set_fuid_feature(zfsvfs);
|
|
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
|
|
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
|
|
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
|
|
vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
|
|
} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
|
|
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
|
|
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
|
|
}
|
|
vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
|
|
|
|
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
|
|
uint64_t pval;
|
|
|
|
atime_changed_cb(zfsvfs, B_FALSE);
|
|
readonly_changed_cb(zfsvfs, B_TRUE);
|
|
if ((error = dsl_prop_get_integer(osname,
|
|
"xattr", &pval, NULL)))
|
|
goto out;
|
|
xattr_changed_cb(zfsvfs, pval);
|
|
zfsvfs->z_issnap = B_TRUE;
|
|
zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
|
|
|
|
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
|
|
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
|
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
|
|
} else {
|
|
if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
|
|
goto out;
|
|
}
|
|
|
|
vfs_mountedfrom(vfsp, osname);
|
|
|
|
if (!zfsvfs->z_issnap)
|
|
zfsctl_create(zfsvfs);
|
|
out:
|
|
if (error) {
|
|
dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
|
|
zfsvfs_free(zfsvfs);
|
|
} else {
|
|
atomic_inc_32(&zfs_active_fs_count);
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
void
|
|
zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
|
|
{
|
|
objset_t *os = zfsvfs->z_os;
|
|
|
|
if (!dmu_objset_is_snapshot(os))
|
|
dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
|
|
}
|
|
|
|
#ifdef SECLABEL
|
|
/*
|
|
* Convert a decimal digit string to a uint64_t integer.
|
|
*/
|
|
static int
|
|
str_to_uint64(char *str, uint64_t *objnum)
|
|
{
|
|
uint64_t num = 0;
|
|
|
|
while (*str) {
|
|
if (*str < '0' || *str > '9')
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
num = num*10 + *str++ - '0';
|
|
}
|
|
|
|
*objnum = num;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* The boot path passed from the boot loader is in the form of
|
|
* "rootpool-name/root-filesystem-object-number'. Convert this
|
|
* string to a dataset name: "rootpool-name/root-filesystem-name".
|
|
*/
|
|
static int
|
|
zfs_parse_bootfs(char *bpath, char *outpath)
|
|
{
|
|
char *slashp;
|
|
uint64_t objnum;
|
|
int error;
|
|
|
|
if (*bpath == 0 || *bpath == '/')
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
(void) strcpy(outpath, bpath);
|
|
|
|
slashp = strchr(bpath, '/');
|
|
|
|
/* if no '/', just return the pool name */
|
|
if (slashp == NULL) {
|
|
return (0);
|
|
}
|
|
|
|
/* if not a number, just return the root dataset name */
|
|
if (str_to_uint64(slashp+1, &objnum)) {
|
|
return (0);
|
|
}
|
|
|
|
*slashp = '\0';
|
|
error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
|
|
*slashp = '/';
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Check that the hex label string is appropriate for the dataset being
|
|
* mounted into the global_zone proper.
|
|
*
|
|
* Return an error if the hex label string is not default or
|
|
* admin_low/admin_high. For admin_low labels, the corresponding
|
|
* dataset must be readonly.
|
|
*/
|
|
int
|
|
zfs_check_global_label(const char *dsname, const char *hexsl)
|
|
{
|
|
if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
|
|
return (0);
|
|
if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
|
|
return (0);
|
|
if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
|
|
/* must be readonly */
|
|
uint64_t rdonly;
|
|
|
|
if (dsl_prop_get_integer(dsname,
|
|
zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
|
|
return (SET_ERROR(EACCES));
|
|
return (rdonly ? 0 : EACCES);
|
|
}
|
|
return (SET_ERROR(EACCES));
|
|
}
|
|
|
|
/*
|
|
* Determine whether the mount is allowed according to MAC check.
|
|
* by comparing (where appropriate) label of the dataset against
|
|
* the label of the zone being mounted into. If the dataset has
|
|
* no label, create one.
|
|
*
|
|
* Returns 0 if access allowed, error otherwise (e.g. EACCES)
|
|
*/
|
|
static int
|
|
zfs_mount_label_policy(vfs_t *vfsp, char *osname)
|
|
{
|
|
int error, retv;
|
|
zone_t *mntzone = NULL;
|
|
ts_label_t *mnt_tsl;
|
|
bslabel_t *mnt_sl;
|
|
bslabel_t ds_sl;
|
|
char ds_hexsl[MAXNAMELEN];
|
|
|
|
retv = EACCES; /* assume the worst */
|
|
|
|
/*
|
|
* Start by getting the dataset label if it exists.
|
|
*/
|
|
error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
|
|
1, sizeof (ds_hexsl), &ds_hexsl, NULL);
|
|
if (error)
|
|
return (SET_ERROR(EACCES));
|
|
|
|
/*
|
|
* If labeling is NOT enabled, then disallow the mount of datasets
|
|
* which have a non-default label already. No other label checks
|
|
* are needed.
|
|
*/
|
|
if (!is_system_labeled()) {
|
|
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
|
|
return (0);
|
|
return (SET_ERROR(EACCES));
|
|
}
|
|
|
|
/*
|
|
* Get the label of the mountpoint. If mounting into the global
|
|
* zone (i.e. mountpoint is not within an active zone and the
|
|
* zoned property is off), the label must be default or
|
|
* admin_low/admin_high only; no other checks are needed.
|
|
*/
|
|
mntzone = zone_find_by_any_path(vfsp->vfs_mntpt, B_FALSE);
|
|
if (mntzone->zone_id == GLOBAL_ZONEID) {
|
|
uint64_t zoned;
|
|
|
|
zone_rele(mntzone);
|
|
|
|
if (dsl_prop_get_integer(osname,
|
|
zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
|
|
return (SET_ERROR(EACCES));
|
|
if (!zoned)
|
|
return (zfs_check_global_label(osname, ds_hexsl));
|
|
else
|
|
/*
|
|
* This is the case of a zone dataset being mounted
|
|
* initially, before the zone has been fully created;
|
|
* allow this mount into global zone.
|
|
*/
|
|
return (0);
|
|
}
|
|
|
|
mnt_tsl = mntzone->zone_slabel;
|
|
ASSERT(mnt_tsl != NULL);
|
|
label_hold(mnt_tsl);
|
|
mnt_sl = label2bslabel(mnt_tsl);
|
|
|
|
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
|
|
/*
|
|
* The dataset doesn't have a real label, so fabricate one.
|
|
*/
|
|
char *str = NULL;
|
|
|
|
if (l_to_str_internal(mnt_sl, &str) == 0 &&
|
|
dsl_prop_set_string(osname,
|
|
zfs_prop_to_name(ZFS_PROP_MLSLABEL),
|
|
ZPROP_SRC_LOCAL, str) == 0)
|
|
retv = 0;
|
|
if (str != NULL)
|
|
kmem_free(str, strlen(str) + 1);
|
|
} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
|
|
/*
|
|
* Now compare labels to complete the MAC check. If the
|
|
* labels are equal then allow access. If the mountpoint
|
|
* label dominates the dataset label, allow readonly access.
|
|
* Otherwise, access is denied.
|
|
*/
|
|
if (blequal(mnt_sl, &ds_sl))
|
|
retv = 0;
|
|
else if (bldominates(mnt_sl, &ds_sl)) {
|
|
vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
|
|
retv = 0;
|
|
}
|
|
}
|
|
|
|
label_rele(mnt_tsl);
|
|
zone_rele(mntzone);
|
|
return (retv);
|
|
}
|
|
#endif /* SECLABEL */
|
|
|
|
static int
|
|
getpoolname(const char *osname, char *poolname)
|
|
{
|
|
char *p;
|
|
|
|
p = strchr(osname, '/');
|
|
if (p == NULL) {
|
|
if (strlen(osname) >= MAXNAMELEN)
|
|
return (ENAMETOOLONG);
|
|
(void) strcpy(poolname, osname);
|
|
} else {
|
|
if (p - osname >= MAXNAMELEN)
|
|
return (ENAMETOOLONG);
|
|
(void) strncpy(poolname, osname, p - osname);
|
|
poolname[p - osname] = '\0';
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
zfs_mount(vfs_t *vfsp)
|
|
{
|
|
kthread_t *td = curthread;
|
|
vnode_t *mvp = vfsp->mnt_vnodecovered;
|
|
cred_t *cr = td->td_ucred;
|
|
char *osname;
|
|
int error = 0;
|
|
int canwrite;
|
|
|
|
if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
/*
|
|
* If full-owner-access is enabled and delegated administration is
|
|
* turned on, we must set nosuid.
|
|
*/
|
|
if (zfs_super_owner &&
|
|
dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
|
|
secpolicy_fs_mount_clearopts(cr, vfsp);
|
|
}
|
|
|
|
/*
|
|
* Check for mount privilege?
|
|
*
|
|
* If we don't have privilege then see if
|
|
* we have local permission to allow it
|
|
*/
|
|
error = secpolicy_fs_mount(cr, mvp, vfsp);
|
|
if (error) {
|
|
if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
|
|
goto out;
|
|
|
|
if (!(vfsp->vfs_flag & MS_REMOUNT)) {
|
|
vattr_t vattr;
|
|
|
|
/*
|
|
* Make sure user is the owner of the mount point
|
|
* or has sufficient privileges.
|
|
*/
|
|
|
|
vattr.va_mask = AT_UID;
|
|
|
|
vn_lock(mvp, LK_SHARED | LK_RETRY);
|
|
if (VOP_GETATTR(mvp, &vattr, cr)) {
|
|
VOP_UNLOCK1(mvp);
|
|
goto out;
|
|
}
|
|
|
|
if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
|
|
VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
|
|
VOP_UNLOCK1(mvp);
|
|
goto out;
|
|
}
|
|
VOP_UNLOCK1(mvp);
|
|
}
|
|
|
|
secpolicy_fs_mount_clearopts(cr, vfsp);
|
|
}
|
|
|
|
/*
|
|
* Refuse to mount a filesystem if we are in a local zone and the
|
|
* dataset is not visible.
|
|
*/
|
|
if (!INGLOBALZONE(curproc) &&
|
|
(!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
|
|
error = SET_ERROR(EPERM);
|
|
goto out;
|
|
}
|
|
|
|
#ifdef SECLABEL
|
|
error = zfs_mount_label_policy(vfsp, osname);
|
|
if (error)
|
|
goto out;
|
|
#endif
|
|
|
|
vfsp->vfs_flag |= MNT_NFS4ACLS;
|
|
|
|
/*
|
|
* When doing a remount, we simply refresh our temporary properties
|
|
* according to those options set in the current VFS options.
|
|
*/
|
|
if (vfsp->vfs_flag & MS_REMOUNT) {
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
|
|
/*
|
|
* Refresh mount options with z_teardown_lock blocking I/O while
|
|
* the filesystem is in an inconsistent state.
|
|
* The lock also serializes this code with filesystem
|
|
* manipulations between entry to zfs_suspend_fs() and return
|
|
* from zfs_resume_fs().
|
|
*/
|
|
rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
|
|
zfs_unregister_callbacks(zfsvfs);
|
|
error = zfs_register_callbacks(vfsp);
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
goto out;
|
|
}
|
|
|
|
/* Initial root mount: try hard to import the requested root pool. */
|
|
if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
|
|
(vfsp->vfs_flag & MNT_UPDATE) == 0) {
|
|
char pname[MAXNAMELEN];
|
|
|
|
error = getpoolname(osname, pname);
|
|
if (error == 0)
|
|
error = spa_import_rootpool(pname);
|
|
if (error)
|
|
goto out;
|
|
}
|
|
DROP_GIANT();
|
|
error = zfs_domount(vfsp, osname);
|
|
PICKUP_GIANT();
|
|
|
|
out:
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zfs_statfs(vfs_t *vfsp, struct statfs *statp)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
uint64_t refdbytes, availbytes, usedobjs, availobjs;
|
|
|
|
statp->f_version = STATFS_VERSION;
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
|
|
dmu_objset_space(zfsvfs->z_os,
|
|
&refdbytes, &availbytes, &usedobjs, &availobjs);
|
|
|
|
/*
|
|
* The underlying storage pool actually uses multiple block sizes.
|
|
* We report the fragsize as the smallest block size we support,
|
|
* and we report our blocksize as the filesystem's maximum blocksize.
|
|
*/
|
|
statp->f_bsize = SPA_MINBLOCKSIZE;
|
|
statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
|
|
|
|
/*
|
|
* The following report "total" blocks of various kinds in the
|
|
* file system, but reported in terms of f_frsize - the
|
|
* "fragment" size.
|
|
*/
|
|
|
|
statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
|
|
statp->f_bfree = availbytes / statp->f_bsize;
|
|
statp->f_bavail = statp->f_bfree; /* no root reservation */
|
|
|
|
/*
|
|
* statvfs() should really be called statufs(), because it assumes
|
|
* static metadata. ZFS doesn't preallocate files, so the best
|
|
* we can do is report the max that could possibly fit in f_files,
|
|
* and that minus the number actually used in f_ffree.
|
|
* For f_ffree, report the smaller of the number of object available
|
|
* and the number of blocks (each object will take at least a block).
|
|
*/
|
|
statp->f_ffree = MIN(availobjs, statp->f_bfree);
|
|
statp->f_files = statp->f_ffree + usedobjs;
|
|
|
|
/*
|
|
* We're a zfs filesystem.
|
|
*/
|
|
strlcpy(statp->f_fstypename, "zfs",
|
|
sizeof (statp->f_fstypename));
|
|
|
|
strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
|
|
sizeof (statp->f_mntfromname));
|
|
strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
|
|
sizeof (statp->f_mntonname));
|
|
|
|
statp->f_namemax = MAXNAMELEN - 1;
|
|
|
|
ZFS_EXIT(zfsvfs);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
znode_t *rootzp;
|
|
int error;
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
|
|
error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
|
|
if (error == 0)
|
|
*vpp = ZTOV(rootzp);
|
|
|
|
ZFS_EXIT(zfsvfs);
|
|
|
|
if (error == 0) {
|
|
error = vn_lock(*vpp, flags);
|
|
if (error != 0) {
|
|
VN_RELE(*vpp);
|
|
*vpp = NULL;
|
|
}
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Teardown the zfsvfs::z_os.
|
|
*
|
|
* Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
|
|
* and 'z_teardown_inactive_lock' held.
|
|
*/
|
|
static int
|
|
zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
|
{
|
|
znode_t *zp;
|
|
dsl_dir_t *dd;
|
|
|
|
/*
|
|
* If someone has not already unmounted this file system,
|
|
* drain the zrele_taskq to ensure all active references to the
|
|
* zfsvfs_t have been handled only then can it be safely destroyed.
|
|
*/
|
|
if (zfsvfs->z_os) {
|
|
/*
|
|
* If we're unmounting we have to wait for the list to
|
|
* drain completely.
|
|
*
|
|
* If we're not unmounting there's no guarantee the list
|
|
* will drain completely, but zreles run from the taskq
|
|
* may add the parents of dir-based xattrs to the taskq
|
|
* so we want to wait for these.
|
|
*
|
|
* We can safely read z_nr_znodes without locking because the
|
|
* VFS has already blocked operations which add to the
|
|
* z_all_znodes list and thus increment z_nr_znodes.
|
|
*/
|
|
int round = 0;
|
|
while (zfsvfs->z_nr_znodes > 0) {
|
|
taskq_wait_outstanding(dsl_pool_zrele_taskq(
|
|
dmu_objset_pool(zfsvfs->z_os)), 0);
|
|
if (++round > 1 && !unmounting)
|
|
break;
|
|
}
|
|
}
|
|
rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
|
|
|
|
if (!unmounting) {
|
|
/*
|
|
* We purge the parent filesystem's vfsp as the parent
|
|
* filesystem and all of its snapshots have their vnode's
|
|
* v_vfsp set to the parent's filesystem's vfsp. Note,
|
|
* 'z_parent' is self referential for non-snapshots.
|
|
*/
|
|
#ifdef FREEBSD_NAMECACHE
|
|
cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Close the zil. NB: Can't close the zil while zfs_inactive
|
|
* threads are blocked as zil_close can call zfs_inactive.
|
|
*/
|
|
if (zfsvfs->z_log) {
|
|
zil_close(zfsvfs->z_log);
|
|
zfsvfs->z_log = NULL;
|
|
}
|
|
|
|
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
|
|
|
|
/*
|
|
* If we are not unmounting (ie: online recv) and someone already
|
|
* unmounted this file system while we were doing the switcheroo,
|
|
* or a reopen of z_os failed then just bail out now.
|
|
*/
|
|
if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
|
|
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
return (SET_ERROR(EIO));
|
|
}
|
|
|
|
/*
|
|
* At this point there are no vops active, and any new vops will
|
|
* fail with EIO since we have z_teardown_lock for writer (only
|
|
* relavent for forced unmount).
|
|
*
|
|
* Release all holds on dbufs.
|
|
*/
|
|
mutex_enter(&zfsvfs->z_znodes_lock);
|
|
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
|
|
zp = list_next(&zfsvfs->z_all_znodes, zp))
|
|
if (zp->z_sa_hdl) {
|
|
ASSERT(ZTOV(zp)->v_count >= 0);
|
|
zfs_znode_dmu_fini(zp);
|
|
}
|
|
mutex_exit(&zfsvfs->z_znodes_lock);
|
|
|
|
/*
|
|
* If we are unmounting, set the unmounted flag and let new vops
|
|
* unblock. zfs_inactive will have the unmounted behavior, and all
|
|
* other vops will fail with EIO.
|
|
*/
|
|
if (unmounting) {
|
|
zfsvfs->z_unmounted = B_TRUE;
|
|
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
}
|
|
|
|
/*
|
|
* z_os will be NULL if there was an error in attempting to reopen
|
|
* zfsvfs, so just return as the properties had already been
|
|
* unregistered and cached data had been evicted before.
|
|
*/
|
|
if (zfsvfs->z_os == NULL)
|
|
return (0);
|
|
|
|
/*
|
|
* Unregister properties.
|
|
*/
|
|
zfs_unregister_callbacks(zfsvfs);
|
|
|
|
/*
|
|
* Evict cached data
|
|
*/
|
|
if (!zfs_is_readonly(zfsvfs))
|
|
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
|
|
dmu_objset_evict_dbufs(zfsvfs->z_os);
|
|
dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
|
|
dsl_dir_cancel_waiters(dd);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
zfs_umount(vfs_t *vfsp, int fflag)
|
|
{
|
|
kthread_t *td = curthread;
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
objset_t *os;
|
|
cred_t *cr = td->td_ucred;
|
|
int ret;
|
|
|
|
ret = secpolicy_fs_unmount(cr, vfsp);
|
|
if (ret) {
|
|
if (dsl_deleg_access((char *)vfsp->vfs_resource,
|
|
ZFS_DELEG_PERM_MOUNT, cr))
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Unmount any snapshots mounted under .zfs before unmounting the
|
|
* dataset itself.
|
|
*/
|
|
if (zfsvfs->z_ctldir != NULL) {
|
|
if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
|
|
return (ret);
|
|
}
|
|
|
|
if (fflag & MS_FORCE) {
|
|
/*
|
|
* Mark file system as unmounted before calling
|
|
* vflush(FORCECLOSE). This way we ensure no future vnops
|
|
* will be called and risk operating on DOOMED vnodes.
|
|
*/
|
|
rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
|
|
zfsvfs->z_unmounted = B_TRUE;
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
}
|
|
|
|
/*
|
|
* Flush all the files.
|
|
*/
|
|
ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
|
|
if (ret != 0)
|
|
return (ret);
|
|
while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
|
|
&zfsvfs->z_unlinked_drain_task, NULL) != 0)
|
|
taskqueue_drain(zfsvfs_taskq->tq_queue,
|
|
&zfsvfs->z_unlinked_drain_task);
|
|
|
|
VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
|
|
os = zfsvfs->z_os;
|
|
|
|
/*
|
|
* z_os will be NULL if there was an error in
|
|
* attempting to reopen zfsvfs.
|
|
*/
|
|
if (os != NULL) {
|
|
/*
|
|
* Unset the objset user_ptr.
|
|
*/
|
|
mutex_enter(&os->os_user_ptr_lock);
|
|
dmu_objset_set_user(os, NULL);
|
|
mutex_exit(&os->os_user_ptr_lock);
|
|
|
|
/*
|
|
* Finally release the objset
|
|
*/
|
|
dmu_objset_disown(os, B_TRUE, zfsvfs);
|
|
}
|
|
|
|
/*
|
|
* We can now safely destroy the '.zfs' directory node.
|
|
*/
|
|
if (zfsvfs->z_ctldir != NULL)
|
|
zfsctl_destroy(zfsvfs);
|
|
zfs_freevfs(vfsp);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
znode_t *zp;
|
|
int err;
|
|
|
|
/*
|
|
* zfs_zget() can't operate on virtual entries like .zfs/ or
|
|
* .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
|
|
* This will make NFS to switch to LOOKUP instead of using VGET.
|
|
*/
|
|
if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
|
|
(zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
|
|
return (EOPNOTSUPP);
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
err = zfs_zget(zfsvfs, ino, &zp);
|
|
if (err == 0 && zp->z_unlinked) {
|
|
vrele(ZTOV(zp));
|
|
err = EINVAL;
|
|
}
|
|
if (err == 0)
|
|
*vpp = ZTOV(zp);
|
|
ZFS_EXIT(zfsvfs);
|
|
if (err == 0) {
|
|
err = vn_lock(*vpp, flags);
|
|
if (err != 0)
|
|
vrele(*vpp);
|
|
}
|
|
if (err != 0)
|
|
*vpp = NULL;
|
|
return (err);
|
|
}
|
|
|
|
static int
|
|
zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
|
|
struct ucred **credanonp, int *numsecflavors, int **secflavors)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
|
|
/*
|
|
* If this is regular file system vfsp is the same as
|
|
* zfsvfs->z_parent->z_vfs, but if it is snapshot,
|
|
* zfsvfs->z_parent->z_vfs represents parent file system
|
|
* which we have to use here, because only this file system
|
|
* has mnt_export configured.
|
|
*/
|
|
return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
|
|
credanonp, numsecflavors, secflavors));
|
|
}
|
|
|
|
CTASSERT(SHORT_FID_LEN <= sizeof (struct fid));
|
|
CTASSERT(LONG_FID_LEN <= sizeof (struct fid));
|
|
|
|
static int
|
|
zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
|
|
{
|
|
struct componentname cn;
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
znode_t *zp;
|
|
vnode_t *dvp;
|
|
uint64_t object = 0;
|
|
uint64_t fid_gen = 0;
|
|
uint64_t gen_mask;
|
|
uint64_t zp_gen;
|
|
int i, err;
|
|
|
|
*vpp = NULL;
|
|
|
|
ZFS_ENTER(zfsvfs);
|
|
|
|
/*
|
|
* On FreeBSD we can get snapshot's mount point or its parent file
|
|
* system mount point depending if snapshot is already mounted or not.
|
|
*/
|
|
if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
|
|
zfid_long_t *zlfid = (zfid_long_t *)fidp;
|
|
uint64_t objsetid = 0;
|
|
uint64_t setgen = 0;
|
|
|
|
for (i = 0; i < sizeof (zlfid->zf_setid); i++)
|
|
objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
|
|
|
|
for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
|
|
setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
|
|
|
|
ZFS_EXIT(zfsvfs);
|
|
|
|
err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
|
|
if (err)
|
|
return (SET_ERROR(EINVAL));
|
|
ZFS_ENTER(zfsvfs);
|
|
}
|
|
|
|
if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
|
|
zfid_short_t *zfid = (zfid_short_t *)fidp;
|
|
|
|
for (i = 0; i < sizeof (zfid->zf_object); i++)
|
|
object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
|
|
|
|
for (i = 0; i < sizeof (zfid->zf_gen); i++)
|
|
fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
|
|
} else {
|
|
ZFS_EXIT(zfsvfs);
|
|
return (SET_ERROR(EINVAL));
|
|
}
|
|
|
|
/*
|
|
* A zero fid_gen means we are in .zfs or the .zfs/snapshot
|
|
* directory tree. If the object == zfsvfs->z_shares_dir, then
|
|
* we are in the .zfs/shares directory tree.
|
|
*/
|
|
if ((fid_gen == 0 &&
|
|
(object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
|
|
(zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
|
|
ZFS_EXIT(zfsvfs);
|
|
VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
|
|
if (object == ZFSCTL_INO_SNAPDIR) {
|
|
cn.cn_nameptr = "snapshot";
|
|
cn.cn_namelen = strlen(cn.cn_nameptr);
|
|
cn.cn_nameiop = LOOKUP;
|
|
cn.cn_flags = ISLASTCN | LOCKLEAF;
|
|
cn.cn_lkflags = flags;
|
|
VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
|
|
vput(dvp);
|
|
} else if (object == zfsvfs->z_shares_dir) {
|
|
/*
|
|
* XXX This branch must not be taken,
|
|
* if it is, then the lookup below will
|
|
* explode.
|
|
*/
|
|
cn.cn_nameptr = "shares";
|
|
cn.cn_namelen = strlen(cn.cn_nameptr);
|
|
cn.cn_nameiop = LOOKUP;
|
|
cn.cn_flags = ISLASTCN;
|
|
cn.cn_lkflags = flags;
|
|
VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
|
|
vput(dvp);
|
|
} else {
|
|
*vpp = dvp;
|
|
}
|
|
return (err);
|
|
}
|
|
|
|
gen_mask = -1ULL >> (64 - 8 * i);
|
|
|
|
dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
|
|
if ((err = zfs_zget(zfsvfs, object, &zp))) {
|
|
ZFS_EXIT(zfsvfs);
|
|
return (err);
|
|
}
|
|
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
|
|
sizeof (uint64_t));
|
|
zp_gen = zp_gen & gen_mask;
|
|
if (zp_gen == 0)
|
|
zp_gen = 1;
|
|
if (zp->z_unlinked || zp_gen != fid_gen) {
|
|
dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
|
|
vrele(ZTOV(zp));
|
|
ZFS_EXIT(zfsvfs);
|
|
return (SET_ERROR(EINVAL));
|
|
}
|
|
|
|
*vpp = ZTOV(zp);
|
|
ZFS_EXIT(zfsvfs);
|
|
err = vn_lock(*vpp, flags);
|
|
if (err == 0)
|
|
vnode_create_vobject(*vpp, zp->z_size, curthread);
|
|
else
|
|
*vpp = NULL;
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Block out VOPs and close zfsvfs_t::z_os
|
|
*
|
|
* Note, if successful, then we return with the 'z_teardown_lock' and
|
|
* 'z_teardown_inactive_lock' write held. We leave ownership of the underlying
|
|
* dataset and objset intact so that they can be atomically handed off during
|
|
* a subsequent rollback or recv operation and the resume thereafter.
|
|
*/
|
|
int
|
|
zfs_suspend_fs(zfsvfs_t *zfsvfs)
|
|
{
|
|
int error;
|
|
|
|
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
|
|
return (error);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Rebuild SA and release VOPs. Note that ownership of the underlying dataset
|
|
* is an invariant across any of the operations that can be performed while the
|
|
* filesystem was suspended. Whether it succeeded or failed, the preconditions
|
|
* are the same: the relevant objset and associated dataset are owned by
|
|
* zfsvfs, held, and long held on entry.
|
|
*/
|
|
int
|
|
zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
|
{
|
|
int err;
|
|
znode_t *zp;
|
|
|
|
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
|
|
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
|
|
|
|
/*
|
|
* We already own this, so just update the objset_t, as the one we
|
|
* had before may have been evicted.
|
|
*/
|
|
objset_t *os;
|
|
VERIFY3P(ds->ds_owner, ==, zfsvfs);
|
|
VERIFY(dsl_dataset_long_held(ds));
|
|
dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
|
|
dsl_pool_config_enter(dp, FTAG);
|
|
VERIFY0(dmu_objset_from_ds(ds, &os));
|
|
dsl_pool_config_exit(dp, FTAG);
|
|
|
|
err = zfsvfs_init(zfsvfs, os);
|
|
if (err != 0)
|
|
goto bail;
|
|
|
|
ds->ds_dir->dd_activity_cancelled = B_FALSE;
|
|
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
|
|
|
|
zfs_set_fuid_feature(zfsvfs);
|
|
|
|
/*
|
|
* Attempt to re-establish all the active znodes with
|
|
* their dbufs. If a zfs_rezget() fails, then we'll let
|
|
* any potential callers discover that via ZFS_ENTER_VERIFY_VP
|
|
* when they try to use their znode.
|
|
*/
|
|
mutex_enter(&zfsvfs->z_znodes_lock);
|
|
for (zp = list_head(&zfsvfs->z_all_znodes); zp;
|
|
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
|
(void) zfs_rezget(zp);
|
|
}
|
|
mutex_exit(&zfsvfs->z_znodes_lock);
|
|
|
|
bail:
|
|
/* release the VOPs */
|
|
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
|
|
if (err) {
|
|
/*
|
|
* Since we couldn't setup the sa framework, try to force
|
|
* unmount this file system.
|
|
*/
|
|
if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
|
|
vfs_ref(zfsvfs->z_vfs);
|
|
(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
|
|
}
|
|
}
|
|
return (err);
|
|
}
|
|
|
|
static void
|
|
zfs_freevfs(vfs_t *vfsp)
|
|
{
|
|
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
|
|
|
zfsvfs_free(zfsvfs);
|
|
|
|
atomic_dec_32(&zfs_active_fs_count);
|
|
}
|
|
|
|
#ifdef __i386__
|
|
static int desiredvnodes_backup;
|
|
#endif
|
|
|
|
static void
|
|
zfs_vnodes_adjust(void)
|
|
{
|
|
#ifdef __i386__
|
|
int newdesiredvnodes;
|
|
|
|
desiredvnodes_backup = desiredvnodes;
|
|
|
|
/*
|
|
* We calculate newdesiredvnodes the same way it is done in
|
|
* vntblinit(). If it is equal to desiredvnodes, it means that
|
|
* it wasn't tuned by the administrator and we can tune it down.
|
|
*/
|
|
newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
|
|
vm_kmem_size / (5 * (sizeof (struct vm_object) +
|
|
sizeof (struct vnode))));
|
|
if (newdesiredvnodes == desiredvnodes)
|
|
desiredvnodes = (3 * newdesiredvnodes) / 4;
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
zfs_vnodes_adjust_back(void)
|
|
{
|
|
|
|
#ifdef __i386__
|
|
desiredvnodes = desiredvnodes_backup;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
zfs_init(void)
|
|
{
|
|
|
|
printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
|
|
|
|
/*
|
|
* Initialize .zfs directory structures
|
|
*/
|
|
zfsctl_init();
|
|
|
|
/*
|
|
* Initialize znode cache, vnode ops, etc...
|
|
*/
|
|
zfs_znode_init();
|
|
|
|
/*
|
|
* Reduce number of vnodes. Originally number of vnodes is calculated
|
|
* with UFS inode in mind. We reduce it here, because it's too big for
|
|
* ZFS/i386.
|
|
*/
|
|
zfs_vnodes_adjust();
|
|
|
|
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
|
|
|
|
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
|
|
}
|
|
|
|
void
|
|
zfs_fini(void)
|
|
{
|
|
taskq_destroy(zfsvfs_taskq);
|
|
zfsctl_fini();
|
|
zfs_znode_fini();
|
|
zfs_vnodes_adjust_back();
|
|
}
|
|
|
|
int
|
|
zfs_busy(void)
|
|
{
|
|
return (zfs_active_fs_count != 0);
|
|
}
|
|
|
|
/*
|
|
* Release VOPs and unmount a suspended filesystem.
|
|
*/
|
|
int
|
|
zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
|
{
|
|
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
|
|
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
|
|
|
|
/*
|
|
* We already own this, so just hold and rele it to update the
|
|
* objset_t, as the one we had before may have been evicted.
|
|
*/
|
|
objset_t *os;
|
|
VERIFY3P(ds->ds_owner, ==, zfsvfs);
|
|
VERIFY(dsl_dataset_long_held(ds));
|
|
dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
|
|
dsl_pool_config_enter(dp, FTAG);
|
|
VERIFY0(dmu_objset_from_ds(ds, &os));
|
|
dsl_pool_config_exit(dp, FTAG);
|
|
zfsvfs->z_os = os;
|
|
|
|
/* release the VOPs */
|
|
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
|
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
|
|
|
/*
|
|
* Try to force unmount this file system.
|
|
*/
|
|
(void) zfs_umount(zfsvfs->z_vfs, 0);
|
|
zfsvfs->z_unmounted = B_TRUE;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
|
{
|
|
int error;
|
|
objset_t *os = zfsvfs->z_os;
|
|
dmu_tx_t *tx;
|
|
|
|
if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
if (newvers < zfsvfs->z_version)
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
if (zfs_spa_version_map(newvers) >
|
|
spa_version(dmu_objset_spa(zfsvfs->z_os)))
|
|
return (SET_ERROR(ENOTSUP));
|
|
|
|
tx = dmu_tx_create(os);
|
|
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
|
|
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
|
|
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
|
|
ZFS_SA_ATTRS);
|
|
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
|
|
}
|
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
|
if (error) {
|
|
dmu_tx_abort(tx);
|
|
return (error);
|
|
}
|
|
|
|
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
|
|
8, 1, &newvers, tx);
|
|
|
|
if (error) {
|
|
dmu_tx_commit(tx);
|
|
return (error);
|
|
}
|
|
|
|
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
|
|
uint64_t sa_obj;
|
|
|
|
ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
|
|
SPA_VERSION_SA);
|
|
sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
|
|
DMU_OT_NONE, 0, tx);
|
|
|
|
error = zap_add(os, MASTER_NODE_OBJ,
|
|
ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
|
|
ASSERT0(error);
|
|
|
|
VERIFY(0 == sa_set_sa_object(os, sa_obj));
|
|
sa_register_update_callback(os, zfs_sa_upgrade);
|
|
}
|
|
|
|
spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
|
|
"from %lu to %lu", zfsvfs->z_version, newvers);
|
|
|
|
dmu_tx_commit(tx);
|
|
|
|
zfsvfs->z_version = newvers;
|
|
os->os_version = newvers;
|
|
|
|
zfs_set_fuid_feature(zfsvfs);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Read a property stored within the master node.
|
|
*/
|
|
int
|
|
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
|
|
{
|
|
uint64_t *cached_copy = NULL;
|
|
|
|
/*
|
|
* Figure out where in the objset_t the cached copy would live, if it
|
|
* is available for the requested property.
|
|
*/
|
|
if (os != NULL) {
|
|
switch (prop) {
|
|
case ZFS_PROP_VERSION:
|
|
cached_copy = &os->os_version;
|
|
break;
|
|
case ZFS_PROP_NORMALIZE:
|
|
cached_copy = &os->os_normalization;
|
|
break;
|
|
case ZFS_PROP_UTF8ONLY:
|
|
cached_copy = &os->os_utf8only;
|
|
break;
|
|
case ZFS_PROP_CASE:
|
|
cached_copy = &os->os_casesensitivity;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
|
|
*value = *cached_copy;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* If the property wasn't cached, look up the file system's value for
|
|
* the property. For the version property, we look up a slightly
|
|
* different string.
|
|
*/
|
|
const char *pname;
|
|
int error = ENOENT;
|
|
if (prop == ZFS_PROP_VERSION) {
|
|
pname = ZPL_VERSION_STR;
|
|
} else {
|
|
pname = zfs_prop_to_name(prop);
|
|
}
|
|
|
|
if (os != NULL) {
|
|
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
|
|
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
|
|
}
|
|
|
|
if (error == ENOENT) {
|
|
/* No value set, use the default value */
|
|
switch (prop) {
|
|
case ZFS_PROP_VERSION:
|
|
*value = ZPL_VERSION;
|
|
break;
|
|
case ZFS_PROP_NORMALIZE:
|
|
case ZFS_PROP_UTF8ONLY:
|
|
*value = 0;
|
|
break;
|
|
case ZFS_PROP_CASE:
|
|
*value = ZFS_CASE_SENSITIVE;
|
|
break;
|
|
default:
|
|
return (error);
|
|
}
|
|
error = 0;
|
|
}
|
|
|
|
/*
|
|
* If one of the methods for getting the property value above worked,
|
|
* copy it into the objset_t's cache.
|
|
*/
|
|
if (error == 0 && cached_copy != NULL) {
|
|
*cached_copy = *value;
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Return true if the coresponding vfs's unmounted flag is set.
|
|
* Otherwise return false.
|
|
* If this function returns true we know VFS unmount has been initiated.
|
|
*/
|
|
boolean_t
|
|
zfs_get_vfs_flag_unmounted(objset_t *os)
|
|
{
|
|
zfsvfs_t *zfvp;
|
|
boolean_t unmounted = B_FALSE;
|
|
|
|
ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
|
|
|
|
mutex_enter(&os->os_user_ptr_lock);
|
|
zfvp = dmu_objset_get_user(os);
|
|
if (zfvp != NULL && zfvp->z_vfs != NULL &&
|
|
(zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
|
|
unmounted = B_TRUE;
|
|
mutex_exit(&os->os_user_ptr_lock);
|
|
|
|
return (unmounted);
|
|
}
|
|
|
|
#ifdef _KERNEL
|
|
void
|
|
zfsvfs_update_fromname(const char *oldname, const char *newname)
|
|
{
|
|
char tmpbuf[MAXPATHLEN];
|
|
struct mount *mp;
|
|
char *fromname;
|
|
size_t oldlen;
|
|
|
|
oldlen = strlen(oldname);
|
|
|
|
mtx_lock(&mountlist_mtx);
|
|
TAILQ_FOREACH(mp, &mountlist, mnt_list) {
|
|
fromname = mp->mnt_stat.f_mntfromname;
|
|
if (strcmp(fromname, oldname) == 0) {
|
|
(void) strlcpy(fromname, newname,
|
|
sizeof (mp->mnt_stat.f_mntfromname));
|
|
continue;
|
|
}
|
|
if (strncmp(fromname, oldname, oldlen) == 0 &&
|
|
(fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
|
|
(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
|
|
newname, fromname + oldlen);
|
|
(void) strlcpy(fromname, tmpbuf,
|
|
sizeof (mp->mnt_stat.f_mntfromname));
|
|
continue;
|
|
}
|
|
}
|
|
mtx_unlock(&mountlist_mtx);
|
|
}
|
|
#endif
|