mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-29 18:24:11 +03:00
Update core ZFS code from build 121 to build 141.
This commit is contained in:
+350
-155
@@ -19,10 +19,11 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@@ -46,6 +47,7 @@
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/varargs.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/atomic.h>
|
||||
@@ -60,6 +62,8 @@
|
||||
#include <sys/dnlc.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/spa_boot.h>
|
||||
#include <sys/sa.h>
|
||||
#include "zfs_comutil.h"
|
||||
|
||||
int zfsfstype;
|
||||
vfsops_t *zfs_vfsops = NULL;
|
||||
@@ -163,8 +167,7 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
|
||||
|
||||
if (zfsvfs->z_log != NULL)
|
||||
zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
|
||||
else
|
||||
txg_wait_synced(dp, 0);
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
} else {
|
||||
/*
|
||||
@@ -380,14 +383,6 @@ vscan_changed_cb(void *arg, uint64_t newval)
|
||||
zfsvfs->z_vscan = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
acl_mode_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg;
|
||||
|
||||
zfsvfs->z_acl_mode = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
acl_inherit_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
@@ -517,8 +512,6 @@ zfs_register_callbacks(vfs_t *vfsp)
|
||||
"exec", exec_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"snapdir", snapdir_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"aclmode", acl_mode_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"aclinherit", acl_inherit_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
@@ -560,7 +553,6 @@ unregister:
|
||||
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
|
||||
zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
|
||||
@@ -568,69 +560,59 @@ unregister:
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
|
||||
int64_t delta, dmu_tx_t *tx)
|
||||
static int
|
||||
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
|
||||
uint64_t *userp, uint64_t *groupp)
|
||||
{
|
||||
uint64_t used = 0;
|
||||
char buf[32];
|
||||
int err;
|
||||
uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
|
||||
znode_phys_t *znp = data;
|
||||
int error = 0;
|
||||
|
||||
if (delta == 0)
|
||||
return;
|
||||
/*
|
||||
* Is it a valid type of object to track?
|
||||
*/
|
||||
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
|
||||
return (ENOENT);
|
||||
|
||||
(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
|
||||
err = zap_lookup(os, obj, buf, 8, 1, &used);
|
||||
ASSERT(err == 0 || err == ENOENT);
|
||||
/* no underflow/overflow */
|
||||
ASSERT(delta > 0 || used >= -delta);
|
||||
ASSERT(delta < 0 || used + delta > used);
|
||||
used += delta;
|
||||
if (used == 0)
|
||||
err = zap_remove(os, obj, buf, tx);
|
||||
else
|
||||
err = zap_update(os, obj, buf, 8, 1, &used, tx);
|
||||
ASSERT(err == 0);
|
||||
}
|
||||
/*
|
||||
* If we have a NULL data pointer
|
||||
* then assume the id's aren't changing and
|
||||
* return EEXIST to the dmu to let it know to
|
||||
* use the same ids
|
||||
*/
|
||||
if (data == NULL)
|
||||
return (EEXIST);
|
||||
|
||||
static void
|
||||
zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
|
||||
void *oldbonus, void *newbonus,
|
||||
uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
|
||||
{
|
||||
znode_phys_t *oldznp = oldbonus;
|
||||
znode_phys_t *newznp = newbonus;
|
||||
|
||||
if (bonustype != DMU_OT_ZNODE)
|
||||
return;
|
||||
|
||||
/* We charge 512 for the dnode (if it's allocated). */
|
||||
if (oldznp->zp_gen != 0)
|
||||
oldused += DNODE_SIZE;
|
||||
if (newznp->zp_gen != 0)
|
||||
newused += DNODE_SIZE;
|
||||
|
||||
if (oldznp->zp_uid == newznp->zp_uid) {
|
||||
uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
|
||||
if (bonustype == DMU_OT_ZNODE) {
|
||||
*userp = znp->zp_uid;
|
||||
*groupp = znp->zp_gid;
|
||||
} else {
|
||||
uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
|
||||
uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
|
||||
}
|
||||
int hdrsize;
|
||||
|
||||
if (oldznp->zp_gid == newznp->zp_gid) {
|
||||
uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
|
||||
} else {
|
||||
uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
|
||||
uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
|
||||
ASSERT(bonustype == DMU_OT_SA);
|
||||
hdrsize = sa_hdrsize(data);
|
||||
|
||||
if (hdrsize != 0) {
|
||||
*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
|
||||
SA_UID_OFFSET));
|
||||
*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
|
||||
SA_GID_OFFSET));
|
||||
} else {
|
||||
/*
|
||||
* This should only happen for newly created
|
||||
* files that haven't had the znode data filled
|
||||
* in yet.
|
||||
*/
|
||||
*userp = 0;
|
||||
*groupp = 0;
|
||||
}
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
|
||||
char *domainbuf, int buflen, uid_t *ridp)
|
||||
{
|
||||
extern uint64_t strtonum(const char *str, char **nptr);
|
||||
uint64_t fuid;
|
||||
const char *domain;
|
||||
|
||||
@@ -811,7 +793,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
|
||||
zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
|
||||
{
|
||||
char buf[32];
|
||||
uint64_t used, quota, usedobj, quotaobj;
|
||||
@@ -834,33 +816,57 @@ zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
|
||||
return (used >= quota);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
|
||||
{
|
||||
uint64_t fuid;
|
||||
uint64_t quotaobj;
|
||||
uid_t id;
|
||||
|
||||
quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
|
||||
|
||||
id = isgroup ? zp->z_gid : zp->z_uid;
|
||||
|
||||
if (quotaobj == 0 || zfsvfs->z_replay)
|
||||
return (B_FALSE);
|
||||
|
||||
if (IS_EPHEMERAL(id)) {
|
||||
VERIFY(0 == sa_lookup(zp->z_sa_hdl,
|
||||
isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs),
|
||||
&fuid, sizeof (fuid)));
|
||||
} else {
|
||||
fuid = (uint64_t)id;
|
||||
}
|
||||
|
||||
return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
|
||||
}
|
||||
|
||||
int
|
||||
zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
|
||||
{
|
||||
objset_t *os;
|
||||
zfsvfs_t *zfsvfs;
|
||||
uint64_t zval;
|
||||
int i, error;
|
||||
uint64_t sa_obj;
|
||||
|
||||
if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
|
||||
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
|
||||
|
||||
/*
|
||||
* We claim to always be readonly so we can open snapshots;
|
||||
* other ZPL code will prevent us from writing to snapshots.
|
||||
*/
|
||||
error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
|
||||
if (error) {
|
||||
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
||||
return (error);
|
||||
if (zval)
|
||||
mode |= DS_MODE_READONLY;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
|
||||
if (error == EROFS) {
|
||||
mode |= DS_MODE_READONLY;
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
|
||||
}
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Initialize the zfs-specific filesystem structure.
|
||||
* Should probably make this a kmem cache, shuffle fields,
|
||||
* and just bzero up to z_hold_mtx[].
|
||||
*/
|
||||
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
|
||||
zfsvfs->z_vfs = NULL;
|
||||
zfsvfs->z_parent = zfsvfs;
|
||||
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
|
||||
@@ -870,15 +876,15 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
|
||||
if (error) {
|
||||
goto out;
|
||||
} else if (zfsvfs->z_version > ZPL_VERSION) {
|
||||
(void) printf("Mismatched versions: File system "
|
||||
"is version %llu on-disk format, which is "
|
||||
"incompatible with this software version %lld!",
|
||||
(u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
|
||||
} else if (zfsvfs->z_version >
|
||||
zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
|
||||
(void) printf("Can't mount a version %lld file system "
|
||||
"on a version %lld pool\n. Pool must be upgraded to mount "
|
||||
"this file system.", (u_longlong_t)zfsvfs->z_version,
|
||||
(u_longlong_t)spa_version(dmu_objset_spa(os)));
|
||||
error = ENOTSUP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
|
||||
goto out;
|
||||
zfsvfs->z_norm = (int)zval;
|
||||
@@ -900,6 +906,26 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
|
||||
|
||||
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
||||
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
|
||||
|
||||
if (zfsvfs->z_use_sa) {
|
||||
/* should either have both of these objects or none */
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
|
||||
&sa_obj);
|
||||
if (error)
|
||||
return (error);
|
||||
} else {
|
||||
/*
|
||||
* Pre SA versions file systems should never touch
|
||||
* either the attribute registration or layout objects.
|
||||
*/
|
||||
sa_obj = 0;
|
||||
}
|
||||
|
||||
zfsvfs->z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END);
|
||||
|
||||
if (zfsvfs->z_version >= ZPL_VERSION_SA)
|
||||
sa_register_update_callback(os, zfs_sa_upgrade);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
|
||||
&zfsvfs->z_root);
|
||||
@@ -944,12 +970,12 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
*zvp = zfsvfs;
|
||||
*zfvp = zfsvfs;
|
||||
return (0);
|
||||
|
||||
out:
|
||||
dmu_objset_close(os);
|
||||
*zvp = NULL;
|
||||
dmu_objset_disown(os, zfsvfs);
|
||||
*zfvp = NULL;
|
||||
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
||||
return (error);
|
||||
}
|
||||
@@ -966,15 +992,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
||||
/*
|
||||
* Set the objset user_ptr to track its zfsvfs.
|
||||
*/
|
||||
mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
|
||||
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
||||
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
|
||||
|
||||
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
|
||||
if (zil_disable) {
|
||||
zil_destroy(zfsvfs->z_log, 0);
|
||||
zfsvfs->z_log = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are not mounting (ie: online recv), then we don't
|
||||
@@ -994,34 +1016,36 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
||||
else
|
||||
zfs_unlinked_drain(zfsvfs);
|
||||
|
||||
if (zfsvfs->z_log) {
|
||||
/*
|
||||
* Parse and replay the intent log.
|
||||
*
|
||||
* Because of ziltest, this must be done after
|
||||
* zfs_unlinked_drain(). (Further note: ziltest
|
||||
* doesn't use readonly mounts, where
|
||||
* zfs_unlinked_drain() isn't called.) This is because
|
||||
* ziltest causes spa_sync() to think it's committed,
|
||||
* but actually it is not, so the intent log contains
|
||||
* many txg's worth of changes.
|
||||
*
|
||||
* In particular, if object N is in the unlinked set in
|
||||
* the last txg to actually sync, then it could be
|
||||
* actually freed in a later txg and then reallocated
|
||||
* in a yet later txg. This would write a "create
|
||||
* object N" record to the intent log. Normally, this
|
||||
* would be fine because the spa_sync() would have
|
||||
* written out the fact that object N is free, before
|
||||
* we could write the "create object N" intent log
|
||||
* record.
|
||||
*
|
||||
* But when we are in ziltest mode, we advance the "open
|
||||
* txg" without actually spa_sync()-ing the changes to
|
||||
* disk. So we would see that object N is still
|
||||
* allocated and in the unlinked set, and there is an
|
||||
* intent log record saying to allocate it.
|
||||
*/
|
||||
/*
|
||||
* Parse and replay the intent log.
|
||||
*
|
||||
* Because of ziltest, this must be done after
|
||||
* zfs_unlinked_drain(). (Further note: ziltest
|
||||
* doesn't use readonly mounts, where
|
||||
* zfs_unlinked_drain() isn't called.) This is because
|
||||
* ziltest causes spa_sync() to think it's committed,
|
||||
* but actually it is not, so the intent log contains
|
||||
* many txg's worth of changes.
|
||||
*
|
||||
* In particular, if object N is in the unlinked set in
|
||||
* the last txg to actually sync, then it could be
|
||||
* actually freed in a later txg and then reallocated
|
||||
* in a yet later txg. This would write a "create
|
||||
* object N" record to the intent log. Normally, this
|
||||
* would be fine because the spa_sync() would have
|
||||
* written out the fact that object N is free, before
|
||||
* we could write the "create object N" intent log
|
||||
* record.
|
||||
*
|
||||
* But when we are in ziltest mode, we advance the "open
|
||||
* txg" without actually spa_sync()-ing the changes to
|
||||
* disk. So we would see that object N is still
|
||||
* allocated and in the unlinked set, and there is an
|
||||
* intent log record saying to allocate it.
|
||||
*/
|
||||
if (zil_replay_disable) {
|
||||
zil_destroy(zfsvfs->z_log, B_FALSE);
|
||||
} else {
|
||||
zfsvfs->z_replay = B_TRUE;
|
||||
zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
|
||||
zfsvfs->z_replay = B_FALSE;
|
||||
@@ -1070,7 +1094,9 @@ zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
|
||||
}
|
||||
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -1084,7 +1110,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
||||
ASSERT(vfsp);
|
||||
ASSERT(osname);
|
||||
|
||||
error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
|
||||
error = zfsvfs_create(osname, &zfsvfs);
|
||||
if (error)
|
||||
return (error);
|
||||
zfsvfs->z_vfs = vfsp;
|
||||
@@ -1135,6 +1161,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
||||
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
|
||||
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
|
||||
}
|
||||
vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
|
||||
|
||||
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
|
||||
uint64_t pval;
|
||||
@@ -1146,9 +1173,9 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
||||
xattr_changed_cb(zfsvfs, pval);
|
||||
zfsvfs->z_issnap = B_TRUE;
|
||||
|
||||
mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
|
||||
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
||||
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
|
||||
} else {
|
||||
error = zfsvfs_setup(zfsvfs, B_TRUE);
|
||||
}
|
||||
@@ -1157,7 +1184,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
||||
zfsctl_create(zfsvfs);
|
||||
out:
|
||||
if (error) {
|
||||
dmu_objset_close(zfsvfs->z_os);
|
||||
dmu_objset_disown(zfsvfs->z_os, zfsvfs);
|
||||
zfsvfs_free(zfsvfs);
|
||||
} else {
|
||||
atomic_add_32(&zfs_active_fs_count, 1);
|
||||
@@ -1201,9 +1228,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
|
||||
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
|
||||
zfsvfs) == 0);
|
||||
|
||||
VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
|
||||
zfsvfs) == 0);
|
||||
|
||||
VERIFY(dsl_prop_unregister(ds, "aclinherit",
|
||||
acl_inherit_changed_cb, zfsvfs) == 0);
|
||||
|
||||
@@ -1267,6 +1291,139 @@ zfs_parse_bootfs(char *bpath, char *outpath)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs_check_global_label:
|
||||
* Check that the hex label string is appropriate for the dataset
|
||||
* being mounted into the global_zone proper.
|
||||
*
|
||||
* Return an error if the hex label string is not default or
|
||||
* admin_low/admin_high. For admin_low labels, the corresponding
|
||||
* dataset must be readonly.
|
||||
*/
|
||||
int
|
||||
zfs_check_global_label(const char *dsname, const char *hexsl)
|
||||
{
|
||||
if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
|
||||
return (0);
|
||||
if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
|
||||
return (0);
|
||||
if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
|
||||
/* must be readonly */
|
||||
uint64_t rdonly;
|
||||
|
||||
if (dsl_prop_get_integer(dsname,
|
||||
zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
|
||||
return (EACCES);
|
||||
return (rdonly ? 0 : EACCES);
|
||||
}
|
||||
return (EACCES);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs_mount_label_policy:
|
||||
* Determine whether the mount is allowed according to MAC check.
|
||||
* by comparing (where appropriate) label of the dataset against
|
||||
* the label of the zone being mounted into. If the dataset has
|
||||
* no label, create one.
|
||||
*
|
||||
* Returns:
|
||||
* 0 : access allowed
|
||||
* >0 : error code, such as EACCES
|
||||
*/
|
||||
static int
|
||||
zfs_mount_label_policy(vfs_t *vfsp, char *osname)
|
||||
{
|
||||
int error, retv;
|
||||
zone_t *mntzone = NULL;
|
||||
ts_label_t *mnt_tsl;
|
||||
bslabel_t *mnt_sl;
|
||||
bslabel_t ds_sl;
|
||||
char ds_hexsl[MAXNAMELEN];
|
||||
|
||||
retv = EACCES; /* assume the worst */
|
||||
|
||||
/*
|
||||
* Start by getting the dataset label if it exists.
|
||||
*/
|
||||
error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
|
||||
1, sizeof (ds_hexsl), &ds_hexsl, NULL);
|
||||
if (error)
|
||||
return (EACCES);
|
||||
|
||||
/*
|
||||
* If labeling is NOT enabled, then disallow the mount of datasets
|
||||
* which have a non-default label already. No other label checks
|
||||
* are needed.
|
||||
*/
|
||||
if (!is_system_labeled()) {
|
||||
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
|
||||
return (0);
|
||||
return (EACCES);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the label of the mountpoint. If mounting into the global
|
||||
* zone (i.e. mountpoint is not within an active zone and the
|
||||
* zoned property is off), the label must be default or
|
||||
* admin_low/admin_high only; no other checks are needed.
|
||||
*/
|
||||
mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
|
||||
if (mntzone->zone_id == GLOBAL_ZONEID) {
|
||||
uint64_t zoned;
|
||||
|
||||
zone_rele(mntzone);
|
||||
|
||||
if (dsl_prop_get_integer(osname,
|
||||
zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
|
||||
return (EACCES);
|
||||
if (!zoned)
|
||||
return (zfs_check_global_label(osname, ds_hexsl));
|
||||
else
|
||||
/*
|
||||
* This is the case of a zone dataset being mounted
|
||||
* initially, before the zone has been fully created;
|
||||
* allow this mount into global zone.
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
|
||||
mnt_tsl = mntzone->zone_slabel;
|
||||
ASSERT(mnt_tsl != NULL);
|
||||
label_hold(mnt_tsl);
|
||||
mnt_sl = label2bslabel(mnt_tsl);
|
||||
|
||||
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
|
||||
/*
|
||||
* The dataset doesn't have a real label, so fabricate one.
|
||||
*/
|
||||
char *str = NULL;
|
||||
|
||||
if (l_to_str_internal(mnt_sl, &str) == 0 &&
|
||||
dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
|
||||
ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
|
||||
retv = 0;
|
||||
if (str != NULL)
|
||||
kmem_free(str, strlen(str) + 1);
|
||||
} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
|
||||
/*
|
||||
* Now compare labels to complete the MAC check. If the
|
||||
* labels are equal then allow access. If the mountpoint
|
||||
* label dominates the dataset label, allow readonly access.
|
||||
* Otherwise, access is denied.
|
||||
*/
|
||||
if (blequal(mnt_sl, &ds_sl))
|
||||
retv = 0;
|
||||
else if (bldominates(mnt_sl, &ds_sl)) {
|
||||
vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
|
||||
retv = 0;
|
||||
}
|
||||
}
|
||||
|
||||
label_rele(mnt_tsl);
|
||||
zone_rele(mntzone);
|
||||
return (retv);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
|
||||
{
|
||||
@@ -1419,8 +1576,7 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
|
||||
*/
|
||||
error = secpolicy_fs_mount(cr, mvp, vfsp);
|
||||
if (error) {
|
||||
error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
|
||||
if (error == 0) {
|
||||
if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) {
|
||||
vattr_t vattr;
|
||||
|
||||
/*
|
||||
@@ -1430,16 +1586,14 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
|
||||
|
||||
vattr.va_mask = AT_UID;
|
||||
|
||||
if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
|
||||
if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
|
||||
VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
|
||||
error = EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
secpolicy_fs_mount_clearopts(cr, vfsp);
|
||||
} else {
|
||||
goto out;
|
||||
@@ -1456,6 +1610,10 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = zfs_mount_label_policy(vfsp, osname);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* When doing a remount, we simply refresh our temporary properties
|
||||
* according to those options set in the current VFS options.
|
||||
@@ -1617,7 +1775,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
||||
mutex_enter(&zfsvfs->z_znodes_lock);
|
||||
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
|
||||
zp = list_next(&zfsvfs->z_all_znodes, zp))
|
||||
if (zp->z_dbuf) {
|
||||
if (zp->z_sa_hdl) {
|
||||
ASSERT(ZTOV(zp)->v_count > 0);
|
||||
zfs_znode_dmu_fini(zp);
|
||||
}
|
||||
@@ -1668,9 +1826,8 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
|
||||
|
||||
ret = secpolicy_fs_unmount(cr, vfsp);
|
||||
if (ret) {
|
||||
ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
|
||||
ZFS_DELEG_PERM_MOUNT, cr);
|
||||
if (ret)
|
||||
if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
|
||||
ZFS_DELEG_PERM_MOUNT, cr))
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -1725,14 +1882,14 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
|
||||
/*
|
||||
* Unset the objset user_ptr.
|
||||
*/
|
||||
mutex_enter(&os->os->os_user_ptr_lock);
|
||||
mutex_enter(&os->os_user_ptr_lock);
|
||||
dmu_objset_set_user(os, NULL);
|
||||
mutex_exit(&os->os->os_user_ptr_lock);
|
||||
mutex_exit(&os->os_user_ptr_lock);
|
||||
|
||||
/*
|
||||
* Finally release the objset
|
||||
*/
|
||||
dmu_objset_close(os);
|
||||
dmu_objset_disown(os, zfsvfs);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1813,7 +1970,9 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (err);
|
||||
}
|
||||
zp_gen = zp->z_phys->zp_gen & gen_mask;
|
||||
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
|
||||
sizeof (uint64_t));
|
||||
zp_gen = zp_gen & gen_mask;
|
||||
if (zp_gen == 0)
|
||||
zp_gen = 1;
|
||||
if (zp->z_unlinked || zp_gen != fid_gen) {
|
||||
@@ -1835,17 +1994,13 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
|
||||
* 'z_teardown_inactive_lock' write held.
|
||||
*/
|
||||
int
|
||||
zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
|
||||
zfs_suspend_fs(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
|
||||
return (error);
|
||||
|
||||
*modep = zfsvfs->z_os->os_mode;
|
||||
if (name)
|
||||
dmu_objset_name(zfsvfs->z_os, name);
|
||||
dmu_objset_close(zfsvfs->z_os);
|
||||
dmu_objset_disown(zfsvfs->z_os, zfsvfs);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -1854,18 +2009,30 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
|
||||
* Reopen zfsvfs_t::z_os and release VOPs.
|
||||
*/
|
||||
int
|
||||
zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
|
||||
zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
|
||||
{
|
||||
int err;
|
||||
int err, err2;
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
|
||||
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
|
||||
|
||||
err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
|
||||
err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
|
||||
&zfsvfs->z_os);
|
||||
if (err) {
|
||||
zfsvfs->z_os = NULL;
|
||||
} else {
|
||||
znode_t *zp;
|
||||
uint64_t sa_obj = 0;
|
||||
|
||||
err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_SA_ATTRS, 8, 1, &sa_obj);
|
||||
|
||||
if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
|
||||
goto bail;
|
||||
|
||||
|
||||
zfsvfs->z_attr_table = sa_setup(zfsvfs->z_os, sa_obj,
|
||||
zfs_attr_table, ZPL_END);
|
||||
|
||||
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
|
||||
|
||||
@@ -1884,6 +2051,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
|
||||
|
||||
}
|
||||
|
||||
bail:
|
||||
/* release the VOPs */
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
@@ -1906,9 +2074,11 @@ zfs_freevfs(vfs_t *vfsp)
|
||||
|
||||
/*
|
||||
* If this is a snapshot, we have an extra VFS_HOLD on our parent
|
||||
* from zfs_mount(). Release it here.
|
||||
* from zfs_mount(). Release it here. If we came through
|
||||
* zfs_mountroot() instead, we didn't grab an extra hold, so
|
||||
* skip the VFS_RELE for rootvfs.
|
||||
*/
|
||||
if (zfsvfs->z_issnap)
|
||||
if (zfsvfs->z_issnap && (vfsp != rootvfs))
|
||||
VFS_RELE(zfsvfs->z_parent->z_vfs);
|
||||
|
||||
zfsvfs_free(zfsvfs);
|
||||
@@ -2000,13 +2170,23 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
||||
if (newvers < zfsvfs->z_version)
|
||||
return (EINVAL);
|
||||
|
||||
if (zfs_spa_version_map(newvers) >
|
||||
spa_version(dmu_objset_spa(zfsvfs->z_os)))
|
||||
return (ENOTSUP);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
|
||||
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
|
||||
ZFS_SA_ATTRS);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
|
||||
}
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
|
||||
8, 1, &newvers, tx);
|
||||
|
||||
@@ -2015,9 +2195,24 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
||||
return (error);
|
||||
}
|
||||
|
||||
spa_history_internal_log(LOG_DS_UPGRADE,
|
||||
dmu_objset_spa(os), tx, CRED(),
|
||||
"oldver=%llu newver=%llu dataset = %llu",
|
||||
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
|
||||
uint64_t sa_obj;
|
||||
|
||||
ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
|
||||
SPA_VERSION_SA);
|
||||
sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
|
||||
error = zap_add(os, MASTER_NODE_OBJ,
|
||||
ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
|
||||
ASSERT3U(error, ==, 0);
|
||||
|
||||
VERIFY(0 == sa_set_sa_object(os, sa_obj));
|
||||
sa_register_update_callback(os, zfs_sa_upgrade);
|
||||
}
|
||||
|
||||
spa_history_log_internal(LOG_DS_UPGRADE,
|
||||
dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
|
||||
zfsvfs->z_version, newvers, dmu_objset_id(os));
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
Reference in New Issue
Block a user