Update core ZFS code from build 121 to build 141.

This commit is contained in:
Brian Behlendorf
2010-05-28 13:45:14 -07:00
parent 6119cb885a
commit 428870ff73
174 changed files with 35763 additions and 14592 deletions
+350 -155
View File
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -46,6 +47,7 @@
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/zap.h>
#include <sys/sa.h>
#include <sys/varargs.h>
#include <sys/policy.h>
#include <sys/atomic.h>
@@ -60,6 +62,8 @@
#include <sys/dnlc.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
#include <sys/sa.h>
#include "zfs_comutil.h"
int zfsfstype;
vfsops_t *zfs_vfsops = NULL;
@@ -163,8 +167,7 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
if (zfsvfs->z_log != NULL)
zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
else
txg_wait_synced(dp, 0);
ZFS_EXIT(zfsvfs);
} else {
/*
@@ -380,14 +383,6 @@ vscan_changed_cb(void *arg, uint64_t newval)
zfsvfs->z_vscan = newval;
}
static void
acl_mode_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
zfsvfs->z_acl_mode = newval;
}
static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
@@ -517,8 +512,6 @@ zfs_register_callbacks(vfs_t *vfsp)
"exec", exec_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclmode", acl_mode_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
@@ -560,7 +553,6 @@ unregister:
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
@@ -568,69 +560,59 @@ unregister:
}
static void
uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
int64_t delta, dmu_tx_t *tx)
static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
uint64_t used = 0;
char buf[32];
int err;
uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
znode_phys_t *znp = data;
int error = 0;
if (delta == 0)
return;
/*
* Is it a valid type of object to track?
*/
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
return (ENOENT);
(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
err = zap_lookup(os, obj, buf, 8, 1, &used);
ASSERT(err == 0 || err == ENOENT);
/* no underflow/overflow */
ASSERT(delta > 0 || used >= -delta);
ASSERT(delta < 0 || used + delta > used);
used += delta;
if (used == 0)
err = zap_remove(os, obj, buf, tx);
else
err = zap_update(os, obj, buf, 8, 1, &used, tx);
ASSERT(err == 0);
}
/*
* If we have a NULL data pointer
* then assume the id's aren't changing and
* return EEXIST to the dmu to let it know to
* use the same ids
*/
if (data == NULL)
return (EEXIST);
static void
zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
void *oldbonus, void *newbonus,
uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
{
znode_phys_t *oldznp = oldbonus;
znode_phys_t *newznp = newbonus;
if (bonustype != DMU_OT_ZNODE)
return;
/* We charge 512 for the dnode (if it's allocated). */
if (oldznp->zp_gen != 0)
oldused += DNODE_SIZE;
if (newznp->zp_gen != 0)
newused += DNODE_SIZE;
if (oldznp->zp_uid == newznp->zp_uid) {
uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
if (bonustype == DMU_OT_ZNODE) {
*userp = znp->zp_uid;
*groupp = znp->zp_gid;
} else {
uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
}
int hdrsize;
if (oldznp->zp_gid == newznp->zp_gid) {
uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
} else {
uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
ASSERT(bonustype == DMU_OT_SA);
hdrsize = sa_hdrsize(data);
if (hdrsize != 0) {
*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
SA_UID_OFFSET));
*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
SA_GID_OFFSET));
} else {
/*
* This should only happen for newly created
* files that haven't had the znode data filled
* in yet.
*/
*userp = 0;
*groupp = 0;
}
}
return (error);
}
static void
fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
char *domainbuf, int buflen, uid_t *ridp)
{
extern uint64_t strtonum(const char *str, char **nptr);
uint64_t fuid;
const char *domain;
@@ -811,7 +793,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
}
boolean_t
zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
char buf[32];
uint64_t used, quota, usedobj, quotaobj;
@@ -834,33 +816,57 @@ zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
return (used >= quota);
}
boolean_t
zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
{
uint64_t fuid;
uint64_t quotaobj;
uid_t id;
quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
id = isgroup ? zp->z_gid : zp->z_uid;
if (quotaobj == 0 || zfsvfs->z_replay)
return (B_FALSE);
if (IS_EPHEMERAL(id)) {
VERIFY(0 == sa_lookup(zp->z_sa_hdl,
isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs),
&fuid, sizeof (fuid)));
} else {
fuid = (uint64_t)id;
}
return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
}
int
zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
{
objset_t *os;
zfsvfs_t *zfsvfs;
uint64_t zval;
int i, error;
uint64_t sa_obj;
if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
/*
* We claim to always be readonly so we can open snapshots;
* other ZPL code will prevent us from writing to snapshots.
*/
error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
if (error) {
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
if (zval)
mode |= DS_MODE_READONLY;
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
if (error == EROFS) {
mode |= DS_MODE_READONLY;
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
}
if (error)
return (error);
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
zfsvfs->z_vfs = NULL;
zfsvfs->z_parent = zfsvfs;
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
@@ -870,15 +876,15 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
if (error) {
goto out;
} else if (zfsvfs->z_version > ZPL_VERSION) {
(void) printf("Mismatched versions: File system "
"is version %llu on-disk format, which is "
"incompatible with this software version %lld!",
(u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
} else if (zfsvfs->z_version >
zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
(void) printf("Can't mount a version %lld file system "
"on a version %lld pool\n. Pool must be upgraded to mount "
"this file system.", (u_longlong_t)zfsvfs->z_version,
(u_longlong_t)spa_version(dmu_objset_spa(os)));
error = ENOTSUP;
goto out;
}
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
goto out;
zfsvfs->z_norm = (int)zval;
@@ -900,6 +906,26 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
if (zfsvfs->z_use_sa) {
/* should either have both of these objects or none */
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
&sa_obj);
if (error)
return (error);
} else {
/*
* Pre SA versions file systems should never touch
* either the attribute registration or layout objects.
*/
sa_obj = 0;
}
zfsvfs->z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END);
if (zfsvfs->z_version >= ZPL_VERSION_SA)
sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
&zfsvfs->z_root);
@@ -944,12 +970,12 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
*zvp = zfsvfs;
*zfvp = zfsvfs;
return (0);
out:
dmu_objset_close(os);
*zvp = NULL;
dmu_objset_disown(os, zfsvfs);
*zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
}
@@ -966,15 +992,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/*
* Set the objset user_ptr to track its zfsvfs.
*/
mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
if (zil_disable) {
zil_destroy(zfsvfs->z_log, 0);
zfsvfs->z_log = NULL;
}
/*
* If we are not mounting (ie: online recv), then we don't
@@ -994,34 +1016,36 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
else
zfs_unlinked_drain(zfsvfs);
if (zfsvfs->z_log) {
/*
* Parse and replay the intent log.
*
* Because of ziltest, this must be done after
* zfs_unlinked_drain(). (Further note: ziltest
* doesn't use readonly mounts, where
* zfs_unlinked_drain() isn't called.) This is because
* ziltest causes spa_sync() to think it's committed,
* but actually it is not, so the intent log contains
* many txg's worth of changes.
*
* In particular, if object N is in the unlinked set in
* the last txg to actually sync, then it could be
* actually freed in a later txg and then reallocated
* in a yet later txg. This would write a "create
* object N" record to the intent log. Normally, this
* would be fine because the spa_sync() would have
* written out the fact that object N is free, before
* we could write the "create object N" intent log
* record.
*
* But when we are in ziltest mode, we advance the "open
* txg" without actually spa_sync()-ing the changes to
* disk. So we would see that object N is still
* allocated and in the unlinked set, and there is an
* intent log record saying to allocate it.
*/
/*
* Parse and replay the intent log.
*
* Because of ziltest, this must be done after
* zfs_unlinked_drain(). (Further note: ziltest
* doesn't use readonly mounts, where
* zfs_unlinked_drain() isn't called.) This is because
* ziltest causes spa_sync() to think it's committed,
* but actually it is not, so the intent log contains
* many txg's worth of changes.
*
* In particular, if object N is in the unlinked set in
* the last txg to actually sync, then it could be
* actually freed in a later txg and then reallocated
* in a yet later txg. This would write a "create
* object N" record to the intent log. Normally, this
* would be fine because the spa_sync() would have
* written out the fact that object N is free, before
* we could write the "create object N" intent log
* record.
*
* But when we are in ziltest mode, we advance the "open
* txg" without actually spa_sync()-ing the changes to
* disk. So we would see that object N is still
* allocated and in the unlinked set, and there is an
* intent log record saying to allocate it.
*/
if (zil_replay_disable) {
zil_destroy(zfsvfs->z_log, B_FALSE);
} else {
zfsvfs->z_replay = B_TRUE;
zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
zfsvfs->z_replay = B_FALSE;
@@ -1070,7 +1094,9 @@ zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
}
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
}
static int
@@ -1084,7 +1110,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
ASSERT(vfsp);
ASSERT(osname);
error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
error = zfsvfs_create(osname, &zfsvfs);
if (error)
return (error);
zfsvfs->z_vfs = vfsp;
@@ -1135,6 +1161,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
}
vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
uint64_t pval;
@@ -1146,9 +1173,9 @@ zfs_domount(vfs_t *vfsp, char *osname)
xattr_changed_cb(zfsvfs, pval);
zfsvfs->z_issnap = B_TRUE;
mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
} else {
error = zfsvfs_setup(zfsvfs, B_TRUE);
}
@@ -1157,7 +1184,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
zfsctl_create(zfsvfs);
out:
if (error) {
dmu_objset_close(zfsvfs->z_os);
dmu_objset_disown(zfsvfs->z_os, zfsvfs);
zfsvfs_free(zfsvfs);
} else {
atomic_add_32(&zfs_active_fs_count, 1);
@@ -1201,9 +1228,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
zfsvfs) == 0);
VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
zfsvfs) == 0);
VERIFY(dsl_prop_unregister(ds, "aclinherit",
acl_inherit_changed_cb, zfsvfs) == 0);
@@ -1267,6 +1291,139 @@ zfs_parse_bootfs(char *bpath, char *outpath)
return (error);
}
/*
* zfs_check_global_label:
* Check that the hex label string is appropriate for the dataset
* being mounted into the global_zone proper.
*
* Return an error if the hex label string is not default or
* admin_low/admin_high. For admin_low labels, the corresponding
* dataset must be readonly.
*/
int
zfs_check_global_label(const char *dsname, const char *hexsl)
{
if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
return (0);
if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
return (0);
if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
/* must be readonly */
uint64_t rdonly;
if (dsl_prop_get_integer(dsname,
zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
return (EACCES);
return (rdonly ? 0 : EACCES);
}
return (EACCES);
}
/*
* zfs_mount_label_policy:
* Determine whether the mount is allowed according to MAC check.
* by comparing (where appropriate) label of the dataset against
* the label of the zone being mounted into. If the dataset has
* no label, create one.
*
* Returns:
* 0 : access allowed
* >0 : error code, such as EACCES
*/
static int
zfs_mount_label_policy(vfs_t *vfsp, char *osname)
{
int error, retv;
zone_t *mntzone = NULL;
ts_label_t *mnt_tsl;
bslabel_t *mnt_sl;
bslabel_t ds_sl;
char ds_hexsl[MAXNAMELEN];
retv = EACCES; /* assume the worst */
/*
* Start by getting the dataset label if it exists.
*/
error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1, sizeof (ds_hexsl), &ds_hexsl, NULL);
if (error)
return (EACCES);
/*
* If labeling is NOT enabled, then disallow the mount of datasets
* which have a non-default label already. No other label checks
* are needed.
*/
if (!is_system_labeled()) {
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
return (0);
return (EACCES);
}
/*
* Get the label of the mountpoint. If mounting into the global
* zone (i.e. mountpoint is not within an active zone and the
* zoned property is off), the label must be default or
* admin_low/admin_high only; no other checks are needed.
*/
mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
if (mntzone->zone_id == GLOBAL_ZONEID) {
uint64_t zoned;
zone_rele(mntzone);
if (dsl_prop_get_integer(osname,
zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
return (EACCES);
if (!zoned)
return (zfs_check_global_label(osname, ds_hexsl));
else
/*
* This is the case of a zone dataset being mounted
* initially, before the zone has been fully created;
* allow this mount into global zone.
*/
return (0);
}
mnt_tsl = mntzone->zone_slabel;
ASSERT(mnt_tsl != NULL);
label_hold(mnt_tsl);
mnt_sl = label2bslabel(mnt_tsl);
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
/*
* The dataset doesn't have a real label, so fabricate one.
*/
char *str = NULL;
if (l_to_str_internal(mnt_sl, &str) == 0 &&
dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
retv = 0;
if (str != NULL)
kmem_free(str, strlen(str) + 1);
} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
/*
* Now compare labels to complete the MAC check. If the
* labels are equal then allow access. If the mountpoint
* label dominates the dataset label, allow readonly access.
* Otherwise, access is denied.
*/
if (blequal(mnt_sl, &ds_sl))
retv = 0;
else if (bldominates(mnt_sl, &ds_sl)) {
vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
retv = 0;
}
}
label_rele(mnt_tsl);
zone_rele(mntzone);
return (retv);
}
static int
zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
{
@@ -1419,8 +1576,7 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
*/
error = secpolicy_fs_mount(cr, mvp, vfsp);
if (error) {
error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
if (error == 0) {
if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) {
vattr_t vattr;
/*
@@ -1430,16 +1586,14 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
vattr.va_mask = AT_UID;
if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
goto out;
}
if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
error = EPERM;
goto out;
}
secpolicy_fs_mount_clearopts(cr, vfsp);
} else {
goto out;
@@ -1456,6 +1610,10 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
goto out;
}
error = zfs_mount_label_policy(vfsp, osname);
if (error)
goto out;
/*
* When doing a remount, we simply refresh our temporary properties
* according to those options set in the current VFS options.
@@ -1617,7 +1775,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
mutex_enter(&zfsvfs->z_znodes_lock);
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
zp = list_next(&zfsvfs->z_all_znodes, zp))
if (zp->z_dbuf) {
if (zp->z_sa_hdl) {
ASSERT(ZTOV(zp)->v_count > 0);
zfs_znode_dmu_fini(zp);
}
@@ -1668,9 +1826,8 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
ret = secpolicy_fs_unmount(cr, vfsp);
if (ret) {
ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
ZFS_DELEG_PERM_MOUNT, cr);
if (ret)
if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
ZFS_DELEG_PERM_MOUNT, cr))
return (ret);
}
@@ -1725,14 +1882,14 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
/*
* Unset the objset user_ptr.
*/
mutex_enter(&os->os->os_user_ptr_lock);
mutex_enter(&os->os_user_ptr_lock);
dmu_objset_set_user(os, NULL);
mutex_exit(&os->os->os_user_ptr_lock);
mutex_exit(&os->os_user_ptr_lock);
/*
* Finally release the objset
*/
dmu_objset_close(os);
dmu_objset_disown(os, zfsvfs);
}
/*
@@ -1813,7 +1970,9 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
ZFS_EXIT(zfsvfs);
return (err);
}
zp_gen = zp->z_phys->zp_gen & gen_mask;
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
sizeof (uint64_t));
zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
if (zp->z_unlinked || zp_gen != fid_gen) {
@@ -1835,17 +1994,13 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
* 'z_teardown_inactive_lock' write held.
*/
int
zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
zfs_suspend_fs(zfsvfs_t *zfsvfs)
{
int error;
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
return (error);
*modep = zfsvfs->z_os->os_mode;
if (name)
dmu_objset_name(zfsvfs->z_os, name);
dmu_objset_close(zfsvfs->z_os);
dmu_objset_disown(zfsvfs->z_os, zfsvfs);
return (0);
}
@@ -1854,18 +2009,30 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
* Reopen zfsvfs_t::z_os and release VOPs.
*/
int
zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
{
int err;
int err, err2;
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
&zfsvfs->z_os);
if (err) {
zfsvfs->z_os = NULL;
} else {
znode_t *zp;
uint64_t sa_obj = 0;
err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_SA_ATTRS, 8, 1, &sa_obj);
if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
goto bail;
zfsvfs->z_attr_table = sa_setup(zfsvfs->z_os, sa_obj,
zfs_attr_table, ZPL_END);
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
@@ -1884,6 +2051,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
}
bail:
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
@@ -1906,9 +2074,11 @@ zfs_freevfs(vfs_t *vfsp)
/*
* If this is a snapshot, we have an extra VFS_HOLD on our parent
* from zfs_mount(). Release it here.
* from zfs_mount(). Release it here. If we came through
* zfs_mountroot() instead, we didn't grab an extra hold, so
* skip the VFS_RELE for rootvfs.
*/
if (zfsvfs->z_issnap)
if (zfsvfs->z_issnap && (vfsp != rootvfs))
VFS_RELE(zfsvfs->z_parent->z_vfs);
zfsvfs_free(zfsvfs);
@@ -2000,13 +2170,23 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
if (newvers < zfsvfs->z_version)
return (EINVAL);
if (zfs_spa_version_map(newvers) >
spa_version(dmu_objset_spa(zfsvfs->z_os)))
return (ENOTSUP);
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
ZFS_SA_ATTRS);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
}
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
}
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &newvers, tx);
@@ -2015,9 +2195,24 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
return (error);
}
spa_history_internal_log(LOG_DS_UPGRADE,
dmu_objset_spa(os), tx, CRED(),
"oldver=%llu newver=%llu dataset = %llu",
if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
uint64_t sa_obj;
ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
SPA_VERSION_SA);
sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
DMU_OT_NONE, 0, tx);
error = zap_add(os, MASTER_NODE_OBJ,
ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
ASSERT3U(error, ==, 0);
VERIFY(0 == sa_set_sa_object(os, sa_obj));
sa_register_update_callback(os, zfs_sa_upgrade);
}
spa_history_log_internal(LOG_DS_UPGRADE,
dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
zfsvfs->z_version, newvers, dmu_objset_id(os));
dmu_tx_commit(tx);