Merge branch 'zpl'

This commit is contained in:
Brian Behlendorf
2011-02-18 09:31:25 -08:00
128 changed files with 7825 additions and 6888 deletions
+1
View File
@@ -13,4 +13,5 @@ $(MODULE)-objs += @top_srcdir@/module/zcommon/zprop_common.o
$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_namecheck.o
$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_comutil.o
$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_fletcher.o
$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_uio.o
$(MODULE)-objs += @top_srcdir@/module/zcommon/zpool_prop.o
+255
View File
@@ -0,0 +1,255 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* The uio support from OpenSolaris has been added as a short term
* work around. The hope is to adopt native Linux type and drop the
* use of uio's entirely. Under Linux they only add overhead and
* when possible we want to use native APIs for the ZPL layer.
*/
#ifdef _KERNEL
#include <sys/types.h>
#include <sys/uio_impl.h>
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
* of the move, and the I/O parameters are provided in "uio", which is
* update to reflect the data which was moved. Returns 0 on success or
* a non-zero errno on failure.
*/
int
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
struct iovec *iov;
ulong_t cnt;
while (n && uio->uio_resid) {
iov = uio->uio_iov;
cnt = MIN(iov->iov_len, n);
if (cnt == 0l) {
uio->uio_iov++;
uio->uio_iovcnt--;
continue;
}
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
/* p = kernel data pointer
* iov->iov_base = user data pointer */
if (rw == UIO_READ) {
if (copy_to_user(iov->iov_base, p, cnt))
return EFAULT;
/* error = xcopyout_nta(p, iov->iov_base, cnt,
* (uio->uio_extflg & UIO_COPY_CACHED)); */
} else {
/* error = xcopyin_nta(iov->iov_base, p, cnt,
* (uio->uio_extflg & UIO_COPY_CACHED)); */
if (copy_from_user(p, iov->iov_base, cnt))
return EFAULT;
}
break;
case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
else
bcopy(iov->iov_base, p, cnt);
break;
}
iov->iov_base += cnt;
iov->iov_len -= cnt;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);
}
EXPORT_SYMBOL(uiomove);
#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
/*
* Fault in the pages of the first n bytes specified by the uio structure.
* 1 byte in each page is touched and the uio struct is unmodified. Any
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
void
uio_prefaultpages(ssize_t n, struct uio *uio)
{
struct iovec *iov;
ulong_t cnt, incr;
caddr_t p;
uint8_t tmp;
int iovcnt;
iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
while ((n > 0) && (iovcnt > 0)) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0) {
/* empty iov entry */
iov++;
iovcnt--;
continue;
}
n -= cnt;
/*
* touch each page in this segment.
*/
p = iov->iov_base;
while (cnt) {
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
}
/*
* touch the last byte in case it straddles a page.
*/
p--;
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
iov++;
iovcnt--;
}
}
EXPORT_SYMBOL(uio_prefaultpages);
/*
* same as uiomove() but doesn't modify uio structure.
* return in cbytes how many bytes were copied.
*/
int
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
{
struct iovec *iov;
ulong_t cnt;
int iovcnt;
iovcnt = uio->uio_iovcnt;
*cbytes = 0;
for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0)
continue;
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
/* p = kernel data pointer
* iov->iov_base = user data pointer */
if (rw == UIO_READ) {
/* * UIO_READ = copy data from kernel to user * */
if (copy_to_user(iov->iov_base, p, cnt))
return EFAULT;
/* error = xcopyout_nta(p, iov->iov_base, cnt,
* (uio->uio_extflg & UIO_COPY_CACHED)); */
} else {
/* * UIO_WRITE = copy data from user to kernel * */
/* error = xcopyin_nta(iov->iov_base, p, cnt,
* (uio->uio_extflg & UIO_COPY_CACHED)); */
if (copy_from_user(p, iov->iov_base, cnt))
return EFAULT;
}
break;
case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
else
bcopy(iov->iov_base, p, cnt);
break;
}
p = (caddr_t)p + cnt;
n -= cnt;
*cbytes += cnt;
}
return (0);
}
EXPORT_SYMBOL(uiocopy);
/*
* Drop the next n chars out of *uiop.
*/
void
uioskip(uio_t *uiop, size_t n)
{
if (n > uiop->uio_resid)
return;
while (n != 0) {
iovec_t *iovp = uiop->uio_iov;
size_t niovb = MIN(iovp->iov_len, n);
if (niovb == 0) {
uiop->uio_iov++;
uiop->uio_iovcnt--;
continue;
}
iovp->iov_base += niovb;
uiop->uio_loffset += niovb;
iovp->iov_len -= niovb;
uiop->uio_resid -= niovb;
n -= niovb;
}
}
EXPORT_SYMBOL(uioskip);
#endif /* _KERNEL */
+4 -1
View File
@@ -64,7 +64,6 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zap_leaf.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zap_micro.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_acl.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_byteswap.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ctldir.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_debug.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_dir.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fm.o
@@ -84,5 +83,9 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zio_checksum.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zio_compress.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zio_inject.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zle.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_file.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_inode.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_super.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_xattr.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zrlock.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zvol.o
+3 -3
View File
@@ -2149,7 +2149,7 @@ arc_reclaim_thread(void)
/* block until needed, or one second, whichever is shorter */
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait(&arc_reclaim_thr_cv,
(void) cv_timedwait_interruptible(&arc_reclaim_thr_cv,
&arc_reclaim_thr_lock, (ddi_get_lbolt() + hz));
CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
}
@@ -4435,8 +4435,8 @@ l2arc_feed_thread(void)
while (l2arc_thread_exit == 0) {
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock,
next);
(void) cv_timedwait_interruptible(&l2arc_feed_thr_cv,
&l2arc_feed_thr_lock, next);
CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
next = ddi_get_lbolt() + hz;
+108 -59
View File
@@ -381,7 +381,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
}
nblks = 1;
}
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP | KM_NODEBUG);
if (dn->dn_objset->os_dsl_dataset)
dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
@@ -1122,9 +1122,113 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
#endif
#ifdef HAVE_ZPL
int
dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
{
dmu_buf_t **dbp;
int numbufs, i, err;
xuio_t *xuio = NULL;
/*
* NB: we could do this block-at-a-time, but it's nice
* to be reading in parallel.
*/
err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG,
&numbufs, &dbp);
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy;
int bufoff;
dmu_buf_t *db = dbp[i];
ASSERT(size > 0);
bufoff = uio->uio_loffset - db->db_offset;
tocpy = (int)MIN(db->db_size - bufoff, size);
if (xuio) {
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
arc_buf_t *dbuf_abuf = dbi->db_buf;
arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
if (!err) {
uio->uio_resid -= tocpy;
uio->uio_loffset += tocpy;
}
if (abuf == dbuf_abuf)
XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
else
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
} else {
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_READ, uio);
}
if (err)
break;
size -= tocpy;
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
static int
dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
{
dmu_buf_t **dbp;
int numbufs;
int err = 0;
int i;
err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy;
int bufoff;
dmu_buf_t *db = dbp[i];
ASSERT(size > 0);
bufoff = uio->uio_loffset - db->db_offset;
tocpy = (int)MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
else
dmu_buf_will_dirty(db, tx);
/*
* XXX uiomove could block forever (eg.nfs-backed
* pages). There needs to be a uiolockdown() function
* to lock the pages in memory, so that uiomove won't
* block.
*/
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_WRITE, uio);
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
if (err)
break;
size -= tocpy;
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
int
dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
dmu_tx_t *tx)
@@ -1164,62 +1268,7 @@ dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
return (err);
}
int
dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
page_t *pp, dmu_tx_t *tx)
{
dmu_buf_t **dbp;
int numbufs, i;
int err;
if (size == 0)
return (0);
err = dmu_buf_hold_array(os, object, offset, size,
FALSE, FTAG, &numbufs, &dbp);
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy, copied, thiscpy;
int bufoff;
dmu_buf_t *db = dbp[i];
caddr_t va;
ASSERT(size > 0);
ASSERT3U(db->db_size, >=, PAGESIZE);
bufoff = offset - db->db_offset;
tocpy = (int)MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
else
dmu_buf_will_dirty(db, tx);
for (copied = 0; copied < tocpy; copied += PAGESIZE) {
ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff);
thiscpy = MIN(PAGESIZE, tocpy - copied);
va = zfs_map_page(pp, S_READ);
bcopy(va, (char *)db->db_data + bufoff, thiscpy);
zfs_unmap_page(pp, va);
pp = pp->p_next;
bufoff += PAGESIZE;
}
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
offset += tocpy;
size -= tocpy;
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
#endif
#endif /* _KERNEL */
/*
* Allocate a loaned anonymous arc buffer.
-2
View File
@@ -910,10 +910,8 @@ dmu_objset_snapshot_one(const char *name, void *arg)
* permission checks for the starting dataset have already been
* performed in zfs_secpolicy_snapshot()
*/
#ifdef HAVE_ZPL
if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED())))
return (err);
#endif
err = dmu_objset_hold(name, sn, &os);
if (err != 0)
-2
View File
@@ -2364,13 +2364,11 @@ dsl_snapshot_rename_one(const char *name, void *arg)
* For recursive snapshot renames the parent won't be changing
* so we just pass name for both the to/from argument.
*/
#ifdef HAVE_ZPL
err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
if (err != 0) {
strfree(snapname);
return (err == ENOENT ? 0 : err);
}
#endif
/* XXX: Ignore for SPL version until mounting the FS is supported */
#if defined(_KERNEL) && !defined(HAVE_SPL)
+4 -4
View File
@@ -92,7 +92,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri,
1, 4, 0);
return (dp);
@@ -214,7 +214,7 @@ dsl_pool_close(dsl_pool_t *dp)
dsl_scan_fini(dp);
rw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_vnrele_taskq);
taskq_destroy(dp->dp_iput_taskq);
if (dp->dp_blkstats)
kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
kmem_free(dp, sizeof (dsl_pool_t));
@@ -738,9 +738,9 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
}
taskq_t *
dsl_pool_vnrele_taskq(dsl_pool_t *dp)
dsl_pool_iput_taskq(dsl_pool_t *dp)
{
return (dp->dp_vnrele_taskq);
return (dp->dp_iput_taskq);
}
/*
-4
View File
@@ -1436,7 +1436,6 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
int
sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
{
#ifdef HAVE_ZPL
int error;
sa_bulk_attr_t bulk;
@@ -1453,9 +1452,6 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
}
mutex_exit(&hdl->sa_lock);
return (error);
#else
return ENOSYS;
#endif /* HAVE_ZPL */
}
#endif
-1
View File
@@ -179,7 +179,6 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
}
(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
VN_RELE(vp);
}
(void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
+46
View File
@@ -33,6 +33,8 @@
#include <sys/zio.h>
#include <sys/sunldi.h>
char *zfs_vdev_scheduler = VDEV_SCHEDULER;
/*
* Virtual device vector for disks.
*/
@@ -102,6 +104,43 @@ vdev_disk_error(zio_t *zio)
#endif
}
/*
* Use the Linux 'noop' elevator for zfs managed block devices. This
* strikes the ideal balance by allowing the zfs elevator to do all
* request ordering and prioritization. While allowing the Linux
* elevator to do the maximum front/back merging allowed by the
* physical device. This yields the largest possible requests for
* the device with the lowest total overhead.
*
* Unfortunately we cannot directly call the elevator_switch() function
* because it is not exported from the block layer. This means we have
* to use the sysfs interface and a user space upcall. Pools will be
* automatically imported on module load so we must do this at device
* open time from the kernel.
*/
static int
vdev_elevator_switch(vdev_t *v, char *elevator, char *device)
{
char sh_path[] = "/bin/sh";
char sh_cmd[128];
char *argv[] = { sh_path, "-c", sh_cmd };
char *envp[] = { NULL };
int error;
if (!strncmp(elevator, "none", 4) && (strlen(elevator) == 4))
return (0);
sprintf(sh_cmd, "%s \"%s\" >/sys/block/%s/queue/scheduler",
"/bin/echo", elevator, device);
error = call_usermodehelper(sh_path, argv, envp, 1);
if (error)
printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
elevator, v->vdev_path, device, error);
return (error);
}
static int
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
{
@@ -167,6 +206,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
/* Based on the minimum sector size set the block size */
*ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
/* Try to set the io scheduler elevator algorithm */
(void) vdev_elevator_switch(v, zfs_vdev_scheduler,
bdev->bd_disk->disk_name);
return 0;
}
@@ -702,3 +745,6 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
return 0;
}
module_param(zfs_vdev_scheduler, charp, 0644);
MODULE_PARM_DESC(zfs_vdev_scheduler, "IO Scheduler (noop)");
-1
View File
@@ -130,7 +130,6 @@ vdev_file_close(vdev_t *vd)
(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
kcred, NULL);
VN_RELE(vf->vf_vnode);
}
vd->vdev_delayed_close = B_FALSE;
+287 -239
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+146 -146
View File
@@ -22,7 +22,6 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifdef HAVE_ZPL
#include <sys/types.h>
#include <sys/param.h>
@@ -51,7 +50,6 @@
#include <sys/zap.h>
#include <sys/dmu.h>
#include <sys/atomic.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#include <sys/zfs_sa.h>
@@ -63,12 +61,12 @@
* of names after deciding which is the appropriate lookup interface.
*/
static int
zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
{
int error;
if (zfsvfs->z_norm) {
if (zsb->z_norm) {
matchtype_t mt = MT_FIRST;
boolean_t conflict = B_FALSE;
size_t bufsz = 0;
@@ -84,17 +82,19 @@ zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
* In the non-mixed case we only expect there would ever
* be one match, but we need to use the normalizing lookup.
*/
error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1,
zoid, mt, buf, bufsz, &conflict);
if (!error && deflags)
*deflags = conflict ? ED_CASE_CONFLICT : 0;
} else {
error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid);
}
*zoid = ZFS_DIRENT_OBJ(*zoid);
#ifdef HAVE_DNLC
if (error == ENOENT && update)
dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
dnlc_update(ZTOI(dzp), name, DNLC_NO_VNODE);
#endif /* HAVE_DNLC */
return (error);
}
@@ -138,12 +138,14 @@ int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
int flag, int *direntflags, pathname_t *realpnp)
{
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(dzp);
zfs_dirlock_t *dl;
boolean_t update;
boolean_t exact;
uint64_t zoid;
#ifdef HAVE_DNLC
vnode_t *vp = NULL;
#endif /* HAVE_DNLC */
int error = 0;
int cmpflags;
@@ -153,15 +155,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
/*
* Verify that we are not trying to lock '.', '..', or '.zfs'
*/
if (name[0] == '.' &&
(name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
if ((name[0] == '.' &&
(name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
(zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
return (EEXIST);
/*
* Case sensitivity and normalization preferences are set when
* the file system is created. These are stored in the
* zfsvfs->z_case and zfsvfs->z_norm fields. These choices
* zsb->z_case and zsb->z_norm fields. These choices
* affect what vnodes can be cached in the DNLC, how we
* perform zap lookups, and the "width" of our dirlocks.
*
@@ -181,8 +183,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* access.
*/
exact =
((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
/*
* Only look in or update the DNLC if we are looking for the
@@ -194,9 +196,9 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* Maybe can add TO-UPPERed version of name to dnlc in ci-only
* case for performance improvement?
*/
update = !zfsvfs->z_norm ||
((zfsvfs->z_case == ZFS_CASE_MIXED) &&
!(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
update = !zsb->z_norm ||
((zsb->z_case == ZFS_CASE_MIXED) &&
!(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
/*
* ZRENAMING indicates we are in a situation where we should
@@ -209,7 +211,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
if (flag & ZRENAMING)
cmpflags = 0;
else
cmpflags = zfsvfs->z_norm;
cmpflags = zsb->z_norm;
/*
* Wait until there are no locks on this name.
@@ -289,29 +291,34 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* See if there's an object by this name; if so, put a hold on it.
*/
if (flag & ZXATTR) {
error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
sizeof (zoid));
if (error == 0)
error = (zoid == 0 ? ENOENT : 0);
} else {
#ifdef HAVE_DNLC
if (update)
vp = dnlc_lookup(ZTOV(dzp), name);
vp = dnlc_lookup(ZTOI(dzp), name);
if (vp == DNLC_NO_VNODE) {
VN_RELE(vp);
iput(vp);
error = ENOENT;
} else if (vp) {
if (flag & ZNEW) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
iput(vp);
return (EEXIST);
}
*dlpp = dl;
*zpp = VTOZ(vp);
return (0);
} else {
error = zfs_match_find(zfsvfs, dzp, name, exact,
error = zfs_match_find(zsb, dzp, name, exact,
update, direntflags, realpnp, &zoid);
}
#else
error = zfs_match_find(zsb, dzp, name, exact,
update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
}
if (error) {
if (error != ENOENT || (flag & ZEXISTS)) {
@@ -323,13 +330,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
zfs_dirent_unlock(dl);
return (EEXIST);
}
error = zfs_zget(zfsvfs, zoid, zpp);
error = zfs_zget(zsb, zoid, zpp);
if (error) {
zfs_dirent_unlock(dl);
return (error);
}
#ifdef HAVE_DNLC
if (!(flag & ZXATTR) && update)
dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
#endif /* HAVE_DNLC */
}
*dlpp = dl;
@@ -378,7 +387,7 @@ zfs_dirent_unlock(zfs_dirlock_t *dl)
* special pseudo-directory.
*/
int
zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
int *deflg, pathname_t *rpnp)
{
zfs_dirlock_t *dl;
@@ -387,31 +396,35 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
uint64_t parent;
if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
*vpp = ZTOV(dzp);
VN_HOLD(*vpp);
*ipp = ZTOI(dzp);
igrab(*ipp);
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(dzp);
/*
* If we are a snapshot mounted under .zfs, return
* the vp for the snapshot directory.
*/
if ((error = sa_lookup(dzp->z_sa_hdl,
SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
return (error);
if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
"snapshot", vpp, NULL, 0, NULL, kcred,
#ifdef HAVE_SNAPSHOT
if (parent == dzp->z_id && zsb->z_parent != zsb) {
error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
"snapshot", ipp, NULL, 0, NULL, kcred,
NULL, NULL, NULL);
return (error);
}
#endif /* HAVE_SNAPSHOT */
rw_enter(&dzp->z_parent_lock, RW_READER);
error = zfs_zget(zfsvfs, parent, &zp);
error = zfs_zget(zsb, parent, &zp);
if (error == 0)
*vpp = ZTOV(zp);
*ipp = ZTOI(zp);
rw_exit(&dzp->z_parent_lock);
#ifdef HAVE_SNAPSHOT
} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
*vpp = zfsctl_root(dzp);
*ipp = zfsctl_root(dzp);
#endif /* HAVE_SNAPSHOT */
} else {
int zf;
@@ -421,7 +434,7 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
if (error == 0) {
*vpp = ZTOV(zp);
*ipp = ZTOI(zp);
zfs_dirent_unlock(dl);
dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
}
@@ -451,13 +464,13 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
void
zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zp);
ASSERT(zp->z_unlinked);
ASSERT(zp->z_links == 0);
VERIFY3U(0, ==,
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
}
/*
@@ -465,7 +478,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
* (force) umounted the file system.
*/
void
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
zfs_unlinked_drain(zfs_sb_t *zsb)
{
zap_cursor_t zc;
zap_attribute_t zap;
@@ -476,7 +489,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
/*
* Interate over the contents of the unlinked set.
*/
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
zap_cursor_retrieve(&zc, &zap) == 0;
zap_cursor_advance(&zc)) {
@@ -484,8 +497,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
* See what kind of object we have in list
*/
error = dmu_object_info(zfsvfs->z_os,
zap.za_first_integer, &doi);
error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
if (error != 0)
continue;
@@ -495,7 +507,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
* We need to re-mark these list entries for deletion,
* so we pull them back into core and set zp->z_unlinked.
*/
error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
error = zfs_zget(zsb, zap.za_first_integer, &zp);
/*
* We may pick up znodes that are already marked for deletion.
@@ -507,7 +519,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
continue;
zp->z_unlinked = B_TRUE;
VN_RELE(ZTOV(zp));
iput(ZTOI(zp));
}
zap_cursor_fini(&zc);
}
@@ -530,35 +542,34 @@ zfs_purgedir(znode_t *dzp)
zap_attribute_t zap;
znode_t *xzp;
dmu_tx_t *tx;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(dzp);
zfs_dirlock_t dl;
int skipped = 0;
int error;
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
error = zfs_zget(zfsvfs,
error = zfs_zget(zsb,
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
if (error) {
skipped += 1;
continue;
}
ASSERT((ZTOV(xzp)->v_type == VREG) ||
(ZTOV(xzp)->v_type == VLNK));
ASSERT(S_ISREG(ZTOI(xzp)->i_mode)||S_ISLNK(ZTOI(xzp)->i_mode));
tx = dmu_tx_create(zfsvfs->z_os);
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
/* Is this really needed ? */
zfs_sa_upgrade_txholds(tx, xzp);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
VN_RELE(ZTOV(xzp));
iput(ZTOI(xzp));
skipped += 1;
continue;
}
@@ -571,7 +582,7 @@ zfs_purgedir(znode_t *dzp)
skipped += 1;
dmu_tx_commit(tx);
VN_RELE(ZTOV(xzp));
iput(ZTOI(xzp));
}
zap_cursor_fini(&zc);
if (error != ENOENT)
@@ -582,8 +593,8 @@ zfs_purgedir(znode_t *dzp)
void
zfs_rmnode(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
objset_t *os = zfsvfs->z_os;
zfs_sb_t *zsb = ZTOZSB(zp);
objset_t *os = zsb->z_os;
znode_t *xzp = NULL;
dmu_tx_t *tx;
uint64_t acl_obj;
@@ -591,19 +602,20 @@ zfs_rmnode(znode_t *zp)
int error;
ASSERT(zp->z_links == 0);
ASSERT(ZTOV(zp)->v_count == 0);
ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
/*
* If this is an attribute directory, purge its contents.
*/
if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) {
if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
if (zfs_purgedir(zp) != 0) {
/*
* Not enough space to delete some xattrs.
* Leave it in the unlinked set.
*/
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
zfs_inode_destroy(ZTOI(zp));
return;
}
}
@@ -617,7 +629,7 @@ zfs_rmnode(znode_t *zp)
* Not enough space. Leave the file in the unlinked set.
*/
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
zfs_inode_destroy(ZTOI(zp));
return;
}
@@ -625,10 +637,10 @@ zfs_rmnode(znode_t *zp)
* If the file has extended attributes, we're going to unlink
* the xattr dir.
*/
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
&xattr_obj, sizeof (xattr_obj));
if (error == 0 && xattr_obj) {
error = zfs_zget(zfsvfs, xattr_obj, &xzp);
error = zfs_zget(zsb, xattr_obj, &xzp);
ASSERT(error == 0);
}
@@ -639,9 +651,9 @@ zfs_rmnode(znode_t *zp)
*/
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
if (xzp) {
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
}
if (acl_obj)
@@ -657,7 +669,7 @@ zfs_rmnode(znode_t *zp)
*/
dmu_tx_abort(tx);
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
zfs_inode_destroy(ZTOI(zp));
goto out;
}
@@ -666,7 +678,7 @@ zfs_rmnode(znode_t *zp)
mutex_enter(&xzp->z_lock);
xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
xzp->z_links = 0; /* no more links to it */
VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
&xzp->z_links, sizeof (xzp->z_links), tx));
mutex_exit(&xzp->z_lock);
zfs_unlinked_add(xzp, tx);
@@ -674,14 +686,14 @@ zfs_rmnode(znode_t *zp)
/* Remove this znode from the unlinked set */
VERIFY3U(0, ==,
zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
out:
if (xzp)
VN_RELE(ZTOV(xzp));
iput(ZTOI(xzp));
}
static uint64_t
@@ -689,7 +701,7 @@ zfs_dirent(znode_t *zp, uint64_t mode)
{
uint64_t de = zp->z_id;
if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
de |= IFTODT(mode) << 60;
return (de);
}
@@ -701,10 +713,9 @@ int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
znode_t *dzp = dl->dl_dzp;
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
vnode_t *vp = ZTOV(zp);
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t value;
int zp_is_dir = (vp->v_type == VDIR);
int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
int count = 0;
@@ -719,17 +730,17 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
return (ENOENT);
}
zp->z_links++;
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
&zp->z_links, sizeof (zp->z_links));
}
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
&dzp->z_id, sizeof (dzp->z_id));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
if (!(flag & ZNEW)) {
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
ctime, sizeof (ctime));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
ctime, B_TRUE);
@@ -743,15 +754,15 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
dzp->z_size++;
dzp->z_links += zp_is_dir;
count = 0;
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
&dzp->z_size, sizeof (dzp->z_size));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
&dzp->z_links, sizeof (dzp->z_links));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
mtime, sizeof (mtime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
&dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
@@ -759,11 +770,13 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
mutex_exit(&dzp->z_lock);
value = zfs_dirent(zp, zp->z_mode);
error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
8, 1, &value, tx);
ASSERT(error == 0);
dnlc_update(ZTOV(dzp), dl->dl_name, vp);
#ifdef HAVE_DNLC
dnlc_update(ZTOI(dzp), dl->dl_name, vp);
#endif /* HAVE_DNLC */
return (0);
}
@@ -774,18 +787,18 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
{
int error;
if (zp->z_zfsvfs->z_norm) {
if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
if (ZTOZSB(zp)->z_norm) {
if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) &&
(flag & ZCIEXACT)) ||
((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) &&
!(flag & ZCILOOK)))
error = zap_remove_norm(zp->z_zfsvfs->z_os,
error = zap_remove_norm(ZTOZSB(zp)->z_os,
dzp->z_id, dl->dl_name, MT_EXACT, tx);
else
error = zap_remove_norm(zp->z_zfsvfs->z_os,
error = zap_remove_norm(ZTOZSB(zp)->z_os,
dzp->z_id, dl->dl_name, MT_FIRST, tx);
} else {
error = zap_remove(zp->z_zfsvfs->z_os,
error = zap_remove(ZTOZSB(zp)->z_os,
dzp->z_id, dl->dl_name, tx);
}
@@ -804,31 +817,23 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
boolean_t *unlinkedp)
{
znode_t *dzp = dl->dl_dzp;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
vnode_t *vp = ZTOV(zp);
int zp_is_dir = (vp->v_type == VDIR);
zfs_sb_t *zsb = ZTOZSB(dzp);
int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
boolean_t unlinked = B_FALSE;
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
int count = 0;
int error;
dnlc_remove(ZTOV(dzp), dl->dl_name);
#ifdef HAVE_DNLC
dnlc_remove(ZTOI(dzp), dl->dl_name);
#endif /* HAVE_DNLC */
if (!(flag & ZRENAMING)) {
if (vn_vfswlock(vp)) /* prevent new mounts on zp */
return (EBUSY);
if (vn_ismntpt(vp)) { /* don't remove mount point */
vn_vfsunlock(vp);
return (EBUSY);
}
mutex_enter(&zp->z_lock);
if (zp_is_dir && !zfs_dirempty(zp)) {
mutex_exit(&zp->z_lock);
vn_vfsunlock(vp);
return (EEXIST);
}
@@ -840,16 +845,13 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
error = zfs_dropname(dl, zp, dzp, tx, flag);
if (error != 0) {
mutex_exit(&zp->z_lock);
vn_vfsunlock(vp);
return (error);
}
if (zp->z_links <= zp_is_dir) {
zfs_panic_recover("zfs: link count on %s is %u, "
"should be at least %u",
zp->z_vnode->v_path ? zp->z_vnode->v_path :
"<unknown>", (int)zp->z_links,
zp_is_dir + 1);
zfs_panic_recover("zfs: link count on %lu is %u, "
"should be at least %u", zp->z_id,
(int)zp->z_links, zp_is_dir + 1);
zp->z_links = zp_is_dir + 1;
}
if (--zp->z_links == zp_is_dir) {
@@ -857,20 +859,19 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
zp->z_links = 0;
unlinked = B_TRUE;
} else {
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
NULL, &ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
NULL, &zp->z_pflags, sizeof (zp->z_pflags));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
B_TRUE);
}
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
NULL, &zp->z_links, sizeof (zp->z_links));
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
count = 0;
ASSERT(error == 0);
mutex_exit(&zp->z_lock);
vn_vfsunlock(vp);
} else {
error = zfs_dropname(dl, zp, dzp, tx, flag);
if (error != 0)
@@ -880,15 +881,15 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
mutex_enter(&dzp->z_lock);
dzp->z_size--; /* one dirent removed */
dzp->z_links -= zp_is_dir; /* ".." link from zp */
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
NULL, &dzp->z_links, sizeof (dzp->z_links));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
NULL, &dzp->z_size, sizeof (dzp->z_size));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
NULL, ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
NULL, mtime, sizeof (mtime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
@@ -915,38 +916,40 @@ zfs_dirempty(znode_t *dzp)
}
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zp);
znode_t *xzp;
dmu_tx_t *tx;
int error;
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
#ifdef DEBUG
uint64_t parent;
#endif
*xvpp = NULL;
*xipp = NULL;
if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
return (error);
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
&acl_ids)) != 0)
return (error);
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
return (EDQUOT);
}
top:
tx = dmu_tx_create(zfsvfs->z_os);
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
fuid_dirtied = zfsvfs->z_fuid_dirty;
fuid_dirtied = zsb->z_fuid_dirty;
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
zfs_fuid_txhold(zsb, tx);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
if (error == ERESTART) {
@@ -961,24 +964,24 @@ top:
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);
zfs_fuid_sync(zsb, tx);
#ifdef DEBUG
error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
&parent, sizeof (parent));
ASSERT(error == 0 && parent == zp->z_id);
#endif
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
sizeof (xzp->z_id), tx));
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
xzp, "", NULL, acl_ids.z_fuidp, vap);
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
*xvpp = ZTOV(xzp);
*xipp = ZTOI(xzp);
return (0);
}
@@ -991,15 +994,15 @@ top:
* cr - credentials of caller
* flags - flags from the VOP_LOOKUP call
*
* OUT: xzpp - pointer to extended attribute znode
* OUT: xipp - pointer to extended attribute znode
*
* RETURN: 0 on success
* error number on failure
*/
int
zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zp);
znode_t *xzp;
zfs_dirlock_t *dl;
vattr_t va;
@@ -1010,18 +1013,17 @@ top:
return (error);
if (xzp != NULL) {
*xvpp = ZTOV(xzp);
*xipp = ZTOI(xzp);
zfs_dirent_unlock(dl);
return (0);
}
if (!(flags & CREATE_XATTR_DIR)) {
zfs_dirent_unlock(dl);
return (ENOENT);
}
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
if (zsb->z_vfs->mnt_flags & MNT_READONLY) {
zfs_dirent_unlock(dl);
return (EROFS);
}
@@ -1036,12 +1038,11 @@ top:
* Once in a directory the ability to read/write attributes
* is controlled by the permissions on the attribute file.
*/
va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
va.va_type = VDIR;
va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
va.va_mode = S_IFDIR | S_ISVTX | 0777;
zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
error = zfs_make_xattrdir(zp, &va, xvpp, cr);
error = zfs_make_xattrdir(zp, &va, xipp, cr);
zfs_dirent_unlock(dl);
if (error == ERESTART) {
@@ -1068,25 +1069,24 @@ top:
int
zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
{
uid_t uid;
uid_t uid;
uid_t downer;
uid_t fowner;
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zdp);
if (zdp->z_zfsvfs->z_replay)
if (zsb->z_replay)
return (0);
if ((zdp->z_mode & S_ISVTX) == 0)
return (0);
downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER);
fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER);
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
(ZTOV(zp)->v_type == VREG &&
(S_ISDIR(ZTOI(zp)->i_mode) &&
zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
return (0);
else
return (secpolicy_vnode_remove(cr));
}
#endif /* HAVE_ZPL */
+105 -88
View File
@@ -46,7 +46,7 @@
* two AVL trees are created. One tree is keyed by the index number
* and the other by the domain string. Nodes are never removed from
* trees, but new entries may be added. If a new entry is added then
* the zfsvfs->z_fuid_dirty flag is set to true and the caller will then
* the zsb->z_fuid_dirty flag is set to true and the caller will then
* be responsible for calling zfs_fuid_sync() to sync the changes to disk.
*
*/
@@ -192,39 +192,38 @@ zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
}
#ifdef _KERNEL
#ifdef HAVE_ZPL
/*
* Load the fuid table(s) into memory.
*/
static void
zfs_fuid_init(zfsvfs_t *zfsvfs)
zfs_fuid_init(zfs_sb_t *zsb)
{
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
rw_enter(&zsb->z_fuid_lock, RW_WRITER);
if (zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
if (zsb->z_fuid_loaded) {
rw_exit(&zsb->z_fuid_lock);
return;
}
zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
zfs_fuid_avl_tree_create(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
(void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
if (zfsvfs->z_fuid_obj != 0) {
zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx,
&zfsvfs->z_fuid_domain);
(void) zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj);
if (zsb->z_fuid_obj != 0) {
zsb->z_fuid_size = zfs_fuid_table_load(zsb->z_os,
zsb->z_fuid_obj, &zsb->z_fuid_idx,
&zsb->z_fuid_domain);
}
zfsvfs->z_fuid_loaded = B_TRUE;
rw_exit(&zfsvfs->z_fuid_lock);
zsb->z_fuid_loaded = B_TRUE;
rw_exit(&zsb->z_fuid_lock);
}
/*
* sync out AVL trees to persistent storage.
*/
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
{
nvlist_t *nvp;
nvlist_t **fuids;
@@ -235,30 +234,30 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
int numnodes;
int i;
if (!zfsvfs->z_fuid_dirty) {
if (!zsb->z_fuid_dirty) {
return;
}
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
rw_enter(&zsb->z_fuid_lock, RW_WRITER);
/*
* First see if table needs to be created?
*/
if (zfsvfs->z_fuid_obj == 0) {
zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
if (zsb->z_fuid_obj == 0) {
zsb->z_fuid_obj = dmu_object_alloc(zsb->z_os,
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
sizeof (uint64_t), tx);
VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
VERIFY(zap_add(zsb->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
&zfsvfs->z_fuid_obj, tx) == 0);
&zsb->z_fuid_obj, tx) == 0);
}
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
numnodes = avl_numnodes(&zsb->z_fuid_idx);
fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
for (i = 0, domnode = avl_first(&zsb->z_fuid_domain); domnode; i++,
domnode = AVL_NEXT(&zsb->z_fuid_domain, domnode)) {
VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
domnode->f_idx) == 0);
@@ -276,30 +275,29 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
NV_ENCODE_XDR, KM_SLEEP) == 0);
nvlist_free(nvp);
zfsvfs->z_fuid_size = nvsize;
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
zfsvfs->z_fuid_size, packed, tx);
kmem_free(packed, zfsvfs->z_fuid_size);
VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
zsb->z_fuid_size = nvsize;
dmu_write(zsb->z_os, zsb->z_fuid_obj, 0, zsb->z_fuid_size, packed, tx);
kmem_free(packed, zsb->z_fuid_size);
VERIFY(0 == dmu_bonus_hold(zsb->z_os, zsb->z_fuid_obj,
FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
*(uint64_t *)db->db_data = zsb->z_fuid_size;
dmu_buf_rele(db, FTAG);
zfsvfs->z_fuid_dirty = B_FALSE;
rw_exit(&zfsvfs->z_fuid_lock);
zsb->z_fuid_dirty = B_FALSE;
rw_exit(&zsb->z_fuid_lock);
}
/*
* Query domain table for a given domain.
*
* If domain isn't found and addok is set, it is added to AVL trees and
* the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be
* the zsb->z_fuid_dirty flag will be set to TRUE. It will then be
* necessary for the caller or another thread to detect the dirty table
* and sync out the changes.
*/
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
char **retdomain, boolean_t addok)
{
fuid_domain_t searchnode, *findnode;
@@ -320,23 +318,23 @@ zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
searchnode.f_ksid = ksid_lookupdomain(domain);
if (retdomain)
*retdomain = searchnode.f_ksid->kd_name;
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs);
if (!zsb->z_fuid_loaded)
zfs_fuid_init(zsb);
retry:
rw_enter(&zfsvfs->z_fuid_lock, rw);
findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
rw_enter(&zsb->z_fuid_lock, rw);
findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc);
if (findnode) {
rw_exit(&zfsvfs->z_fuid_lock);
rw_exit(&zsb->z_fuid_lock);
ksiddomain_rele(searchnode.f_ksid);
return (findnode->f_idx);
} else if (addok) {
fuid_domain_t *domnode;
uint64_t retidx;
if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) {
rw_exit(&zfsvfs->z_fuid_lock);
if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) {
rw_exit(&zsb->z_fuid_lock);
rw = RW_WRITER;
goto retry;
}
@@ -344,15 +342,15 @@ retry:
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_ksid = searchnode.f_ksid;
retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1;
avl_add(&zfsvfs->z_fuid_domain, domnode);
avl_add(&zfsvfs->z_fuid_idx, domnode);
zfsvfs->z_fuid_dirty = B_TRUE;
rw_exit(&zfsvfs->z_fuid_lock);
avl_add(&zsb->z_fuid_domain, domnode);
avl_add(&zsb->z_fuid_idx, domnode);
zsb->z_fuid_dirty = B_TRUE;
rw_exit(&zsb->z_fuid_lock);
return (retidx);
} else {
rw_exit(&zfsvfs->z_fuid_lock);
rw_exit(&zsb->z_fuid_lock);
return (-1);
}
}
@@ -364,23 +362,23 @@ retry:
*
*/
const char *
zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx)
{
char *domain;
if (idx == 0 || !zfsvfs->z_use_fuids)
if (idx == 0 || !zsb->z_use_fuids)
return (NULL);
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs);
if (!zsb->z_fuid_loaded)
zfs_fuid_init(zsb);
rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
rw_enter(&zsb->z_fuid_lock, RW_READER);
if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty)
domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
if (zsb->z_fuid_obj || zsb->z_fuid_dirty)
domain = zfs_fuid_idx_domain(&zsb->z_fuid_idx, idx);
else
domain = nulldomain;
rw_exit(&zfsvfs->z_fuid_lock);
rw_exit(&zsb->z_fuid_lock);
ASSERT(domain);
return (domain);
@@ -389,14 +387,15 @@ zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
void
zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
{
*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
*gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_gid, cr, ZFS_GROUP);
*uidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
*gidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_gid, cr, ZFS_GROUP);
}
uid_t
zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid,
cred_t *cr, zfs_fuid_type_t type)
{
#ifdef HAVE_KSID
uint32_t index = FUID_INDEX(fuid);
const char *domain;
uid_t id;
@@ -404,7 +403,7 @@ zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
if (index == 0)
return (fuid);
domain = zfs_fuid_find_by_idx(zfsvfs, index);
domain = zfs_fuid_find_by_idx(zsb, index);
ASSERT(domain != NULL);
if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
@@ -415,6 +414,12 @@ zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
FUID_RID(fuid), &id);
}
return (id);
#else
if(type == ZFS_OWNER || type == ZFS_ACE_USER)
return (crgetuid(cr));
else
return (crgetgid(cr));
#endif /* HAVE_KSID */
}
/*
@@ -483,6 +488,7 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
}
}
#ifdef HAVE_KSID
/*
* Create a file system FUID, based on information in the users cred
*
@@ -492,13 +498,13 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
* be used if it exists.
*/
uint64_t
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
cred_t *cr, zfs_fuid_info_t **fuidp)
{
uint64_t idx;
ksid_t *ksid;
uint32_t rid;
char *kdomain;
char *kdomain;
const char *domain;
uid_t id;
@@ -506,7 +512,7 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
if (!zfsvfs->z_use_fuids || (ksid == NULL)) {
if (!zsb->z_use_fuids || (ksid == NULL)) {
id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
if (IS_EPHEMERAL(id))
@@ -529,12 +535,13 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
rid = ksid_getrid(ksid);
domain = ksid_getdomain(ksid);
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
return (FUID_ENCODE(idx, rid));
}
#endif /* HAVE_KSID */
/*
* Create a file system FUID for an ACL ace
@@ -546,12 +553,13 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
*
* During replay operations the domain+rid information is
* found in the zfs_fuid_info_t that the replay code has
* attached to the zfsvfs of the file system.
* attached to the zsb of the file system.
*/
uint64_t
zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
{
#ifdef HAVE_KSID
const char *domain;
char *kdomain;
uint32_t fuid_idx = FUID_INDEX(id);
@@ -569,11 +577,11 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
* chmod.
*/
if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
if (!zsb->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
return (id);
if (zfsvfs->z_replay) {
fuidp = zfsvfs->z_fuid_replay;
if (zsb->z_replay) {
fuidp = zsb->z_fuid_replay;
/*
* If we are passed an ephemeral id, but no
@@ -620,9 +628,9 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
}
}
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
if (!zfsvfs->z_replay)
if (!zsb->z_replay)
zfs_fuid_node_add(fuidpp, kdomain,
rid, idx, id, type);
else if (zfuid != NULL) {
@@ -630,18 +638,24 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
kmem_free(zfuid, sizeof (zfs_fuid_t));
}
return (FUID_ENCODE(idx, rid));
#else
/*
* The Linux port only supports POSIX IDs, use the passed id.
*/
return (id);
#endif
}
void
zfs_fuid_destroy(zfsvfs_t *zfsvfs)
zfs_fuid_destroy(zfs_sb_t *zsb)
{
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (!zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
rw_enter(&zsb->z_fuid_lock, RW_WRITER);
if (!zsb->z_fuid_loaded) {
rw_exit(&zsb->z_fuid_lock);
return;
}
zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
rw_exit(&zfsvfs->z_fuid_lock);
zfs_fuid_table_destroy(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
rw_exit(&zsb->z_fuid_lock);
}
/*
@@ -695,14 +709,15 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
* Will use a straight FUID compare when possible.
*/
boolean_t
zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
{
#ifdef HAVE_KSID
ksid_t *ksid = crgetsid(cr, KSID_GROUP);
ksidlist_t *ksidlist = crgetsidlist(cr);
uid_t gid;
if (ksid && ksidlist) {
int i;
int i;
ksid_t *ksid_groups;
uint32_t idx = FUID_INDEX(id);
uint32_t rid = FUID_RID(id);
@@ -718,7 +733,7 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
} else {
const char *domain;
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
domain = zfs_fuid_find_by_idx(zsb, idx);
ASSERT(domain != NULL);
if (strcmp(domain,
@@ -736,23 +751,25 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
/*
* Not found in ksidlist, check posix groups
*/
gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
gid = zfs_fuid_map_id(zsb, id, cr, ZFS_GROUP);
return (groupmember(gid, cr));
#else
return (B_TRUE);
#endif
}
void
zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx)
{
if (zfsvfs->z_fuid_obj == 0) {
if (zsb->z_fuid_obj == 0) {
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
FUID_SIZE_ESTIMATE(zsb));
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
} else {
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
dmu_tx_hold_bonus(tx, zsb->z_fuid_obj);
dmu_tx_hold_write(tx, zsb->z_fuid_obj, 0,
FUID_SIZE_ESTIMATE(zsb));
}
}
#endif /* HAVE_ZPL */
#endif
+107 -150
View File
@@ -58,7 +58,6 @@
#include <sys/mount.h>
#include <sys/sdt.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_onexit.h>
#include <sys/zvol.h>
@@ -433,7 +432,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
/*
* If the existing dataset label is nondefault, check if the
* dataset is mounted (label cannot be changed while mounted).
* Get the zfsvfs; if there isn't one, then the dataset isn't
* Get the zfs_sb_t; if there isn't one, then the dataset isn't
* mounted (or isn't a dataset, doesn't exist, ...).
*/
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
@@ -592,7 +591,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
return (error);
}
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
static int
zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
{
@@ -616,12 +615,12 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
return (dsl_deleg_access(zc->zc_name,
ZFS_DELEG_PERM_SHARE, cr));
}
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
int
zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
{
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
if (!INGLOBALZONE(curproc))
return (EPERM);
@@ -632,13 +631,13 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
}
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
}
int
zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
{
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
if (!INGLOBALZONE(curproc))
return (EPERM);
@@ -649,7 +648,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
}
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
}
static int
@@ -850,20 +849,6 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
return (error);
}
#ifdef HAVE_ZPL
static int
zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
{
int error;
error = secpolicy_fs_unmount(cr, NULL);
if (error) {
error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
}
return (error);
}
#endif /* HAVE_ZPL */
/*
* Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
* SYS_CONFIG privilege, which is not available in a local zone.
@@ -1106,9 +1091,8 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
return (error);
}
#ifdef HAVE_ZPL
static int
getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
{
objset_t *os;
int error;
@@ -1122,9 +1106,9 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
}
mutex_enter(&os->os_user_ptr_lock);
*zfvp = dmu_objset_get_user(os);
if (*zfvp) {
VFS_HOLD((*zfvp)->z_vfs);
*zsbp = dmu_objset_get_user(os);
if (*zsbp) {
mntget((*zsbp)->z_vfs);
} else {
error = ESRCH;
}
@@ -1132,52 +1116,45 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
dmu_objset_rele(os, FTAG);
return (error);
}
#endif
/*
* Find a zfsvfs_t for a mounted filesystem, or create our own, in which
* Find a zfs_sb_t for a mounted filesystem, or create our own, in which
* case its z_vfs will be NULL, and it will be opened as the owner.
*/
static int
zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
{
#ifdef HAVE_ZPL
int error = 0;
if (getzfsvfs(name, zfvp) != 0)
error = zfsvfs_create(name, zfvp);
if (get_zfs_sb(name, zsbp) != 0)
error = zfs_sb_create(name, zsbp);
if (error == 0) {
rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
RW_READER, tag);
if ((*zfvp)->z_unmounted) {
if ((*zsbp)->z_unmounted) {
/*
* XXX we could probably try again, since the unmounting
* thread should be just about to disassociate the
* objset from the zfsvfs.
*/
rrw_exit(&(*zfvp)->z_teardown_lock, tag);
rrw_exit(&(*zsbp)->z_teardown_lock, tag);
return (EBUSY);
}
}
return (error);
#else
return ENOTSUP;
#endif
}
static void
zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
zfs_sb_rele(zfs_sb_t *zsb, void *tag)
{
#ifdef HAVE_ZPL
rrw_exit(&zfsvfs->z_teardown_lock, tag);
rrw_exit(&zsb->z_teardown_lock, tag);
if (zfsvfs->z_vfs) {
VFS_RELE(zfsvfs->z_vfs);
if (zsb->z_vfs) {
mntput(zsb->z_vfs);
} else {
dmu_objset_disown(zfsvfs->z_os, zfsvfs);
zfsvfs_free(zfsvfs);
dmu_objset_disown(zsb->z_os, zsb);
zfs_sb_free(zsb);
}
#endif
}
static int
@@ -2087,7 +2064,6 @@ top:
static int
zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
{
#ifdef HAVE_ZPL
const char *propname = nvpair_name(pair);
uint64_t *valary;
unsigned int vallen;
@@ -2096,7 +2072,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
zfs_userquota_prop_t type;
uint64_t rid;
uint64_t quota;
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
int err;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
@@ -2121,16 +2097,13 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
rid = valary[1];
quota = valary[2];
err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
if (err == 0) {
err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
zfsvfs_rele(zfsvfs, FTAG);
err = zfs_set_userquota(zsb, type, domain, rid, quota);
zfs_sb_rele(zsb, FTAG);
}
return (err);
#else
return ENOTSUP;
#endif
}
/*
@@ -2186,15 +2159,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
break;
case ZFS_PROP_VERSION:
{
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
break;
#ifdef HAVE_ZPL
err = zfs_set_version(zfsvfs, intval);
#endif
zfsvfs_rele(zfsvfs, FTAG);
err = zfs_set_version(zsb, intval);
zfs_sb_rele(zsb, FTAG);
if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
zfs_cmd_t *zc;
@@ -2749,7 +2720,7 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc)
return (error);
}
#ifdef HAVE_ZPL
#ifdef HAVE_SNAPSHOT
/*
* Search the vfs list for a specified resource. Returns a pointer to it
* or NULL if no suitable entry is found. The caller of this routine
@@ -2765,7 +2736,7 @@ zfs_get_vfs(const char *resource)
vfsp = rootvfs;
do {
if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
VFS_HOLD(vfsp);
mntget(vfsp);
vfs_found = vfsp;
break;
}
@@ -2774,7 +2745,7 @@ zfs_get_vfs(const char *resource)
vfs_list_unlock();
return (vfs_found);
}
#endif /* HAVE_ZPL */
#endif /* HAVE_SNAPSHOT */
/* ARGSUSED */
static void
@@ -3129,7 +3100,7 @@ out:
int
zfs_unmount_snap(const char *name, void *arg)
{
#ifdef HAVE_ZPL
#ifdef HAVE_SNAPSHOT
vfs_t *vfsp = NULL;
if (arg) {
@@ -3149,14 +3120,14 @@ zfs_unmount_snap(const char *name, void *arg)
int err;
if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
VFS_RELE(vfsp);
mntput(vfsp);
return (err);
}
VFS_RELE(vfsp);
mntput(vfsp);
if ((err = dounmount(vfsp, flag, kcred)) != 0)
return (err);
}
#endif /* HAVE_ZPL */
#endif /* HAVE_SNAPSHOT */
return (0);
}
@@ -3216,10 +3187,9 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
static int
zfs_ioc_rollback(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
dsl_dataset_t *ds, *clone;
int error;
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
char *clone_name;
error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
@@ -3253,8 +3223,8 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
/*
* Do clone swap.
*/
if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
error = zfs_suspend_fs(zfsvfs);
if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
error = zfs_suspend_fs(zsb);
if (error == 0) {
int resume_err;
@@ -3266,10 +3236,10 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
} else {
error = EBUSY;
}
resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
resume_err = zfs_resume_fs(zsb, zc->zc_name);
error = error ? error : resume_err;
}
VFS_RELE(zfsvfs->z_vfs);
mntput(zsb->z_vfs);
} else {
if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
@@ -3290,9 +3260,6 @@ out:
if (ds)
dsl_dataset_rele(ds, FTAG);
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
}
/*
@@ -3307,6 +3274,7 @@ static int
zfs_ioc_rename(zfs_cmd_t *zc)
{
boolean_t recursive = zc->zc_cookie & 1;
int err;
zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
@@ -3320,13 +3288,18 @@ zfs_ioc_rename(zfs_cmd_t *zc)
*/
if (!recursive && strchr(zc->zc_name, '@') != NULL &&
zc->zc_objset_type == DMU_OST_ZFS) {
int err = zfs_unmount_snap(zc->zc_name, NULL);
err = zfs_unmount_snap(zc->zc_name, NULL);
if (err)
return (err);
}
if (zc->zc_objset_type == DMU_OST_ZVOL)
err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive);
if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) {
(void) zvol_remove_minor(zc->zc_name);
return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
(void) zvol_create_minor(zc->zc_value);
}
return (err);
}
static int
@@ -3736,29 +3709,25 @@ zfs_ioc_recv(zfs_cmd_t *zc)
&zc->zc_action_handle);
if (error == 0) {
#ifdef HAVE_ZPL
zfsvfs_t *zfsvfs = NULL;
zfs_sb_t *zsb = NULL;
if (getzfsvfs(tofs, &zfsvfs) == 0) {
if (get_zfs_sb(tofs, &zsb) == 0) {
/* online recv */
int end_err;
error = zfs_suspend_fs(zfsvfs);
error = zfs_suspend_fs(zsb);
/*
* If the suspend fails, then the recv_end will
* likely also fail, and clean up after itself.
*/
end_err = dmu_recv_end(&drc);
if (error == 0)
error = zfs_resume_fs(zfsvfs, tofs);
error = zfs_resume_fs(zsb, tofs);
error = error ? error : end_err;
VFS_RELE(zfsvfs->z_vfs);
mntput(zsb->z_vfs);
} else {
error = dmu_recv_end(&drc);
}
#else
error = dmu_recv_end(&drc);
#endif /* HAVE_ZPL */
}
zc->zc_cookie = off - fp->f_offset;
@@ -4082,25 +4051,21 @@ zfs_ioc_promote(zfs_cmd_t *zc)
static int
zfs_ioc_userspace_one(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
int error;
if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
return (EINVAL);
error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
if (error)
return (error);
error = zfs_userspace_one(zfsvfs,
error = zfs_userspace_one(zsb,
zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
zfsvfs_rele(zfsvfs, FTAG);
zfs_sb_rele(zsb, FTAG);
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
}
/*
@@ -4117,20 +4082,21 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
static int
zfs_ioc_userspace_many(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
int bufsize = zc->zc_nvlist_dst_size;
int error;
void *buf;
if (bufsize <= 0)
return (ENOMEM);
int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
if (error)
return (error);
void *buf = kmem_alloc(bufsize, KM_SLEEP);
buf = kmem_alloc(bufsize, KM_SLEEP);
error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
buf, &zc->zc_nvlist_dst_size);
if (error == 0) {
@@ -4139,12 +4105,9 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
zc->zc_nvlist_dst_size);
}
kmem_free(buf, bufsize);
zfsvfs_rele(zfsvfs, FTAG);
zfs_sb_rele(zsb, FTAG);
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
}
/*
@@ -4157,25 +4120,24 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
static int
zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
objset_t *os;
int error = 0;
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
if (!dmu_objset_userused_enabled(zsb->z_os)) {
/*
* If userused is not enabled, it may be because the
* objset needs to be closed & reopened (to grow the
* objset_phys_t). Suspend/resume the fs will do that.
*/
error = zfs_suspend_fs(zfsvfs);
error = zfs_suspend_fs(zsb);
if (error == 0)
error = zfs_resume_fs(zfsvfs, zc->zc_name);
error = zfs_resume_fs(zsb, zc->zc_name);
}
if (error == 0)
error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
VFS_RELE(zfsvfs->z_vfs);
error = dmu_objset_userspace_upgrade(zsb->z_os);
mntput(zsb->z_vfs);
} else {
/* XXX kind of reading contents without owning */
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
@@ -4187,9 +4149,6 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
}
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
}
/*
@@ -4199,7 +4158,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
* the first file system is shared.
* Neither sharefs, nfs or smbsrv are unloadable modules.
*/
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
int (*znfsexport_fs)(void *arg);
int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
int (*zsmbexport_fs)(void *arg, boolean_t add_share);
@@ -4231,12 +4190,12 @@ zfs_init_sharefs()
}
return (0);
}
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
static int
zfs_ioc_share(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
int error;
int opcode;
@@ -4328,7 +4287,7 @@ zfs_ioc_share(zfs_cmd_t *zc)
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
}
ace_t full_access[] = {
@@ -4445,16 +4404,16 @@ zfs_ioc_diff(zfs_cmd_t *zc)
/*
* Remove all ACL files in shares dir
*/
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
static int
zfs_smb_acl_purge(znode_t *dzp)
{
zap_cursor_t zc;
zap_attribute_t zap;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(dzp);
int error;
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
@@ -4464,17 +4423,17 @@ zfs_smb_acl_purge(znode_t *dzp)
zap_cursor_fini(&zc);
return (error);
}
#endif /* HAVE ZPL */
#endif /* HAVE SHARE */
static int
zfs_ioc_smb_acl(zfs_cmd_t *zc)
{
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
vnode_t *vp;
znode_t *dzp;
vnode_t *resourcevp = NULL;
znode_t *sharedir;
zfsvfs_t *zfsvfs;
zfs_sb_t *zsb;
nvlist_t *nvlist;
char *src, *target;
vattr_t vattr;
@@ -4495,17 +4454,17 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
}
dzp = VTOZ(vp);
zfsvfs = dzp->z_zfsvfs;
ZFS_ENTER(zfsvfs);
zsb = ZTOZSB(dzp);
ZFS_ENTER(zsb);
/*
* Create share dir if its missing.
*/
mutex_enter(&zfsvfs->z_lock);
if (zfsvfs->z_shares_dir == 0) {
mutex_enter(&zsb->z_lock);
if (zsb->z_shares_dir == 0) {
dmu_tx_t *tx;
tx = dmu_tx_create(zfsvfs->z_os);
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
ZFS_SHARES_DIR);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
@@ -4513,29 +4472,28 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
if (error) {
dmu_tx_abort(tx);
} else {
error = zfs_create_share_dir(zfsvfs, tx);
error = zfs_create_share_dir(zsb, tx);
dmu_tx_commit(tx);
}
if (error) {
mutex_exit(&zfsvfs->z_lock);
mutex_exit(&zsb->z_lock);
VN_RELE(vp);
ZFS_EXIT(zfsvfs);
ZFS_EXIT(zsb);
return (error);
}
}
mutex_exit(&zfsvfs->z_lock);
mutex_exit(&zsb->z_lock);
ASSERT(zfsvfs->z_shares_dir);
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
ASSERT(zsb->z_shares_dir);
if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
VN_RELE(vp);
ZFS_EXIT(zfsvfs);
ZFS_EXIT(zsb);
return (error);
}
switch (zc->zc_cookie) {
case ZFS_SMB_ACL_ADD:
vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
vattr.va_type = VREG;
vattr.va_mode = S_IFREG|0777;
vattr.va_uid = 0;
vattr.va_gid = 0;
@@ -4560,7 +4518,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
if ((error = get_nvlist(zc->zc_nvlist_src,
zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
VN_RELE(vp);
ZFS_EXIT(zfsvfs);
ZFS_EXIT(zsb);
return (error);
}
if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
@@ -4568,7 +4526,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
&target)) {
VN_RELE(vp);
VN_RELE(ZTOV(sharedir));
ZFS_EXIT(zfsvfs);
ZFS_EXIT(zsb);
nvlist_free(nvlist);
return (error);
}
@@ -4589,12 +4547,12 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
VN_RELE(vp);
VN_RELE(ZTOV(sharedir));
ZFS_EXIT(zfsvfs);
ZFS_EXIT(zsb);
return (error);
#else
return (ENOTSUP);
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
}
/*
@@ -5190,10 +5148,8 @@ zfs_detach(void)
list_destroy(&zfsdev_state_list);
}
#ifdef HAVE_ZPL
uint_t zfs_fsyncer_key;
extern uint_t rrw_tsd_key;
#endif
#ifdef DEBUG
#define ZFS_DEBUG_STR " (DEBUG mode)"
@@ -5215,12 +5171,12 @@ _init(void)
if ((error = zfs_attach()) != 0)
goto out2;
#ifdef HAVE_ZPL
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, NULL);
#ifdef HAVE_SHARE
mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
#endif /* HAVE_ZPL */
#endif /* HAVE_SHARE */
printk(KERN_NOTICE "ZFS: Loaded ZFS Filesystem v%s%s\n",
ZFS_META_VERSION, ZFS_DEBUG_STR);
@@ -5245,7 +5201,7 @@ _fini(void)
zvol_fini();
zfs_fini();
spa_fini();
#ifdef HAVE_ZPL
#ifdef HAVE_SHARE
if (zfs_nfsshare_inited)
(void) ddi_modclose(nfs_mod);
if (zfs_smbshare_inited)
@@ -5254,8 +5210,9 @@ _fini(void)
(void) ddi_modclose(sharefs_mod);
mutex_destroy(&zfs_share_lock);
#endif /* HAVE_SHARE */
tsd_destroy(&zfs_fsyncer_key);
#endif /* HAVE_ZPL */
tsd_destroy(&rrw_tsd_key);
printk(KERN_NOTICE "ZFS: Unloaded ZFS Filesystem v%s%s\n",
ZFS_META_VERSION, ZFS_DEBUG_STR);
+43 -24
View File
@@ -22,7 +22,6 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifdef HAVE_ZPL
#include <sys/types.h>
#include <sys/param.h>
@@ -70,7 +69,11 @@
int
zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
{
#ifdef HAVE_XVATTR
int isxvattr = (vap->va_mask & AT_XVATTR);
#else
int isxvattr = 0;
#endif /* HAVE_XVATTR */
switch (type) {
case Z_FILE:
if (vsecp == NULL && !isxvattr)
@@ -98,6 +101,7 @@ zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
return (TX_MAX_TYPE);
}
#ifdef HAVE_XVATTR
/*
* build up the log data necessary for logging xvattr_t
* First lr_attr_t is initialized. following the lr_attr_t
@@ -211,6 +215,7 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
}
return (start);
}
#endif /* HAVE_XVATTR */
/*
* zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR,
@@ -239,11 +244,13 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
{
itx_t *itx;
lr_create_t *lr;
#ifdef HAVE_XVATTR
lr_acl_create_t *lracl;
xvattr_t *xvap = (xvattr_t *)vap;
#endif /* HAVE_XVATTR */
size_t aclsize;
size_t xvatsize = 0;
size_t txsize;
xvattr_t *xvap = (xvattr_t *)vap;
void *end;
size_t lrsize;
size_t namesize = strlen(name) + 1;
@@ -261,8 +268,10 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
}
#ifdef HAVE_XVATTR
if (vap->va_mask & AT_XVATTR)
xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
#endif /* HAVE_XVATTR */
if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
(int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
@@ -293,18 +302,19 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
sizeof (uint64_t));
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
lr->lr_crtime, sizeof (uint64_t) * 2);
if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev,
if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev,
sizeof (lr->lr_rdev)) != 0)
lr->lr_rdev = 0;
/*
* Fill in xvattr info if any
*/
#ifdef HAVE_XVATTR
if (vap->va_mask & AT_XVATTR) {
zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
end = (caddr_t)lr + lrsize + xvatsize;
@@ -334,6 +344,9 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
end = zfs_log_fuid_ids(fuidp, end);
end = zfs_log_fuid_domains(fuidp, end);
}
#else
end = (caddr_t)lr + lrsize;
#endif /* HAVE_XVATTR */
/*
* Now place file name in log record
*/
@@ -411,9 +424,9 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr->lr_uid = zp->z_uid;
lr->lr_gid = zp->z_gid;
lr->lr_mode = zp->z_mode;
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
sizeof (uint64_t));
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
lr->lr_crtime, sizeof (uint64_t) * 2);
bcopy(name, (char *)(lr + 1), namesize);
bcopy(link, (char *)(lr + 1) + namesize, linksize);
@@ -496,7 +509,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx = zil_itx_create(txtype, sizeof (*lr) +
(write_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
@@ -513,7 +526,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
lr->lr_blkoff = 0;
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = zp->z_zfsvfs;
itx->itx_private = ZTOZSB(zp);
if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
(fsync_cnt == 0))
@@ -553,12 +566,14 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
* zfs_log_setattr() handles TX_SETATTR transactions.
*/
void
zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp,
struct iattr *attr, uint_t mask_applied, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
lr_setattr_t *lr;
#ifdef HAVE_XVATTR
xvattr_t *xvap = (xvattr_t *)vap;
#endif /* HAVEXVATTR */
size_t recsize = sizeof (lr_setattr_t);
void *start;
@@ -570,32 +585,35 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
* for lr_attr_t + xvattr mask, mapsize and create time
* plus actual attribute values
*/
if (vap->va_mask & AT_XVATTR)
#ifdef HAVE_XVATTR
if (attr->ia_valid & AT_XVATTR)
recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
if (fuidp)
recsize += fuidp->z_domain_str_sz;
#endif /* HAVE_XVATTR */
itx = zil_itx_create(txtype, recsize);
lr = (lr_setattr_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
lr->lr_mask = (uint64_t)mask_applied;
lr->lr_mode = (uint64_t)vap->va_mode;
if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid))
lr->lr_mode = (uint64_t)attr->ia_mode;
if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(attr->ia_uid))
lr->lr_uid = fuidp->z_fuid_owner;
else
lr->lr_uid = (uint64_t)vap->va_uid;
lr->lr_uid = (uint64_t)attr->ia_uid;
if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid))
if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(attr->ia_gid))
lr->lr_gid = fuidp->z_fuid_group;
else
lr->lr_gid = (uint64_t)vap->va_gid;
lr->lr_gid = (uint64_t)attr->ia_gid;
lr->lr_size = (uint64_t)vap->va_size;
ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
lr->lr_size = (uint64_t)attr->ia_size;
ZFS_TIME_ENCODE(&attr->ia_atime, lr->lr_atime);
ZFS_TIME_ENCODE(&attr->ia_mtime, lr->lr_mtime);
start = (lr_setattr_t *)(lr + 1);
if (vap->va_mask & AT_XVATTR) {
#ifdef HAVE_XVATTR
if (attr->ia_valid & ATTR_XVATTR) {
zfs_log_xvattr((lr_attr_t *)start, xvap);
start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
}
@@ -606,6 +624,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
if (fuidp)
(void) zfs_log_fuid_domains(fuidp, start);
#endif /* HAVE_XVATTR */
itx->itx_sync = (zp->z_sync_cnt != 0);
zil_itx_assign(zilog, itx, tx);
@@ -629,7 +648,7 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ?
txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ?
TX_ACL_V0 : TX_ACL;
if (txtype == TX_ACL)
@@ -667,14 +686,14 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
#ifdef HAVE_XVATTR
if (fuidp) {
start = zfs_log_fuid_ids(fuidp, start);
(void) zfs_log_fuid_domains(fuidp, start);
}
#endif /* HAVE_XVATTR */
}
itx->itx_sync = (zp->z_sync_cnt != 0);
zil_itx_assign(zilog, itx, tx);
}
#endif /* HAVE_ZPL */
+180 -153
View File
@@ -22,8 +22,6 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifdef HAVE_ZPL
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -41,6 +39,7 @@
#include <sys/zfs_fuid.h>
#include <sys/spa.h>
#include <sys/zil.h>
#include <sys/zfs_vnops.h>
#include <sys/byteorder.h>
#include <sys/stat.h>
#include <sys/mode.h>
@@ -55,26 +54,24 @@
*/
static void
zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
zfs_init_iattr(struct iattr *attr, uint64_t mask, uint64_t mode,
uint64_t uid, uint64_t gid)
{
bzero(vap, sizeof (*vap));
vap->va_mask = (uint_t)mask;
vap->va_type = IFTOVT(mode);
vap->va_mode = mode & MODEMASK;
vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
vap->va_rdev = zfs_cmpldev(rdev);
vap->va_nodeid = nodeid;
memset(attr, 0, sizeof(*attr));
attr->ia_valid = (uint_t)mask;
attr->ia_mode = mode;
attr->ia_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
attr->ia_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
}
/* ARGSUSED */
static int
zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
zfs_replay_error(zfs_sb_t *zsb, lr_t *lr, boolean_t byteswap)
{
return (ENOTSUP);
}
#ifdef HAVE_XVATTR
static void
zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
{
@@ -135,6 +132,7 @@ zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0);
}
#endif /* HAVE_XVATTR */
static int
zfs_replay_domain_cnt(uint64_t uid, uint64_t gid)
@@ -263,16 +261,17 @@ zfs_replay_swap_attrs(lr_attr_t *lrattr)
* as option FUID information.
*/
static int
zfs_replay_create_acl(zfsvfs_t *zfsvfs,
lr_acl_create_t *lracl, boolean_t byteswap)
zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
lr_create_t *lr = (lr_create_t *)lracl;
znode_t *dzp;
vnode_t *vp = NULL;
struct inode *ip = NULL;
#ifdef HAVE_XVATTR
xvattr_t xva;
int vflg = 0;
vsecattr_t vsec = { 0 };
#endif /* HAVE_XVATTR */
int vflg = 0;
lr_attr_t *lrattr;
void *aclstart;
void *fuidstart;
@@ -300,24 +299,26 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
}
}
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
return (error);
#ifdef HAVE_XVATTR
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
zfs_init_iattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, /*lr->lr_rdev, lr->lr_foid*/);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
* creation time and generation number. The generic VOP_CREATE()
* creation time and generation number. The generic zfs_create()
* doesn't have either concept, so we smuggle the values inside
* the vattr's otherwise unused va_ctime and va_nblocks fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
#endif /* HAVE_XVATTR */
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
goto bail;
@@ -328,11 +329,12 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
case TX_CREATE_ACL_ATTR:
#ifdef HAVE_XVATTR
if (name == NULL) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
@@ -344,27 +346,29 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
if (zfsvfs->z_fuid_replay == NULL) {
if (zsb->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
0, 0, &vp, kcred, vflg, NULL, &vsec);
error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
0, 0, &ip, kcred, vflg, NULL, &vsec);
#endif /* HAVE_XVATTR */
break;
case TX_MKDIR_ACL:
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
case TX_MKDIR_ACL_ATTR:
#ifdef HAVE_XVATTR
if (name == NULL) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
@@ -375,47 +379,50 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
if (zfsvfs->z_fuid_replay == NULL) {
if (zsb->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
&vp, kcred, NULL, vflg, &vsec);
error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
&ip, kcred, NULL, vflg, &vsec);
#endif /* HAVE_XVATTR */
break;
default:
error = ENOTSUP;
}
bail:
if (error == 0 && vp != NULL)
VN_RELE(vp);
if (error == 0 && ip != NULL)
iput(ip);
VN_RELE(ZTOV(dzp));
iput(ZTOI(dzp));
if (zfsvfs->z_fuid_replay)
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
if (zsb->z_fuid_replay)
zfs_fuid_info_free(zsb->z_fuid_replay);
zsb->z_fuid_replay = NULL;
return (error);
}
static int
zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
char *link; /* symlink content follows name */
znode_t *dzp;
vnode_t *vp = NULL;
struct inode *ip = NULL;
xvattr_t xva;
int vflg = 0;
#ifdef HAVE_XVATTR
size_t lrsize = sizeof (lr_create_t);
lr_attr_t *lrattr;
void *start;
size_t xvatlen;
#endif /* HAVE_XVATTR */
void *start;
uint64_t txtype;
int error;
@@ -427,24 +434,26 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
}
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
return (error);
#ifdef HAVE_XVATTR
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
zfs_init_iattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, /*lr->lr_rdev, lr->lr_foid*/);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
* creation time and generation number. The generic VOP_CREATE()
* creation time and generation number. The generic zfs_create()
* doesn't have either concept, so we smuggle the values inside
* the vattr's otherwise unused va_ctime and va_nblocks fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
#endif /* HAVE_XVATTR */
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
goto out;
@@ -461,75 +470,79 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
(int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
(int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
start = (lr + 1);
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
}
switch (txtype) {
case TX_CREATE_ATTR:
#ifdef HAVE_XVATTR
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
#endif /* HAVE_XVATTR */
/*FALLTHROUGH*/
case TX_CREATE:
if (name == NULL)
name = (char *)start;
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
0, 0, &vp, kcred, vflg, NULL, NULL);
error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
0, 0, &ip, kcred, vflg, NULL);
break;
case TX_MKDIR_ATTR:
#ifdef HAVE_XVATTR
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
#endif /* HAVE_XVATTR */
/*FALLTHROUGH*/
case TX_MKDIR:
if (name == NULL)
name = (char *)(lr + 1);
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
&vp, kcred, NULL, vflg, NULL);
error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
&ip, kcred, vflg, NULL);
break;
case TX_MKXATTR:
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
break;
case TX_SYMLINK:
name = (char *)(lr + 1);
link = name + strlen(name) + 1;
error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr,
link, kcred, NULL, vflg);
error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
link, &ip, kcred, vflg);
break;
default:
error = ENOTSUP;
}
out:
if (error == 0 && vp != NULL)
VN_RELE(vp);
if (error == 0 && ip != NULL)
iput(ip);
VN_RELE(ZTOV(dzp));
iput(ZTOI(dzp));
if (zfsvfs->z_fuid_replay)
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
if (zsb->z_fuid_replay)
zfs_fuid_info_free(zsb->z_fuid_replay);
zsb->z_fuid_replay = NULL;
return (error);
}
static int
zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_remove_t */
znode_t *dzp;
@@ -539,60 +552,63 @@ zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
return (error);
#ifdef HAVE_PN_UTILS
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
#endif
switch ((int)lr->lr_common.lrc_txtype) {
case TX_REMOVE:
error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg);
error = zfs_remove(ZTOI(dzp), name, kcred);
break;
case TX_RMDIR:
error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg);
error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg);
break;
default:
error = ENOTSUP;
}
VN_RELE(ZTOV(dzp));
iput(ZTOI(dzp));
return (error);
}
static int
zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_link_t */
znode_t *dzp, *zp;
int error;
int vflg = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
return (error);
if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
VN_RELE(ZTOV(dzp));
if ((error = zfs_zget(zsb, lr->lr_link_obj, &zp)) != 0) {
iput(ZTOI(dzp));
return (error);
}
#ifdef HAVE_PN_UTILS
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
#endif
error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg);
error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred);
VN_RELE(ZTOV(zp));
VN_RELE(ZTOV(dzp));
iput(ZTOI(zp));
iput(ZTOI(dzp));
return (error);
}
static int
zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap)
{
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
@@ -603,39 +619,39 @@ zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_sdoid, &sdzp)) != 0)
return (error);
if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
VN_RELE(ZTOV(sdzp));
if ((error = zfs_zget(zsb, lr->lr_tdoid, &tdzp)) != 0) {
iput(ZTOI(sdzp));
return (error);
}
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred,
NULL, vflg);
error = zfs_rename(ZTOI(sdzp), sname, ZTOI(tdzp), tname, kcred, vflg);
VN_RELE(ZTOV(tdzp));
VN_RELE(ZTOV(sdzp));
iput(ZTOI(tdzp));
iput(ZTOI(sdzp));
return (error);
}
static int
zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
{
#if 0
char *data = (char *)(lr + 1); /* data follows lr_write_t */
#endif
znode_t *zp;
int error;
ssize_t resid;
uint64_t eod, offset, length;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) {
/*
* As we can log writes out of order, it's possible the
* file has been removed. In this case just drop the write
@@ -658,10 +674,10 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
* write needs to be there. So we write the whole block and
* reduce the eof. This needs to be done within the single dmu
* transaction created within vn_rdwr -> zfs_write. So a possible
* new end of file is passed through in zfsvfs->z_replay_eof
* new end of file is passed through in zsb->z_replay_eof
*/
zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */
zsb->z_replay_eof = 0; /* 0 means don't change end of file */
/* If it's a dmu_sync() block, write the whole block */
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
@@ -671,14 +687,19 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
length = blocksize;
}
if (zp->z_size < eod)
zfsvfs->z_replay_eof = eod;
zsb->z_replay_eof = eod;
}
error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset,
/*
* XXX: Use zfs_write() once uio types are removed
*/
#if 0
error = vn_rdwr(UIO_WRITE, ZTOI(zp), data, length, offset,
UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
#endif
VN_RELE(ZTOV(zp));
zfsvfs->z_replay_eof = 0; /* safety */
iput(ZTOI(zp));
zsb->z_replay_eof = 0; /* safety */
return (error);
}
@@ -690,7 +711,7 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
* the file is grown.
*/
static int
zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
{
znode_t *zp;
int error;
@@ -699,19 +720,19 @@ zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
return (error);
top:
end = lr->lr_offset + lr->lr_length;
if (end > zp->z_size) {
dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_t *tx = dmu_tx_create(zsb->z_os);
zp->z_size = end;
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
VN_RELE(ZTOV(zp));
iput(ZTOI(zp));
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
@@ -720,22 +741,22 @@ top:
dmu_tx_abort(tx);
return (error);
}
(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
(void *)&zp->z_size, sizeof (uint64_t), tx);
/* Ensure the replayed seq is updated */
(void) zil_replaying(zfsvfs->z_log, tx);
(void) zil_replaying(zsb->z_log, tx);
dmu_tx_commit(tx);
}
VN_RELE(ZTOV(zp));
iput(ZTOI(zp));
return (error);
}
static int
zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap)
{
znode_t *zp;
flock64_t fl;
@@ -744,7 +765,7 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&fl, sizeof (fl));
@@ -753,68 +774,75 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
fl.l_start = lr->lr_offset;
fl.l_len = lr->lr_length;
error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
lr->lr_offset, kcred, NULL);
error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
lr->lr_offset, kcred);
VN_RELE(ZTOV(zp));
iput(ZTOI(zp));
return (error);
}
static int
zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
{
znode_t *zp;
#ifdef HAVE_XVATTR
xvattr_t xva;
vattr_t *vap = &xva.xva_vattr;
#endif /* HAVE_XATTR */
struct iattr attr;
int error;
void *start;
#ifdef HAVE_XVATTR
xva_init(&xva);
#endif /* HAVE_XVATTR */
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
#ifdef HAVE_XVATTR
if ((lr->lr_mask & AT_XVATTR) &&
zfsvfs->z_version >= ZPL_VERSION_INITIAL)
zsb->z_version >= ZPL_VERSION_INITIAL)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
#endif /* HAVE_XVATTR */
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
return (error);
zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
zfs_init_iattr(&attr, lr->lr_mask, lr->lr_mode, lr->lr_uid, lr->lr_gid);
vap->va_size = lr->lr_size;
ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
attr.ia_size = lr->lr_size;
ZFS_TIME_DECODE(&attr.ia_atime, lr->lr_atime);
ZFS_TIME_DECODE(&attr.ia_mtime, lr->lr_mtime);
/*
* Fill in xvattr_t portions if necessary.
*/
start = (lr_setattr_t *)(lr + 1);
if (vap->va_mask & AT_XVATTR) {
#ifdef HAVE_XVATTR
if (iattr->ia_mask & AT_XVATTR) {
zfs_replay_xvattr((lr_attr_t *)start, &xva);
start = (caddr_t)start +
ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize);
} else
xva.xva_vattr.va_mask &= ~AT_XVATTR;
#endif /* HAVE_XVATTR */
zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL);
error = zfs_setattr(ZTOI(zp), &attr, 0, kcred);
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
VN_RELE(ZTOV(zp));
zfs_fuid_info_free(zsb->z_fuid_replay);
zsb->z_fuid_replay = NULL;
iput(ZTOI(zp));
return (error);
}
static int
zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
vsecattr_t vsa;
@@ -826,7 +854,7 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&vsa, sizeof (vsa));
@@ -836,9 +864,9 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
vsa.vsa_aclflags = 0;
vsa.vsa_aclentp = ace;
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL);
error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
VN_RELE(ZTOV(zp));
iput(ZTOI(zp));
return (error);
}
@@ -858,7 +886,7 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
*
*/
static int
zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1);
vsecattr_t vsa;
@@ -875,7 +903,7 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
}
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&vsa, sizeof (vsa));
@@ -889,18 +917,18 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
void *fuidstart = (caddr_t)ace +
ZIL_ACE_LENGTH(lr->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zsb->z_fuid_replay =
zfs_replay_fuids(fuidstart, &fuidstart,
lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
}
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL);
error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
if (zfsvfs->z_fuid_replay)
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
if (zsb->z_fuid_replay)
zfs_fuid_info_free(zsb->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
VN_RELE(ZTOV(zp));
zsb->z_fuid_replay = NULL;
iput(ZTOI(zp));
return (error);
}
@@ -909,26 +937,25 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
* Callback vectors for replaying records
*/
zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_error, /* 0 no such transaction type */
zfs_replay_create, /* TX_CREATE */
zfs_replay_create, /* TX_MKDIR */
zfs_replay_create, /* TX_MKXATTR */
zfs_replay_create, /* TX_SYMLINK */
zfs_replay_remove, /* TX_REMOVE */
zfs_replay_remove, /* TX_RMDIR */
zfs_replay_link, /* TX_LINK */
zfs_replay_rename, /* TX_RENAME */
zfs_replay_write, /* TX_WRITE */
zfs_replay_truncate, /* TX_TRUNCATE */
zfs_replay_setattr, /* TX_SETATTR */
zfs_replay_acl_v0, /* TX_ACL_V0 */
zfs_replay_acl, /* TX_ACL */
zfs_replay_create_acl, /* TX_CREATE_ACL */
zfs_replay_create, /* TX_CREATE_ATTR */
zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL */
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
(zil_replay_func_t *)zfs_replay_error, /* no such type */
(zil_replay_func_t *)zfs_replay_create, /* TX_CREATE */
(zil_replay_func_t *)zfs_replay_create, /* TX_MKDIR */
(zil_replay_func_t *)zfs_replay_create, /* TX_MKXATTR */
(zil_replay_func_t *)zfs_replay_create, /* TX_SYMLINK */
(zil_replay_func_t *)zfs_replay_remove, /* TX_REMOVE */
(zil_replay_func_t *)zfs_replay_remove, /* TX_RMDIR */
(zil_replay_func_t *)zfs_replay_link, /* TX_LINK */
(zil_replay_func_t *)zfs_replay_rename, /* TX_RENAME */
(zil_replay_func_t *)zfs_replay_write, /* TX_WRITE */
(zil_replay_func_t *)zfs_replay_truncate, /* TX_TRUNCATE */
(zil_replay_func_t *)zfs_replay_setattr, /* TX_SETATTR */
(zil_replay_func_t *)zfs_replay_acl_v0, /* TX_ACL_V0 */
(zil_replay_func_t *)zfs_replay_acl, /* TX_ACL */
(zil_replay_func_t *)zfs_replay_create_acl, /* TX_CREATE_ACL */
(zil_replay_func_t *)zfs_replay_create, /* TX_CREATE_ATTR */
(zil_replay_func_t *)zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */
(zil_replay_func_t *)zfs_replay_create_acl, /* TX_MKDIR_ACL */
(zil_replay_func_t *)zfs_replay_create, /* TX_MKDIR_ATTR */
(zil_replay_func_t *)zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
(zil_replay_func_t *)zfs_replay_write2, /* TX_WRITE2 */
};
#endif /* HAVE_ZPL */
+39 -26
View File
@@ -112,14 +112,14 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
* Range locking is also used by zvol and uses a
* dummied up znode. However, for zvol, we don't need to
* append or grow blocksize, and besides we don't have
* a "sa" data or z_zfsvfs - so skip that processing.
* a "sa" data or zfs_sb_t - so skip that processing.
*
* Yes, this is ugly, and would be solved by not handling
* grow or append in range lock code. If that was done then
* we could make the range locking code generically available
* to other non-zfs consumers.
*/
if (zp->z_vnode) { /* caller is ZPL */
if (!zp->z_is_zvol) { /* caller is ZPL */
/*
* If in append mode pick up the current end of file.
* This is done under z_range_lock to avoid races.
@@ -134,7 +134,7 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
*/
end_size = MAX(zp->z_size, new->r_off + len);
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
new->r_off = 0;
new->r_len = UINT64_MAX;
}
@@ -453,6 +453,20 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
return (new);
}
static void
zfs_range_free(void *arg)
{
rl_t *rl = arg;
if (rl->r_write_wanted)
cv_destroy(&rl->r_wr_cv);
if (rl->r_read_wanted)
cv_destroy(&rl->r_rd_cv);
kmem_free(rl, sizeof (rl_t));
}
/*
* Unlock a reader lock
*/
@@ -472,14 +486,14 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
*/
if (remove->r_cnt == 1) {
avl_remove(tree, remove);
if (remove->r_write_wanted) {
mutex_exit(&zp->z_range_lock);
if (remove->r_write_wanted)
cv_broadcast(&remove->r_wr_cv);
cv_destroy(&remove->r_wr_cv);
}
if (remove->r_read_wanted) {
if (remove->r_read_wanted)
cv_broadcast(&remove->r_rd_cv);
cv_destroy(&remove->r_rd_cv);
}
taskq_dispatch(system_taskq, zfs_range_free, remove, 0);
} else {
ASSERT3U(remove->r_cnt, ==, 0);
ASSERT3U(remove->r_write_wanted, ==, 0);
@@ -505,19 +519,21 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
rl->r_cnt--;
if (rl->r_cnt == 0) {
avl_remove(tree, rl);
if (rl->r_write_wanted) {
if (rl->r_write_wanted)
cv_broadcast(&rl->r_wr_cv);
cv_destroy(&rl->r_wr_cv);
}
if (rl->r_read_wanted) {
if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv);
cv_destroy(&rl->r_rd_cv);
}
kmem_free(rl, sizeof (rl_t));
taskq_dispatch(system_taskq,
zfs_range_free, rl, 0);
}
}
mutex_exit(&zp->z_range_lock);
kmem_free(remove, sizeof (rl_t));
}
kmem_free(remove, sizeof (rl_t));
}
/*
@@ -537,22 +553,19 @@ zfs_range_unlock(rl_t *rl)
/* writer locks can't be shared or split */
avl_remove(&zp->z_range_avl, rl);
mutex_exit(&zp->z_range_lock);
if (rl->r_write_wanted) {
if (rl->r_write_wanted)
cv_broadcast(&rl->r_wr_cv);
cv_destroy(&rl->r_wr_cv);
}
if (rl->r_read_wanted) {
if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv);
cv_destroy(&rl->r_rd_cv);
}
kmem_free(rl, sizeof (rl_t));
taskq_dispatch(system_taskq, zfs_range_free, rl, 0);
} else {
/*
* lock may be shared, let zfs_range_unlock_reader()
* release the lock and free the rl_t
* release the zp->z_range_lock lock and free the rl_t
*/
zfs_range_unlock_reader(zp, rl);
mutex_exit(&zp->z_range_lock);
}
}
+53 -45
View File
@@ -67,7 +67,6 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
};
#ifdef _KERNEL
#ifdef HAVE_ZPL
int
zfs_sa_readlink(znode_t *zp, uio_t *uio)
{
@@ -82,7 +81,7 @@ zfs_sa_readlink(znode_t *zp, uio_t *uio)
MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
} else {
dmu_buf_t *dbp;
if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id,
if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id,
0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
error = uiomove(dbp->db_data,
MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
@@ -108,7 +107,7 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
dmu_buf_t *dbp;
zfs_grow_blocksize(zp, len, tx);
VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os,
VERIFY(0 == dmu_buf_hold(ZTOZSB(zp)->z_os,
zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(dbp, tx);
@@ -119,16 +118,17 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
}
}
#ifdef HAVE_SCANSTAMP
void
zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zp);
xoptattr_t *xoap;
ASSERT(MUTEX_HELD(&zp->z_lock));
VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
if (zp->z_is_sa) {
if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
&xoap->xoa_av_scanstamp,
sizeof (xoap->xoa_av_scanstamp)) != 0)
return;
@@ -156,13 +156,13 @@ zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
void
zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_sb_t *zsb = ZTOZSB(zp);
xoptattr_t *xoap;
ASSERT(MUTEX_HELD(&zp->z_lock));
VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
if (zp->z_is_sa)
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
&xoap->xoa_av_scanstamp,
sizeof (xoap->xoa_av_scanstamp), tx));
else {
@@ -179,10 +179,11 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
&zp->z_pflags, sizeof (uint64_t), tx));
}
}
#endif /* HAVE_SCANSTAMP */
/*
* I'm not convinced we should do any of this upgrade.
@@ -197,15 +198,16 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
{
dmu_buf_t *db = sa_get_db(hdl);
znode_t *zp = sa_get_userdata(hdl);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
sa_bulk_attr_t bulk[20];
zfs_sb_t *zsb = ZTOZSB(zp);
int count = 0;
sa_bulk_attr_t sa_attrs[20] = { 0 };
sa_bulk_attr_t *bulk, *sa_attrs;
zfs_acl_locator_cb_t locate = { 0 };
uint64_t uid, gid, mode, rdev, xattr, parent;
uint64_t crtime[2], mtime[2], ctime[2];
zfs_acl_phys_t znode_acl;
#ifdef HAVE_SCANSTAMP
char scanstamp[AV_SCANSTAMP_SZ];
#endif /* HAVE_SCANSTAMP */
boolean_t drop_lock = B_FALSE;
/*
@@ -214,7 +216,7 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
* and ready the ACL would require special "locked"
* interfaces that would be messy
*/
if (zp->z_acl_cached == NULL || ZTOV(zp)->v_type == VLNK)
if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode))
return;
/*
@@ -234,83 +236,90 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
}
/* First do a bulk query of the attributes that aren't cached */
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
bulk = kmem_alloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
&znode_acl, 88);
if (sa_bulk_lookup_locked(hdl, bulk, count) != 0)
if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) {
kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20);
goto done;
}
/*
* While the order here doesn't matter its best to try and organize
* it is such a way to pick up an already existing layout number
*/
count = 0;
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
sa_attrs = kmem_zalloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zsb), NULL,
&zp->z_size, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zsb),
NULL, &zp->z_gen, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zsb),
NULL, &parent, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zsb), NULL,
&zp->z_pflags, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zsb), NULL,
zp->z_atime, 16);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zsb), NULL,
&mtime, 16);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zsb), NULL,
&ctime, 16);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL,
&crtime, 16);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL,
&zp->z_links, 8);
if (zp->z_vnode->v_type == VBLK || zp->z_vnode->v_type == VCHR)
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL,
&rdev, 8);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zsb), NULL,
&zp->z_acl_cached->z_acl_count, 8);
if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
zfs_acl_xform(zp, zp->z_acl_cached, CRED());
locate.cb_aclp = zp->z_acl_cached;
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zsb),
zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);
if (xattr)
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs),
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zsb),
NULL, &xattr, 8);
#ifdef HAVE_SCANSTAMP
/* if scanstamp then add scanstamp */
if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
scanstamp, AV_SCANSTAMP_SZ);
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zsb),
NULL, scanstamp, AV_SCANSTAMP_SZ);
zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
}
#endif /* HAVE_SCANSTAMP */
VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
count, tx) == 0);
if (znode_acl.z_acl_extern_obj)
VERIFY(0 == dmu_object_free(zfsvfs->z_os,
VERIFY(0 == dmu_object_free(zsb->z_os,
znode_acl.z_acl_extern_obj, tx));
zp->z_is_sa = B_TRUE;
kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * 20);
kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20);
done:
if (drop_lock)
mutex_exit(&zp->z_lock);
@@ -319,7 +328,7 @@ done:
void
zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
{
if (!zp->z_zfsvfs->z_use_sa || zp->z_is_sa)
if (!ZTOZSB(zp)->z_use_sa || zp->z_is_sa)
return;
@@ -331,5 +340,4 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
}
}
#endif /* HAVE_ZPL */
#endif
+514 -1216
View File
File diff suppressed because it is too large Load Diff
+854 -1721
View File
File diff suppressed because it is too large Load Diff
+310 -826
View File
File diff suppressed because it is too large Load Diff
+333
View File
@@ -0,0 +1,333 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
#include <sys/zpl.h>
static int
zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_readdir(dentry->d_inode, dirent, filldir,
&filp->f_pos, cr);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
ZPL_FSYNC_PROTO(zpl_fsync, filp, unused_dentry, datasync)
{
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_fsync(filp->f_path.dentry->d_inode, datasync, cr);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
ssize_t
zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t pos,
uio_seg_t segment, int flags, cred_t *cr)
{
int error;
struct iovec iov;
uio_t uio;
iov.iov_base = (void *)buf;
iov.iov_len = len;
uio.uio_iov = &iov;
uio.uio_resid = len;
uio.uio_iovcnt = 1;
uio.uio_loffset = pos;
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;
error = -zfs_read(ip, &uio, flags, cr);
if (error < 0)
return (error);
return (len - uio.uio_resid);
}
static ssize_t
zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
cred_t *cr;
ssize_t read;
cr = (cred_t *)get_current_cred();
read = zpl_read_common(filp->f_mapping->host, buf, len, *ppos,
UIO_USERSPACE, filp->f_flags, cr);
put_cred(cr);
if (read < 0)
return (read);
*ppos += read;
return (read);
}
ssize_t
zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t pos,
uio_seg_t segment, int flags, cred_t *cr)
{
int error;
struct iovec iov;
uio_t uio;
iov.iov_base = (void *)buf;
iov.iov_len = len;
uio.uio_iov = &iov;
uio.uio_resid = len,
uio.uio_iovcnt = 1;
uio.uio_loffset = pos;
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;
error = -zfs_write(ip, &uio, flags, cr);
if (error < 0)
return (error);
return (len - uio.uio_resid);
}
static ssize_t
zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
cred_t *cr;
ssize_t wrote;
cr = (cred_t *)get_current_cred();
wrote = zpl_write_common(filp->f_mapping->host, buf, len, *ppos,
UIO_USERSPACE, filp->f_flags, cr);
put_cred(cr);
if (wrote < 0)
return (wrote);
*ppos += wrote;
return (wrote);
}
/*
* It's worth taking a moment to describe how mmap is implemented
* for zfs because it differs considerably from other Linux filesystems.
* However, this issue is handled the same way under OpenSolaris.
*
* The issue is that by design zfs bypasses the Linux page cache and
* leaves all caching up to the ARC. This has been shown to work
* well for the common read(2)/write(2) case. However, mmap(2)
* is problem because it relies on being tightly integrated with the
* page cache. To handle this we cache mmap'ed files twice, once in
* the ARC and a second time in the page cache. The code is careful
* to keep both copies synchronized.
*
* When a file with an mmap'ed region is written to using write(2)
* both the data in the ARC and existing pages in the page cache
* are updated. For a read(2) data will be read first from the page
* cache then the ARC if needed. Neither a write(2) or read(2) will
* will ever result in new pages being added to the page cache.
*
* New pages are added to the page cache only via .readpage() which
* is called when the vfs needs to read a page off disk to back the
* virtual memory region. These pages may be modified without
* notifying the ARC and will be written out periodically via
* .writepage(). This will occur due to either a sync or the usual
* page aging behavior. Note because a read(2) of a mmap'ed file
* will always check the page cache first even when the ARC is out
* of date correct data will still be returned.
*
* While this implementation ensures correct behavior it does have
* have some drawbacks. The most obvious of which is that it
* increases the required memory footprint when access mmap'ed
* files. It also adds additional complexity to the code keeping
* both caches synchronized.
*
* Longer term it may be possible to cleanly resolve this wart by
* mapping page cache pages directly on to the ARC buffers. The
* Linux address space operations are flexible enough to allow
* selection of which pages back a particular index. The trick
* would be working out the details of which subsystem is in
* charge, the ARC, the page cache, or both. It may also prove
* helpful to move the ARC buffers to a scatter-gather lists
* rather than a vmalloc'ed region.
*/
static int
zpl_mmap(struct file *filp, struct vm_area_struct *vma)
{
znode_t *zp = ITOZ(filp->f_mapping->host);
int error;
error = generic_file_mmap(filp, vma);
if (error)
return (error);
mutex_enter(&zp->z_lock);
zp->z_is_mapped = 1;
mutex_exit(&zp->z_lock);
return (error);
}
/*
* Populate a page with data for the Linux page cache. This function is
* only used to support mmap(2). There will be an identical copy of the
* data in the ARC which is kept up to date via .write() and .writepage().
*
* Current this function relies on zpl_read_common() and the O_DIRECT
* flag to read in a page. This works but the more correct way is to
* update zfs_fillpage() to be Linux friendly and use that interface.
*/
static int
zpl_readpage(struct file *filp, struct page *pp)
{
struct inode *ip;
loff_t off, i_size;
size_t len, wrote;
cred_t *cr;
void *pb;
int error = 0;
ASSERT(PageLocked(pp));
ip = pp->mapping->host;
off = page_offset(pp);
i_size = i_size_read(ip);
ASSERT3S(off, <, i_size);
cr = (cred_t *)get_current_cred();
len = MIN(PAGE_CACHE_SIZE, i_size - off);
pb = kmap(pp);
/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
wrote = zpl_read_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
if (wrote != len)
error = -EIO;
if (!error && (len < PAGE_CACHE_SIZE))
memset(pb + len, 0, PAGE_CACHE_SIZE - len);
kunmap(pp);
put_cred(cr);
if (error) {
SetPageError(pp);
ClearPageUptodate(pp);
} else {
ClearPageError(pp);
SetPageUptodate(pp);
flush_dcache_page(pp);
}
unlock_page(pp);
return (error);
}
/*
* Write out dirty pages to the ARC, this function is only required to
* support mmap(2). Mapped pages may be dirtied by memory operations
* which never call .write(). These dirty pages are kept in sync with
* the ARC buffers via this hook.
*
* Currently this function relies on zpl_write_common() and the O_DIRECT
* flag to push out the page. This works but the more correct way is
* to update zfs_putapage() to be Linux friendly and use that interface.
*/
static int
zpl_writepage(struct page *pp, struct writeback_control *wbc)
{
struct inode *ip;
loff_t off, i_size;
size_t len, read;
cred_t *cr;
void *pb;
int error = 0;
ASSERT(PageLocked(pp));
ip = pp->mapping->host;
off = page_offset(pp);
i_size = i_size_read(ip);
cr = (cred_t *)get_current_cred();
len = MIN(PAGE_CACHE_SIZE, i_size - off);
pb = kmap(pp);
/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
read = zpl_write_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
if (read != len)
error = -EIO;
kunmap(pp);
put_cred(cr);
if (error) {
SetPageError(pp);
ClearPageUptodate(pp);
} else {
ClearPageError(pp);
SetPageUptodate(pp);
}
unlock_page(pp);
return (error);
}
const struct address_space_operations zpl_address_space_operations = {
.readpage = zpl_readpage,
.writepage = zpl_writepage,
};
const struct file_operations zpl_file_operations = {
.open = generic_file_open,
.llseek = generic_file_llseek,
.read = zpl_read,
.write = zpl_write,
.readdir = zpl_readdir,
.mmap = zpl_mmap,
.fsync = zpl_fsync,
};
const struct file_operations zpl_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = zpl_readdir,
.fsync = zpl_fsync,
};
+363
View File
@@ -0,0 +1,363 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/vfs.h>
#include <sys/zpl.h>
static struct dentry *
zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode *ip;
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
put_cred(cr);
if (error) {
if (error == -ENOENT)
return d_splice_alias(NULL, dentry);
else
return ERR_PTR(error);
}
return d_splice_alias(ip, dentry);
}
static int
zpl_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
const struct cred *cred;
struct inode *ip;
vattr_t *vap;
int error;
cred = get_current_cred();
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = mode;
vap->va_mask = ATTR_MODE;
vap->va_uid = current_fsuid();
vap->va_gid = current_fsgid();
error = -zfs_create(dir, (char *)dentry->d_name.name,
vap, 0, mode, &ip, (struct cred *)cred, 0, NULL);
if (error)
goto out;
d_instantiate(dentry, ip);
out:
kmem_free(vap, sizeof(vattr_t));
put_cred(cred);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
{
const struct cred *cred;
struct inode *ip;
vattr_t *vap;
int error;
cred = get_current_cred();
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = mode;
vap->va_mask = ATTR_MODE;
vap->va_rdev = rdev;
vap->va_uid = current_fsuid();
vap->va_gid = current_fsgid();
error = -zfs_create(dir, (char *)dentry->d_name.name,
vap, 0, mode, &ip, (struct cred *)cred, 0, NULL);
if (error)
goto out;
d_instantiate(dentry, ip);
out:
kmem_free(vap, sizeof(vattr_t));
put_cred(cred);
ASSERT3S(error, <=, 0);
return (-error);
}
static int
zpl_unlink(struct inode *dir, struct dentry *dentry)
{
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_remove(dir, dname(dentry), cr);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
cred_t *cr;
vattr_t *vap;
struct inode *ip;
int error;
cr = (cred_t *)get_current_cred();
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = S_IFDIR | mode;
vap->va_mask = ATTR_MODE;
vap->va_uid = current_fsuid();
vap->va_gid = current_fsgid();
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
if (error)
goto out;
d_instantiate(dentry, ip);
out:
kmem_free(vap, sizeof(vattr_t));
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_rmdir(struct inode * dir, struct dentry *dentry)
{
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_getattr(dentry->d_inode, stat, 0, cr);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_setattr(struct dentry *dentry, struct iattr *attr)
{
cred_t *cr;
int error;
error = inode_change_ok(dentry->d_inode, attr);
if (error)
return (error);
cr = (cred_t *)get_current_cred();
error = -zfs_setattr(dentry->d_inode, attr, 0, cr);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (-error);
}
static int
zpl_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
{
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
{
cred_t *cr;
vattr_t *vap;
struct inode *ip;
int error;
cr = (cred_t *)get_current_cred();
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = S_IFLNK | S_IRWXUGO;
vap->va_mask = ATTR_MODE;
vap->va_uid = current_fsuid();
vap->va_gid = current_fsgid();
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
if (error)
goto out;
d_instantiate(dentry, ip);
out:
kmem_free(vap, sizeof(vattr_t));
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static void *
zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *ip = dentry->d_inode;
struct iovec iov;
uio_t uio;
char *link;
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
iov.iov_len = MAXPATHLEN;
iov.iov_base = link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_resid = (MAXPATHLEN - 1);
uio.uio_segflg = UIO_SYSSPACE;
error = -zfs_readlink(ip, &uio, cr);
if (error) {
kmem_free(link, MAXPATHLEN);
nd_set_link(nd, ERR_PTR(error));
} else {
nd_set_link(nd, link);
}
put_cred(cr);
return (NULL);
}
static void
zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
{
char *link;
link = nd_get_link(nd);
if (!IS_ERR(link))
kmem_free(link, MAXPATHLEN);
}
static int
zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *ip = old_dentry->d_inode;
cred_t *cr;
int error;
if (ip->i_nlink >= ZFS_LINK_MAX)
return -EMLINK;
cr = (cred_t *)get_current_cred();
ip->i_ctime = CURRENT_TIME_SEC;
igrab(ip); /* Use ihold() if available */
error = -zfs_link(dir, ip, dname(dentry), cr);
if (error) {
iput(ip);
goto out;
}
d_instantiate(dentry, ip);
out:
put_cred(cr);
ASSERT3S(error, <=, 0);
return (error);
}
const struct inode_operations zpl_inode_operations = {
.create = zpl_create,
.link = zpl_link,
.unlink = zpl_unlink,
.symlink = zpl_symlink,
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
.rename = zpl_rename,
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
const struct inode_operations zpl_dir_inode_operations = {
.create = zpl_create,
.lookup = zpl_lookup,
.link = zpl_link,
.unlink = zpl_unlink,
.symlink = zpl_symlink,
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
.rename = zpl_rename,
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
const struct inode_operations zpl_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = zpl_follow_link,
.put_link = zpl_put_link,
};
const struct inode_operations zpl_special_inode_operations = {
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
+204
View File
@@ -0,0 +1,204 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
#include <sys/zpl.h>
static struct inode *
zpl_inode_alloc(struct super_block *sb)
{
struct inode *ip;
VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
ip->i_version = 1;
return (ip);
}
static void
zpl_inode_destroy(struct inode *ip)
{
ASSERT(atomic_read(&ip->i_count) == 0);
zfs_inode_destroy(ip);
}
/*
* When ->drop_inode() is called its return value indicates if the
* inode should be evicted from the inode cache. If the inode is
* unhashed and has no links the default policy is to evict it
* immediately.
*
* Prior to 2.6.36 this eviction was accomplished by the vfs calling
* ->delete_inode(). It was ->delete_inode()'s responsibility to
* truncate the inode pages and call clear_inode(). The call to
* clear_inode() synchronously invalidates all the buffers and
* calls ->clear_inode(). It was ->clear_inode()'s responsibility
* to cleanup and filesystem specific data before freeing the inode.
*
* This elaborate mechanism was replaced by ->evict_inode() which
* does the job of both ->delete_inode() and ->clear_inode(). It
* will be called exactly once, and when it returns the inode must
* be in a state where it can simply be freed. The ->evict_inode()
* callback must minimally truncate the inode pages, and call
* end_writeback() to complete all outstanding writeback for the
* inode. After this is complete evict inode can cleanup any
* remaining filesystem specific data.
*/
#ifdef HAVE_EVICT_INODE
static void
zpl_evict_inode(struct inode *ip)
{
truncate_inode_pages(&ip->i_data, 0);
end_writeback(ip);
zfs_inactive(ip);
}
#else
static void
zpl_clear_inode(struct inode *ip)
{
zfs_inactive(ip);
}
static void
zpl_inode_delete(struct inode *ip)
{
truncate_inode_pages(&ip->i_data, 0);
clear_inode(ip);
}
#endif /* HAVE_EVICT_INODE */
static void
zpl_put_super(struct super_block *sb)
{
int error;
error = -zfs_umount(sb);
ASSERT3S(error, <=, 0);
}
static int
zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
{
int error;
error = -zfs_statvfs(dentry, statp);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
{
struct super_block *sb = vfsp->mnt_sb;
zfs_sb_t *zsb = sb->s_fs_info;
/*
* The Linux VFS automatically handles the following flags:
* MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY
*/
if (zsb->z_flags & ZSB_XATTR_USER)
seq_printf(seq, ",%s", "xattr");
return (0);
}
static int
zpl_fill_super(struct super_block *sb, void *data, int silent)
{
int error;
error = -zfs_domount(sb, data, silent);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_get_sb(struct file_system_type *fs_type, int flags,
const char *osname, void *data, struct vfsmount *mnt)
{
zpl_mount_data_t zmd = { osname, data, mnt };
return get_sb_nodev(fs_type, flags, &zmd, zpl_fill_super, mnt);
}
static void
zpl_kill_sb(struct super_block *sb)
{
#ifdef HAVE_SNAPSHOT
zfs_sb_t *zsb = sb->s_fs_info;
if (zsb && dmu_objset_is_snapshot(zsb->z_os))
zfs_snap_destroy(zsb);
#endif /* HAVE_SNAPSHOT */
kill_anon_super(sb);
}
const struct super_operations zpl_super_operations = {
.alloc_inode = zpl_inode_alloc,
.destroy_inode = zpl_inode_destroy,
.dirty_inode = NULL,
.write_inode = NULL,
.drop_inode = NULL,
#ifdef HAVE_EVICT_INODE
.evict_inode = zpl_evict_inode,
#else
.clear_inode = zpl_clear_inode,
.delete_inode = zpl_inode_delete,
#endif /* HAVE_EVICT_INODE */
.put_super = zpl_put_super,
.write_super = NULL,
.sync_fs = NULL,
.freeze_fs = NULL,
.unfreeze_fs = NULL,
.statfs = zpl_statfs,
.remount_fs = NULL,
.show_options = zpl_show_options,
.show_stats = NULL,
};
#if 0
const struct export_operations zpl_export_operations = {
.fh_to_dentry = NULL,
.fh_to_parent = NULL,
.get_parent = NULL,
};
#endif
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
.get_sb = zpl_get_sb,
.kill_sb = zpl_kill_sb,
};
+438
View File
@@ -0,0 +1,438 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*
* Extended attributes (xattr) on Solaris are implemented as files
* which exist in a hidden xattr directory. These extended attributes
* can be accessed using the attropen() system call which opens
* the extended attribute. It can then be manipulated just like
* a standard file descriptor. This has a couple advantages such
* as practically no size limit on the file, and the extended
* attributes permissions may differ from those of the parent file.
* This interface is really quite clever, but it's also completely
* different than what is supported on Linux.
*
* Under Linux extended attributes are manipulated by the system
* calls getxattr(2), setxattr(2), and listxattr(2). They consider
* extended attributes to be name/value pairs where the name is a
* NULL terminated string. The name must also include one of the
* following name space prefixes:
*
* user - No restrictions and is available to user applications.
* trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.
* system - Used for access control lists (system.nfs4_acl, etc).
* security - Used by SELinux to store a files security context.
*
* This Linux interface is implemented internally using the more
* flexible Solaris style extended attributes. Every extended
* attribute is store as a file in a hidden directory associated
* with the parent file. This ensures on disk compatibility with
* zfs implementations on other platforms (Solaris, FreeBSD, MacOS).
*
* One consequence of this implementation is that when an extended
* attribute is manipulated an inode is created. This inode will
* exist in the Linux inode cache but there will be no associated
* entry in the dentry cache which references it. This is safe
* but it may result in some confusion.
*
* Longer term I would like to see the 'security.selinux' extended
* attribute moved to a SA. This should significantly improve
* performance on a SELinux enabled system by minimizing the
* number of seeks required to access a file. However, for now
* this xattr is still stored in a file because I'm pretty sure
* adding a new SA will break on-disk compatibility.
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
#include <sys/vfs.h>
#include <sys/zpl.h>
typedef struct xattr_filldir {
size_t size;
size_t offset;
char *buf;
struct inode *inode;
} xattr_filldir_t;
static int
zpl_xattr_filldir(void *arg, const char *name, int name_len,
loff_t offset, uint64_t objnum, unsigned int d_type)
{
xattr_filldir_t *xf = arg;
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR_USER))
return (0);
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
if (!capable(CAP_SYS_ADMIN))
return (0);
/* When xf->buf is NULL only calculate the required size. */
if (xf->buf) {
if (xf->offset + name_len + 1 > xf->size)
return (-ERANGE);
memcpy(xf->buf + xf->offset, name, name_len);
xf->buf[xf->offset + name_len] = '\0';
}
xf->offset += (name_len + 1);
return (0);
}
ssize_t
zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *ip = dentry->d_inode;
struct inode *dxip = NULL;
loff_t pos = 3; /* skip '.', '..', and '.zfs' entries. */
cred_t *cr;
int error;
xattr_filldir_t xf = { buffer_size, 0, buffer, ip };
cr = (cred_t *)get_current_cred();
/* Lookup the xattr directory */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
if (error) {
if (error == -ENOENT)
error = 0;
goto out;
}
/* Fill provided buffer via zpl_zattr_filldir helper */
error = -zfs_readdir(dxip, (void *)&xf, zpl_xattr_filldir, &pos, cr);
if (error)
goto out;
error = xf.offset;
out:
if (dxip)
iput(dxip);
put_cred(cr);
return (error);
}
static int
zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size)
{
struct inode *dxip = NULL;
struct inode *xip = NULL;
cred_t *cr;
int error;
cr = (cred_t *)get_current_cred();
/* Lookup the xattr directory */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
if (error)
goto out;
/* Lookup a specific xattr name in the directory */
error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
if (error)
goto out;
if (!size) {
error = i_size_read(xip);
goto out;
}
error = zpl_read_common(xip, buf, size, 0, UIO_SYSSPACE, 0, cr);
out:
if (xip)
iput(xip);
if (dxip)
iput(dxip);
put_cred(cr);
if (error == -ENOENT)
error = -ENODATA;
return (error);
}
static int
zpl_xattr_set(struct inode *ip, const char *name, const void *value,
size_t size, int flags)
{
struct inode *dxip = NULL;
struct inode *xip = NULL;
vattr_t *vap = NULL;
cred_t *cr;
ssize_t wrote;
int error;
cr = (cred_t *)get_current_cred();
/* Lookup the xattr directory and create it if required. */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR,
cr, NULL, NULL);
if (error)
goto out;
/*
* Lookup a specific xattr name in the directory, two failure modes:
* XATTR_CREATE: fail if xattr already exists
* XATTR_REMOVE: fail if xattr does not exist
*/
error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
if (error) {
if (error != -ENOENT)
goto out;
if ((error == -ENOENT) && (flags & XATTR_REPLACE))
goto out;
} else {
error = -EEXIST;
if (flags & XATTR_CREATE)
goto out;
}
error = 0;
/* Remove a specific name xattr when value is set to NULL. */
if (value == NULL) {
if (xip)
error = -zfs_remove(dxip, (char *)name, cr);
goto out;
}
/* Lookup failed create a new xattr. */
if (xip == NULL) {
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = S_IFREG | 0644;
vap->va_mask = ATTR_MODE;
vap->va_uid = current_fsuid();
vap->va_gid = current_fsgid();
error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
cr, 0, NULL);
if (error)
goto out;
}
ASSERT(xip != NULL);
wrote = zpl_write_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
if (wrote < 0)
error = wrote;
out:
if (vap)
kmem_free(vap, sizeof(vattr_t));
if (xip)
iput(xip);
if (dxip)
iput(dxip);
put_cred(cr);
if (error == -ENOENT)
error = -ENODATA;
ASSERT3S(error, <=, 0);
return (error);
}
static int
__zpl_xattr_user_get(struct inode *ip, const char *name,
void *buffer, size_t size)
{
char *xattr_name;
int error;
if (strcmp(name, "") == 0)
return -EINVAL;
if (!(ITOZSB(ip)->z_flags & ZSB_XATTR_USER))
return -EOPNOTSUPP;
xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
error = zpl_xattr_get(ip, xattr_name, buffer, size);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
static int
__zpl_xattr_user_set(struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
char *xattr_name;
int error;
if (strcmp(name, "") == 0)
return -EINVAL;
if (!(ITOZSB(ip)->z_flags & ZSB_XATTR_USER))
return -EOPNOTSUPP;
xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
error = zpl_xattr_set(ip, xattr_name, value, size, flags);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
xattr_handler_t zpl_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.get = zpl_xattr_user_get,
.set = zpl_xattr_user_set,
};
static int
__zpl_xattr_trusted_get(struct inode *ip, const char *name,
void *buffer, size_t size)
{
char *xattr_name;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (strcmp(name, "") == 0)
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
error = zpl_xattr_get(ip, xattr_name, buffer, size);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
static int
__zpl_xattr_trusted_set(struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
char *xattr_name;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (strcmp(name, "") == 0)
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
error = zpl_xattr_set(ip, xattr_name, value, size, flags);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
xattr_handler_t zpl_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.get = zpl_xattr_trusted_get,
.set = zpl_xattr_trusted_set,
};
static int
__zpl_xattr_security_get(struct inode *ip, const char *name,
void *buffer, size_t size)
{
char *xattr_name;
int error;
if (strcmp(name, "") == 0)
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
error = zpl_xattr_get(ip, xattr_name, buffer, size);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
static int
__zpl_xattr_security_set(struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
char *xattr_name;
int error;
if (strcmp(name, "") == 0)
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
error = zpl_xattr_set(ip, xattr_name, value, size, flags);
strfree(xattr_name);
return (error);
}
ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
int
zpl_xattr_security_init(struct inode *ip, struct inode *dip)
{
int error;
size_t len;
void *value;
char *name;
error = security_inode_init_security(ip, dip, &name, &value, &len);
if (error) {
if (error == -EOPNOTSUPP)
return 0;
return (error);
}
error = __zpl_xattr_security_set(ip, name, value, len, 0);
kfree(name);
kfree(value);
return (error);
}
xattr_handler_t zpl_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = zpl_xattr_security_get,
.set = zpl_xattr_security_set,
};
xattr_handler_t *zpl_xattr_handlers[] = {
&zpl_xattr_security_handler,
&zpl_xattr_trusted_handler,
&zpl_xattr_user_handler,
#ifdef HAVE_POSIX_ACLS
&zpl_xattr_acl_access_handler,
&zpl_xattr_acl_default_handler,
#endif /* HAVE_POSIX_ACLS */
};
+4 -1
View File
@@ -1062,6 +1062,8 @@ zvol_alloc(dev_t dev, const char *name)
mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
sizeof (rl_t), offsetof(rl_t, r_node));
zv->zv_znode.z_is_zvol = TRUE;
spin_lock_init(&zv->zv_lock);
list_link_init(&zv->zv_next);
@@ -1228,7 +1230,8 @@ zvol_create_minors_cb(spa_t *spa, uint64_t dsobj,
if (strchr(dsname, '/') == NULL)
return 0;
return __zvol_create_minor(dsname);
(void) __zvol_create_minor(dsname);
return (0);
}
/*