Rebase master to b105

This commit is contained in:
Brian Behlendorf
2009-01-15 13:59:39 -08:00
parent 42bcb36c89
commit fb5f0bc833
50 changed files with 1602 additions and 849 deletions
+127 -51
View File
@@ -105,9 +105,7 @@
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
* as they can span dmu_tx_assign() calls.
*
* (4) Always pass zfsvfs->z_assign as the second argument to dmu_tx_assign().
* In normal operation, this will be TXG_NOWAIT. During ZIL replay,
* it will be a specific txg. Either way, dmu_tx_assign() never blocks.
* (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
* This is critical because we don't want to block while holding locks.
* Note, in particular, that if a lock is sometimes acquired before
* the tx assigns, and sometimes after (e.g. z_lock), then failing to
@@ -124,6 +122,8 @@
* (5) If the operation succeeded, generate the intent log entry for it
* before dropping locks. This ensures that the ordering of events
* in the intent log matches the order in which they actually occurred.
* During ZIL replay the zfs_log_* functions will update the sequence
* number to indicate the zil transaction has replayed.
*
* (6) At the end of each vnode op, the DMU tx must always commit,
* regardless of whether there were any errors.
@@ -139,12 +139,12 @@
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
* error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign
* error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
* if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
* if (error == ERESTART) {
* dmu_tx_wait(tx);
* dmu_tx_abort(tx);
* goto top;
@@ -698,10 +698,9 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
if (error == ERESTART &&
zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
continue;
@@ -807,7 +806,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
* If we're in replay mode, or we made no progress, return error.
* Otherwise, it's at least a partial write, so it's successful.
*/
if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) {
if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1233,11 +1232,10 @@ top:
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
}
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART &&
zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1449,11 +1447,11 @@ top:
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1659,10 +1657,10 @@ top:
if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp)
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1789,13 +1787,13 @@ top:
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -2342,6 +2340,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
zilog_t *zilog;
dmu_tx_t *tx;
vattr_t oldva;
xvattr_t tmpxvattr;
uint_t mask = vap->va_mask;
uint_t saved_mask;
int trim_mask = 0;
@@ -2396,6 +2395,8 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
*/
xoap = xva_getxoptattr(xvap);
xva_init(&tmpxvattr);
/*
* Immutable files can only alter immutable bit and atime
*/
@@ -2518,28 +2519,78 @@ top:
oldva.va_mode = pzp->zp_mode;
zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
if (mask & AT_XVATTR) {
if ((need_policy == FALSE) &&
(XVA_ISSET_REQ(xvap, XAT_APPENDONLY) &&
xoap->xoa_appendonly !=
((pzp->zp_flags & ZFS_APPENDONLY) != 0)) ||
(XVA_ISSET_REQ(xvap, XAT_NOUNLINK) &&
xoap->xoa_nounlink !=
((pzp->zp_flags & ZFS_NOUNLINK) != 0)) ||
(XVA_ISSET_REQ(xvap, XAT_IMMUTABLE) &&
xoap->xoa_immutable !=
((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) ||
(XVA_ISSET_REQ(xvap, XAT_NODUMP) &&
xoap->xoa_nodump !=
((pzp->zp_flags & ZFS_NODUMP) != 0)) ||
(XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED) &&
xoap->xoa_av_modified !=
((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) ||
((XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED) &&
((vp->v_type != VREG && xoap->xoa_av_quarantined) ||
xoap->xoa_av_quarantined !=
((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)))) ||
(XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
(XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
/*
* Update xvattr mask to include only those attributes
* that are actually changing.
*
* the bits will be restored prior to actually setting
* the attributes so the caller thinks they were set.
*/
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
if (xoap->xoa_appendonly !=
((pzp->zp_flags & ZFS_APPENDONLY) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_APPENDONLY);
XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
}
}
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
if (xoap->xoa_nounlink !=
((pzp->zp_flags & ZFS_NOUNLINK) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_NOUNLINK);
XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
}
}
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
if (xoap->xoa_immutable !=
((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
}
}
if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
if (xoap->xoa_nodump !=
((pzp->zp_flags & ZFS_NODUMP) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_NODUMP);
XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
}
}
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
if (xoap->xoa_av_modified !=
((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
}
}
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
if ((vp->v_type != VREG &&
xoap->xoa_av_quarantined) ||
xoap->xoa_av_quarantined !=
((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) {
need_policy = TRUE;
} else {
XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
}
}
if (need_policy == FALSE &&
(XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
need_policy = TRUE;
}
}
@@ -2649,7 +2700,7 @@ top:
dmu_tx_hold_bonus(tx, attrzp->z_id);
}
err = dmu_tx_assign(tx, zfsvfs->z_assign);
err = dmu_tx_assign(tx, TXG_NOWAIT);
if (err) {
if (attrzp)
VN_RELE(ZTOV(attrzp));
@@ -2659,7 +2710,7 @@ top:
aclp = NULL;
}
if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (err == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -2732,6 +2783,31 @@ top:
*/
if (xoap && (mask & AT_XVATTR)) {
/*
* restore trimmed off masks
* so that return masks can be set for caller.
*/
if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
XVA_SET_REQ(xvap, XAT_APPENDONLY);
}
if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
XVA_SET_REQ(xvap, XAT_NOUNLINK);
}
if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
XVA_SET_REQ(xvap, XAT_IMMUTABLE);
}
if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
XVA_SET_REQ(xvap, XAT_NODUMP);
}
if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
}
if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
}
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
size_t len;
dmu_object_info_t doi;
@@ -3104,7 +3180,7 @@ top:
if (tzp)
dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
@@ -3113,7 +3189,7 @@ top:
VN_RELE(ZTOV(szp));
if (tzp)
VN_RELE(ZTOV(tzp));
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3242,10 +3318,10 @@ top:
FUID_SIZE_ESTIMATE(zfsvfs));
}
}
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3462,10 +3538,10 @@ top:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, szp->z_id);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3547,7 +3623,7 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
len = PAGESIZE;
/*
* If our blocksize is bigger than the page size, try to kluster
* muiltiple pages so that we write a full block (thus avoiding
* multiple pages so that we write a full block (thus avoiding
* a read-modify-write).
*/
if (off < filesz && zp->z_blksz > PAGESIZE) {
@@ -3589,9 +3665,9 @@ top:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_write(tx, zp->z_id, off, len);
dmu_tx_hold_bonus(tx, zp->z_id);
err = dmu_tx_assign(tx, zfsvfs->z_assign);
err = dmu_tx_assign(tx, TXG_NOWAIT);
if (err != 0) {
if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
if (err == ERESTART) {
zfs_range_unlock(rl);
dmu_tx_wait(tx);
dmu_tx_abort(tx);