3464 zfs synctask code needs restructuring
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Approved by: Garrett D'Amore <garrett@damore.org>

References:
  https://www.illumos.org/issues/3464
  illumos/illumos-gate@3b2aab1880

Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1495
This commit is contained in:
Matthew Ahrens
2013-09-04 07:00:57 -05:00
committed by Brian Behlendorf
parent 6f1ffb0665
commit 13fe019870
86 changed files with 6425 additions and 6043 deletions
+2
View File
@@ -93,3 +93,5 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_super.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_xattr.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zrlock.o
$(MODULE)-objs += @top_srcdir@/module/zfs/zvol.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_destroy.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_userhold.o
+5 -5
View File
@@ -1643,12 +1643,12 @@ arc_buf_free(arc_buf_t *buf, void *tag)
}
}
int
boolean_t
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = NULL;
int no_callback = (buf->b_efunc == NULL);
boolean_t no_callback = (buf->b_efunc == NULL);
if (hdr->b_state == arc_anon) {
ASSERT(hdr->b_datacnt == 1);
@@ -1854,7 +1854,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
ARCSTAT_INCR(arcstat_mutex_miss, missed);
/*
* We have just evicted some date into the ghost state, make
* We have just evicted some data into the ghost state, make
* sure we also adjust the ghost state size if necessary.
*/
if (arc_no_grow &&
@@ -2772,7 +2772,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
if (zio == NULL || zio->io_error == 0)
bcopy(buf->b_data, arg, buf->b_hdr->b_size);
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
}
/* a generic arc_done_func_t */
@@ -2781,7 +2781,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
if (zio && zio->io_error) {
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
*bufp = NULL;
} else {
*bufp = buf;
+8
View File
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/bplist.h>
@@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp)
mutex_exit(&bpl->bpl_lock);
}
/*
* To aid debugging, we keep the most recently removed entry. This way if
* we are in the callback, we can easily locate the entry.
*/
static bplist_entry_t *bplist_iterate_last_removed;
void
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
{
@@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
mutex_enter(&bpl->bpl_lock);
while ((bpe = list_head(&bpl->bpl_list))) {
bplist_iterate_last_removed = bpe;
list_remove(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
func(arg, &bpe->bpe_blk, tx);
+4
View File
@@ -366,6 +366,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
{
bpobj_t subbpo;
uint64_t used, comp, uncomp, subsubobjs;
ASSERTV(dmu_object_info_t doi);
ASSERT(bpo->bpo_havesubobj);
ASSERT(bpo->bpo_havecomp);
@@ -392,6 +393,9 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
}
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
+35 -52
View File
@@ -64,7 +64,7 @@ static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
static void dbuf_destroy(dmu_buf_impl_t *db);
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
/*
@@ -546,7 +546,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
@@ -875,10 +875,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
continue;
/* found a level 0 buffer in the range */
if (dbuf_undirty(db, tx))
continue;
mutex_enter(&db->db_mtx);
if (dbuf_undirty(db, tx)) {
/* mutex has been dropped and dbuf destroyed */
continue;
}
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
@@ -1005,7 +1007,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
mutex_enter(&db->db_mtx);
dbuf_set_data(db, buf);
VERIFY(arc_buf_remove_ref(obuf, db) == 1);
VERIFY(arc_buf_remove_ref(obuf, db));
db->db.db_size = size;
if (db->db_level == 0) {
@@ -1306,7 +1308,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr);
}
static int
/*
* Return TRUE if this evicted the dbuf.
*/
static boolean_t
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
dnode_t *dn;
@@ -1315,18 +1320,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(txg != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT0(db->db_level);
ASSERT(MUTEX_HELD(&db->db_mtx));
mutex_enter(&db->db_mtx);
/*
* If this buffer is not dirty, we're done.
*/
for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
if (dr->dr_txg <= txg)
break;
if (dr == NULL || dr->dr_txg < txg) {
mutex_exit(&db->db_mtx);
return (0);
}
if (dr == NULL || dr->dr_txg < txg)
return (B_FALSE);
ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
@@ -1334,24 +1338,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn = DB_DNODE(db);
/*
* If this buffer is currently held, we cannot undirty
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
* Also note: we can get here with a spill block, so
* test for that similar to how dbuf_dirty does.
* Note: This code will probably work even if there are concurrent
* holders, but it is untested in that scenerio, as the ZPL and
* ztest have additional locking (the range locks) that prevents
* that type of concurrent access.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
if (db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
dnode_clear_range(dn, db->db_blkid, 1, tx);
mutex_exit(&dn->dn_mtx);
}
DB_DNODE_EXIT(db);
return (0);
}
ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@@ -1380,21 +1372,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
DB_DNODE_EXIT(db);
if (db->db_level == 0) {
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
ASSERT(db->db_buf != NULL);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
}
} else {
ASSERT(db->db_buf != NULL);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
mutex_destroy(&dr->dt.di.dr_mtx);
list_destroy(&dr->dt.di.dr_children);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
@@ -1406,13 +1390,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (1);
return (B_TRUE);
}
mutex_exit(&db->db_mtx);
return (0);
return (B_FALSE);
}
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
@@ -1511,7 +1494,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
xuio_stat_wbuf_copied();
return;
}
@@ -1529,10 +1512,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_release(db->db_buf, db);
}
dr->dt.dl.dr_data = buf;
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
arc_release(db->db_buf, db);
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
}
db->db_buf = NULL;
}
@@ -2168,10 +2151,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* This dbuf has anonymous data associated with it.
*/
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
VERIFY(!arc_buf_remove_ref(db->db_buf, db));
/*
* A dbuf will be eligible for eviction if either the
@@ -2669,7 +2652,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (db->db_state != DB_NOFILL) {
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
db));
else if (!arc_released(db->db_buf))
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
+1 -1
View File
@@ -1382,7 +1382,7 @@ void
dmu_return_arcbuf(arc_buf_t *buf)
{
arc_return_buf(buf, FTAG);
VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
VERIFY(arc_buf_remove_ref(buf, FTAG));
}
/*
+42 -40
View File
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -155,51 +156,49 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
int
dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp)
dmu_diff(const char *tosnap_name, const char *fromsnap_name,
struct vnode *vp, offset_t *offp)
{
struct diffarg da;
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
dsl_dataset_t *findds;
dsl_dataset_t *relds;
int err = 0;
dsl_dataset_t *fromsnap;
dsl_dataset_t *tosnap;
dsl_pool_t *dp;
int error;
uint64_t fromtxg;
/* make certain we are looking at snapshots */
if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
if (strchr(tosnap_name, '@') == NULL ||
strchr(fromsnap_name, '@') == NULL)
return (EINVAL);
/* fromsnap must be earlier and from the same lineage as tosnap */
if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
return (EXDEV);
error = dsl_pool_hold(tosnap_name, FTAG, &dp);
if (error != 0)
return (error);
relds = NULL;
findds = ds;
while (fromds->ds_dir != findds->ds_dir) {
dsl_pool_t *dp = ds->ds_dir->dd_pool;
if (!dsl_dir_is_clone(findds->ds_dir)) {
if (relds)
dsl_dataset_rele(relds, FTAG);
return (EXDEV);
}
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dataset_hold_obj(dp,
findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
rw_exit(&dp->dp_config_rwlock);
if (relds)
dsl_dataset_rele(relds, FTAG);
if (err)
return (EXDEV);
relds = findds;
error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
if (relds)
dsl_dataset_rele(relds, FTAG);
error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap);
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
if (!dsl_dataset_is_before(tosnap, fromsnap)) {
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (EXDEV);
}
fromtxg = fromsnap->ds_phys->ds_creation_txg;
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_long_hold(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
da.da_vp = vp;
da.da_offp = offp;
@@ -207,15 +206,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp)
da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0;
da.da_err = 0;
err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
error = traverse_dataset(tosnap, fromtxg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
if (err) {
da.da_err = err;
if (error != 0) {
da.da_err = error;
} else {
/* we set the da.da_err we return as side-effect */
(void) write_record(&da);
}
dsl_dataset_long_rele(tosnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
return (da.da_err);
}
+398 -499
View File
File diff suppressed because it is too large Load Diff
+472 -448
View File
File diff suppressed because it is too large Load Diff
+24 -18
View File
@@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
cbp = buf->b_data;
@@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = traverse_visitbp(td, dnp, &cbp[i], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
dnp = buf->b_data;
@@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
for (i = 0; i < epb; i++) {
err = traverse_dnode(td, &dnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
osp = buf->b_data;
@@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -518,14 +518,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
objset_t *os;
if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
err = dmu_objset_from_ds(ds, &os);
if (err)
err = arc_read(NULL, td->td_spa, rootbp,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
if (err != 0)
return (err);
traverse_zil(td, &os->os_zil_header);
osp = buf->b_data;
traverse_zil(td, &osp->os_zil_header);
(void) arc_buf_remove_ref(buf, &buf);
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
@@ -591,7 +597,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit the MOS */
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
txg_start, NULL, flags, func, arg);
if (err)
if (err != 0)
return (err);
/* visit each dataset */
@@ -600,7 +606,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -611,10 +617,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dsl_dataset_t *ds;
uint64_t txg = txg_start;
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_pool_config_exit(dp, FTAG);
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -624,7 +630,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
txg = ds->ds_phys->ds_prev_snap_txg;
err = traverse_dataset(ds, txg, flags, func, arg);
dsl_dataset_rele(ds, FTAG);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
+16 -17
View File
@@ -917,7 +917,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
#endif
static int
dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
@@ -985,15 +985,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
fudge += txh->txh_fudge;
}
/*
* NB: This check must be after we've held the dnodes, so that
* the dmu_tx_unassign() logic will work properly
*/
if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) {
DMU_TX_STAT_BUMP(dmu_tx_how);
return (ERESTART);
}
/*
* If a snapshot has been taken since we made our estimates,
* assume that we won't be able to free or overwrite anything.
@@ -1076,29 +1067,28 @@ dmu_tx_unassign(dmu_tx_t *tx)
*
* (1) TXG_WAIT. If the current open txg is full, waits until there's
* a new one. This should be used when you're not holding locks.
* If will only fail if we're truly out of space (or over quota).
* It will only fail if we're truly out of space (or over quota).
*
* (2) TXG_NOWAIT. If we can't assign into the current open txg without
* blocking, returns immediately with ERESTART. This should be used
* whenever you're holding locks. On an ERESTART error, the caller
* should drop locks, do a dmu_tx_wait(tx), and try again.
*
* (3) A specific txg. Use this if you need to ensure that multiple
* transactions all sync in the same txg. Like TXG_NOWAIT, it
* returns ERESTART if it can't assign you into the requested txg.
*/
int
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
hrtime_t before, after;
int err;
ASSERT(tx->tx_txg == 0);
ASSERT(txg_how != 0);
ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
before = gethrtime();
/* If we might wait, we must not hold the config lock. */
ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
dmu_tx_unassign(tx);
@@ -1124,6 +1114,7 @@ dmu_tx_wait(dmu_tx_t *tx)
spa_t *spa = tx->tx_pool->dp_spa;
ASSERT(tx->tx_txg == 0);
ASSERT(!dsl_pool_config_held(tx->tx_pool));
/*
* It's possible that the pool has become active after this thread
@@ -1250,6 +1241,14 @@ dmu_tx_get_txg(dmu_tx_t *tx)
return (tx->tx_txg);
}
dsl_pool_t *
dmu_tx_pool(dmu_tx_t *tx)
{
ASSERT(tx->tx_pool != NULL);
return (tx->tx_pool);
}
void
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
{
+5 -1
View File
@@ -74,7 +74,11 @@ dnode_cons(void *arg, void *unused, int kmflag)
mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
refcount_create(&dn->dn_holds);
/*
* Every dbuf has a reference, and dropping a tracked reference is
* O(number of references), so don't track dn_holds.
*/
refcount_create_untracked(&dn->dn_holds);
refcount_create(&dn->dn_tx_holds);
list_link_init(&dn->dn_link);
+1
View File
@@ -481,6 +481,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still
+1175 -2584
View File
File diff suppressed because it is too large Load Diff
+86 -64
View File
@@ -147,28 +147,37 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
return (0);
}
static void
dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
typedef struct dsl_deleg_arg {
const char *dda_name;
nvlist_t *dda_nvlist;
} dsl_deleg_arg_t;
static void
dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj;
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dmu_buf_will_dirty(dd->dd_dbuf, tx);
zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
}
while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
while ((whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair))) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
uint64_t jumpobj;
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
perms = fnvpair_value_nvlist(whopair);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
@@ -185,21 +194,27 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static void
dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
uint64_t zapobj;
if (zapobj == 0)
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dsl_dir_rele(dd, FTAG);
return;
}
while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
while ((whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair))) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
@@ -234,35 +249,40 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static int
dsl_deleg_check(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
int error;
if (spa_version(dmu_tx_pool(tx)->dp_spa) <
SPA_VERSION_DELEGATED_PERMS) {
return (ENOTSUP);
}
error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
if (error == 0)
dsl_dir_rele(dd, FTAG);
return (error);
}
int
dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
{
dsl_dir_t *dd;
int error;
nvpair_t *whopair = NULL;
int blocks_modified = 0;
dsl_deleg_arg_t dda;
error = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (error)
return (error);
/* nvp must already have been verified to be valid */
if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
SPA_VERSION_DELEGATED_PERMS) {
dsl_dir_close(dd, FTAG);
return (ENOTSUP);
}
dda.dda_name = ddname;
dda.dda_nvlist = nvp;
while ((whopair = nvlist_next_nvpair(nvp, whopair)))
blocks_modified++;
error = dsl_sync_task_do(dd->dd_pool, NULL,
return (dsl_sync_task(ddname, dsl_deleg_check,
unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
dd, nvp, blocks_modified);
dsl_dir_close(dd, FTAG);
return (error);
&dda, fnvlist_num_pairs(nvp)));
}
/*
@@ -293,10 +313,16 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
zap_attribute_t *baseza, *za;
char *source;
error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
if (error)
error = dsl_pool_hold(ddname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
dp = startdd->dd_pool;
mos = dp->dp_meta_objset;
@@ -307,20 +333,16 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
rw_enter(&dp->dp_config_rwlock, RW_READER);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
nvlist_t *sp_nvp;
uint64_t n;
if (dd->dd_phys->dd_deleg_zapobj &&
(zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
&n) == 0) && n) {
VERIFY(nvlist_alloc(&sp_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
} else {
if (dd->dd_phys->dd_deleg_zapobj == 0 ||
zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 ||
n == 0)
continue;
}
sp_nvp = fnvlist_alloc();
for (zap_cursor_init(basezc, mos,
dd->dd_phys->dd_deleg_zapobj);
zap_cursor_retrieve(basezc, baseza) == 0;
@@ -330,27 +352,23 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
ASSERT(baseza->za_integer_length == 8);
ASSERT(baseza->za_num_integers == 1);
VERIFY(nvlist_alloc(&perms_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
perms_nvp = fnvlist_alloc();
for (zap_cursor_init(zc, mos, baseza->za_first_integer);
zap_cursor_retrieve(zc, za) == 0;
zap_cursor_advance(zc)) {
VERIFY(nvlist_add_boolean(perms_nvp,
za->za_name) == 0);
fnvlist_add_boolean(perms_nvp, za->za_name);
}
zap_cursor_fini(zc);
VERIFY(nvlist_add_nvlist(sp_nvp, baseza->za_name,
perms_nvp) == 0);
nvlist_free(perms_nvp);
fnvlist_add_nvlist(sp_nvp, baseza->za_name, perms_nvp);
fnvlist_free(perms_nvp);
}
zap_cursor_fini(basezc);
dsl_dir_name(dd, source);
VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
fnvlist_add_nvlist(*nvp, source, sp_nvp);
nvlist_free(sp_nvp);
}
rw_exit(&dp->dp_config_rwlock);
kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
kmem_free(baseza, sizeof(zap_attribute_t));
@@ -358,7 +376,8 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
kmem_free(za, sizeof(zap_attribute_t));
kmem_free(zc, sizeof(zap_cursor_t));
dsl_dir_close(startdd, FTAG);
dsl_dir_rele(startdd, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}
@@ -564,7 +583,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
offsetof(perm_set_t, p_node));
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
checkflag = ZFS_DELEG_DESCENDENT) {
uint64_t zapobj;
@@ -625,7 +644,6 @@ again:
}
error = EPERM;
success:
rw_exit(&dp->dp_config_rwlock);
cookie = NULL;
while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@@ -637,15 +655,19 @@ success:
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dsname, FTAG, &ds);
if (error)
error = dsl_pool_hold(dsname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_deleg_access_impl(ds, perm, cr);
dsl_dataset_rele(ds, FTAG);
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (error == 0) {
error = dsl_deleg_access_impl(ds, perm, cr);
dsl_dataset_rele(ds, FTAG);
}
dsl_pool_rele(dp, FTAG);
return (error);
}
+940
View File
@@ -0,0 +1,940 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_objset.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
nvlist_t *dsda_successful_snaps;
boolean_t dsda_defer;
nvlist_t *dsda_errlist;
} dmu_snapshots_destroy_arg_t;
/*
* ds must be owned.
*/
static int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (dsl_dataset_long_held(ds))
return (EBUSY);
/*
* Only allow deferred destroy on pools that support it.
* NOTE: deferred destroy is only supported on snapshots.
*/
if (defer) {
if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
SPA_VERSION_USERREFS)
return (ENOTSUP);
return (0);
}
/*
* If this snapshot has an elevated user reference count,
* we can't destroy it yet.
*/
if (ds->ds_userrefs > 0)
return (EBUSY);
/*
* Can't delete a branch point.
*/
if (ds->ds_phys->ds_num_children > 1)
return (EEXIST);
return (0);
}
static int
dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int error = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
dsl_dataset_t *ds;
error = dsl_dataset_hold(dp, nvpair_name(pair),
FTAG, &ds);
/*
* If the snapshot does not exist, silently ignore it
* (it's "already destroyed").
*/
if (error == ENOENT)
continue;
if (error == 0) {
error = dsl_destroy_snapshot_check_impl(ds,
dsda->dsda_defer);
dsl_dataset_rele(ds, FTAG);
}
if (error == 0) {
fnvlist_add_boolean(dsda->dsda_successful_snaps,
nvpair_name(pair));
} else {
fnvlist_add_int32(dsda->dsda_errlist,
nvpair_name(pair), error);
}
}
pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
if (pair != NULL)
return (fnvpair_value_int32(pair));
return (0);
}
struct process_old_arg {
dsl_dataset_t *ds;
dsl_dataset_t *ds_prev;
boolean_t after_branch_point;
zio_t *pio;
uint64_t used, comp, uncomp;
};
static int
process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
struct process_old_arg *poa = arg;
dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
if (poa->ds_prev && !poa->after_branch_point &&
bp->blk_birth >
poa->ds_prev->ds_phys->ds_prev_snap_txg) {
poa->ds_prev->ds_phys->ds_unique_bytes +=
bp_get_dsize_sync(dp->dp_spa, bp);
}
} else {
poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
poa->comp += BP_GET_PSIZE(bp);
poa->uncomp += BP_GET_UCSIZE(bp);
dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
}
return (0);
}
static void
process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
{
struct process_old_arg poa = { 0 };
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t deadlist_obj;
ASSERT(ds->ds_deadlist.dl_oldfmt);
ASSERT(ds_next->ds_deadlist.dl_oldfmt);
poa.ds = ds;
poa.ds_prev = ds_prev;
poa.after_branch_point = after_branch_point;
poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
process_old_cb, &poa, tx));
VERIFY0(zio_wait(poa.pio));
ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
/* change snapused */
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-poa.used, -poa.comp, -poa.uncomp, tx);
/* swap next's deadlist to our deadlist */
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_close(&ds_next->ds_deadlist);
deadlist_obj = ds->ds_phys->ds_deadlist_obj;
ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
dsl_deadlist_open(&ds_next->ds_deadlist, mos,
ds_next->ds_phys->ds_deadlist_obj);
}
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (ds->ds_dir->dd_phys->dd_clones == 0)
return;
for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
dsl_dataset_remove_clones_key(clone, mintxg, tx);
}
dsl_dataset_rele(clone, FTAG);
}
zap_cursor_fini(&zc);
}
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
#ifdef ZFS_DEBUG
int err;
#endif
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
dsl_dataset_t *ds_prev = NULL;
uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0;
dsl_dataset_t *ds_next, *ds_head, *hds;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(refcount_is_zero(&ds->ds_longholds));
if (defer &&
(ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
return;
}
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
ASSERT3P(ds->ds_prev, ==, NULL);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
after_branch_point =
(ds_prev->ds_phys->ds_next_snap_obj != obj);
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
if (after_branch_point &&
ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
if (ds->ds_phys->ds_next_snap_obj != 0) {
VERIFY0(zap_add_int(mos,
ds_prev->ds_phys->ds_next_clones_obj,
ds->ds_phys->ds_next_snap_obj, tx));
}
}
if (!after_branch_point) {
ds_prev->ds_phys->ds_next_snap_obj =
ds->ds_phys->ds_next_snap_obj;
}
}
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
old_unique = ds_next->ds_phys->ds_unique_bytes;
dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
ds_next->ds_phys->ds_prev_snap_obj =
ds->ds_phys->ds_prev_snap_obj;
ds_next->ds_phys->ds_prev_snap_txg =
ds->ds_phys->ds_prev_snap_txg;
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
if (ds_next->ds_deadlist.dl_oldfmt) {
process_old_deadlist(ds, ds_prev, ds_next,
after_branch_point, tx);
} else {
/* Adjust prev's unique space. */
if (ds_prev && !after_branch_point) {
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds_prev->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_prev_snap_txg,
&used, &comp, &uncomp);
ds_prev->ds_phys->ds_unique_bytes += used;
}
/* Adjust snapused. */
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
&used, &comp, &uncomp);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-used, -comp, -uncomp, tx);
/* Move blocks to be freed to pool's free list. */
dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
&dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
ds->ds_phys->ds_deadlist_obj, tx);
}
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
/* Collapse range in clone heads */
dsl_dataset_remove_clones_key(ds,
ds->ds_phys->ds_creation_txg, tx);
if (dsl_dataset_is_snapshot(ds_next)) {
dsl_dataset_t *ds_nextnext;
/*
* Update next's unique to include blocks which
* were previously shared by only this snapshot
* and it. Those blocks will be born after the
* prev snap and before this snap, and will have
* died after the next snap and before the one
* after that (ie. be on the snap after next's
* deadlist).
*/
VERIFY0(dsl_dataset_hold_obj(dp,
ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_creation_txg,
&used, &comp, &uncomp);
ds_next->ds_phys->ds_unique_bytes += used;
dsl_dataset_rele(ds_nextnext, FTAG);
ASSERT3P(ds_next->ds_prev, ==, NULL);
/* Collapse range in this head. */
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
dsl_deadlist_remove_key(&hds->ds_deadlist,
ds->ds_phys->ds_creation_txg, tx);
dsl_dataset_rele(hds, FTAG);
} else {
ASSERT3P(ds_next->ds_prev, ==, ds);
dsl_dataset_rele(ds_next->ds_prev, ds_next);
ds_next->ds_prev = NULL;
if (ds_prev) {
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj,
ds_next, &ds_next->ds_prev));
}
dsl_dataset_recalc_head_uniq(ds_next);
/*
* Reduce the amount of our unconsumed refreservation
* being charged to our parent by the amount of
* new unique data we have gained.
*/
if (old_unique < ds_next->ds_reserved) {
int64_t mrsdelta;
uint64_t new_unique =
ds_next->ds_phys->ds_unique_bytes;
ASSERT(old_unique <= new_unique);
mrsdelta = MIN(new_unique - old_unique,
ds_next->ds_reserved - old_unique);
dsl_dir_diduse_space(ds->ds_dir,
DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
}
}
dsl_dataset_rele(ds_next, FTAG);
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* remove from snapshot namespace */
ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
VERIFY0(dsl_dataset_get_snapname(ds));
#ifdef ZFS_DEBUG
{
uint64_t val;
err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
ASSERT0(err);
ASSERT3U(val, ==, obj);
}
#endif
VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
dsl_dataset_rele(ds_head, FTAG);
if (ds_prev != NULL)
dsl_dataset_rele(ds_prev, FTAG);
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
if (ds->ds_phys->ds_next_clones_obj != 0) {
ASSERTV(uint64_t count);
ASSERT0(zap_count(mos,
ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
VERIFY0(dmu_object_free(mos,
ds->ds_phys->ds_next_clones_obj, tx));
}
if (ds->ds_phys->ds_props_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
if (ds->ds_phys->ds_userrefs_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
}
static void
dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
pair != NULL;
pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* The semantics of this function are described in the comment above
* lzc_destroy_snaps(). To summarize:
*
* The snapshots must all be in the same pool.
*
* Snapshots that don't exist will be silently ignored (considered to be
* "already deleted").
*
* On success, all snaps will be destroyed and this will return 0.
* On failure, no snaps will be destroyed, the errlist will be filled in,
* and this will return an errno.
*/
int
dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
nvlist_t *errlist)
{
dmu_snapshots_destroy_arg_t dsda;
int error;
nvpair_t *pair;
pair = nvlist_next_nvpair(snaps, NULL);
if (pair == NULL)
return (0);
dsda.dsda_snaps = snaps;
VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps, NV_UNIQUE_NAME, KM_PUSHPAGE));
dsda.dsda_defer = defer;
dsda.dsda_errlist = errlist;
error = dsl_sync_task(nvpair_name(pair),
dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
&dsda, 0);
fnvlist_free(dsda.dsda_successful_snaps);
return (error);
}
int
dsl_destroy_snapshot(const char *name, boolean_t defer)
{
int error;
nvlist_t *nvl;
nvlist_t *errlist;
VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
VERIFY0(nvlist_alloc(&errlist, NV_UNIQUE_NAME, KM_PUSHPAGE));
fnvlist_add_boolean(nvl, name);
error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
fnvlist_free(errlist);
fnvlist_free(nvl);
return (error);
}
struct killarg {
dsl_dataset_t *ds;
dmu_tx_t *tx;
};
/* ARGSUSED */
static int
kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
if (bp == NULL)
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
ASSERT(zilog != NULL);
/*
* It's a block in the intent log. It has no
* accounting, so just free it.
*/
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else {
ASSERT(zilog == NULL);
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
}
return (0);
}
static void
old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
{
struct killarg ka;
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* NB: this should be very quick, because we already
* freed all the objects in open context.
*/
ka.ds = ds;
ka.tx = tx;
VERIFY0(traverse_dataset(ds,
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
kill_blkptr, &ka));
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
}
typedef struct dsl_destroy_head_arg {
const char *ddha_name;
} dsl_destroy_head_arg_t;
int
dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
{
int error;
uint64_t count;
objset_t *mos;
if (dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (refcount_count(&ds->ds_longholds) != expected_holds)
return (EBUSY);
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
* Can't delete a head dataset if there are snapshots of it.
* (Except if the only snapshots are from the branch we cloned
* from.)
*/
if (ds->ds_prev != NULL &&
ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
return (EBUSY);
/*
* Can't delete if there are children of this fs.
*/
error = zap_count(mos,
ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
if (error != 0)
return (error);
if (count != 0)
return (EEXIST);
if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0) {
/* We need to remove the origin snapshot as well. */
if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
return (EBUSY);
}
return (0);
}
static int
dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
if (error != 0)
return (error);
error = dsl_destroy_head_check_impl(ds, 0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
{
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
dd_used_t t;
ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
ASSERT0(dd->dd_phys->dd_head_dataset_obj);
/*
* Remove our reservation. The impl() routine avoids setting the
* actual property, which would require the (already destroyed) ds.
*/
dsl_dir_set_reservation_sync_impl(dd, 0, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY0(zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
dsl_dir_rele(dd, FTAG);
VERIFY0(dmu_object_free(mos, ddobj, tx));
}
void
dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
uint64_t obj, ddobj, prevobj = 0;
boolean_t rmorigin;
zfeature_info_t *async_destroy;
objset_t *os;
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0);
/* Remove our reservation */
if (ds->ds_reserved != 0) {
dsl_dataset_set_refreservation_sync_impl(ds,
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
0, tx);
ASSERT0(ds->ds_reserved);
}
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
/* This is a clone */
ASSERT(ds->ds_prev != NULL);
ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
ASSERT0(ds->ds_phys->ds_next_snap_obj);
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds->ds_prev,
obj, tx);
}
ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
ds->ds_prev->ds_phys->ds_num_children--;
}
async_destroy =
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
/*
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
VERIFY0(dmu_objset_from_ds(ds, &os));
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
old_synchronous_dataset_destroy(ds, tx);
} else {
/*
* Move the bptree into the pool's list of trees to
* clean up and update space accounting information.
*/
uint64_t used, comp, uncomp;
zil_destroy_sync(dmu_objset_zil(os), tx);
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
spa_feature_incr(dp->dp_spa, async_destroy, tx);
dp->dp_bptree_obj = bptree_alloc(mos, tx);
VERIFY0(zap_add(mos,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj, tx));
}
used = ds->ds_dir->dd_phys->dd_used_bytes;
comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == used);
bptree_add(mos, dp->dp_bptree_obj,
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
used, comp, uncomp, tx);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
used, comp, uncomp, tx);
}
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
VERIFY0(zap_remove_int(mos,
ds->ds_prev->ds_dir->dd_phys->dd_clones,
ds->ds_object, tx));
}
prevobj = ds->ds_prev->ds_object;
dsl_dataset_rele(ds->ds_prev, ds);
ds->ds_prev = NULL;
}
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* Erase the link in the dir */
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
ddobj = ds->ds_dir->dd_object;
ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
ASSERT0(ds->ds_phys->ds_next_clones_obj);
ASSERT0(ds->ds_phys->ds_props_obj);
ASSERT0(ds->ds_phys->ds_userrefs_obj);
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
dsl_dir_destroy_sync(ddobj, tx);
if (rmorigin) {
dsl_dataset_t *prev;
VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
dsl_dataset_rele(prev, FTAG);
}
}
static void
dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
dsl_dataset_rele(ds, FTAG);
}
static void
dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
/* Mark it as inconsistent on-disk, in case we crash */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
spa_history_log_internal_ds(ds, "destroy begin", tx, "");
dsl_dataset_rele(ds, FTAG);
}
int
dsl_destroy_head(const char *name)
{
dsl_destroy_head_arg_t ddha;
int error;
spa_t *spa;
boolean_t isenabled;
#ifdef _KERNEL
zfs_destroy_unmount_origin(name);
#endif
error = spa_open(name, &spa, FTAG);
if (error != 0)
return (error);
isenabled = spa_feature_is_enabled(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
spa_close(spa, FTAG);
ddha.ddha_name = name;
if (!isenabled) {
objset_t *os;
error = dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_begin_sync, &ddha, 0);
if (error != 0)
return (error);
/*
* Head deletion is processed in one txg on old pools;
* remove the objects from open context so that the txg sync
* is not too long.
*/
error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error == 0) {
uint64_t obj;
uint64_t prev_snap_txg =
dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
for (obj = 0; error == 0;
error = dmu_object_next(os, &obj, FALSE,
prev_snap_txg))
(void) dmu_free_object(os, obj);
/* sync out all frees */
txg_wait_synced(dmu_objset_pool(os), 0);
dmu_objset_disown(os, FTAG);
}
}
return (dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_sync, &ddha, 0));
}
/*
* Note, this function is used as the callback for dmu_objset_find(). We
* always return 0 so that we will continue to find and process
* inconsistent datasets, even if we encounter an error trying to
* process one of them.
*/
/* ARGSUSED */
int
dsl_destroy_inconsistent(const char *dsname, void *arg)
{
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
dmu_objset_rele(os, FTAG);
if (inconsistent)
(void) dsl_destroy_head(dsname);
}
return (0);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dsl_destroy_head);
EXPORT_SYMBOL(dsl_destroy_head_sync_impl);
EXPORT_SYMBOL(dsl_dataset_user_hold_check_one);
EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl);
EXPORT_SYMBOL(dsl_destroy_inconsistent);
EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
EXPORT_SYMBOL(dsl_destroy_head_check_impl);
#endif
+258 -335
View File
@@ -40,8 +40,6 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
uint64_t value, dmu_tx_t *tx);
/* ARGSUSED */
static void
@@ -58,7 +56,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd);
@@ -72,18 +70,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **ddp)
{
dmu_buf_t *dbuf;
dsl_dir_t *dd;
int err;
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
dsl_pool_sync_context(dp));
ASSERT(dsl_pool_config_held(dp));
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
if (err)
if (err != 0)
return (err);
dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
@@ -110,9 +107,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_snap_cmtime_update(dd);
if (dd->dd_phys->dd_parent_obj) {
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
if (err)
if (err != 0)
goto errout;
if (tail) {
#ifdef ZFS_DEBUG
@@ -129,7 +126,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_parent->dd_phys->dd_child_dir_zapobj,
ddobj, 0, dd->dd_myname);
}
if (err)
if (err != 0)
goto errout;
} else {
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
@@ -146,7 +143,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
*/
err = dmu_bonus_hold(dp->dp_meta_objset,
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
if (err)
if (err != 0)
goto errout;
origin_phys = origin_bonus->db_data;
dd->dd_origin_txg =
@@ -158,7 +155,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_evict);
if (winner) {
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -185,7 +182,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
errout:
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -193,7 +190,7 @@ errout:
}
void
dsl_dir_close(dsl_dir_t *dd, void *tag)
dsl_dir_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_close(dd->dd_pool->dp_spa, tag);
@@ -250,6 +247,7 @@ static int
getcomponent(const char *path, char *component, const char **nextp)
{
char *p;
if ((path == NULL) || (path[0] == '\0'))
return (ENOENT);
/* This would be a good place to reserve some namespace... */
@@ -272,10 +270,10 @@ getcomponent(const char *path, char *component, const char **nextp)
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
p++;
} else if (p[0] == '@') {
/*
@@ -284,66 +282,57 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (EINVAL);
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
} else {
ASSERT(!"invalid p");
panic("invalid p=%p", (void *)p);
}
*nextp = p;
return (0);
}
/*
* same as dsl_dir_open, ignore the first component of name and use the
* spa instead
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. The name must be in the specified dsl_pool_t. This
* thread must hold the dp_config_rwlock for the pool. Returns NULL if the
* path is bogus, or if tail==NULL and we couldn't parse the whole name.
* (*tail)[0] == '@' means that the last component is a snapshot.
*/
int
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **ddp, const char **tailp)
{
char *buf;
const char *next, *nextnext = NULL;
const char *spaname, *next, *nextnext = NULL;
int err;
dsl_dir_t *dd;
dsl_pool_t *dp;
uint64_t ddobj;
int openedspa = FALSE;
dprintf("%s\n", name);
buf = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE);
err = getcomponent(name, buf, &next);
if (err)
if (err != 0)
goto error;
if (spa == NULL) {
err = spa_open(buf, &spa, FTAG);
if (err) {
dprintf("spa_open(%s) failed\n", buf);
goto error;
}
openedspa = TRUE;
/* XXX this assertion belongs in spa_open */
ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
/* Make sure the name is in the specified pool. */
spaname = spa_name(dp->dp_spa);
if (strcmp(buf, spaname) != 0) {
err = EINVAL;
goto error;
}
dp = spa_get_dsl(spa);
ASSERT(dsl_pool_config_held(dp));
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err) {
rw_exit(&dp->dp_config_rwlock);
if (openedspa)
spa_close(spa, FTAG);
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err != 0) {
goto error;
}
while (next != NULL) {
dsl_dir_t *child_ds;
err = getcomponent(next, buf, &nextnext);
if (err)
if (err != 0)
break;
ASSERT(next[0] != '\0');
if (next[0] == '@')
@@ -354,25 +343,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
err = zap_lookup(dp->dp_meta_objset,
dd->dd_phys->dd_child_dir_zapobj,
buf, sizeof (ddobj), 1, &ddobj);
if (err) {
if (err != 0) {
if (err == ENOENT)
err = 0;
break;
}
err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
if (err)
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
if (err != 0)
break;
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dd = child_ds;
next = nextnext;
}
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_dir_close(dd, tag);
if (openedspa)
spa_close(spa, FTAG);
if (err != 0) {
dsl_dir_rele(dd, tag);
goto error;
}
@@ -383,32 +369,18 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
if (next != NULL &&
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
/* bad path name */
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
err = ENOENT;
}
if (tailp)
if (tailp != NULL)
*tailp = next;
if (openedspa)
spa_close(spa, FTAG);
*ddp = dd;
error:
kmem_free(buf, MAXNAMELEN);
return (err);
}
/*
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. Return NULL if the path is bogus, or if
* tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@'
* means that the last component is a snapshot.
*/
int
dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
{
return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
}
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
dmu_tx_t *tx)
@@ -446,71 +418,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
return (ddobj);
}
/* ARGSUSED */
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t count;
/*
* There should be exactly two holds, both from
* dsl_dataset_destroy: one on the dd directory, and one on its
* head ds. If there are more holds, then a concurrent thread is
* performing a lookup inside this dir while we're trying to destroy
* it. To minimize this possibility, we perform this check only
* in syncing context and fail the operation if we encounter
* additional holds. The dp_config_rwlock ensures that nobody else
* opens it after we check.
*/
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
return (EBUSY);
err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
if (err)
return (err);
if (count != 0)
return (EEXIST);
return (0);
}
void
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
objset_t *mos = dd->dd_pool->dp_meta_objset;
uint64_t obj;
dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
/*
* Remove our reservation. The impl() routine avoids setting the
* actual property, which would require the (already destroyed) ds.
*/
dsl_dir_set_reservation_sync_impl(dd, 0, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY(0 == zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
obj = dd->dd_object;
dsl_dir_close(dd, tag);
VERIFY(0 == dmu_object_free(mos, obj, tx));
}
boolean_t
dsl_dir_is_clone(dsl_dir_t *dd)
{
@@ -546,18 +453,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
}
mutex_exit(&dd->dd_lock);
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
char buf[MAXNAMELEN];
VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dd->dd_phys->dd_origin_obj, FTAG, &ds));
dsl_dataset_name(ds, buf);
dsl_dataset_rele(ds, FTAG);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
rw_exit(&dd->dd_pool->dp_config_rwlock);
}
void
@@ -567,7 +472,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
ASSERT(dd->dd_phys);
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(dd->dd_dbuf, dd);
}
@@ -854,7 +759,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
FALSE, asize > usize, tr_list, tx, TRUE);
}
if (err)
if (err != 0)
dsl_dir_tempreserve_clear(tr_list, tx);
else
*tr_cookiep = tr_list;
@@ -1007,118 +912,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
mutex_exit(&dd->dd_lock);
}
typedef struct dsl_dir_set_qr_arg {
const char *ddsqra_name;
zprop_source_t ddsqra_source;
uint64_t ddsqra_value;
} dsl_dir_set_qr_arg_t;
static int
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
int err;
uint64_t towrite;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
uint64_t towrite, newval;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
if (psa->psa_effective_value == 0)
error = dsl_prop_predict(ds->ds_dir, "quota",
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
if (newval == 0) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
mutex_enter(&dd->dd_lock);
mutex_enter(&ds->ds_dir->dd_lock);
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
* pending changes could under-estimate the amount of space to be
* freed up.
*/
towrite = dsl_dir_space_towrite(dd);
towrite = dsl_dir_space_towrite(ds->ds_dir);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(psa->psa_effective_value < dd->dd_phys->dd_reserved ||
psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
err = ENOSPC;
(newval < ds->ds_dir->dd_phys->dd_reserved ||
newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
error = ENOSPC;
}
mutex_exit(&dd->dd_lock);
return (err);
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
return (error);
}
extern dsl_syncfunc_t dsl_prop_set_sync;
static void
dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
spa_history_log_internal_dd(dd, "set quota", tx,
"quota=%lld", (longlong_t)effective_value);
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
mutex_enter(&ds->ds_dir->dd_lock);
ds->ds_dir->dd_phys->dd_quota = newval;
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = quota;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
/*
* If someone removes a file, then tries to set the quota, we want to
* make sure the file freeing takes effect.
*/
txg_wait_open(dd->dd_pool, 0);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, &ddsqra, 0));
}
int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value;
uint64_t used, avail;
int err;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
dsl_dir_t *dd;
uint64_t newval, used, avail;
int error;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
effective_value = psa->psa_effective_value;
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
dd = ds->ds_dir;
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
if (!dmu_tx_is_syncing(tx))
if (!dmu_tx_is_syncing(tx)) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
error = dsl_prop_predict(ds->ds_dir,
zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
@@ -1131,21 +1041,21 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, effective_value) -
if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, newval) -
MAX(used, dd->dd_phys->dd_reserved);
if (delta > avail)
return (ENOSPC);
if (dd->dd_phys->dd_quota > 0 &&
effective_value > dd->dd_phys->dd_quota)
return (ENOSPC);
if (delta > avail ||
(dd->dd_phys->dd_quota > 0 &&
newval > dd->dd_phys->dd_quota))
error = ENOSPC;
}
return (0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
void
dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
{
uint64_t used;
@@ -1167,51 +1077,38 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
}
static void
dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t value = psa->psa_effective_value;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
dsl_dir_set_reservation_sync_impl(dd, value, tx);
spa_history_log_internal_dd(dd, "set reservation", tx,
"reservation=%lld", (longlong_t)value);
dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = reservation;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, &ddsqra, 0));
}
static dsl_dir_t *
@@ -1243,79 +1140,123 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
return (would_change(dd->dd_parent, delta, ancestor));
}
struct renamearg {
dsl_dir_t *newparent;
const char *mynewname;
};
typedef struct dsl_dir_rename_arg {
const char *ddra_oldname;
const char *ddra_newname;
} dsl_dir_rename_arg_t;
/* ARGSUSED */
static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
char namebuf[MAXNAMELEN];
dsl_dataset_name(ds, namebuf);
if (strlen(namebuf) + *deltap >= MAXNAMELEN)
return (ENAMETOOLONG);
return (0);
}
static int
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t val;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
/*
* There should only be one reference, from dmu_objset_rename().
* Fleeting holds are also possible (eg, from "zfs list" getting
* stats), but any that are present in open context will likely
* be gone by syncing context, so only fail from syncing
* context.
*/
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
return (EBUSY);
/* target dir should exist */
error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
if (error != 0)
return (error);
/* check for existing name */
err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
ra->mynewname, 8, 1, &val);
if (err == 0)
/* new parent should exist */
error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
&newparent, &mynewname);
if (error != 0) {
dsl_dir_rele(dd, FTAG);
return (error);
}
/* can't rename to different pool */
if (dd->dd_pool != newparent->dd_pool) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (ENXIO);
}
/* new name should not already exist */
if (mynewname == NULL) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EEXIST);
if (err != ENOENT)
return (err);
}
if (ra->newparent != dd->dd_parent) {
/* if the name length is growing, validate child name lengths */
if (delta > 0) {
error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
&delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
if (newparent != dd->dd_parent) {
/* is there enough space? */
uint64_t myspace =
MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendant */
if (closest_common_ancestor(dd, ra->newparent) == dd)
if (closest_common_ancestor(dd, newparent) == dd) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EINVAL);
}
if ((err = dsl_dir_transfer_possible(dd->dd_parent,
ra->newparent, myspace)))
return (err);
error = dsl_dir_transfer_possible(dd->dd_parent,
newparent, myspace);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (0);
}
static void
dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
objset_t *mos = dp->dp_meta_objset;
int err;
char namebuf[MAXNAMELEN];
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
&mynewname));
/* Log this before we change the name. */
dsl_dir_name(ra->newparent, namebuf);
spa_history_log_internal_dd(dd, "rename", tx,
"-> %s/%s", namebuf, ra->mynewname);
"-> %s", ddra->ddra_newname);
if (ra->newparent != dd->dd_parent) {
if (newparent != dd->dd_parent) {
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-dd->dd_phys->dd_used_bytes,
-dd->dd_phys->dd_compressed_bytes,
-dd->dd_phys->dd_uncompressed_bytes, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
dsl_dir_diduse_space(newparent, DD_USED_CHILD,
dd->dd_phys->dd_used_bytes,
dd->dd_phys->dd_compressed_bytes,
dd->dd_phys->dd_uncompressed_bytes, tx);
@@ -1326,7 +1267,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
-unused_rsrv, 0, 0, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
unused_rsrv, 0, 0, tx);
}
}
@@ -1334,52 +1275,36 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(dd->dd_dbuf, tx);
/* remove from old parent zapobj */
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, tx);
ASSERT0(err);
ASSERT0(error);
(void) strcpy(dd->dd_myname, ra->mynewname);
dsl_dir_close(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
(void) strcpy(dd->dd_myname, mynewname);
dsl_dir_rele(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = newparent->dd_object;
VERIFY0(dsl_dir_hold_obj(dp,
newparent->dd_object, NULL, dd, &dd->dd_parent));
/* add to new parent zapobj */
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT0(err);
VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
dsl_prop_notify_all(dd);
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
}
int
dsl_dir_rename(dsl_dir_t *dd, const char *newname)
dsl_dir_rename(const char *oldname, const char *newname)
{
struct renamearg ra;
int err;
dsl_dir_rename_arg_t ddra;
/* new parent should exist */
err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
if (err)
return (err);
ddra.ddra_oldname = oldname;
ddra.ddra_newname = newname;
/* can't rename to different pool */
if (dd->dd_pool != ra.newparent->dd_pool) {
err = ENXIO;
goto out;
}
/* new name should not already exist */
if (ra.mynewname == NULL) {
err = EEXIST;
goto out;
}
err = dsl_sync_task_do(dd->dd_pool,
dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
out:
dsl_dir_close(ra.newparent, FTAG);
return (err);
return (dsl_sync_task(oldname,
dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
}
int
@@ -1424,6 +1349,4 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
EXPORT_SYMBOL(dsl_dir_open);
EXPORT_SYMBOL(dsl_dir_close);
#endif
+157 -61
View File
@@ -43,6 +43,7 @@
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
#include <sys/dsl_userhold.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@@ -264,7 +265,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
if (err)
return (err);
return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
}
static dsl_pool_t *
@@ -276,7 +277,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
dp->dp_spa = spa;
dp->dp_meta_rootbp = *bp;
rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
rrw_init(&dp->dp_config_rwlock, B_TRUE);
dp->dp_write_limit = zfs_write_limit_min;
txg_init(dp, txg);
@@ -287,7 +288,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_dirty_dirs,
offsetof(dsl_dir_t, dd_dirty_link));
txg_list_create(&dp->dp_sync_tasks,
offsetof(dsl_sync_task_group_t, dstg_node));
offsetof(dsl_sync_task_t, dst_node));
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -324,14 +325,14 @@ dsl_pool_open(dsl_pool_t *dp)
dsl_dataset_t *ds;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
if (err)
goto out;
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir);
if (err)
goto out;
@@ -352,7 +353,7 @@ dsl_pool_open(dsl_pool_t *dp)
&dp->dp_origin_snap);
dsl_dataset_rele(ds, FTAG);
}
dsl_dir_close(dd, dp);
dsl_dir_rele(dd, dp);
if (err)
goto out;
}
@@ -367,7 +368,7 @@ dsl_pool_open(dsl_pool_t *dp)
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
if (err)
goto out;
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@@ -400,7 +401,7 @@ dsl_pool_open(dsl_pool_t *dp)
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (err);
}
@@ -415,13 +416,13 @@ dsl_pool_close(dsl_pool_t *dp)
* and not a hold, so just drop that here.
*/
if (dp->dp_origin_snap)
dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
dsl_dataset_rele(dp->dp_origin_snap, dp);
if (dp->dp_mos_dir)
dsl_dir_close(dp->dp_mos_dir, dp);
dsl_dir_rele(dp->dp_mos_dir, dp);
if (dp->dp_free_dir)
dsl_dir_close(dp->dp_free_dir, dp);
dsl_dir_rele(dp->dp_free_dir, dp);
if (dp->dp_root_dir)
dsl_dir_close(dp->dp_root_dir, dp);
dsl_dir_rele(dp->dp_root_dir, dp);
bpobj_close(&dp->dp_free_bpobj);
@@ -439,7 +440,7 @@ dsl_pool_close(dsl_pool_t *dp)
dsl_scan_fini(dp);
dsl_pool_tx_assign_destroy(dp);
dsl_pool_txg_history_destroy(dp);
rw_destroy(&dp->dp_config_rwlock);
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_iput_taskq);
if (dp->dp_blkstats)
@@ -457,6 +458,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dsl_dataset_t *ds;
uint64_t obj;
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
@@ -467,30 +470,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
ASSERT0(err);
/* Initialize scan structures */
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
VERIFY0(dsl_scan_init(dp, txg));
/* create and open the root dir */
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir));
/* create and open the meta-objset dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
MOS_DIR_NAME, &dp->dp_mos_dir));
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
/* create and open the free dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@@ -501,7 +504,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
/* create the root objset */
VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx)));
#ifdef _KERNEL
@@ -511,6 +514,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dmu_tx_commit(tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (dp);
}
@@ -533,10 +538,7 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@@ -558,7 +560,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
/*
* We need to copy dp_space_towrite() before doing
* dsl_sync_task_group_sync(), because
* dsl_sync_task_sync(), because
* dsl_dataset_snapshot_reserve_space() will increase
* dp_space_towrite but not actually write anything.
*/
@@ -673,14 +675,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
DTRACE_PROBE(pool_sync__3task);
if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
dsl_sync_task_group_t *dstg;
dsl_sync_task_t *dst;
/*
* No more sync tasks should have been added while we
* were syncing.
*/
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
while ((dstg = txg_list_remove(&dp->dp_sync_tasks, txg)))
dsl_sync_task_group_sync(dstg, tx);
while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)))
dsl_sync_task_sync(dst, tx);
}
dmu_tx_commit(tx);
@@ -857,14 +859,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
/* ARGSUSED */
static int
upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds, *prev = NULL;
int err;
dsl_pool_t *dp = spa_get_dsl(spa);
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@@ -890,7 +891,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
ASSERT0(prev->ds_phys->ds_bp.blk_birth);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {
@@ -910,13 +911,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
if (ds->ds_phys->ds_next_snap_obj == 0) {
ASSERT(ds->ds_prev == NULL);
VERIFY(0 == dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
}
}
ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object);
ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
dmu_buf_will_dirty(prev->ds_dbuf, tx);
@@ -924,7 +925,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY(0 == zap_add_int(dp->dp_meta_objset,
VERIFY0(zap_add_int(dp->dp_meta_objset,
prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
dsl_dataset_rele(ds, FTAG);
@@ -939,25 +940,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
tx, DS_FIND_CHILDREN));
}
/* ARGSUSED */
static int
upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
if (ds->ds_dir->dd_phys->dd_origin_obj) {
if (ds->ds_dir->dd_phys->dd_origin_obj != 0) {
dsl_dataset_t *origin;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
if (origin->ds_dir->dd_phys->dd_clones == 0) {
@@ -966,13 +963,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
VERIFY0(zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx));
dsl_dataset_rele(origin, FTAG);
}
dsl_dataset_rele(ds, FTAG);
return (0);
}
@@ -984,7 +979,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/*
@@ -994,12 +989,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
*/
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
}
@@ -1011,17 +1005,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap == NULL);
ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
/* create the origin dir, ds, & snap-ds */
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
dp, &dp->dp_origin_snap));
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
}
taskq_t *
@@ -1056,7 +1049,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
dsl_dataset_user_release_tmp(dp, dsobj, htag);
}
zap_cursor_fini(&zc);
}
@@ -1078,7 +1071,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
static int
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
{
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
@@ -1103,7 +1096,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
if (holding)
error = zap_add(mos, zapobj, name, 8, 1, now, tx);
error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
else
error = zap_remove(mos, zapobj, name, tx);
strfree(name);
@@ -1116,7 +1109,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
*/
int
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
uint64_t *now, dmu_tx_t *tx)
uint64_t now, dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
}
@@ -1128,10 +1121,113 @@ int
dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
tx, B_FALSE));
}
/*
* DSL Pool Configuration Lock
*
* The dp_config_rwlock protects against changes to DSL state (e.g. dataset
* creation / destruction / rename / property setting). It must be held for
* read to hold a dataset or dsl_dir. I.e. you must call
* dsl_pool_config_enter() or dsl_pool_hold() before calling
* dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock
* must be held continuously until all datasets and dsl_dirs are released.
*
* The only exception to this rule is that if a "long hold" is placed on
* a dataset, then the dp_config_rwlock may be dropped while the dataset
* is still held. The long hold will prevent the dataset from being
* destroyed -- the destroy will fail with EBUSY. A long hold can be
* obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
* (by calling dsl_{dataset,objset}_{try}own{_obj}).
*
* Legitimate long-holders (including owners) should be long-running, cancelable
* tasks that should cause "zfs destroy" to fail. This includes DMU
* consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
* "zfs send", and "zfs diff". There are several other long-holders whose
* uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
*
* The usual formula for long-holding would be:
* dsl_pool_hold()
* dsl_dataset_hold()
* ... perform checks ...
* dsl_dataset_long_hold()
* dsl_pool_rele()
* ... perform long-running task ...
* dsl_dataset_long_rele()
* dsl_dataset_rele()
*
* Note that when the long hold is released, the dataset is still held but
* the pool is not held. The dataset may change arbitrarily during this time
* (e.g. it could be destroyed). Therefore you shouldn't do anything to the
* dataset except release it.
*
* User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
* or modifying operations.
*
* Modifying operations should generally use dsl_sync_task(). The synctask
* infrastructure enforces proper locking strategy with respect to the
* dp_config_rwlock. See the comment above dsl_sync_task() for details.
*
* Read-only operations will manually hold the pool, then the dataset, obtain
* information from the dataset, then release the pool and dataset.
* dmu_objset_{hold,rele}() are convenience routines that also do the pool
* hold/rele.
*/
int
dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
{
spa_t *spa;
int error;
error = spa_open(name, &spa, tag);
if (error == 0) {
*dp = spa_get_dsl(spa);
dsl_pool_config_enter(*dp, tag);
}
return (error);
}
void
dsl_pool_rele(dsl_pool_t *dp, void *tag)
{
dsl_pool_config_exit(dp, tag);
spa_close(dp->dp_spa, tag);
}
void
dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
{
/*
* We use a "reentrant" reader-writer lock, but not reentrantly.
*
* The rrwlock can (with the track_all flag) track all reading threads,
* which is very useful for debugging which code path failed to release
* the lock, and for verifying that the *current* thread does hold
* the lock.
*
* (Unlike a rwlock, which knows that N threads hold it for
* read, but not *which* threads, so rw_held(RW_READER) returns TRUE
* if any thread holds it for read, even if this thread doesn't).
*/
ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
}
void
dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
{
rrw_exit(&dp->dp_config_rwlock, tag);
}
boolean_t
dsl_pool_config_held(dsl_pool_t *dp)
{
return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(zfs_no_write_throttle, int, 0644);
MODULE_PARM_DESC(zfs_no_write_throttle, "Disable write throttling");
+240 -249
View File
@@ -82,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
char *inheritstr;
char *recvdstr;
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(dd->dd_pool));
if (setpoint)
setpoint[0] = '\0';
@@ -97,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
* after this loop.
*/
for (; dd != NULL; dd = dd->dd_parent) {
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
if (dd != target || snapshot) {
if (!inheritable)
break;
@@ -167,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
boolean_t snapshot;
uint64_t zapobj;
ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
@@ -231,22 +229,16 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg)
{
dsl_dir_t *dd = ds->ds_dir;
dsl_pool_t *dp = dd->dd_pool;
uint64_t value;
dsl_prop_cb_record_t *cbr;
int err;
int need_rwlock;
ASSERTV(dsl_pool_t *dp = dd->dd_pool);
need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
if (need_rwlock)
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
if (err != 0) {
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
err = dsl_prop_get_int_ds(ds, propname, &value);
if (err != 0)
return (err);
}
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_PUSHPAGE);
cbr->cbr_ds = ds;
@@ -259,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
mutex_exit(&dd->dd_lock);
cbr->cbr_func(cbr->cbr_arg, value);
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@@ -269,19 +258,18 @@ int
dsl_prop_get(const char *dsname, const char *propname,
int intsz, int numints, void *buf, char *setpoint)
{
dsl_dataset_t *ds;
int err;
objset_t *os;
int error;
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
intsz, numints, buf, setpoint);
dsl_dataset_rele(ds, FTAG);
return (err);
dmu_objset_rele(os, FTAG);
return (error);
}
/*
@@ -299,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname,
return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
}
void
dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value)
int
dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
uint64_t *valuep)
{
psa->psa_name = propname;
psa->psa_source = source;
psa->psa_intsz = 8;
psa->psa_numints = 1;
psa->psa_value = value;
psa->psa_effective_value = -1ULL;
return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
}
/*
@@ -323,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
* a property not handled by this function.
*/
int
dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
dsl_prop_predict(dsl_dir_t *dd, const char *propname,
zprop_source_t source, uint64_t value, uint64_t *newvalp)
{
const char *propname = psa->psa_name;
zfs_prop_t prop = zfs_name_to_prop(propname);
zprop_source_t source = psa->psa_source;
objset_t *mos;
uint64_t zapobj;
uint64_t version;
@@ -359,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
switch ((int)source) {
case ZPROP_SRC_NONE:
/* Revert to the received value, if any. */
err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
case ZPROP_SRC_LOCAL:
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case ZPROP_SRC_RECEIVED:
/*
* If there's no local setting, then the new received value will
* be the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
/*
* We're clearing the received value, so the local setting (if
* it exists) remains the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
default:
cmn_err(CE_PANIC, "unexpected property source: %d", source);
panic("unexpected property source: %d", source);
}
strfree(recvdstr);
@@ -399,39 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
return (err);
}
#ifdef ZFS_DEBUG
void
dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
{
zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
uint64_t intval;
char setpoint[MAXNAMELEN];
uint64_t version = spa_version(dd->dd_pool->dp_spa);
int err;
if (version < SPA_VERSION_RECVD_PROPS) {
switch (prop) {
case ZFS_PROP_QUOTA:
case ZFS_PROP_RESERVATION:
return;
default:
break;
}
}
err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
setpoint, B_FALSE);
if (err == 0 && intval != psa->psa_effective_value) {
cmn_err(CE_PANIC, "%s property, source: %x, "
"predicted effective value: %llu, "
"actual effective value: %llu (setpoint: %s)",
psa->psa_name, psa->psa_source,
(unsigned long long)psa->psa_effective_value,
(unsigned long long)intval, setpoint);
}
}
#endif
/*
* Unregister this callback. Return 0 on success, ENOENT if ddname is
* invalid, ENOMSG if no matching callback registered.
@@ -466,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (0);
}
/*
* Return the number of callbacks that are registered for this dataset.
*/
int
dsl_prop_numcb(dsl_dataset_t *ds)
boolean_t
dsl_prop_hascb(dsl_dataset_t *ds)
{
dsl_dir_t *dd = ds->ds_dir;
boolean_t rv = B_FALSE;
dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds) {
rv = B_TRUE;
break;
}
}
mutex_exit(&dd->dd_lock);
return (rv);
}
/* ARGSUSED */
static int
dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_cb_record_t *cbr;
int num = 0;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs);
cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds)
num++;
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
uint64_t value;
if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
sizeof (value), 1, &value, NULL) == 0)
cbr->cbr_func(cbr->cbr_arg, value);
}
mutex_exit(&dd->dd_lock);
return (num);
return (0);
}
/*
* Update all property values for ddobj & its descendants. This is used
* when renaming the dir.
*/
void
dsl_prop_notify_all(dsl_dir_t *dd)
{
dsl_pool_t *dp = dd->dd_pool;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
(void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
NULL, DS_FIND_CHILDREN);
}
static void
@@ -498,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
zap_attribute_t *za;
int err;
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
if (err)
return;
@@ -510,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
*/
err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
if (err == 0) {
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
return;
}
ASSERT3U(err, ==, ENOENT);
@@ -545,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
}
kmem_free(za, sizeof (zap_attribute_t));
zap_cursor_fini(&zc);
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
}
void
dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_prop_setarg_t *psa = arg2;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t zapobj, intval, dummy;
int isint;
char valbuf[32];
char *valstr = NULL;
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
const char *propname = psa->psa_name;
zprop_source_t source = psa->psa_source;
isint = (dodefault(propname, 8, 1, &intval) == 0);
@@ -614,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY(0 == zap_update(mos, zapobj, propname,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
break;
case ZPROP_SRC_INHERITED:
/*
@@ -626,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
NULL) == 0) {
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
err = zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx);
ASSERT(err == 0);
VERIFY0(zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx));
}
break;
case ZPROP_SRC_RECEIVED:
@@ -639,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* set propname$recvd -> value
*/
err = zap_update(mos, zapobj, recvdstr,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
intsz, numints, value, tx);
ASSERT(err == 0);
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
@@ -669,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
strfree(recvdstr);
if (isint) {
VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
dsl_prop_cb_record_t *cbr;
@@ -696,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
valstr = valbuf;
} else {
if (source == ZPROP_SRC_LOCAL) {
valstr = (char *)psa->psa_value;
valstr = value;
} else {
tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_PUSHPAGE);
if (dsl_prop_get_ds(ds, propname, 1,
@@ -713,118 +686,73 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
kmem_free(tbuf, ZAP_MAXVALUELEN);
}
void
dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_prop_set_int(const char *dsname, const char *propname,
zprop_source_t source, uint64_t value)
{
dsl_dataset_t *ds = arg1;
dsl_props_arg_t *pa = arg2;
nvlist_t *props = pa->pa_props;
dsl_prop_setarg_t psa;
nvpair_t *elem = NULL;
nvlist_t *nvl = fnvlist_alloc();
int error;
psa.psa_source = pa->pa_source;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
psa.psa_name = nvpair_name(pair);
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs;
VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
&pair) == 0);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
VERIFY(nvpair_value_string(pair,
(char **)&psa.psa_value) == 0);
psa.psa_intsz = 1;
psa.psa_numints = strlen(psa.psa_value) + 1;
} else {
uint64_t intval;
VERIFY(nvpair_value_uint64(pair, &intval) == 0);
psa.psa_intsz = sizeof (intval);
psa.psa_numints = 1;
psa.psa_value = &intval;
}
dsl_prop_set_sync(ds, &psa, tx);
}
fnvlist_add_uint64(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
int intsz, int numints, const void *buf)
dsl_prop_set_string(const char *dsname, const char *propname,
zprop_source_t source, const char *value)
{
dsl_dataset_t *ds;
uint64_t version;
int err;
dsl_prop_setarg_t psa;
nvlist_t *nvl = fnvlist_alloc();
int error;
/*
* We must do these checks before we get to the syncfunc, since
* it can't fail.
*/
if (strlen(propname) >= ZAP_MAXNAMELEN)
return (ENAMETOOLONG);
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
dsl_dataset_rele(ds, FTAG);
return (E2BIG);
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
psa.psa_name = propname;
psa.psa_source = source;
psa.psa_intsz = intsz;
psa.psa_numints = numints;
psa.psa_value = buf;
psa.psa_effective_value = -1ULL;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_prop_set_sync, ds, &psa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
fnvlist_add_string(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
dsl_prop_inherit(const char *dsname, const char *propname,
zprop_source_t source)
{
nvlist_t *nvl = fnvlist_alloc();
int error;
fnvlist_add_boolean(nvl, propname);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
typedef struct dsl_props_set_arg {
const char *dpsa_dsname;
zprop_source_t dpsa_source;
nvlist_t *dpsa_props;
} dsl_props_set_arg_t;
static int
dsl_props_set_check(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t version;
nvpair_t *elem = NULL;
dsl_props_arg_t pa;
int err;
if ((err = dsl_dataset_hold(dsname, FTAG, &ds)))
err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
if (err != 0)
return (err);
/*
* Do these checks before the syncfunc, since it can't fail.
*/
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
dsl_dataset_rele(ds, FTAG);
return (ENAMETOOLONG);
}
if (nvpair_type(elem) == DATA_TYPE_STRING) {
char *valstr;
VERIFY(nvpair_value_string(elem, &valstr) == 0);
char *valstr = fnvpair_value_string(elem);
if (strlen(valstr) >= (version <
SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
@@ -834,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
}
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
pa.pa_props = props;
pa.pa_source = source;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_props_set_sync, ds, &pa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
return (0);
}
void
dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
nvlist_t *props, dmu_tx_t *tx)
{
nvpair_t *elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs = fnvpair_value_nvlist(pair);
pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
const char *value = fnvpair_value_string(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 1, strlen(value) + 1, value, tx);
} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
uint64_t intval = fnvpair_value_uint64(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, sizeof (intval), 1, &intval, tx);
} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 0, 0, NULL, tx);
} else {
panic("invalid nvpair type");
}
}
}
static void
dsl_props_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
dsl_dataset_rele(ds, FTAG);
}
/*
* All-or-nothing; if any prop can't be set, nothing will be modified.
*/
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
{
dsl_props_set_arg_t dpsa;
int nblks = 0;
dpsa.dpsa_dsname = dsname;
dpsa.dpsa_source = source;
dpsa.dpsa_props = props;
/*
* If the source includes NONE, then we will only be removing entries
* from the ZAP object. In that case don't check for ENOSPC.
*/
if ((source & ZPROP_SRC_NONE) == 0)
nblks = 2 * fnvlist_num_pairs(props);
return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
&dpsa, nblks));
}
typedef enum dsl_prop_getflags {
@@ -997,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (dsl_dataset_is_snapshot(ds))
flags |= DSL_PROP_GET_SNAPSHOT;
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
if (ds->ds_phys->ds_props_obj != 0) {
ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
@@ -1022,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
break;
}
out:
rw_exit(&dp->dp_config_rwlock);
return (err);
}
boolean_t
dsl_prop_get_hasrecvd(objset_t *os)
dsl_prop_get_hasrecvd(const char *dsname)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
int rc;
uint64_t dummy;
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return (rc == 0);
return (0 ==
dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
}
static void
dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
static int
dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
uint64_t dummy = 0;
dsl_prop_setarg_t psa;
uint64_t version;
spa_t *spa;
int error = 0;
if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
return;
VERIFY0(spa_open(dsname, &spa, FTAG));
version = spa_version(spa);
spa_close(spa, FTAG);
dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
dsl_prop_set_sync, ds, &psa, 2);
if (version >= SPA_VERSION_RECVD_PROPS)
error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
return (error);
}
/*
* Call after successfully receiving properties to ensure that only the first
* receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
*/
void
dsl_prop_set_hasrecvd(objset_t *os)
int
dsl_prop_set_hasrecvd(const char *dsname)
{
if (dsl_prop_get_hasrecvd(os)) {
ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return;
}
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
int error = 0;
if (!dsl_prop_get_hasrecvd(dsname))
error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
return (error);
}
void
dsl_prop_unset_hasrecvd(objset_t *os)
dsl_prop_unset_hasrecvd(const char *dsname)
{
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
}
int
@@ -1083,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
}
int
dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
{
objset_t *os;
int error;
/*
* Received properties are not distinguishable from local properties
* until the dataset has received properties on or after
* SPA_VERSION_RECVD_PROPS.
*/
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
dmu_objset_rele(os, FTAG);
return (error);
}
void
@@ -1138,8 +1131,6 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dsl_prop_register);
EXPORT_SYMBOL(dsl_prop_unregister);
EXPORT_SYMBOL(dsl_prop_numcb);
EXPORT_SYMBOL(dsl_prop_set);
EXPORT_SYMBOL(dsl_prop_get);
EXPORT_SYMBOL(dsl_prop_get_integer);
EXPORT_SYMBOL(dsl_prop_get_all);
+56 -60
View File
@@ -53,7 +53,7 @@
typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_scrub_cb;
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
@@ -150,9 +150,9 @@ dsl_scan_fini(dsl_pool_t *dp)
/* ARGSUSED */
static int
dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (EBUSY);
@@ -160,12 +160,11 @@ dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
}
/* ARGSUSED */
static void
dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
pool_scan_func_t *funcp = arg2;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
@@ -312,9 +311,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
/* ARGSUSED */
static int
dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state != DSS_SCANNING)
return (ENOENT);
@@ -323,9 +322,9 @@ dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* ARGSUSED */
static void
dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
dsl_scan_done(scn, B_FALSE, tx);
dsl_scan_sync_state(scn, tx);
@@ -334,12 +333,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_scan_cancel(dsl_pool_t *dp)
{
boolean_t complete = B_FALSE;
int err;
err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
return (err);
return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
dsl_scan_cancel_sync, NULL, 3));
}
static void dsl_scan_visitbp(blkptr_t *bp,
@@ -375,7 +370,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
static void
dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
{
VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
&scn->scn_phys, tx));
@@ -959,33 +954,33 @@ struct enqueue_clones_arg {
/* ARGSUSED */
static int
enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
struct enqueue_clones_arg *eca = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
return (0);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
return (0);
}
@@ -1075,17 +1070,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
}
if (usenext) {
VERIFY(zap_join_key(dp->dp_meta_objset,
VERIFY0(zap_join_key(dp->dp_meta_objset,
ds->ds_phys->ds_next_clones_obj,
scn->scn_phys.scn_queue_obj,
ds->ds_phys->ds_creation_txg, tx) == 0);
ds->ds_phys->ds_creation_txg, tx));
} else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
}
}
@@ -1095,15 +1090,14 @@ out:
/* ARGSUSED */
static int
enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@@ -1261,8 +1255,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
@@ -1402,7 +1396,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u txg=%llu",
func, tx->tx_txg);
dsl_scan_setup_sync(scn, &func, tx);
dsl_scan_setup_sync(&func, tx);
}
if (!dsl_scan_active(scn) ||
@@ -1436,21 +1430,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
if (err != 0)
return;
VERIFY0(zio_wait(scn->scn_zio_root));
/* disable async destroy feature */
spa_feature_decr(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
ASSERT(!spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
if (err == 0) {
zfeature_info_t *feat = &spa_feature_table
[SPA_FEATURE_ASYNC_DESTROY];
/* finished; deactivate async destroy feature */
spa_feature_decr(spa, feat, tx);
ASSERT(!spa_feature_is_active(spa, feat));
VERIFY0(zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY0(bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
}
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
@@ -1497,7 +1491,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
dsl_pool_config_enter(dp, FTAG);
dsl_scan_visit(scn, tx);
dsl_pool_config_exit(dp, FTAG);
(void) zio_wait(scn->scn_zio_root);
scn->scn_zio_root = NULL;
@@ -1734,8 +1730,8 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
return (dsl_sync_task_do(dp, dsl_scan_setup_check,
dsl_scan_setup_sync, dp->dp_scan, &func, 0));
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+88 -157
View File
@@ -34,138 +34,115 @@
/* ARGSUSED */
static int
dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
{
return (0);
}
dsl_sync_task_group_t *
dsl_sync_task_group_create(dsl_pool_t *dp)
{
dsl_sync_task_group_t *dstg;
dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
offsetof(dsl_sync_task_t, dst_node));
dstg->dstg_pool = dp;
return (dstg);
}
void
dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_t *dst;
if (checkfunc == NULL)
checkfunc = dsl_null_checkfunc;
dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
dst->dst_checkfunc = checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg1 = arg1;
dst->dst_arg2 = arg2;
list_insert_tail(&dstg->dstg_tasks, dst);
dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
}
/*
* Called from open context to perform a callback in syncing context. Waits
* for the operation to complete.
*
* The checkfunc will be called from open context as a preliminary check
* which can quickly fail. If it succeeds, it will be called again from
* syncing context. The checkfunc should generally be designed to work
* properly in either context, but if necessary it can check
* dmu_tx_is_syncing(tx).
*
* The synctask infrastructure enforces proper locking strategy with respect
* to the dp_config_rwlock -- the lock will always be held when the callbacks
* are called. It will be held for read during the open-context (preliminary)
* call to the checkfunc, and then held for write from syncing context during
* the calls to the check and sync funcs.
*
* A dataset or pool name can be passed as the first argument. Typically,
* the check func will hold, check the return value of the hold, and then
* release the dataset. The sync func will VERIFYO(hold()) the dataset.
* This is safe because no changes can be made between the check and sync funcs,
* and the sync func will only be called if the check func successfully opened
* the dataset.
*/
int
dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified)
{
spa_t *spa;
dmu_tx_t *tx;
uint64_t txg;
dsl_sync_task_t *dst;
int err;
dsl_sync_task_t dst = { { { NULL } } };
dsl_pool_t *dp;
err = spa_open(pool, &spa, FTAG);
if (err != 0)
return (err);
dp = spa_get_dsl(spa);
top:
tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
tx = dmu_tx_create_dd(dp->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
txg = dmu_tx_get_txg(tx);
dst.dst_pool = dp;
dst.dst_txg = dmu_tx_get_txg(tx);
dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
dst.dst_syncfunc = syncfunc;
dst.dst_arg = arg;
dst.dst_error = 0;
dst.dst_nowaiter = B_FALSE;
/* Do a preliminary error check. */
dstg->dstg_err = 0;
#ifdef ZFS_DEBUG
/*
* Only check half the time, otherwise, the sync-context
* check will almost never fail.
*/
if (spa_get_random(2) == 0)
goto skip;
#endif
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
rw_exit(&dstg->dstg_pool->dp_config_rwlock);
dsl_pool_config_enter(dp, FTAG);
err = dst.dst_checkfunc(arg, tx);
dsl_pool_config_exit(dp, FTAG);
if (dstg->dstg_err) {
if (err != 0) {
dmu_tx_commit(tx);
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (err);
}
#ifdef ZFS_DEBUG
skip:
#endif
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));
dmu_tx_commit(tx);
txg_wait_synced(dstg->dstg_pool, txg);
txg_wait_synced(dp, dst.dst_txg);
if (dstg->dstg_err == EAGAIN) {
txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
if (dst.dst_error == EAGAIN) {
txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
goto top;
}
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (dst.dst_error);
}
void
dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, dmu_tx_t *tx)
{
uint64_t txg;
dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
dstg->dstg_nowaiter = B_TRUE;
txg = dmu_tx_get_txg(tx);
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
dst->dst_pool = dp;
dst->dst_txg = dmu_tx_get_txg(tx);
dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst->dst_checkfunc = dsl_null_checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg = arg;
dst->dst_error = 0;
dst->dst_nowaiter = B_TRUE;
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg));
}
/*
* Called in syncing context to execute the synctask.
*/
void
dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
while ((dst = list_head(&dstg->dstg_tasks))) {
list_remove(&dstg->dstg_tasks, dst);
kmem_free(dst, sizeof (dsl_sync_task_t));
}
kmem_free(dstg, sizeof (dsl_sync_task_group_t));
}
void
dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
dsl_pool_t *dp = dstg->dstg_pool;
dsl_pool_t *dp = dst->dst_pool;
uint64_t quota, used;
ASSERT0(dstg->dstg_err);
ASSERT0(dst->dst_error);
/*
* Check for sufficient space. We just check against what's
@@ -177,70 +154,24 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
/* MOS space is triple-dittoed, so we multiply by 3. */
if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
dstg->dstg_err = ENOSPC;
if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) {
dst->dst_error = ENOSPC;
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
return;
}
/*
* Check for errors by calling checkfuncs.
* Check for errors by calling checkfunc.
*/
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
if (dstg->dstg_err == 0) {
/*
* Execute sync tasks.
*/
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
}
}
rw_exit(&dp->dp_config_rwlock);
if (dstg->dstg_nowaiter)
dsl_sync_task_group_destroy(dstg);
}
int
dsl_sync_task_do(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_group_t *dstg;
int err;
ASSERT(spa_writeable(dp->dp_spa));
dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
return (err);
}
void
dsl_sync_task_do_nowait(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
{
dsl_sync_task_group_t *dstg;
dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
dsl_sync_task_group_nowait(dstg, tx);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
if (dst->dst_error == 0)
dst->dst_syncfunc(dst->dst_arg, tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dsl_sync_task_do);
EXPORT_SYMBOL(dsl_sync_task_do_nowait);
#endif
+537
View File
@@ -0,0 +1,537 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
typedef struct dsl_dataset_user_hold_arg {
nvlist_t *dduha_holds;
nvlist_t *dduha_errlist;
minor_t dduha_minor;
} dsl_dataset_user_hold_arg_t;
/*
* If you add new checks here, you may need to add additional checks to the
* "temporary" case in snapshot_check() in dmu_objset.c.
*/
int
dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
boolean_t temphold, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
int error = 0;
if (strlen(htag) > MAXNAMELEN)
return (E2BIG);
/* Tempholds have a more restricted length */
if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
return (E2BIG);
/* tags must be unique (if ds already exists) */
if (ds != NULL) {
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj != 0) {
uint64_t value;
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
htag, 8, 1, &value);
if (error == 0)
error = EEXIST;
else if (error == ENOENT)
error = 0;
}
mutex_exit(&ds->ds_lock);
}
return (error);
}
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
return (ENOTSUP);
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
int error = 0;
dsl_dataset_t *ds;
char *htag;
/* must be a snapshot */
if (strchr(nvpair_name(pair), '@') == NULL)
error = EINVAL;
if (error == 0)
error = nvpair_value_string(pair, &htag);
if (error == 0) {
error = dsl_dataset_hold(dp,
nvpair_name(pair), FTAG, &ds);
}
if (error == 0) {
error = dsl_dataset_user_hold_check_one(ds, htag,
dduha->dduha_minor != 0, tx);
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
rv = error;
fnvlist_add_int32(dduha->dduha_errlist,
nvpair_name(pair), error);
}
}
return (rv);
}
void
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
minor_t minor, uint64_t now, dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj == 0) {
/*
* This is the first user hold for this dataset. Create
* the userrefs zap object.
*/
dmu_buf_will_dirty(ds->ds_dbuf, tx);
zapobj = ds->ds_phys->ds_userrefs_obj =
zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
} else {
zapobj = ds->ds_phys->ds_userrefs_obj;
}
ds->ds_userrefs++;
mutex_exit(&ds->ds_lock);
VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
if (minor != 0) {
VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
htag, now, tx));
dsl_register_onexit_hold_cleanup(ds, htag, minor);
}
spa_history_log_internal_ds(ds, "hold", tx,
"tag=%s temp=%d refs=%llu",
htag, minor != 0, ds->ds_userrefs);
}
static void
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
uint64_t now = gethrestime_sec();
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
dduha->dduha_minor, now, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> holdname
* errlist will be filled in with snapname -> error
* if cleanup_minor is not 0, the holds will be temporary, cleaned up
* when the process exits.
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
{
dsl_dataset_user_hold_arg_t dduha;
nvpair_t *pair;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
dduha.dduha_holds = holds;
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
}
typedef struct dsl_dataset_user_release_arg {
nvlist_t *ddura_holds;
nvlist_t *ddura_todelete;
nvlist_t *ddura_errlist;
} dsl_dataset_user_release_arg_t;
static int
dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
nvlist_t *holds, boolean_t *todelete)
{
uint64_t zapobj;
nvpair_t *pair;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int error;
int numholds = 0;
*todelete = B_FALSE;
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
zapobj = ds->ds_phys->ds_userrefs_obj;
if (zapobj == 0)
return (ESRCH);
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
/* Make sure the hold exists */
uint64_t tmp;
error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
if (error == ENOENT)
error = ESRCH;
if (error != 0)
return (error);
numholds++;
}
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
ds->ds_userrefs == numholds) {
/* we need to destroy the snapshot as well */
if (dsl_dataset_long_held(ds))
return (EBUSY);
*todelete = B_TRUE;
}
return (0);
}
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
const char *name = nvpair_name(pair);
int error;
dsl_dataset_t *ds;
nvlist_t *holds;
error = nvpair_value_nvlist(pair, &holds);
if (error != 0)
return (EINVAL);
error = dsl_dataset_hold(dp, name, FTAG, &ds);
if (error == 0) {
boolean_t deleteme;
error = dsl_dataset_user_release_check_one(ds,
holds, &deleteme);
if (error == 0 && deleteme) {
fnvlist_add_boolean(ddura->ddura_todelete,
name);
}
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
if (ddura->ddura_errlist != NULL) {
fnvlist_add_int32(ddura->ddura_errlist,
name, error);
}
rv = error;
}
}
return (rv);
}
static void
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
int error;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
ds->ds_userrefs--;
error = dsl_pool_user_release(dp, ds->ds_object,
nvpair_name(pair), tx);
VERIFY(error == 0 || error == ENOENT);
zapobj = ds->ds_phys->ds_userrefs_obj;
VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
spa_history_log_internal_ds(ds, "release", tx,
"tag=%s refs=%lld", nvpair_name(pair),
(longlong_t)ds->ds_userrefs);
}
}
static void
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_release_sync_one(ds,
fnvpair_value_nvlist(pair), tx);
if (nvlist_exists(ddura->ddura_todelete,
nvpair_name(pair))) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
{
dsl_dataset_user_release_arg_t ddura;
nvpair_t *pair;
int error;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
ddura.ddura_holds = holds;
ddura.ddura_errlist = errlist;
ddura.ddura_todelete = fnvlist_alloc();
error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
fnvlist_free(ddura.ddura_todelete);
return (error);
}
typedef struct dsl_dataset_user_release_tmp_arg {
uint64_t ddurta_dsobj;
nvlist_t *ddurta_holds;
boolean_t ddurta_deleteme;
} dsl_dataset_user_release_tmp_arg_t;
static int
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
if (!dmu_tx_is_syncing(tx))
return (0);
error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
if (error)
return (error);
error = dsl_dataset_user_release_check_one(ds,
ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
if (ddurta->ddurta_deleteme) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
/*
* Called at spa_load time to release a stale temporary user hold.
* Also called by the onexit code.
*/
void
dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
{
dsl_dataset_user_release_tmp_arg_t ddurta;
#ifdef _KERNEL
dsl_dataset_t *ds;
int error;
/* Make sure it is not mounted. */
dsl_pool_config_enter(dp, FTAG);
error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (error == 0) {
char name[MAXNAMELEN];
dsl_dataset_name(ds, name);
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
zfs_unmount_snap(name);
} else {
dsl_pool_config_exit(dp, FTAG);
}
#endif
ddurta.ddurta_dsobj = dsobj;
ddurta.ddurta_holds = fnvlist_alloc();
fnvlist_add_boolean(ddurta.ddurta_holds, htag);
(void) dsl_sync_task(spa_name(dp->dp_spa),
dsl_dataset_user_release_tmp_check,
dsl_dataset_user_release_tmp_sync, &ddurta, 1);
fnvlist_free(ddurta.ddurta_holds);
}
typedef struct zfs_hold_cleanup_arg {
char zhca_spaname[MAXNAMELEN];
uint64_t zhca_spa_load_guid;
uint64_t zhca_dsobj;
char zhca_htag[MAXNAMELEN];
} zfs_hold_cleanup_arg_t;
static void
dsl_dataset_user_release_onexit(void *arg)
{
zfs_hold_cleanup_arg_t *ca = arg;
spa_t *spa;
int error;
error = spa_open(ca->zhca_spaname, &spa, FTAG);
if (error != 0) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
return;
}
if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded (guid doesn't match)",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
spa_close(spa, FTAG);
return;
}
dsl_dataset_user_release_tmp(spa_get_dsl(spa),
ca->zhca_dsobj, ca->zhca_htag);
kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
spa_close(spa, FTAG);
}
void
dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor)
{
zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
spa_t *spa = dsl_dataset_get_spa(ds);
(void) strlcpy(ca->zhca_spaname, spa_name(spa),
sizeof (ca->zhca_spaname));
ca->zhca_spa_load_guid = spa_load_guid(spa);
ca->zhca_dsobj = ds->ds_object;
(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
VERIFY0(zfs_onexit_add_cb(minor,
dsl_dataset_user_release_onexit, ca, NULL));
}
int
dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
err = dsl_pool_hold(dsname, FTAG, &dp);
if (err != 0)
return (err);
err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
if (ds->ds_phys->ds_userrefs_obj != 0) {
zap_attribute_t *za;
zap_cursor_t zc;
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
ds->ds_phys->ds_userrefs_obj);
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
fnvlist_add_uint64(nvl, za->za_name,
za->za_first_integer);
}
zap_cursor_fini(&zc);
kmem_free(za, sizeof (zap_attribute_t));
}
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}
+40
View File
@@ -1928,6 +1928,46 @@ void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
spa_config_exit(spa, SCL_VDEV, FTAG);
}
static void
checkmap(space_map_t *sm, uint64_t off, uint64_t size)
{
space_seg_t *ss;
avl_index_t where;
mutex_enter(sm->sm_lock);
ss = space_map_find(sm, off, size, &where);
if (ss != NULL)
panic("freeing free block; ss=%p", (void *)ss);
mutex_exit(sm->sm_lock);
}
void
metaslab_check_free(spa_t *spa, const blkptr_t *bp)
{
int i, j;
if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
return;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
for (i = 0; i < BP_GET_NDVAS(bp); i++) {
uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]);
vdev_t *vd = vdev_lookup_top(spa, vdid);
uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]);
uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift];
if (ms->ms_map->sm_loaded)
checkmap(ms->ms_map, off, size);
for (j = 0; j < TXG_SIZE; j++)
checkmap(ms->ms_freemap[j], off, size);
for (j = 0; j < TXG_DEFER_SIZE; j++)
checkmap(ms->ms_defermap[j], off, size);
}
spa_config_exit(spa, SCL_VDEV, FTAG);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(metaslab_debug, int, 0644);
MODULE_PARM_DESC(metaslab_debug, "keep space maps in core to verify frees");
+14 -7
View File
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */
#else
int reference_tracking_enable = TRUE;
#endif
int reference_history = 4; /* tunable */
int reference_history = 3; /* tunable */
static kmem_cache_t *reference_cache;
static kmem_cache_t *reference_history_cache;
@@ -64,6 +65,14 @@ refcount_create(refcount_t *rc)
offsetof(reference_t, ref_link));
rc->rc_count = 0;
rc->rc_removed_count = 0;
rc->rc_tracked = reference_tracking_enable;
}
void
refcount_create_untracked(refcount_t *rc)
{
refcount_create(rc);
rc->rc_tracked = B_FALSE;
}
void
@@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc)
int
refcount_is_zero(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count == 0);
}
int64_t
refcount_count(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count);
}
@@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
reference_t *ref = NULL;
int64_t count;
if (reference_tracking_enable) {
if (rc->rc_tracked) {
ref = kmem_cache_alloc(reference_cache, KM_PUSHPAGE);
ref->ref_holder = holder;
ref->ref_number = number;
}
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= 0);
if (reference_tracking_enable)
if (rc->rc_tracked)
list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
count = rc->rc_count;
@@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= number);
if (!reference_tracking_enable) {
if (!rc->rc_tracked) {
rc->rc_count -= number;
count = rc->rc_count;
mutex_exit(&rc->rc_mtx);
@@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
KM_PUSHPAGE);
list_insert_head(&rc->rc_removed, ref);
rc->rc_removed_count++;
if (rc->rc_removed_count >= reference_history) {
if (rc->rc_removed_count > reference_history) {
ref = list_tail(&rc->rc_removed);
list_remove(&rc->rc_removed, ref);
kmem_cache_free(reference_history_cache,
+26 -15
View File
@@ -75,8 +75,9 @@
uint_t rrw_tsd_key;
typedef struct rrw_node {
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
void *rn_tag;
} rrw_node_t;
static rrw_node_t *
@@ -98,13 +99,14 @@ rrn_find(rrwlock_t *rrl)
* Add a node to the head of the singly linked list.
*/
static void
rrn_add(rrwlock_t *rrl)
rrn_add(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
rn->rn_tag = tag;
VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
}
@@ -113,7 +115,7 @@ rrn_add(rrwlock_t *rrl)
* thread's list and return TRUE; otherwise return FALSE.
*/
static boolean_t
rrn_find_and_remove(rrwlock_t *rrl)
rrn_find_and_remove(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rrw_node_t *prev = NULL;
@@ -122,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
return (B_FALSE);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl) {
if (rn->rn_rrl == rrl && rn->rn_tag == tag) {
if (prev)
prev->rn_next = rn->rn_next;
else
@@ -136,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
}
void
rrw_init(rrwlock_t *rrl)
rrw_init(rrwlock_t *rrl, boolean_t track_all)
{
mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
@@ -144,6 +146,7 @@ rrw_init(rrwlock_t *rrl)
refcount_create(&rrl->rr_anon_rcount);
refcount_create(&rrl->rr_linked_rcount);
rrl->rr_writer_wanted = B_FALSE;
rrl->rr_track_all = track_all;
}
void
@@ -156,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl)
refcount_destroy(&rrl->rr_linked_rcount);
}
static void
void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
#if !defined(DEBUG) && defined(_KERNEL)
if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
!rrl->rr_track_all) {
rrl->rr_anon_rcount.rc_count++;
mutex_exit(&rrl->rr_lock);
return;
@@ -171,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
while (rrl->rr_writer || (rrl->rr_writer_wanted &&
while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
refcount_is_zero(&rrl->rr_anon_rcount) &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
if (rrl->rr_writer_wanted) {
if (rrl->rr_writer_wanted || rrl->rr_track_all) {
/* may or may not be a re-entrant enter */
rrn_add(rrl);
rrn_add(rrl, tag);
(void) refcount_add(&rrl->rr_linked_rcount, tag);
} else {
(void) refcount_add(&rrl->rr_anon_rcount, tag);
@@ -187,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
static void
void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
@@ -233,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag)
if (rrl->rr_writer == NULL) {
int64_t count;
if (rrn_find_and_remove(rrl))
if (rrn_find_and_remove(rrl, tag)) {
count = refcount_remove(&rrl->rr_linked_rcount, tag);
else
} else {
ASSERT(!rrl->rr_track_all);
count = refcount_remove(&rrl->rr_anon_rcount, tag);
}
if (count == 0)
cv_broadcast(&rrl->rr_cv);
} else {
@@ -249,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
/*
* If the lock was created with track_all, rrw_held(RW_READER) will return
* B_TRUE iff the current thread has the lock for reader. Otherwise it may
* return B_TRUE if any thread has the lock for reader.
*/
boolean_t
rrw_held(rrwlock_t *rrl, krw_t rw)
{
@@ -259,7 +270,7 @@ rrw_held(rrwlock_t *rrl, krw_t rw)
held = (rrl->rr_writer == curthread);
} else {
held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount));
rrn_find(rrl) != NULL);
}
mutex_exit(&rrl->rr_lock);
+3 -3
View File
@@ -1019,10 +1019,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
sa_attr_type_t *tb;
int error;
mutex_enter(&os->os_lock);
mutex_enter(&os->os_user_ptr_lock);
if (os->os_sa) {
mutex_enter(&os->os_sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
tb = os->os_sa->sa_user_table;
mutex_exit(&os->os_sa->sa_lock);
*user_table = tb;
@@ -1035,7 +1035,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
os->os_sa = sa;
mutex_enter(&sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
avl_create(&sa->sa_layout_num_tree, layout_num_compare,
sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
+44 -39
View File
@@ -64,6 +64,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/zfeature.h>
#include <sys/dsl_destroy.h>
#include <sys/zvol.h>
#ifdef _KERNEL
@@ -131,10 +132,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static dsl_checkfunc_t spa_change_guid_check;
static dsl_syncfunc_t spa_change_guid_sync;
static void spa_sync_version(void *arg, dmu_tx_t *tx);
static void spa_sync_props(void *arg, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@@ -329,10 +328,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
dsl_dataset_t *ds = NULL;
dp = spa_get_dsl(spa);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
if ((err = dsl_dataset_hold_obj(dp,
za.za_first_integer, FTAG, &ds))) {
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
break;
}
@@ -341,7 +340,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
KM_PUSHPAGE);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
} else {
strval = NULL;
intval = za.za_first_integer;
@@ -495,9 +494,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
if (dmu_objset_type(os) != DMU_OST_ZFS) {
error = ENOTSUP;
} else if ((error = dsl_prop_get_integer(strval,
} else if ((error =
dsl_prop_get_int_ds(dmu_objset_ds(os),
zfs_prop_to_name(ZFS_PROP_COMPRESSION),
&compress, NULL)) == 0 &&
&compress)) == 0 &&
!BOOTFS_COMPRESS_VALID(compress)) {
error = ENOTSUP;
} else {
@@ -661,8 +661,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
* read object, the features for write object, or the
* feature descriptions object.
*/
error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
spa_sync_version, spa, &ver, 6);
error = dsl_sync_task(spa->spa_name, NULL,
spa_sync_version, &ver, 6);
if (error)
return (error);
continue;
@@ -673,8 +673,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
}
if (need_sync) {
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
spa, nvp, 6));
return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
nvp, 6));
}
return (0);
@@ -696,12 +696,12 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
/*ARGSUSED*/
static int
spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_check(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
vdev_t *rvd = spa->spa_root_vdev;
uint64_t vdev_state;
ASSERTV(uint64_t *newguid = arg2);
ASSERTV(uint64_t *newguid = arg);
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
vdev_state = rvd->vdev_state;
@@ -716,10 +716,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t *newguid = arg2;
uint64_t *newguid = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
uint64_t oldguid;
vdev_t *rvd = spa->spa_root_vdev;
@@ -753,8 +753,8 @@ spa_change_guid(spa_t *spa)
mutex_enter(&spa_namespace_lock);
guid = spa_generate_guid(NULL);
error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
spa_change_guid_sync, spa, &guid, 5);
error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
spa_change_guid_sync, &guid, 5);
if (error == 0) {
spa_config_sync(spa, B_FALSE, B_TRUE);
@@ -1729,23 +1729,24 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
/*
* Check for missing log devices
*/
static int
static boolean_t
spa_check_logs(spa_t *spa)
{
boolean_t rv = B_FALSE;
switch (spa->spa_log_state) {
default:
break;
case SPA_LOG_MISSING:
/* need to recheck in case slog has been restored */
case SPA_LOG_UNKNOWN:
if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
DS_FIND_CHILDREN)) {
rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
NULL, DS_FIND_CHILDREN) != 0);
if (rv)
spa_set_log_state(spa, SPA_LOG_MISSING);
return (1);
}
break;
}
return (0);
return (rv);
}
static boolean_t
@@ -1793,11 +1794,11 @@ spa_activate_log(spa_t *spa)
int
spa_offline_log(spa_t *spa)
{
int error = 0;
if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN)) == 0) {
int error;
error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN);
if (error == 0) {
/*
* We successfully offlined the log device, sync out the
* current txg so that the "stubby" block can be removed
@@ -3610,7 +3611,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_sync_props(spa, props, tx);
spa_sync_props(props, tx);
}
dmu_tx_commit(tx);
@@ -3844,7 +3845,7 @@ out:
* Import a non-root pool into the system.
*/
int
spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
{
spa_t *spa;
char *altroot = NULL;
@@ -5878,10 +5879,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
}
static void
spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_version(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t version = *(uint64_t *)arg2;
uint64_t *versionp = arg;
uint64_t version = *versionp;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
/*
* Setting the version is special cased when first creating the pool.
@@ -5900,11 +5902,11 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
* Set zpool properties.
*/
static void
spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_props(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
nvlist_t *nvp = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
@@ -6056,6 +6058,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
ASSERT(spa->spa_sync_pass == 1);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
dsl_pool_create_origin(dp, tx);
@@ -6081,6 +6085,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
spa_feature_create_zap_objects(spa, tx);
}
rrw_exit(&dp->dp_config_rwlock, FTAG);
}
/*
+10 -16
View File
@@ -197,10 +197,10 @@ spa_history_zone(void)
*/
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
nvlist_t *nvl = arg2;
nvlist_t *nvl = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
dmu_buf_t *dbp;
spa_history_phys_t *shpp;
@@ -222,7 +222,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* Get the offset of where we need to write via the bonus buffer.
* Update the offset when the write completes.
*/
VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
shpp = dbp->db_data;
dmu_buf_will_dirty(dbp, tx);
@@ -326,8 +326,8 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
/* Kick this off asynchronously; errors are ignored. */
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, nvarg, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
nvarg, 0, tx);
dmu_tx_commit(tx);
/* spa_history_log_sync will free nvl */
@@ -465,10 +465,10 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
if (dmu_tx_is_syncing(tx)) {
spa_history_log_sync(spa, nvl, tx);
spa_history_log_sync(nvl, tx);
} else {
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, nvl, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa),
spa_history_log_sync, nvl, 0, tx);
}
/* spa_history_log_sync() will free nvl */
}
@@ -544,17 +544,11 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
void
spa_history_log_version(spa_t *spa, const char *operation)
{
#ifdef _KERNEL
uint64_t current_vers = spa_version(spa);
spa_history_log_internal(spa, operation, NULL,
"pool version %llu; software version %llu/%d; uts %s %s %s %s",
(u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
(u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+9 -10
View File
@@ -268,7 +268,7 @@ spa_config_lock_init(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
refcount_create(&scl->scl_count);
refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
}
@@ -326,6 +326,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
int wlocks_held = 0;
int i;
ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
for (i = 0; i < SCL_LOCKS; i++) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (scl->scl_writer == curthread)
@@ -406,27 +408,22 @@ spa_lookup(const char *name)
static spa_t search; /* spa_t is large; don't allocate on stack */
spa_t *spa;
avl_index_t where;
char c = 0;
char *cp;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
/*
* If it's a full dataset name, figure out the pool name and
* just use that.
*/
cp = strpbrk(name, "/@");
if (cp) {
c = *cp;
cp = strpbrk(search.spa_name, "/@");
if (cp != NULL)
*cp = '\0';
}
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
spa = avl_find(&spa_namespace_avl, &search, &where);
if (cp)
*cp = c;
return (spa);
}
@@ -539,6 +536,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
KM_SLEEP) == 0);
}
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
return (spa);
}
+21 -22
View File
@@ -102,7 +102,7 @@ void
space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
space_seg_t ssearch, *ss_before, *ss_after, *ss;
space_seg_t *ss_before, *ss_after, *ss;
uint64_t end = start + size;
int merge_before, merge_after;
@@ -115,11 +115,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
ss = space_map_find(sm, start, size, &where);
if (ss != NULL) {
zfs_panic_recover("zfs: allocating allocated segment"
"(offset=%llu size=%llu)\n",
(longlong_t)start, (longlong_t)size);
@@ -171,19 +168,12 @@ void
space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
space_seg_t ssearch, *ss, *newseg;
space_seg_t *ss, *newseg;
uint64_t end = start + size;
int left_over, right_over;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(!sm->sm_condensing);
VERIFY(size != 0);
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
ss = space_map_find(sm, start, size, &where);
/* Make sure we completely overlap with someone */
if (ss == NULL) {
@@ -226,12 +216,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
sm->sm_space -= size;
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
space_seg_t *
space_map_find(space_map_t *sm, uint64_t start, uint64_t size,
avl_index_t *wherep)
{
avl_index_t where;
space_seg_t ssearch, *ss;
uint64_t end = start + size;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(size != 0);
@@ -239,10 +228,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
ssearch.ss_end = start + size;
ss = avl_find(&sm->sm_root, &ssearch, wherep);
return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size)
return (ss);
return (NULL);
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
return (space_map_find(sm, start, size, &where) != 0);
}
void
+21 -16
View File
@@ -659,6 +659,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@@ -682,6 +684,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@@ -747,42 +751,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg)
}
/*
* Add an entry to the list.
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the list (unless it's already on the list).
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
tn->tn_member[t] = 1;
tn->tn_next[t] = tl->tl_head[t];
tl->tl_head[t] = tn;
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
* Add an entry to the end of the list (walks list to find end).
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the end of the list, unless it's already on the list.
* (walks list to find end)
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
txg_node_t **tp;
for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
@@ -794,7 +799,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
@@ -845,13 +850,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
return (NULL);
}
int
boolean_t
txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
return (tn->tn_member[t]);
return (tn->tn_member[t] != 0);
}
/*
+24 -19
View File
@@ -80,6 +80,7 @@
#include <sys/zfs_vnops.h>
#include <sys/stat.h>
#include <sys/dmu.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_deleg.h>
#include <sys/mount.h>
#include <sys/zpl.h>
@@ -488,13 +489,13 @@ zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
*/
/*ARGSUSED*/
int
zfsctl_snapdir_rename(struct inode *sdip, char *sname,
struct inode *tdip, char *tname, cred_t *cr, int flags)
zfsctl_snapdir_rename(struct inode *sdip, char *snm,
struct inode *tdip, char *tnm, cred_t *cr, int flags)
{
zfs_sb_t *zsb = ITOZSB(sdip);
zfs_snapentry_t search, *sep;
avl_index_t where;
char *to, *from, *real;
char *to, *from, *real, *fsname;
int error;
ZFS_ENTER(zsb);
@@ -502,23 +503,26 @@ zfsctl_snapdir_rename(struct inode *sdip, char *sname,
to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
error = dmu_snapshot_realname(zsb->z_os, sname, real,
error = dmu_snapshot_realname(zsb->z_os, snm, real,
MAXNAMELEN, NULL);
if (error == 0) {
sname = real;
snm = real;
} else if (error != ENOTSUP) {
goto out;
}
}
error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
if (!error)
error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
if (!error)
dmu_objset_name(zsb->z_os, fsname);
error = zfsctl_snapshot_zname(sdip, snm, MAXNAMELEN, from);
if (error == 0)
error = zfsctl_snapshot_zname(tdip, tnm, MAXNAMELEN, to);
if (error == 0)
error = zfs_secpolicy_rename_perms(from, to, cr);
if (error)
if (error != 0)
goto out;
/*
@@ -532,21 +536,21 @@ zfsctl_snapdir_rename(struct inode *sdip, char *sname,
/*
* No-op when names are identical.
*/
if (strcmp(sname, tname) == 0) {
if (strcmp(snm, tnm) == 0) {
error = 0;
goto out;
}
mutex_enter(&zsb->z_ctldir_lock);
error = dmu_objset_rename(from, to, B_FALSE);
error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
if (error)
goto out_unlock;
search.se_name = (char *)sname;
search.se_name = (char *)snm;
sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
if (sep)
zfsctl_rename_snap(zsb, sep, tname);
zfsctl_rename_snap(zsb, sep, tnm);
out_unlock:
mutex_exit(&zsb->z_ctldir_lock);
@@ -554,6 +558,7 @@ out:
kmem_free(from, MAXNAMELEN);
kmem_free(to, MAXNAMELEN);
kmem_free(real, MAXNAMELEN);
kmem_free(fsname, MAXNAMELEN);
ZFS_EXIT(zsb);
@@ -588,14 +593,14 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
}
error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
if (!error)
if (error == 0)
error = zfs_secpolicy_destroy_perms(snapname, cr);
if (error)
if (error != 0)
goto out;
error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
if ((error == 0) || (error == ENOENT))
error = dmu_objset_destroy(snapname, B_FALSE);
error = dsl_destroy_snapshot(snapname, B_FALSE);
out:
kmem_free(snapname, MAXNAMELEN);
kmem_free(real, MAXNAMELEN);
@@ -628,12 +633,12 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
dmu_objset_name(zsb->z_os, dsname);
error = zfs_secpolicy_snapshot_perms(dsname, cr);
if (error)
if (error != 0)
goto out;
if (error == 0) {
error = dmu_objset_snapshot_one(dsname, dirname);
if (error)
if (error != 0)
goto out;
error = zfsctl_snapdir_lookup(dip, dirname, ipp,
+518 -642
View File
File diff suppressed because it is too large Load Diff
+39 -28
View File
@@ -248,28 +248,31 @@ zfs_register_callbacks(zfs_sb_t *zsb)
* overboard...
*/
ds = dmu_objset_ds(os);
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
error = dsl_prop_register(ds,
"atime", atime_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"xattr", xattr_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"recordsize", blksz_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"readonly", readonly_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"devices", devices_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"setuid", setuid_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"exec", exec_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
zsb);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb);
error = error ? error : dsl_prop_register(ds,
"nbmand", nbmand_changed_cb, zsb);
zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
@@ -284,18 +287,28 @@ unregister:
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zsb);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb);
(void) dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
atime_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
xattr_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
blksz_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
readonly_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
devices_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
setuid_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
exec_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
snapdir_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
acl_inherit_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
vscan_changed_cb, zsb);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND),
nbmand_changed_cb, zsb);
return (error);
}
@@ -305,8 +318,6 @@ static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
int error = 0;
/*
* Is it a valid type of object to track?
*/
@@ -363,7 +374,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
*groupp = BSWAP_64(*groupp);
}
}
return (error);
return (0);
}
static void
@@ -726,7 +737,7 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zsb->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
rrw_init(&zsb->z_teardown_lock);
rrw_init(&zsb->z_teardown_lock, B_FALSE);
rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1138,7 +1149,7 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
!zfs_is_readonly(zsb))
txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
(void) dmu_objset_evict_dbufs(zsb->z_os);
dmu_objset_evict_dbufs(zsb->z_os);
return (0);
}
+106 -34
View File
@@ -257,7 +257,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
}
}
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
return (error);
@@ -356,7 +356,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
break;
error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
if (error)
if (error != 0)
break;
for (lrp = lrbuf; lrp < end; lrp += reclen) {
@@ -492,7 +492,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
if (dsl_dataset_is_snapshot(ds))
panic("dirtying snapshot!");
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, zilog);
}
@@ -658,8 +658,8 @@ zil_claim(const char *osname, void *txarg)
objset_t *os;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@@ -672,7 +672,7 @@ zil_claim(const char *osname, void *txarg)
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log);
dsl_dataset_dirty(dmu_objset_ds(os), tx);
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@@ -697,7 +697,7 @@ zil_claim(const char *osname, void *txarg)
}
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@@ -717,7 +717,7 @@ zil_check_log_chain(const char *osname, void *tx)
ASSERT(tx == NULL);
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@@ -1014,7 +1014,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
BP_ZERO(bp);
use_slog = USE_SLOG(zilog);
error = zio_alloc_zil(spa, txg, bp, zil_blksz, USE_SLOG(zilog));
error = zio_alloc_zil(spa, txg, bp, zil_blksz,
USE_SLOG(zilog));
if (use_slog)
{
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
@@ -1025,7 +1026,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
ZIL_STAT_BUMP(zil_itx_metaslab_normal_count);
ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused);
}
if (!error) {
if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg);
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
@@ -1145,7 +1146,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
}
if (error) {
if (error != 0) {
ASSERT(error == ENOENT || error == EEXIST ||
error == EALREADY);
return (lwb);
@@ -1807,6 +1808,9 @@ zil_free(zilog_t *zilog)
zilog->zl_stop_sync = 1;
ASSERT0(zilog->zl_suspend);
ASSERT0(zilog->zl_suspending);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
@@ -1905,32 +1909,100 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
}
static char *suspend_tag = "zil suspending";
/*
* Suspend an intent log. While in suspended mode, we still honor
* synchronous semantics, but we rely on txg_wait_synced() to do it.
* We suspend the log briefly when taking a snapshot so that the snapshot
* contains all the data it's supposed to, and has an empty intent log.
* On old version pools, we suspend the log briefly when taking a
* snapshot so that it will have an empty intent log.
*
* Long holds are not really intended to be used the way we do here --
* held for such a short time. A concurrent caller of dsl_dataset_long_held()
* could fail. Therefore we take pains to only put a long hold if it is
* actually necessary. Fortunately, it will only be necessary if the
* objset is currently mounted (or the ZVOL equivalent). In that case it
* will already have a long hold, so we are not really making things any worse.
*
* Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
* zvol_state_t), and use their mechanism to prevent their hold from being
* dropped (e.g. VFS_HOLD()). However, that would be even more pain for
* very little gain.
*
* if cookiep == NULL, this does both the suspend & resume.
* Otherwise, it returns with the dataset "long held", and the cookie
* should be passed into zil_resume().
*/
int
zil_suspend(zilog_t *zilog)
zil_suspend(const char *osname, void **cookiep)
{
const zil_header_t *zh = zilog->zl_header;
objset_t *os;
zilog_t *zilog;
const zil_header_t *zh;
int error;
error = dmu_objset_hold(osname, suspend_tag, &os);
if (error != 0)
return (error);
zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
zh = zilog->zl_header;
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (EBUSY);
}
if (zilog->zl_suspend++ != 0) {
/*
* Don't put a long hold in the cases where we can avoid it. This
* is when there is no cookie so we are doing a suspend & resume
* (i.e. called from zil_vdev_offline()), and there's nothing to do
* for the suspend because it's already suspended, or there's no ZIL.
*/
if (cookiep == NULL && !zilog->zl_suspending &&
(zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (0);
}
dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
zilog->zl_suspend++;
if (zilog->zl_suspend > 1) {
/*
* Someone else already began a suspend.
* Someone else is already suspending it.
* Just wait for them to finish.
*/
while (zilog->zl_suspending)
cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
/*
* If there is no pointer to an on-disk block, this ZIL must not
* be active (e.g. filesystem not mounted), so there's nothing
* to clean up.
*/
if (BP_IS_HOLE(&zh->zh_log)) {
ASSERT(cookiep != NULL); /* fast path already handled */
*cookiep = os;
mutex_exit(&zilog->zl_lock);
return (0);
}
zilog->zl_suspending = B_TRUE;
mutex_exit(&zilog->zl_lock);
@@ -1943,16 +2015,25 @@ zil_suspend(zilog_t *zilog)
cv_broadcast(&zilog->zl_cv_suspend);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
void
zil_resume(zilog_t *zilog)
zil_resume(void *cookie)
{
objset_t *os = cookie;
zilog_t *zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
ASSERT(zilog->zl_suspend != 0);
zilog->zl_suspend--;
mutex_exit(&zilog->zl_lock);
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
}
typedef struct zil_replay_arg {
@@ -2025,7 +2106,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
error = zil_read_log_data(zilog, (lr_write_t *)lr,
zr->zr_lr + reclen);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
@@ -2046,7 +2127,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
* is updated if we are in replay mode.
*/
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
if (error) {
if (error != 0) {
/*
* The DMU's dnode layer doesn't see removes until the txg
* commits, so a subsequent claim can spuriously fail with
@@ -2056,7 +2137,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
*/
txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
return (0);
@@ -2128,21 +2209,12 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
int
zil_vdev_offline(const char *osname, void *arg)
{
objset_t *os;
zilog_t *zilog;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error)
return (error);
zilog = dmu_objset_zil(os);
if (zil_suspend(zilog) != 0)
error = EEXIST;
else
zil_resume(zilog);
dmu_objset_rele(os, FTAG);
return (error);
error = zil_suspend(osname, NULL);
if (error != 0)
return (EEXIST);
return (0);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+6 -2
View File
@@ -767,6 +767,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies)
void
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
{
metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
}
@@ -785,6 +786,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
arc_freed(spa, bp);
metaslab_check_free(spa, bp);
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
@@ -2060,7 +2063,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
bcmp(abuf->b_data, zio->io_orig_data,
zio->io_orig_size) != 0)
error = EEXIST;
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
ddt_enter(ddt);
@@ -2656,8 +2659,9 @@ zio_vdev_io_assess(zio_t *zio)
* set vdev_cant_write so that we stop trying to allocate from it.
*/
if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
vd != NULL && !vd->vdev_ops->vdev_op_leaf)
vd != NULL && !vd->vdev_ops->vdev_op_leaf) {
vd->vdev_cant_write = B_TRUE;
}
if (zio->io_error)
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
+11 -6
View File
@@ -315,6 +315,13 @@ zvol_set_volsize(const char *name, uint64_t volsize)
uint64_t readonly;
int error;
error = dsl_prop_get_integer(name,
zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
if (error != 0)
return (error);
if (readonly)
return (EROFS);
mutex_enter(&zvol_state_lock);
zv = zvol_find_by_name(name);
@@ -1459,8 +1466,7 @@ zvol_remove_minor(const char *name)
}
static int
zvol_create_minors_cb(spa_t *spa, uint64_t dsobj,
const char *dsname, void *arg)
zvol_create_minors_cb(const char *dsname, void *arg)
{
if (strchr(dsname, '/') == NULL)
return 0;
@@ -1474,7 +1480,7 @@ zvol_create_minors_cb(spa_t *spa, uint64_t dsobj,
* for all available pools.
*/
int
zvol_create_minors(const char *pool)
zvol_create_minors(char *pool)
{
spa_t *spa = NULL;
int error = 0;
@@ -1484,13 +1490,12 @@ zvol_create_minors(const char *pool)
mutex_enter(&zvol_state_lock);
if (pool) {
error = dmu_objset_find_spa(NULL, pool, zvol_create_minors_cb,
error = dmu_objset_find(pool, zvol_create_minors_cb,
NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
} else {
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa)) != NULL) {
error = dmu_objset_find_spa(NULL,
spa_name(spa), zvol_create_minors_cb, NULL,
error = dmu_objset_find(spa_name(spa), zvol_create_minors_cb, NULL,
DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (error)
break;