From 7b8518cb8d39aa340fecf559143763b27b212b0d Mon Sep 17 00:00:00 2001 From: Tim Haley Date: Tue, 26 Jul 2011 16:38:27 -0700 Subject: [PATCH 01/10] Illumos #xxx: zdb -vvv broken after zfs diff integration References to Illumos issue and patch: - https://github.com/illumos/illumos-gate/commit/163eb7ff Signed-off-by: Brian Behlendorf Issue #340 --- module/zfs/zfs_znode.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index ea8b4c505..dfbe11aca 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1560,12 +1560,12 @@ zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db) + dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; - if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0) + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); @@ -1573,13 +1573,13 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, doi.doi_bonus_type != DMU_OT_ZNODE) || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (ENOTSUP); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (error); } @@ -1587,10 +1587,10 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, } void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db) +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); - sa_buf_rele(db, FTAG); + sa_buf_rele(db, tag); } /* @@ -1667,7 +1667,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, int is_xattrdir; if (prevdb) - zfs_release_sa_handle(prevhdl, prevdb); + zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) @@ -1699,7 +1699,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, prevhdl = sa_hdl; prevdb = sa_db; } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db); + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; @@ -1709,7 +1709,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); - zfs_release_sa_handle(sa_hdl, sa_db); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) @@ -1730,13 +1730,13 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } @@ -1756,19 +1756,19 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } From ef3c1dea7024b07b4ace6115de9f22a99c1394d8 Mon Sep 17 00:00:00 2001 From: Gordon Ross Date: Tue, 26 Jul 2011 11:37:06 -0700 Subject: [PATCH 02/10] Illumos #764: panic in zfs:dbuf_sync_list Hypothesis about what's going on here. At some time in the past, something, i.e. dnode_reallocate() calls one of: dbuf_rm_spill(dn, tx); These will do: dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx) dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx) dbuf_undirty(db, tx) Currently dbuf_undirty can leave a spill block in dn_dirty_records[], (it having been put there previously by dbuf_dirty) and free it. Sometime later, dbuf_sync_list trips over this reference to free'd (and typically reused) memory. Also, dbuf_undirty can call dnode_clear_range with a bogus block ID. It needs to test for DMU_SPILL_BLKID, similar to how dnode_clear_range is called in dbuf_dirty(). References to Illumos issue and patch: - https://www.illumos.org/issues/764 - https://github.com/illumos/illumos-gate/commit/3f2366c2bb Reviewed by: George Wilson Reviewed by: Mark.Maybe@oracle.com Reviewed by: Albert Lee Signed-off-by: Brian Behlendorf Issue #340 --- module/zfs/dbuf.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index e166c81df..34ce2f62b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include @@ -1347,13 +1348,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it, since one of the current holders may be in the * middle of an update. Note that users of dbuf_undirty() * should not place a hold on the dbuf before the call. + * Also note: we can get here with a spill block, so + * test for that similar to how dbuf_dirty does. */ if (refcount_count(&db->db_holds) > db->db_dirtycnt) { mutex_exit(&db->db_mtx); /* Make sure we don't toss this buffer at sync phase */ - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); + if (db->db_blkid != DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + } DB_DNODE_EXIT(db); return (0); } @@ -1366,11 +1371,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) *drp = dr->dr_next; + /* + * Note that there are three places in dbuf_dirty() + * where this dirty record may be put on a list. + * Make sure to do a list_remove corresponding to + * every one of those list_insert calls. + */ if (dr->dr_parent) { mutex_enter(&dr->dr_parent->dt.di.dr_mtx); list_remove(&dr->dr_parent->dt.di.dr_children, dr); mutex_exit(&dr->dr_parent->dt.di.dr_mtx); - } else if (db->db_level+1 == dn->dn_nlevels) { + } else if (db->db_blkid == DMU_SPILL_BLKID || + db->db_level+1 == dn->dn_nlevels) { ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); From 2cc6c8db12eaf8d1a1c17a13fbc6d25d8244486e Mon Sep 17 00:00:00 2001 From: Garrett D'Amore Date: Fri, 22 Apr 2011 00:49:41 -0700 Subject: [PATCH 03/10] Illumos #175: zfs vdev cache consumes excessive memory Note that with the current ZFS code, it turns out that the vdev cache is not helpful, and in some cases actually harmful. It is better if we disable this. Once some time has passed, we should actually remove this to simplify the code. For now we just disable it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11 has made these same changes. References to Illumos issue and patch: - https://www.illumos.org/issues/175 - https://github.com/illumos/illumos-gate/commit/b68a40a845 Reviewed by: George Wilson Reviewed by: Eric Schrock Approved by: Richard Lowe Signed-off-by: Brian Behlendorf Issue #340 --- module/zfs/vdev_cache.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index 0d1fe7d2c..e2f8040d1 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -71,9 +71,16 @@ * 1< Date: Tue, 26 Jul 2011 11:53:09 -0700 Subject: [PATCH 04/10] Illumos #510: 'zfs get' enhancement - mountpoint as an argument The 'zfs get' command should be able to deal with mountpoint as an argument. It already works with 'zfs list' command: # zfs list /export/home/estibi NAME USED AVAIL REFER MOUNTPOINT rpool/export/home/estibi 1.14G 3.86G 1.14G /export/home/estibi but it fails with 'zfs get': # zfs get all /export/home/estibi cannot open '/export/home/estibi': invalid dataset name Reviewed by: Eric Schrock Reviewed by: Deano Reviewed by: Garrett D'Amore Approved by: Garrett D'Amore References to Illumos issue and patch: - https://www.illumos.org/issues/510 - https://github.com/illumos/illumos-gate/commit/5ead3ed965 Signed-off-by: Brian Behlendorf Issue #340 --- cmd/zfs/zfs_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index b6dc6d49d..69a6736a6 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include @@ -1274,7 +1275,7 @@ static int zfs_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; - int i, c, flags = 0; + int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS; char *value, *fields; int ret; int limit = 0; From 6d974228ef05366c546bb04198dafcb38785c16d Mon Sep 17 00:00:00 2001 From: George Wilson Date: Tue, 26 Jul 2011 12:08:52 -0700 Subject: [PATCH 05/10] Illumos #1051: zfs should handle imbalanced luns Today zfs tries to allocate blocks evenly across all devices. This means when devices are imbalanced zfs will use lots of CPU searching for space on devices which tend to be pretty full. It should instead fail quickly on the full LUNs and move onto devices which have more availability. Reviewed by: Eric Schrock Reviewed by: Matt Ahrens Reviewed by: Adam Leventhal Reviewed by: Albert Lee Reviewed by: Gordon Ross Approved by: Garrett D'Amore References to Illumos issue and patch: - https://www.illumos.org/issues/510 - https://github.com/illumos/illumos-gate/commit/5ead3ed965 Signed-off-by: Brian Behlendorf Issue #340 --- cmd/ztest/ztest.c | 2 + include/sys/metaslab.h | 3 ++ include/sys/metaslab_impl.h | 2 + include/sys/spa.h | 8 +++ include/sys/spa_impl.h | 2 + module/zfs/metaslab.c | 105 ++++++++++++++++++++++++++---------- module/zfs/spa_misc.c | 7 +++ module/zfs/zio.c | 22 +++++++- 8 files changed, 123 insertions(+), 28 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 6acba5290..235bf56ef 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -5300,6 +5301,7 @@ ztest_run(ztest_shared_t *zs) */ kernel_init(FREAD | FWRITE); VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0); + spa->spa_debug = B_TRUE; zs->zs_spa = spa; spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 583d6303b..2cf4d2b48 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -47,6 +48,8 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg); #define METASLAB_HINTBP_FAVOR 0x0 #define METASLAB_HINTBP_AVOID 0x1 #define METASLAB_GANG_HEADER 0x2 +#define METASLAB_GANG_CHILD 0x4 +#define METASLAB_GANG_AVOID 0x8 extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 07988dd51..6c670a162 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -52,6 +53,7 @@ struct metaslab_group { avl_tree_t mg_metaslab_tree; uint64_t mg_aliquot; uint64_t mg_bonus_area; + uint64_t mg_alloc_failures; int64_t mg_bias; int64_t mg_activation_count; metaslab_class_t *mg_class; diff --git a/include/sys/spa.h b/include/sys/spa.h index 52737ebc2..c9028fb09 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_SPA_H @@ -698,6 +699,13 @@ _NOTE(CONSTCOND) } while (0) #define dprintf_bp(bp, fmt, ...) #endif +extern boolean_t spa_debug_enabled(spa_t *spa); +#define spa_dbgmsg(spa, ...) \ +{ \ + if (spa_debug_enabled(spa)) \ + zfs_dbgmsg(__VA_ARGS__); \ +} + extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */ #ifdef __cplusplus diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 1c34873b6..3f5cd9a73 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_SPA_IMPL_H @@ -196,6 +197,7 @@ struct spa { kcondvar_t spa_suspend_cv; /* notification of resume */ uint8_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ + boolean_t spa_debug; /* debug enabled? */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ int spa_mode; /* FREAD | FWRITE */ diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 56c46100d..b089f1eac 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -30,11 +31,30 @@ #include #include -#define WITH_NDF_BLOCK_ALLOCATOR +#define WITH_DF_BLOCK_ALLOCATOR + +/* + * Allow allocations to switch to gang blocks quickly. We do this to + * avoid having to load lots of space_maps in a given txg. There are, + * however, some cases where we want to avoid "fast" ganging and instead + * we want to do an exhaustive search of all metaslabs on this device. + * Currently we don't allow any gang or dump device related allocations + * to "fast" gang. + */ +#define CAN_FASTGANG(flags) \ + (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \ + METASLAB_GANG_AVOID))) uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ +/* + * This value defines the number of allowed allocation failures per vdev. + * If a device reaches this threshold in a given txg then we consider skipping + * allocations on that device. + */ +int zfs_mg_alloc_failures; + /* * Metaslab debugging: when set, keeps all space maps in core to verify frees. */ @@ -865,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg) } static int -metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) +metaslab_activate(metaslab_t *msp, uint64_t activation_weight) { metaslab_group_t *mg = msp->ms_group; space_map_t *sm = &msp->ms_map; @@ -899,13 +919,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) mutex_exit(&mg->mg_lock); } - /* - * If we were able to load the map then make sure - * that this map is still able to satisfy our request. - */ - if (msp->ms_weight < size) - return (ENOSPC); - metaslab_group_sort(msp->ms_group, msp, msp->ms_weight | activation_weight); } @@ -1123,6 +1136,7 @@ void metaslab_sync_reassess(metaslab_group_t *mg) { vdev_t *vd = mg->mg_vd; + int64_t failures = mg->mg_alloc_failures; int m; /* @@ -1140,6 +1154,8 @@ metaslab_sync_reassess(metaslab_group_t *mg) mutex_exit(&msp->ms_lock); } + atomic_add_64(&mg->mg_alloc_failures, -failures); + /* * Prefetch the next potential metaslabs */ @@ -1164,9 +1180,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) } static uint64_t -metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, - uint64_t min_distance, dva_t *dva, int d) +metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, + uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) { + spa_t *spa = mg->mg_vd->vdev_spa; metaslab_t *msp = NULL; uint64_t offset = -1ULL; avl_tree_t *t = &mg->mg_metaslab_tree; @@ -1187,11 +1204,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, mutex_enter(&mg->mg_lock); for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { - if (msp->ms_weight < size) { + if (msp->ms_weight < asize) { + spa_dbgmsg(spa, "%s: failed to meet weight " + "requirement: vdev %llu, txg %llu, mg %p, " + "msp %p, psize %llu, asize %llu, " + "failures %llu, weight %llu", + spa_name(spa), mg->mg_vd->vdev_id, txg, + mg, msp, psize, asize, + mg->mg_alloc_failures, msp->ms_weight); mutex_exit(&mg->mg_lock); return (-1ULL); } - was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -1210,6 +1233,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp == NULL) return (-1ULL); + /* + * If we've already reached the allowable number of failed + * allocation attempts on this metaslab group then we + * consider skipping it. We skip it only if we're allowed + * to "fast" gang, the physical size is larger than + * a gang block, and we're attempting to allocate from + * the primary metaslab. + */ + if (mg->mg_alloc_failures > zfs_mg_alloc_failures && + CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && + activation_weight == METASLAB_WEIGHT_PRIMARY) { + spa_dbgmsg(spa, "%s: skipping metaslab group: " + "vdev %llu, txg %llu, mg %p, psize %llu, " + "asize %llu, failures %llu", spa_name(spa), + mg->mg_vd->vdev_id, txg, mg, psize, asize, + mg->mg_alloc_failures); + return (-1ULL); + } + mutex_enter(&msp->ms_lock); /* @@ -1218,7 +1260,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, * another thread may have changed the weight while we * were blocked on the metaslab lock. */ - if (msp->ms_weight < size || (was_active && + if (msp->ms_weight < asize || (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK) && activation_weight == METASLAB_WEIGHT_PRIMARY)) { mutex_exit(&msp->ms_lock); @@ -1233,14 +1275,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, continue; } - if (metaslab_activate(msp, activation_weight, size) != 0) { + if (metaslab_activate(msp, activation_weight) != 0) { mutex_exit(&msp->ms_lock); continue; } - if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) + if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL) break; + atomic_inc_64(&mg->mg_alloc_failures); + metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); mutex_exit(&msp->ms_lock); @@ -1249,7 +1293,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize); mutex_exit(&msp->ms_lock); @@ -1376,7 +1420,8 @@ top: asize = vdev_psize_to_asize(vd, psize); ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); - offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); + offset = metaslab_group_alloc(mg, psize, asize, txg, distance, + dva, d, flags); if (offset != -1ULL) { /* * If we've just selected this metaslab group, @@ -1388,18 +1433,24 @@ top: vdev_stat_t *vs = &vd->vdev_stat; int64_t vu, cu; - /* - * Determine percent used in units of 0..1024. - * (This is just to avoid floating point.) - */ - vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); - cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); + vu = (vs->vs_alloc * 100) / (vs->vs_space + 1); + cu = (mc->mc_alloc * 100) / (mc->mc_space + 1); /* - * Bias by at most +/- 25% of the aliquot. + * Calculate how much more or less we should + * try to allocate from this device during + * this iteration around the rotor. + * For example, if a device is 80% full + * and the pool is 20% full then we should + * reduce allocations by 60% on this device. + * + * mg_bias = (20 - 80) * 512K / 100 = -307K + * + * This reduces allocations by 307K for this + * iteration. */ mg->mg_bias = ((cu - vu) * - (int64_t)mg->mg_aliquot) / (1024 * 4); + (int64_t)mg->mg_aliquot) / 100; } if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= @@ -1513,7 +1564,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) - error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) error = ENOENT; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 487a76d71..e4e0c35f0 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -1680,6 +1681,12 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) return (0); } +boolean_t +spa_debug_enabled(spa_t *spa) +{ + return (spa->spa_debug); +} + #if defined(_KERNEL) && defined(HAVE_SPL) /* Namespace manipulation */ EXPORT_SYMBOL(spa_lookup); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 0fa823687..0022c64cc 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -79,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; #endif +extern int zfs_mg_alloc_failures; /* * An allocating zio is one that either currently has the DVA allocate @@ -158,6 +160,12 @@ zio_init(void) zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; } + /* + * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs + * to fail 3 times per txg or 8 failures, whichever is greater. + */ + zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); + zio_inject_init(); } @@ -2151,6 +2159,7 @@ zio_dva_allocate(zio_t *zio) metaslab_class_t *mc = spa_normal_class(spa); blkptr_t *bp = zio->io_bp; int error; + int flags = 0; if (zio->io_gang_leader == NULL) { ASSERT(zio->io_child_type > ZIO_CHILD_GANG); @@ -2163,10 +2172,21 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); + /* + * The dump device does not support gang blocks so allocation on + * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid + * the "fast" gang feature. + */ + flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; + flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? + METASLAB_GANG_CHILD : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, - zio->io_prop.zp_copies, zio->io_txg, NULL, 0); + zio->io_prop.zp_copies, zio->io_txg, NULL, flags); if (error) { + spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " + "size %llu, error %d", spa_name(spa), zio, zio->io_size, + error); if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) return (zio_write_gang_block(zio)); zio->io_error = error; From f5fc4acaa77e2c1782a9495bbf1a39884b4c3940 Mon Sep 17 00:00:00 2001 From: Matt Ahrens Date: Tue, 26 Jul 2011 12:23:00 -0700 Subject: [PATCH 06/10] Illumos #1092: zfs refratio property Add a "REFRATIO" property, which is the compression ratio based on data referenced. For snapshots, this is the same as COMPRESSRATIO, but for filesystems/volumes, the COMPRESSRATIO is based on the data "USED" (ie, includes blocks in children, but not blocks shared with the origin). This is needed to figure out how much space a filesystem would use if it were not compressed (ignoring snapshots). Reviewed by: George Wilson Reviewed by: Adam Leventhal Reviewed by: Dan McDonald Reviewed by: Richard Elling Reviewed by: Mark Musante Reviewed by: Garrett D'Amore Approved by: Garrett D'Amore References to Illumos issue and patch: - https://www.illumos.org/issues/1092 - https://github.com/illumos/illumos-gate/commit/187d6ac08a Signed-off-by: Brian Behlendorf Issue #340 --- include/sys/fs/zfs.h | 2 ++ lib/libzfs/libzfs_dataset.c | 2 ++ man/man8/zfs.8 | 17 +++++++++++++++-- module/zcommon/zfs_prop.c | 4 ++++ module/zfs/dsl_dataset.c | 13 ++++++++----- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index a2b68eddf..2ac84f645 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -122,6 +123,7 @@ typedef enum { ZFS_PROP_DEDUP, ZFS_PROP_MLSLABEL, ZFS_PROP_SYNC, + ZFS_PROP_REFRATIO, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 5f8847a93..996bae2d7 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -2025,6 +2026,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, } break; + case ZFS_PROP_REFRATIO: case ZFS_PROP_COMPRESSRATIO: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 3da2e44a7..26dc6cadc 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -360,7 +360,7 @@ This property can also be referred to by its shortened column name, \fBavail\fR. .ad .sp .6 .RS 4n -The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. +For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. .RE .sp @@ -420,6 +420,19 @@ The amount of data that is accessible by this dataset, which may or may not be s This property can also be referred to by its shortened column name, \fBrefer\fR. .RE +.sp +.ne 2 +.mk +.na +\fB\fBrefcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +The compression ratio achieved for the \fBreferenced\fR space of this +dataset, expressed as a multiplier. See also the \fBcompressratio\fR +property. +.RE + .sp .ne 2 .mk @@ -1235,7 +1248,7 @@ Recursively destroy all dependents, including cloned file systems outside the ta Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems. .RE -Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. +Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. .RE .sp diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index ce03a498c..9d65e35de 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -311,6 +312,9 @@ zfs_prop_init(void) zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "RATIO"); + zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, + PROP_READONLY, ZFS_TYPE_DATASET, + "<1.00x or higher if compressed>", "REFRATIO"); zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index c34ac2a76..26362c95c 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -2153,7 +2154,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { - uint64_t refd, avail, uobjs, aobjs; + uint64_t refd, avail, uobjs, aobjs, ratio; dsl_dir_stats(ds->ds_dir, nv); @@ -2180,6 +2181,11 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + if (ds->ds_phys->ds_next_snap_obj) { /* * This is a snapshot; override the dd's space used with @@ -2187,10 +2193,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) */ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, - ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); } } From 3e31d2b080b4e6665a93691d171a13d7e29a768a Mon Sep 17 00:00:00 2001 From: Eric Schrock Date: Tue, 26 Jul 2011 12:41:53 -0700 Subject: [PATCH 07/10] Illumos #883: ZIL reuse during remount corruption Moving the zil_free() cleanup to zil_close() prevents this problem from occurring in the first place. There is a very good description of the issue and fix in Illumus #883. Reviewed by: Matt Ahrens Reviewed by: Adam Leventhal Reviewed by: Albert Lee Reviewed by: Gordon Ross Reviewed by: Garrett D'Amore Reivewed by: Dan McDonald Approved by: Gordon Ross References to Illumos issue and patch: - https://www.illumos.org/issues/883 - https://github.com/illumos/illumos-gate/commit/c9ba2a43cb Signed-off-by: Brian Behlendorf Issue #340 --- cmd/ztest/ztest.c | 36 ++++++++++++++++++++++++++++++++++++ module/zfs/zil.c | 40 ++++++++++++++++++++++++---------------- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 235bf56ef..715815859 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -206,6 +206,7 @@ typedef struct ztest_od { */ typedef struct ztest_ds { objset_t *zd_os; + krwlock_t zd_zilog_lock; zilog_t *zd_zilog; uint64_t zd_seq; ztest_od_t *zd_od; /* debugging aid */ @@ -239,6 +240,7 @@ ztest_func_t ztest_dmu_commit_callbacks; ztest_func_t ztest_zap; ztest_func_t ztest_zap_parallel; ztest_func_t ztest_zil_commit; +ztest_func_t ztest_zil_remount; ztest_func_t ztest_dmu_read_write_zcopy; ztest_func_t ztest_dmu_objset_create_destroy; ztest_func_t ztest_dmu_prealloc; @@ -274,6 +276,7 @@ ztest_info_t ztest_info[] = { { ztest_zap_parallel, 100, &zopt_always }, { ztest_split_pool, 1, &zopt_always }, { ztest_zil_commit, 1, &zopt_incessant }, + { ztest_zil_remount, 1, &zopt_sometimes }, { ztest_dmu_read_write_zcopy, 1, &zopt_often }, { ztest_dmu_objset_create_destroy, 1, &zopt_often }, { ztest_dsl_prop_get_set, 1, &zopt_often }, @@ -1007,6 +1010,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) dmu_objset_name(os, zd->zd_name); int l; + rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) @@ -1022,6 +1026,7 @@ ztest_zd_fini(ztest_ds_t *zd) int l; mutex_destroy(&zd->zd_dirobj_lock); + rw_destroy(&zd->zd_zilog_lock); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -1993,6 +1998,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) if (ztest_random(2) == 0) io_type = ZTEST_IO_WRITE_TAG; + (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + switch (io_type) { case ZTEST_IO_WRITE_TAG: @@ -2030,6 +2037,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) break; } + (void) rw_exit(&zd->zd_zilog_lock); + umem_free(data, blocksize); } @@ -2084,6 +2093,8 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) { zilog_t *zilog = zd->zd_zilog; + (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); /* @@ -2095,6 +2106,31 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq); zd->zd_seq = zilog->zl_commit_lr_seq; mutex_exit(&zilog->zl_lock); + + (void) rw_exit(&zd->zd_zilog_lock); +} + +/* + * This function is designed to simulate the operations that occur during a + * mount/unmount operation. We hold the dataset across these operations in an + * attempt to expose any implicit assumptions about ZIL management. + */ +/* ARGSUSED */ +void +ztest_zil_remount(ztest_ds_t *zd, uint64_t id) +{ + objset_t *os = zd->zd_os; + + (void) rw_enter(&zd->zd_zilog_lock, RW_WRITER); + + /* zfsvfs_teardown() */ + zil_close(zd->zd_zilog); + + /* zfsvfs_setup() */ + VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); + zil_replay(os, zd, ztest_replay_vector); + + (void) rw_exit(&zd->zd_zilog_lock); } /* diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 8aa811db2..5296b38be 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -562,7 +563,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) if (!list_is_empty(&zilog->zl_lwb_list)) { ASSERT(zh->zh_claim_txg == 0); - ASSERT(!keep_first); + VERIFY(!keep_first); while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { list_remove(&zilog->zl_lwb_list, lwb); if (lwb->lwb_buf != NULL) @@ -1665,21 +1666,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) void zil_free(zilog_t *zilog) { - lwb_t *head_lwb; int i; zilog->zl_stop_sync = 1; - /* - * After zil_close() there should only be one lwb with a buffer. - */ - head_lwb = list_head(&zilog->zl_lwb_list); - if (head_lwb) { - ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list)); - list_remove(&zilog->zl_lwb_list, head_lwb); - zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz); - kmem_cache_free(zil_lwb_cache, head_lwb); - } + ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); avl_destroy(&zilog->zl_vdev_tree); @@ -1719,6 +1710,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data) { zilog_t *zilog = dmu_objset_zil(os); + ASSERT(zilog->zl_clean_taskq == NULL); + ASSERT(zilog->zl_get_data == NULL); + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + zilog->zl_get_data = get_data; zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, 2, 2, TASKQ_PREPOPULATE); @@ -1732,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data) void zil_close(zilog_t *zilog) { - lwb_t *tail_lwb; + lwb_t *lwb; uint64_t txg = 0; zil_commit(zilog, 0); /* commit all itx */ @@ -1744,9 +1739,9 @@ zil_close(zilog_t *zilog) * destroy the zl_clean_taskq. */ mutex_enter(&zilog->zl_lock); - tail_lwb = list_tail(&zilog->zl_lwb_list); - if (tail_lwb != NULL) - txg = tail_lwb->lwb_max_txg; + lwb = list_tail(&zilog->zl_lwb_list); + if (lwb != NULL) + txg = lwb->lwb_max_txg; mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); @@ -1754,6 +1749,19 @@ zil_close(zilog_t *zilog) taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; zilog->zl_get_data = NULL; + + /* + * We should have only one LWB left on the list; remove it now. + */ + mutex_enter(&zilog->zl_lock); + lwb = list_head(&zilog->zl_lwb_list); + if (lwb != NULL) { + ASSERT(lwb == list_tail(&zilog->zl_lwb_list)); + list_remove(&zilog->zl_lwb_list, lwb); + zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + kmem_cache_free(zil_lwb_cache, lwb); + } + mutex_exit(&zilog->zl_lock); } /* From ca5252204aa25f81e9f19084917e0a46fdd470b0 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 26 Jul 2011 13:08:02 -0700 Subject: [PATCH 08/10] Illumos #1043: Recursive zfs snapshot destroy fails Prior to revision 11314 if a user was recursively destroying snapshots of a dataset the target dataset was not required to exist. The zfs_secpolicy_destroy_snaps() function introduced the security check on the target dataset, so since then if the target dataset does not exist, the recursive destroy is not performed. Before 11314, only a delete permission check on the snapshot's master dataset was performed. Steps to reproduce: zfs create pool/a zfs snapshot pool/a@s1 zfs destroy -r pool@s1 Therefore I suggest to fallback to the old security check, if the target snapshot does not exist and continue with the destroy. References to Illumos issue and patch: - https://www.illumos.org/issues/1043 - https://www.illumos.org/attachments/217/recursive_dataset_destroy.patch Signed-off-by: Brian Behlendorf Issue #340 --- module/zfs/zfs_ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 693ffc0c8..088c64b27 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -701,6 +701,9 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) * and destroying snapshots requires descendent permissions, a successfull * check of the top level snapshot applies to snapshots of all descendent * datasets as well. + * + * The target snapshot may not exist when doing a recursive destroy. + * In this case fallback to permissions of the parent dataset. */ static int zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) @@ -711,6 +714,8 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); error = zfs_secpolicy_destroy_perms(dsname, cr); + if (error == ENOENT) + error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); strfree(dsname); return (error); From 0b7936d5c2337bc976ac831c1c38de563844c36b Mon Sep 17 00:00:00 2001 From: Alexander Stetsenko Date: Tue, 26 Jul 2011 15:44:36 -0700 Subject: [PATCH 09/10] Illumos #278: get rid zfs of python and pyzfs dependencies Remove all python and pyzfs dependencies for consistency and to ensure full functionality even in a mimimalist environment. Reviewed by: gordon.w.ross@gmail.com Reviewed by: trisk@opensolaris.org Reviewed by: alexander.r.eremin@gmail.com Reviewed by: jerry.jelinek@joyent.com Approved by: garrett@nexenta.com References to Illumos issue and patch: - https://www.illumos.org/issues/278 - https://github.com/illumos/illumos-gate/commit/1af68beac3 Signed-off-by: Brian Behlendorf Issue #340 Issue #160 Signed-off-by: Brian Behlendorf --- cmd/zfs/zfs_main.c | 2273 ++++++++++++++++++++++++++++++++++- include/libzfs.h | 9 +- include/zfs_deleg.h | 2 + lib/libzfs/libzfs_dataset.c | 189 +++ module/zcommon/zfs_deleg.c | 3 +- 5 files changed, 2441 insertions(+), 35 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 69a6736a6..54d057b1e 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,13 @@ #include #include +#include +#include #include +#ifdef HAVE_IDMAP +#include +#include +#endif /* HAVE_IDMAP */ #include "zfs_iter.h" #include "zfs_util.h" @@ -62,7 +69,6 @@ libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; -const char *pypath = "/usr/lib/zfs/pyzfs.py"; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -83,8 +89,10 @@ static int zfs_do_send(int argc, char **argv); static int zfs_do_receive(int argc, char **argv); static int zfs_do_promote(int argc, char **argv); static int zfs_do_userspace(int argc, char **argv); -static int zfs_do_python(int argc, char **argv); +static int zfs_do_allow(int argc, char **argv); +static int zfs_do_unallow(int argc, char **argv); static int zfs_do_hold(int argc, char **argv); +static int zfs_do_holds(int argc, char **argv); static int zfs_do_release(int argc, char **argv); static int zfs_do_diff(int argc, char **argv); @@ -177,12 +185,12 @@ static zfs_command_t command_table[] = { { "send", zfs_do_send, HELP_SEND }, { "receive", zfs_do_receive, HELP_RECEIVE }, { NULL }, - { "allow", zfs_do_python, HELP_ALLOW }, + { "allow", zfs_do_allow, HELP_ALLOW }, { NULL }, - { "unallow", zfs_do_python, HELP_UNALLOW }, + { "unallow", zfs_do_unallow, HELP_UNALLOW }, { NULL }, { "hold", zfs_do_hold, HELP_HOLD }, - { "holds", zfs_do_python, HELP_HOLDS }, + { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, }; @@ -1825,71 +1833,731 @@ zfs_do_upgrade(int argc, char **argv) return (ret); } +#define USTYPE_USR_BIT (0) +#define USTYPE_GRP_BIT (1) +#define USTYPE_PSX_BIT (2) +#define USTYPE_SMB_BIT (3) + +#define USTYPE_USR (1 << USTYPE_USR_BIT) +#define USTYPE_GRP (1 << USTYPE_GRP_BIT) + +#define USTYPE_PSX (1 << USTYPE_PSX_BIT) +#define USTYPE_SMB (1 << USTYPE_SMB_BIT) + +#define USTYPE_PSX_USR (USTYPE_PSX | USTYPE_USR) +#define USTYPE_SMB_USR (USTYPE_SMB | USTYPE_USR) +#define USTYPE_PSX_GRP (USTYPE_PSX | USTYPE_GRP) +#define USTYPE_SMB_GRP (USTYPE_SMB | USTYPE_GRP) +#define USTYPE_ALL (USTYPE_PSX_USR | USTYPE_SMB_USR \ + | USTYPE_PSX_GRP | USTYPE_SMB_GRP) + + +#define USPROP_USED_BIT (0) +#define USPROP_QUOTA_BIT (1) + +#define USPROP_USED (1 << USPROP_USED_BIT) +#define USPROP_QUOTA (1 << USPROP_QUOTA_BIT) + +typedef struct us_node { + nvlist_t *usn_nvl; + uu_avl_node_t usn_avlnode; + uu_list_node_t usn_listnode; +} us_node_t; + +typedef struct us_cbdata { + nvlist_t **cb_nvlp; + uu_avl_pool_t *cb_avl_pool; + uu_avl_t *cb_avl; + boolean_t cb_numname; + boolean_t cb_nicenum; + boolean_t cb_sid2posix; + zfs_userquota_prop_t cb_prop; + zfs_sort_column_t *cb_sortcol; + size_t cb_max_typelen; + size_t cb_max_namelen; + size_t cb_max_usedlen; + size_t cb_max_quotalen; +} us_cbdata_t; + +typedef struct { + zfs_sort_column_t *si_sortcol; + boolean_t si_num_name; + boolean_t si_parsable; +} us_sort_info_t; + +static int +us_compare(const void *larg, const void *rarg, void *unused) +{ + const us_node_t *l = larg; + const us_node_t *r = rarg; + int rc = 0; + us_sort_info_t *si = (us_sort_info_t *)unused; + zfs_sort_column_t *sortcol = si->si_sortcol; + boolean_t num_name = si->si_num_name; + nvlist_t *lnvl = l->usn_nvl; + nvlist_t *rnvl = r->usn_nvl; + + for (; sortcol != NULL; sortcol = sortcol->sc_next) { + char *lvstr = ""; + char *rvstr = ""; + uint32_t lv32 = 0; + uint32_t rv32 = 0; + uint64_t lv64 = 0; + uint64_t rv64 = 0; + zfs_prop_t prop = sortcol->sc_prop; + const char *propname = NULL; + boolean_t reverse = sortcol->sc_reverse; + + switch (prop) { + case ZFS_PROP_TYPE: + propname = "type"; + (void) nvlist_lookup_uint32(lnvl, propname, &lv32); + (void) nvlist_lookup_uint32(rnvl, propname, &rv32); + if (rv32 != lv32) + rc = (rv32 > lv32) ? 1 : -1; + break; + case ZFS_PROP_NAME: + propname = "name"; + if (num_name) { + (void) nvlist_lookup_uint32(lnvl, propname, + &lv32); + (void) nvlist_lookup_uint32(rnvl, propname, + &rv32); + if (rv32 != lv32) + rc = (rv32 > lv32) ? 1 : -1; + } else { + (void) nvlist_lookup_string(lnvl, propname, + &lvstr); + (void) nvlist_lookup_string(rnvl, propname, + &rvstr); + rc = strcmp(lvstr, rvstr); + } + break; + + case ZFS_PROP_USED: + case ZFS_PROP_QUOTA: + if (ZFS_PROP_USED == prop) + propname = "used"; + else + propname = "quota"; + (void) nvlist_lookup_uint64(lnvl, propname, &lv64); + (void) nvlist_lookup_uint64(rnvl, propname, &rv64); + if (rv64 != lv64) + rc = (rv64 > lv64) ? 1 : -1; + default: + break; + } + + if (rc) { + if (rc < 0) + return (reverse ? 1 : -1); + else + return (reverse ? -1 : 1); + } + } + + return (rc); +} + +static inline const char * +us_type2str(unsigned field_type) +{ + switch (field_type) { + case USTYPE_PSX_USR: + return ("POSIX User"); + case USTYPE_PSX_GRP: + return ("POSIX Group"); + case USTYPE_SMB_USR: + return ("SMB User"); + case USTYPE_SMB_GRP: + return ("SMB Group"); + default: + return ("Undefined"); + } +} + /* * zfs userspace */ static int userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) { - zfs_userquota_prop_t *typep = arg; - zfs_userquota_prop_t p = *typep; + us_cbdata_t *cb = (us_cbdata_t *)arg; + zfs_userquota_prop_t prop = cb->cb_prop; char *name = NULL; - char *ug, *propname; + char *propname; char namebuf[32]; char sizebuf[32]; + us_node_t *node; + uu_avl_pool_t *avl_pool = cb->cb_avl_pool; + uu_avl_t *avl = cb->cb_avl; + uu_avl_index_t idx; + nvlist_t *props; + us_node_t *n; + zfs_sort_column_t *sortcol = cb->cb_sortcol; + unsigned type; + const char *typestr; + size_t namelen; + size_t typelen; + size_t sizelen; + us_sort_info_t sortinfo = { sortcol, cb->cb_numname }; if (domain == NULL || domain[0] == '\0') { - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) { + /* POSIX */ + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_PSX_GRP; struct group *g = getgrgid(rid); if (g) name = g->gr_name; } else { + type = USTYPE_PSX_USR; struct passwd *p = getpwuid(rid); if (p) name = p->pw_name; } + } else { +#ifdef HAVE_IDMAP + char sid[ZFS_MAXNAMELEN+32]; + uid_t id; + uint64_t classes; + int err; + directory_error_t e; + + (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid); + /* SMB */ + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_SMB_GRP; + err = sid_to_id(sid, B_FALSE, &id); + } else { + type = USTYPE_SMB_USR; + err = sid_to_id(sid, B_TRUE, &id); + } + + if (err == 0) { + rid = id; + + e = directory_name_from_sid(NULL, sid, &name, &classes); + if (e != NULL) { + directory_error_free(e); + return (NULL); + } + + if (name == NULL) + name = sid; + } +#else + return (-1); +#endif /* HAVE_IDMAP */ } - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) - ug = "group"; - else - ug = "user"; +/* + * if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) + * ug = "group"; + * else + * ug = "user"; + */ - if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED) + if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) propname = "used"; else propname = "quota"; - if (name == NULL) { - (void) snprintf(namebuf, sizeof (namebuf), - "%llu", (longlong_t)rid); + (void) snprintf(namebuf, sizeof (namebuf), "%u", rid); + if (name == NULL) name = namebuf; - } - zfs_nicenum(space, sizebuf, sizeof (sizebuf)); - (void) printf("%s %s %s%c%s %s\n", propname, ug, domain, - domain[0] ? '-' : ' ', name, sizebuf); + if (cb->cb_nicenum) + zfs_nicenum(space, sizebuf, sizeof (sizebuf)); + else + (void) sprintf(sizebuf, "%llu", (u_longlong_t)space); + + node = safe_malloc(sizeof (us_node_t)); + uu_avl_node_init(node, &node->usn_avlnode, avl_pool); + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { + free(node); + return (-1); + } + + if (nvlist_add_uint32(props, "type", type) != 0) + nomem(); + + if (cb->cb_numname) { + if (nvlist_add_uint32(props, "name", rid) != 0) + nomem(); + namelen = strlen(namebuf); + } else { + if (nvlist_add_string(props, "name", name) != 0) + nomem(); + namelen = strlen(name); + } + + typestr = us_type2str(type); + typelen = strlen(gettext(typestr)); + if (typelen > cb->cb_max_typelen) + cb->cb_max_typelen = typelen; + + if (namelen > cb->cb_max_namelen) + cb->cb_max_namelen = namelen; + + sizelen = strlen(sizebuf); + if (0 == strcmp(propname, "used")) { + if (sizelen > cb->cb_max_usedlen) + cb->cb_max_usedlen = sizelen; + } else { + if (sizelen > cb->cb_max_quotalen) + cb->cb_max_quotalen = sizelen; + } + + node->usn_nvl = props; + + n = uu_avl_find(avl, node, &sortinfo, &idx); + if (n == NULL) + uu_avl_insert(avl, node, idx); + else { + nvlist_free(props); + free(node); + node = n; + props = node->usn_nvl; + } + + if (nvlist_add_uint64(props, propname, space) != 0) + nomem(); return (0); } +static inline boolean_t +usprop_check(zfs_userquota_prop_t p, unsigned types, unsigned props) +{ + unsigned type; + unsigned prop; + + switch (p) { + case ZFS_PROP_USERUSED: + type = USTYPE_USR; + prop = USPROP_USED; + break; + case ZFS_PROP_USERQUOTA: + type = USTYPE_USR; + prop = USPROP_QUOTA; + break; + case ZFS_PROP_GROUPUSED: + type = USTYPE_GRP; + prop = USPROP_USED; + break; + case ZFS_PROP_GROUPQUOTA: + type = USTYPE_GRP; + prop = USPROP_QUOTA; + break; + default: /* ALL */ + return (B_TRUE); + }; + + return (type & types && prop & props); +} + +#define USFIELD_TYPE (1 << 0) +#define USFIELD_NAME (1 << 1) +#define USFIELD_USED (1 << 2) +#define USFIELD_QUOTA (1 << 3) +#define USFIELD_ALL (USFIELD_TYPE | USFIELD_NAME | USFIELD_USED | USFIELD_QUOTA) + +static int +parsefields(unsigned *fieldsp, char **names, unsigned *bits, size_t len) +{ + char *field = optarg; + char *delim; + + do { + int i; + boolean_t found = B_FALSE; + delim = strchr(field, ','); + if (delim != NULL) + *delim = '\0'; + + for (i = 0; i < len; i++) + if (0 == strcmp(field, names[i])) { + found = B_TRUE; + *fieldsp |= bits[i]; + break; + } + + if (!found) { + (void) fprintf(stderr, gettext("invalid type '%s'" + "for -t option\n"), field); + return (-1); + } + + field = delim + 1; + } while (delim); + + return (0); +} + + +static char *type_names[] = { "posixuser", "smbuser", "posixgroup", "smbgroup", + "all" }; +static unsigned type_bits[] = { + USTYPE_PSX_USR, + USTYPE_SMB_USR, + USTYPE_PSX_GRP, + USTYPE_SMB_GRP, + USTYPE_ALL +}; + +static char *us_field_names[] = { "type", "name", "used", "quota" }; +static unsigned us_field_bits[] = { + USFIELD_TYPE, + USFIELD_NAME, + USFIELD_USED, + USFIELD_QUOTA +}; + +static void +print_us_node(boolean_t scripted, boolean_t parseable, unsigned fields, + size_t type_width, size_t name_width, size_t used_width, + size_t quota_width, us_node_t *node) +{ + nvlist_t *nvl = node->usn_nvl; + nvpair_t *nvp = NULL; + char valstr[ZFS_MAXNAMELEN]; + boolean_t first = B_TRUE; + boolean_t quota_found = B_FALSE; + + if (fields & USFIELD_QUOTA && !nvlist_exists(nvl, "quota")) + if (nvlist_add_string(nvl, "quota", "none") != 0) + nomem(); + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + char *pname = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + uint32_t val32 = 0; + uint64_t val64 = 0; + char *strval = NULL; + unsigned field = 0; + unsigned width = 0; + int i; + for (i = 0; i < 4; i++) { + if (0 == strcmp(pname, us_field_names[i])) { + field = us_field_bits[i]; + break; + } + } + + if (!(field & fields)) + continue; + + switch (type) { + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &val32); + break; + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &val64); + break; + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &strval); + break; + default: + (void) fprintf(stderr, "Invalid data type\n"); + } + + if (!first) { + if (scripted) + (void) printf("\t"); + else + (void) printf(" "); + } + + switch (field) { + case USFIELD_TYPE: + strval = (char *)us_type2str(val32); + width = type_width; + break; + case USFIELD_NAME: + if (type == DATA_TYPE_UINT64) { + (void) sprintf(valstr, "%llu", + (u_longlong_t) val64); + strval = valstr; + } + width = name_width; + break; + case USFIELD_USED: + case USFIELD_QUOTA: + if (type == DATA_TYPE_UINT64) { + (void) nvpair_value_uint64(nvp, &val64); + if (parseable) + (void) sprintf(valstr, "%llu", + (u_longlong_t) val64); + else + zfs_nicenum(val64, valstr, + sizeof (valstr)); + strval = valstr; + } + + if (field == USFIELD_USED) + width = used_width; + else { + quota_found = B_FALSE; + width = quota_width; + } + + break; + } + + if (field == USFIELD_QUOTA && !quota_found) + (void) printf("%*s", width, strval); + else { + if (type == DATA_TYPE_STRING) + (void) printf("%-*s", width, strval); + else + (void) printf("%*s", width, strval); + } + + first = B_FALSE; + + } + + (void) printf("\n"); +} + +static void +print_us(boolean_t scripted, boolean_t parsable, unsigned fields, + unsigned type_width, unsigned name_width, unsigned used_width, + unsigned quota_width, boolean_t rmnode, uu_avl_t *avl) +{ + static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" }; + us_node_t *node; + const char *col; + int i; + int width[4] = { type_width, name_width, used_width, quota_width }; + + if (!scripted) { + boolean_t first = B_TRUE; + for (i = 0; i < 4; i++) { + unsigned field = us_field_bits[i]; + if (!(field & fields)) + continue; + + col = gettext(us_field_hdr[i]); + if (field == USFIELD_TYPE || field == USFIELD_NAME) + (void) printf(first?"%-*s":" %-*s", width[i], + col); + else + (void) printf(first?"%*s":" %*s", width[i], + col); + first = B_FALSE; + } + (void) printf("\n"); + } + + for (node = uu_avl_first(avl); node != NULL; + node = uu_avl_next(avl, node)) { + print_us_node(scripted, parsable, fields, type_width, + name_width, used_width, used_width, node); + if (rmnode) + nvlist_free(node->usn_nvl); + } +} + static int zfs_do_userspace(int argc, char **argv) { zfs_handle_t *zhp; zfs_userquota_prop_t p; + uu_avl_pool_t *avl_pool; + uu_avl_t *avl_tree; + uu_avl_walk_t *walk; + + char *cmd; + boolean_t scripted = B_FALSE; + boolean_t prtnum = B_FALSE; + boolean_t parseable = B_FALSE; + boolean_t sid2posix = B_FALSE; int error; + int c; + zfs_sort_column_t *default_sortcol = NULL; + zfs_sort_column_t *sortcol = NULL; + unsigned types = USTYPE_PSX_USR | USTYPE_SMB_USR; + unsigned fields = 0; + unsigned props = USPROP_USED | USPROP_QUOTA; + us_cbdata_t cb; + us_node_t *node; + boolean_t resort_avl = B_FALSE; + + if (argc < 2) + usage(B_FALSE); + + cmd = argv[0]; + if (0 == strcmp(cmd, "groupspace")) + /* toggle default group types */ + types = USTYPE_PSX_GRP | USTYPE_SMB_GRP; + + /* check options */ + while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) { + switch (c) { + case 'n': + prtnum = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case 'p': + parseable = B_TRUE; + break; + case 'o': + if (parsefields(&fields, us_field_names, us_field_bits, + 4) != 0) + return (1); + break; + case 's': + if (zfs_add_sort_column(&sortcol, optarg, + B_FALSE) != 0) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case 'S': + if (zfs_add_sort_column(&sortcol, optarg, + B_TRUE) != 0) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case 't': + if (parsefields(&types, type_names, type_bits, 5)) + return (1); + break; + case 'i': + sid2posix = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* ok, now we have sorted by default colums (type,name) avl tree */ + if (sortcol) { + zfs_sort_column_t *sc; + for (sc = sortcol; sc; sc = sc->sc_next) { + if (sc->sc_prop == ZFS_PROP_QUOTA) { + resort_avl = B_TRUE; + break; + } + } + } + + if (!fields) + fields = USFIELD_ALL; if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL) return (1); - (void) printf("PROP TYPE NAME VALUE\n"); + if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t), + offsetof(us_node_t, usn_avlnode), + us_compare, UU_DEFAULT)) == NULL) + nomem(); + if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) + nomem(); + + if (sortcol && !resort_avl) + cb.cb_sortcol = sortcol; + else { + (void) zfs_add_sort_column(&default_sortcol, "type", B_FALSE); + (void) zfs_add_sort_column(&default_sortcol, "name", B_FALSE); + cb.cb_sortcol = default_sortcol; + } + cb.cb_numname = prtnum; + cb.cb_nicenum = !parseable; + cb.cb_avl_pool = avl_pool; + cb.cb_avl = avl_tree; + cb.cb_sid2posix = sid2posix; + cb.cb_max_typelen = strlen(gettext("TYPE")); + cb.cb_max_namelen = strlen(gettext("NAME")); + cb.cb_max_usedlen = strlen(gettext("USED")); + cb.cb_max_quotalen = strlen(gettext("QUOTA")); for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { - error = zfs_userspace(zhp, p, userspace_cb, &p); + if (!usprop_check(p, types, props)) + continue; + + cb.cb_prop = p; + error = zfs_userspace(zhp, p, userspace_cb, &cb); if (error) break; } + + + if (resort_avl) { + us_node_t *node; + us_node_t *rmnode; + uu_list_pool_t *listpool; + uu_list_t *list; + uu_avl_index_t idx = 0; + uu_list_index_t idx2 = 0; + listpool = uu_list_pool_create("tmplist", sizeof (us_node_t), + offsetof(us_node_t, usn_listnode), NULL, + UU_DEFAULT); + list = uu_list_create(listpool, NULL, UU_DEFAULT); + + node = uu_avl_first(avl_tree); + uu_list_node_init(node, &node->usn_listnode, listpool); + while (node != NULL) { + rmnode = node; + node = uu_avl_next(avl_tree, node); + uu_avl_remove(avl_tree, rmnode); + if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) { + uu_list_insert(list, rmnode, idx2); + } + } + + for (node = uu_list_first(list); node != NULL; + node = uu_list_next(list, node)) { + us_sort_info_t sortinfo = { sortcol, cb.cb_numname }; + if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == + NULL) + uu_avl_insert(avl_tree, node, idx); + } + + uu_list_destroy(list); + } + + /* print & free node`s nvlist memory */ + print_us(scripted, parseable, fields, cb.cb_max_typelen, + cb.cb_max_namelen, cb.cb_max_usedlen, + cb.cb_max_quotalen, B_TRUE, cb.cb_avl); + + if (sortcol) + zfs_free_sort_columns(sortcol); + zfs_free_sort_columns(default_sortcol); + + /* + * Finally, clean up the AVL tree. + */ + if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(cb.cb_avl, node); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(avl_tree); + uu_avl_pool_destroy(avl_pool); + return (error); } @@ -2821,6 +3489,1362 @@ zfs_do_receive(int argc, char **argv) return (err != 0); } +/* + * allow/unallow stuff + */ +/* copied from zfs/sys/dsl_deleg.h */ +#define ZFS_DELEG_PERM_CREATE "create" +#define ZFS_DELEG_PERM_DESTROY "destroy" +#define ZFS_DELEG_PERM_SNAPSHOT "snapshot" +#define ZFS_DELEG_PERM_ROLLBACK "rollback" +#define ZFS_DELEG_PERM_CLONE "clone" +#define ZFS_DELEG_PERM_PROMOTE "promote" +#define ZFS_DELEG_PERM_RENAME "rename" +#define ZFS_DELEG_PERM_MOUNT "mount" +#define ZFS_DELEG_PERM_SHARE "share" +#define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_RECEIVE "receive" +#define ZFS_DELEG_PERM_ALLOW "allow" +#define ZFS_DELEG_PERM_USERPROP "userprop" +#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */ +#define ZFS_DELEG_PERM_USERQUOTA "userquota" +#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" +#define ZFS_DELEG_PERM_USERUSED "userused" +#define ZFS_DELEG_PERM_GROUPUSED "groupused" +#define ZFS_DELEG_PERM_HOLD "hold" +#define ZFS_DELEG_PERM_RELEASE "release" +#define ZFS_DELEG_PERM_DIFF "diff" + +#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE + +static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { + { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW }, + { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, + { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, + { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, + { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, + { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, + { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, + { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, + { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, + { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, + { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, + { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, + { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, + { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, + + { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, + { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, + { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, + { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, + { NULL, ZFS_DELEG_NOTE_NONE } +}; + +/* permission structure */ +typedef struct deleg_perm { + zfs_deleg_who_type_t dp_who_type; + const char *dp_name; + boolean_t dp_local; + boolean_t dp_descend; +} deleg_perm_t; + +/* */ +typedef struct deleg_perm_node { + deleg_perm_t dpn_perm; + + uu_avl_node_t dpn_avl_node; +} deleg_perm_node_t; + +typedef struct fs_perm fs_perm_t; + +/* permissions set */ +typedef struct who_perm { + zfs_deleg_who_type_t who_type; + const char *who_name; /* id */ + char who_ug_name[256]; /* user/group name */ + fs_perm_t *who_fsperm; /* uplink */ + + uu_avl_t *who_deleg_perm_avl; /* permissions */ +} who_perm_t; + +/* */ +typedef struct who_perm_node { + who_perm_t who_perm; + uu_avl_node_t who_avl_node; +} who_perm_node_t; + +typedef struct fs_perm_set fs_perm_set_t; +/* fs permissions */ +struct fs_perm { + const char *fsp_name; + + uu_avl_t *fsp_sc_avl; /* sets,create */ + uu_avl_t *fsp_uge_avl; /* user,group,everyone */ + + fs_perm_set_t *fsp_set; /* uplink */ +}; + +/* */ +typedef struct fs_perm_node { + fs_perm_t fspn_fsperm; + uu_avl_t *fspn_avl; + + uu_list_node_t fspn_list_node; +} fs_perm_node_t; + +/* top level structure */ +struct fs_perm_set { + uu_list_pool_t *fsps_list_pool; + uu_list_t *fsps_list; /* list of fs_perms */ + + uu_avl_pool_t *fsps_named_set_avl_pool; + uu_avl_pool_t *fsps_who_perm_avl_pool; + uu_avl_pool_t *fsps_deleg_perm_avl_pool; +}; + +static inline const char * +deleg_perm_type(zfs_deleg_note_t note) +{ + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + case ZFS_DELEG_NOTE_GROUPUSED: + case ZFS_DELEG_NOTE_USERPROP: + case ZFS_DELEG_NOTE_USERQUOTA: + case ZFS_DELEG_NOTE_USERUSED: + /* other */ + return (gettext("other")); + default: + return (gettext("subcommand")); + } +} + +static int inline +who_type2weight(zfs_deleg_who_type_t who_type) +{ + int res; + switch (who_type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + res = 0; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + res = 1; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + res = 2; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + res = 3; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + res = 4; + break; + default: + res = -1; + } + + return (res); +} + +/* ARGSUSED */ +static int +who_perm_compare(const void *larg, const void *rarg, void *unused) +{ + const who_perm_node_t *l = larg; + const who_perm_node_t *r = rarg; + zfs_deleg_who_type_t ltype = l->who_perm.who_type; + zfs_deleg_who_type_t rtype = r->who_perm.who_type; + int lweight = who_type2weight(ltype); + int rweight = who_type2weight(rtype); + int res = lweight - rweight; + if (res == 0) + res = strncmp(l->who_perm.who_name, r->who_perm.who_name, + ZFS_MAX_DELEG_NAME-1); + + if (res == 0) + return (0); + if (res > 0) + return (1); + else + return (-1); +} + +/* ARGSUSED */ +static int +deleg_perm_compare(const void *larg, const void *rarg, void *unused) +{ + const deleg_perm_node_t *l = larg; + const deleg_perm_node_t *r = rarg; + int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name, + ZFS_MAX_DELEG_NAME-1); + + if (res == 0) + return (0); + + if (res > 0) + return (1); + else + return (-1); +} + +static inline void +fs_perm_set_init(fs_perm_set_t *fspset) +{ + bzero(fspset, sizeof (fs_perm_set_t)); + + if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool", + sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node), + NULL, UU_DEFAULT)) == NULL) + nomem(); + if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create( + "named_set_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create( + "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create( + "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof( + deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT)) + == NULL) + nomem(); +} + +static inline void fs_perm_fini(fs_perm_t *); +static inline void who_perm_fini(who_perm_t *); + +static inline void +fs_perm_set_fini(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = uu_list_first(fspset->fsps_list); + + while (node != NULL) { + fs_perm_node_t *next_node = + uu_list_next(fspset->fsps_list, node); + fs_perm_t *fsperm = &node->fspn_fsperm; + fs_perm_fini(fsperm); + uu_list_remove(fspset->fsps_list, node); + free(node); + node = next_node; + } + + uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool); + uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool); + uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool); +} + +static inline void +deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type, + const char *name) +{ + deleg_perm->dp_who_type = type; + deleg_perm->dp_name = name; +} + +static inline void +who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm, + zfs_deleg_who_type_t type, const char *name) +{ + uu_avl_pool_t *pool; + pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool; + + bzero(who_perm, sizeof (who_perm_t)); + + if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + who_perm->who_type = type; + who_perm->who_name = name; + who_perm->who_fsperm = fsperm; +} + +static inline void +who_perm_fini(who_perm_t *who_perm) +{ + deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl); + + while (node != NULL) { + deleg_perm_node_t *next_node = + uu_avl_next(who_perm->who_deleg_perm_avl, node); + + uu_avl_remove(who_perm->who_deleg_perm_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(who_perm->who_deleg_perm_avl); +} + +static inline void +fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname) +{ + uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool; + uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool; + + bzero(fsperm, sizeof (fs_perm_t)); + + if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + fsperm->fsp_set = fspset; + fsperm->fsp_name = fsname; +} + +static inline void +fs_perm_fini(fs_perm_t *fsperm) +{ + who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_sc_avl, node); + free(node); + node = next_node; + } + + node = uu_avl_first(fsperm->fsp_uge_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_uge_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(fsperm->fsp_sc_avl); + uu_avl_destroy(fsperm->fsp_uge_avl); +} + +static void inline +set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node, + zfs_deleg_who_type_t who_type, const char *name, char locality) +{ + uu_avl_index_t idx = 0; + + deleg_perm_node_t *found_node = NULL; + deleg_perm_t *deleg_perm = &node->dpn_perm; + + deleg_perm_init(deleg_perm, who_type, name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) + uu_avl_insert(avl, node, idx); + else { + node = found_node; + deleg_perm = &node->dpn_perm; + } + + + switch (locality) { + case ZFS_DELEG_LOCAL: + deleg_perm->dp_local = B_TRUE; + break; + case ZFS_DELEG_DESCENDENT: + deleg_perm->dp_descend = B_TRUE; + break; + case ZFS_DELEG_NA: + break; + default: + assert(B_FALSE); /* invalid locality */ + } +} + +static inline int +parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set; + uu_avl_t *avl = who_perm->who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_perm->who_type; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *name = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool; + deleg_perm_node_t *node = + safe_malloc(sizeof (deleg_perm_node_t)); + + VERIFY(type == DATA_TYPE_BOOLEAN); + + uu_avl_node_init(node, &node->dpn_avl_node, avl_pool); + set_deleg_perm_node(avl, node, who_type, name, locality); + } + + return (0); +} + +static inline int +parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = fsperm->fsp_set; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *name = nvpair_name(nvp); + uu_avl_t *avl = NULL; + uu_avl_pool_t *avl_pool = NULL; + zfs_deleg_who_type_t perm_type = name[0]; + char perm_locality = name[1]; + const char *perm_name = name + 3; + boolean_t is_set = B_TRUE; + who_perm_t *who_perm = NULL; + + assert('$' == name[2]); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + switch (perm_type) { + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_NAMED_SET: + case ZFS_DELEG_NAMED_SET_SETS: + avl_pool = fspset->fsps_named_set_avl_pool; + avl = fsperm->fsp_sc_avl; + break; + case ZFS_DELEG_USER: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + avl_pool = fspset->fsps_who_perm_avl_pool; + avl = fsperm->fsp_uge_avl; + break; + default: + break; + } + + if (is_set) { + who_perm_node_t *found_node = NULL; + who_perm_node_t *node = safe_malloc( + sizeof (who_perm_node_t)); + who_perm = &node->who_perm; + uu_avl_index_t idx = 0; + + uu_avl_node_init(node, &node->who_avl_node, avl_pool); + who_perm_init(who_perm, fsperm, perm_type, perm_name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) { + if (avl == fsperm->fsp_uge_avl) { + uid_t rid = 0; + struct passwd *p = NULL; + struct group *g = NULL; + const char *nice_name = NULL; + + switch (perm_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + rid = atoi(perm_name); + p = getpwuid(rid); + if (p) + nice_name = p->pw_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + rid = atoi(perm_name); + g = getgrgid(rid); + if (g) + nice_name = g->gr_name; + break; + default: + break; + } + + if (nice_name != NULL) + (void) strlcpy( + node->who_perm.who_ug_name, + nice_name, 256); + } + + uu_avl_insert(avl, node, idx); + } else { + node = found_node; + who_perm = &node->who_perm; + } + } + + (void) parse_who_perm(who_perm, nvl2, perm_locality); + } + + return (0); +} + +static inline int +parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + uu_avl_index_t idx = 0; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *fsname = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + fs_perm_t *fsperm = NULL; + fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t)); + if (node == NULL) + nomem(); + + fsperm = &node->fspn_fsperm; + + VERIFY(DATA_TYPE_NVLIST == type); + + uu_list_node_init(node, &node->fspn_list_node, + fspset->fsps_list_pool); + + idx = uu_list_numnodes(fspset->fsps_list); + fs_perm_init(fsperm, fspset, fsname); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + (void) parse_fs_perm(fsperm, nvl2); + + uu_list_insert(fspset->fsps_list, node, idx); + } + + return (0); +} + +static inline const char * +deleg_perm_comment(zfs_deleg_note_t note) +{ + const char *str = ""; + + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + case ZFS_DELEG_NOTE_ALLOW: + str = gettext("Must also have the permission that is being" + "\n\t\t\t\tallowed"); + break; + case ZFS_DELEG_NOTE_CLONE: + str = gettext("Must also have the 'create' ability and 'mount'" + "\n\t\t\t\tability in the origin file system"); + break; + case ZFS_DELEG_NOTE_CREATE: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DESTROY: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DIFF: + str = gettext("Allows lookup of paths within a dataset;" + "\n\t\t\t\tgiven an object number. Ordinary users need this" + "\n\t\t\t\tin order to use zfs diff"); + break; + case ZFS_DELEG_NOTE_HOLD: + str = gettext("Allows adding a user hold to a snapshot"); + break; + case ZFS_DELEG_NOTE_MOUNT: + str = gettext("Allows mount/umount of ZFS datasets"); + break; + case ZFS_DELEG_NOTE_PROMOTE: + str = gettext("Must also have the 'mount'\n\t\t\t\tand" + " 'promote' ability in the origin file system"); + break; + case ZFS_DELEG_NOTE_RECEIVE: + str = gettext("Must also have the 'mount' and 'create'" + " ability"); + break; + case ZFS_DELEG_NOTE_RELEASE: + str = gettext("Allows releasing a user hold which\n\t\t\t\t" + "might destroy the snapshot"); + break; + case ZFS_DELEG_NOTE_RENAME: + str = gettext("Must also have the 'mount' and 'create'" + "\n\t\t\t\tability in the new parent"); + break; + case ZFS_DELEG_NOTE_ROLLBACK: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SEND: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SHARE: + str = gettext("Allows sharing file systems over NFS or SMB" + "\n\t\t\t\tprotocols"); + break; + case ZFS_DELEG_NOTE_SNAPSHOT: + str = gettext(""); + break; +/* + * case ZFS_DELEG_NOTE_VSCAN: + * str = gettext(""); + * break; + */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + str = gettext("Allows accessing any groupquota@... property"); + break; + case ZFS_DELEG_NOTE_GROUPUSED: + str = gettext("Allows reading any groupused@... property"); + break; + case ZFS_DELEG_NOTE_USERPROP: + str = gettext("Allows changing any user property"); + break; + case ZFS_DELEG_NOTE_USERQUOTA: + str = gettext("Allows accessing any userquota@... property"); + break; + case ZFS_DELEG_NOTE_USERUSED: + str = gettext("Allows reading any userused@... property"); + break; + /* other */ + default: + str = ""; + } + + return (str); +} + +struct allow_opts { + boolean_t local; + boolean_t descend; + boolean_t user; + boolean_t group; + boolean_t everyone; + boolean_t create; + boolean_t set; + boolean_t recursive; /* unallow only */ + boolean_t prt_usage; + + boolean_t prt_perms; + char *who; + char *perms; + const char *dataset; +}; + +static inline int +prop_cmp(const void *a, const void *b) +{ + const char *str1 = *(const char **)a; + const char *str2 = *(const char **)b; + return (strcmp(str1, str2)); +} + +static void +allow_usage(boolean_t un, boolean_t requested, const char *msg) +{ + const char *opt_desc[] = { + "-h", gettext("show this help message and exit"), + "-l", gettext("set permission locally"), + "-d", gettext("set permission for descents"), + "-u", gettext("set permission for user"), + "-g", gettext("set permission for group"), + "-e", gettext("set permission for everyone"), + "-c", gettext("set create time permission"), + "-s", gettext("define permission set"), + /* unallow only */ + "-r", gettext("remove permissions recursively"), + }; + size_t unallow_size = sizeof (opt_desc) / sizeof (char *); + size_t allow_size = unallow_size - 2; + const char *props[ZFS_NUM_PROPS]; + int i; + size_t count = 0; + FILE *fp = requested ? stdout : stderr; + zprop_desc_t *pdtbl = zfs_prop_get_table(); + const char *fmt = gettext("%-16s %-14s\t%s\n"); + + (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW : + HELP_ALLOW)); + (void) fprintf(fp, gettext("Options:\n")); + for (i = 0; i < (un ? unallow_size : allow_size); i++) { + const char *opt = opt_desc[i++]; + const char *optdsc = opt_desc[i]; + (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc); + } + + (void) fprintf(fp, gettext("\nThe following permissions are " + "supported:\n\n")); + (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"), + gettext("NOTES")); + for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) { + const char *perm_name = zfs_deleg_perm_tbl[i].z_perm; + zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note; + const char *perm_type = deleg_perm_type(perm_note); + const char *perm_comment = deleg_perm_comment(perm_note); + (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment); + } + + for (i = 0; i < ZFS_NUM_PROPS; i++) { + zprop_desc_t *pd = &pdtbl[i]; + if (pd->pd_visible != B_TRUE) + continue; + + if (pd->pd_attr == PROP_READONLY) + continue; + + props[count++] = pd->pd_name; + } + props[count] = NULL; + + qsort(props, count, sizeof (char *), prop_cmp); + + for (i = 0; i < count; i++) + (void) fprintf(fp, fmt, props[i], gettext("property"), ""); + + if (msg != NULL) + (void) fprintf(fp, gettext("\nzfs: error: %s"), msg); + + exit(requested ? 0 : 2); +} + +static inline const char * +munge_args(int argc, char **argv, boolean_t un, size_t expected_argc, + char **permsp) +{ + if (un && argc == expected_argc - 1) + *permsp = NULL; + else if (argc == expected_argc) + *permsp = argv[argc - 2]; + else + allow_usage(un, B_FALSE, + gettext("wrong number of parameters\n")); + + return (argv[argc - 1]); +} + +static void +parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts) +{ + int uge_sum = opts->user + opts->group + opts->everyone; + int csuge_sum = opts->create + opts->set + uge_sum; + int ldcsuge_sum = csuge_sum + opts->local + opts->descend; + int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum; + + if (uge_sum > 1) + allow_usage(un, B_FALSE, + gettext("-u, -g, and -e are mutually exclusive\n")); + + if (opts->prt_usage) { + if (argc == 0 && all_sum == 0) + allow_usage(un, B_TRUE, NULL); + else + usage(B_FALSE); + } + + if (opts->set) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -s\n")); + + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + if (argv[0][0] != '@') + allow_usage(un, B_FALSE, + gettext("invalid set name: missing '@' prefix\n")); + opts->who = argv[0]; + } else if (opts->create) { + if (ldcsuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -c\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (opts->everyone) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -e\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone") + == 0) { + opts->everyone = B_TRUE; + argc--; + argv++; + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (argc == 1) { + opts->prt_perms = B_TRUE; + opts->dataset = argv[argc-1]; + } else { + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + opts->who = argv[0]; + } + + if (!opts->local && !opts->descend) { + opts->local = B_TRUE; + opts->descend = B_TRUE; + } +} + +static void +store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, + const char *who, char *perms, nvlist_t *top_nvl) +{ + int i; + char ld[2] = { '\0', '\0' }; + char who_buf[ZFS_MAXNAMELEN+32]; + char base_type = ZFS_DELEG_WHO_UNKNOWN; + char set_type = ZFS_DELEG_WHO_UNKNOWN; + nvlist_t *base_nvl = NULL; + nvlist_t *set_nvl = NULL; + nvlist_t *nvl; + + if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + switch (type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + set_type = ZFS_DELEG_NAMED_SET_SETS; + base_type = ZFS_DELEG_NAMED_SET; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + set_type = ZFS_DELEG_CREATE_SETS; + base_type = ZFS_DELEG_CREATE; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + set_type = ZFS_DELEG_USER_SETS; + base_type = ZFS_DELEG_USER; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + set_type = ZFS_DELEG_GROUP_SETS; + base_type = ZFS_DELEG_GROUP; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + set_type = ZFS_DELEG_EVERYONE_SETS; + base_type = ZFS_DELEG_EVERYONE; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + default: + break; + } + + if (perms != NULL) { + char *curr = perms; + char *end = curr + strlen(perms); + + while (curr < end) { + char *delim = strchr(curr, ','); + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + if (curr[0] == '@') + nvl = set_nvl; + else + nvl = base_nvl; + + (void) nvlist_add_boolean(nvl, curr); + if (delim != end) + *delim = ','; + curr = delim + 1; + } + + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (!nvlist_empty(base_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + base_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + base_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + base_nvl); + } + + + if (!nvlist_empty(set_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + set_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + set_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + set_nvl); + } + } + } else { + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", base_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", base_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", set_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", set_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + } + } +} + +static int +construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp) +{ + if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + if (opts->set) { + store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local, + opts->descend, opts->who, opts->perms, *nvlp); + } else if (opts->create) { + store_allow_perm(ZFS_DELEG_CREATE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else if (opts->everyone) { + store_allow_perm(ZFS_DELEG_EVERYONE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else { + char *curr = opts->who; + char *end = curr + strlen(curr); + + while (curr < end) { + const char *who; + zfs_deleg_who_type_t who_type; + char *endch; + char *delim = strchr(curr, ','); + char errbuf[256]; + char id[64]; + struct passwd *p = NULL; + struct group *g = NULL; + + uid_t rid; + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + rid = (uid_t)strtol(curr, &endch, 0); + if (opts->user) { + who_type = ZFS_DELEG_USER; + if (*endch != '\0') + p = getpwnam(curr); + else + p = getpwuid(rid); + + if (p != NULL) + rid = p->pw_uid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid user %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else if (opts->group) { + who_type = ZFS_DELEG_GROUP; + if (*endch != '\0') + g = getgrnam(curr); + else + g = getgrgid(rid); + + if (g != NULL) + rid = g->gr_gid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else { + if (*endch != '\0') { + p = getpwnam(curr); + } else { + p = getpwuid(rid); + } + + if (p == NULL) { + if (*endch != '\0') { + g = getgrnam(curr); + } else { + g = getgrgid(rid); + } + } + + if (p != NULL) { + who_type = ZFS_DELEG_USER; + rid = p->pw_uid; + } else if (g != NULL) { + who_type = ZFS_DELEG_GROUP; + rid = g->gr_gid; + } else { + (void) snprintf(errbuf, 256, gettext( + "invalid user/group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } + + (void) sprintf(id, "%u", rid); + who = id; + + store_allow_perm(who_type, opts->local, + opts->descend, who, opts->perms, *nvlp); + curr = delim + 1; + } + } + + return (0); +} + +static void +print_set_creat_perms(uu_avl_t *who_avl) +{ + const char *sc_title[] = { + gettext("Permission sets:\n"), + gettext("Create time permissions:\n"), + NULL + }; + const char **title_ptr = sc_title; + who_perm_node_t *who_node = NULL; + int prev_weight = -1; + + for (who_node = uu_avl_first(who_avl); who_node != NULL; + who_node = uu_avl_next(who_avl, who_node)) { + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + const char *who_name = who_node->who_perm.who_name; + int weight = who_type2weight(who_type); + boolean_t first = B_TRUE; + deleg_perm_node_t *deleg_node; + + if (prev_weight != weight) { + (void) printf(*title_ptr++); + prev_weight = weight; + } + + if (who_name == NULL || strnlen(who_name, 1) == 0) + (void) printf("\t"); + else + (void) printf("\t%s ", who_name); + + for (deleg_node = uu_avl_first(avl); deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (first) { + (void) printf("%s", + deleg_node->dpn_perm.dp_name); + first = B_FALSE; + } else + (void) printf(",%s", + deleg_node->dpn_perm.dp_name); + } + + (void) printf("\n"); + } +} + +static void inline +print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend, + const char *title) +{ + who_perm_node_t *who_node = NULL; + boolean_t prt_title = B_TRUE; + uu_avl_walk_t *walk; + + if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((who_node = uu_avl_walk_next(walk)) != NULL) { + const char *who_name = who_node->who_perm.who_name; + const char *nice_who_name = who_node->who_perm.who_ug_name; + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + char delim = ' '; + deleg_perm_node_t *deleg_node; + boolean_t prt_who = B_TRUE; + + for (deleg_node = uu_avl_first(avl); + deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (local != deleg_node->dpn_perm.dp_local || + descend != deleg_node->dpn_perm.dp_descend) + continue; + + if (prt_who) { + const char *who = NULL; + if (prt_title) { + prt_title = B_FALSE; + (void) printf(title); + } + + switch (who_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + who = gettext("user"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + who = gettext("group"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + who = gettext("everyone"); + who_name = NULL; + default: + break; + } + + prt_who = B_FALSE; + if (who_name == NULL) + (void) printf("\t%s", who); + else + (void) printf("\t%s %s", who, who_name); + } + + (void) printf("%c%s", delim, + deleg_node->dpn_perm.dp_name); + delim = ','; + } + + if (!prt_who) + (void) printf("\n"); + } + + uu_avl_walk_end(walk); +} + +static void +print_fs_perms(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = NULL; + char buf[ZFS_MAXNAMELEN+32]; + const char *dsname = buf; + + for (node = uu_list_first(fspset->fsps_list); node != NULL; + node = uu_list_next(fspset->fsps_list, node)) { + uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl; + uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; + int left = 0; + + (void) snprintf(buf, ZFS_MAXNAMELEN+32, + gettext("---- Permissions on %s "), + node->fspn_fsperm.fsp_name); + (void) printf(dsname); + left = 70 - strlen(buf); + while (left-- > 0) + (void) printf("-"); + (void) printf("\n"); + + print_set_creat_perms(sc_avl); + print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE, + gettext("Local permissions:\n")); + print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE, + gettext("Descendent permissions:\n")); + print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE, + gettext("Local+Descendent permissions:\n")); + } +} + +static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL }; + +struct deleg_perms { + boolean_t un; + nvlist_t *nvl; +}; + +static int +set_deleg_perms(zfs_handle_t *zhp, void *data) +{ + struct deleg_perms *perms = (struct deleg_perms *)data; + zfs_type_t zfs_type = zfs_get_type(zhp); + + if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME) + return (0); + + return (zfs_set_fsacl(zhp, perms->un, perms->nvl)); +} + +static int +zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) +{ + zfs_handle_t *zhp; + nvlist_t *perm_nvl = NULL; + nvlist_t *update_perm_nvl = NULL; + int error = 1; + int c; + struct allow_opts opts = { 0 }; + + const char *optstr = un ? "ldugecsrh" : "ldugecsh"; + + /* check opts */ + while ((c = getopt(argc, argv, optstr)) != -1) { + switch (c) { + case 'l': + opts.local = B_TRUE; + break; + case 'd': + opts.descend = B_TRUE; + break; + case 'u': + opts.user = B_TRUE; + break; + case 'g': + opts.group = B_TRUE; + break; + case 'e': + opts.everyone = B_TRUE; + break; + case 's': + opts.set = B_TRUE; + break; + case 'c': + opts.create = B_TRUE; + break; + case 'r': + opts.recursive = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case 'h': + opts.prt_usage = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + parse_allow_args(argc, argv, un, &opts); + + /* try to open the dataset */ + if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM)) + == NULL) { + (void) fprintf(stderr, "Failed to open Dataset *%s*\n", + opts.dataset); + return (-1); + } + + if (zfs_get_fsacl(zhp, &perm_nvl) != 0) + goto cleanup2; + + fs_perm_set_init(&fs_perm_set); + if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) { + (void) fprintf(stderr, "Failed to parse fsacl permissionsn"); + goto cleanup1; + } + + if (opts.prt_perms) + print_fs_perms(&fs_perm_set); + else { + (void) construct_fsacl_list(un, &opts, &update_perm_nvl); + if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0) + goto cleanup0; + + if (un && opts.recursive) { + struct deleg_perms data = { un, update_perm_nvl }; + if (zfs_iter_filesystems(zhp, set_deleg_perms, + &data) != 0) + goto cleanup0; + } + } + + error = 0; + +cleanup0: + nvlist_free(perm_nvl); + if (update_perm_nvl != NULL) + nvlist_free(update_perm_nvl); +cleanup1: + fs_perm_set_fini(&fs_perm_set); +cleanup2: + zfs_close(zhp); + + return (error); +} + +/* + * zfs allow [-r] [-t] ... + * + * -r Recursively hold + * -t Temporary hold (hidden option) + * + * Apply a user-hold with the given tag to the list of snapshots. + */ +static int +zfs_do_allow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); +} + +/* + * zfs unallow [-r] [-t] ... + * + * -r Recursively hold + * -t Temporary hold (hidden option) + * + * Apply a user-hold with the given tag to the list of snapshots. + */ +static int +zfs_do_unallow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE)); +} + static int zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) { @@ -2928,6 +4952,200 @@ zfs_do_release(int argc, char **argv) return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); } +typedef struct holds_cbdata { + boolean_t cb_recursive; + const char *cb_snapname; + nvlist_t **cb_nvlp; + size_t cb_max_namelen; + size_t cb_max_taglen; +} holds_cbdata_t; + +#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" +#define DATETIME_BUF_LEN (32) +/* + * + */ +static void +print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl) +{ + int i; + nvpair_t *nvp = NULL; + char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" }; + const char *col; + + if (!scripted) { + for (i = 0; i < 3; i++) { + col = gettext(hdr_cols[i]); + if (i < 2) + (void) printf("%-*s ", i ? tagwidth : nwidth, + col); + else + (void) printf("%s\n", col); + } + } + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + char *zname = nvpair_name(nvp); + nvlist_t *nvl2; + nvpair_t *nvp2 = NULL; + (void) nvpair_value_nvlist(nvp, &nvl2); + while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) { + char tsbuf[DATETIME_BUF_LEN]; + char *tagname = nvpair_name(nvp2); + uint64_t val = 0; + time_t time; + struct tm t; + char sep = scripted ? '\t' : ' '; + int sepnum = scripted ? 1 : 2; + + (void) nvpair_value_uint64(nvp2, &val); + time = (time_t)val; + (void) localtime_r(&time, &t); + (void) strftime(tsbuf, DATETIME_BUF_LEN, + gettext(STRFTIME_FMT_STR), &t); + + (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname, + sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf); + } + } +} + +/* + * Generic callback function to list a dataset or snapshot. + */ +static int +holds_callback(zfs_handle_t *zhp, void *data) +{ + holds_cbdata_t *cbp = data; + nvlist_t *top_nvl = *cbp->cb_nvlp; + nvlist_t *nvl = NULL; + nvpair_t *nvp = NULL; + const char *zname = zfs_get_name(zhp); + size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN); + + if (cbp->cb_recursive) { + const char *snapname; + char *delim = strchr(zname, '@'); + if (delim == NULL) + return (0); + + snapname = delim + 1; + if (strcmp(cbp->cb_snapname, snapname)) + return (0); + } + + if (zfs_get_holds(zhp, &nvl) != 0) + return (-1); + + if (znamelen > cbp->cb_max_namelen) + cbp->cb_max_namelen = znamelen; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *tag = nvpair_name(nvp); + size_t taglen = strnlen(tag, MAXNAMELEN); + if (taglen > cbp->cb_max_taglen) + cbp->cb_max_taglen = taglen; + } + + return (nvlist_add_nvlist(top_nvl, zname, nvl)); +} + +/* + * zfs holds [-r] ... + * + * -r Recursively hold + */ +static int +zfs_do_holds(int argc, char **argv) +{ + int errors = 0; + int c; + int i; + boolean_t scripted = B_FALSE; + boolean_t recursive = B_FALSE; + const char *opts = "rH"; + nvlist_t *nvl; + + int types = ZFS_TYPE_SNAPSHOT; + holds_cbdata_t cb = { 0 }; + + int limit = 0; + int ret; + int flags = 0; + + /* check options */ + while ((c = getopt(argc, argv, opts)) != -1) { + switch (c) { + case 'r': + recursive = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + if (recursive) { + types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; + flags |= ZFS_ITER_RECURSE; + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) + usage(B_FALSE); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + for (i = 0; i < argc; ++i) { + char *snapshot = argv[i]; + const char *delim; + const char *snapname; + + delim = strchr(snapshot, '@'); + if (delim == NULL) { + (void) fprintf(stderr, + gettext("'%s' is not a snapshot\n"), snapshot); + ++errors; + continue; + } + snapname = delim + 1; + if (recursive) + snapshot[delim - snapshot] = '\0'; + + cb.cb_recursive = recursive; + cb.cb_snapname = snapname; + cb.cb_nvlp = &nvl; + + /* + * 1. collect holds data, set format options + */ + ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit, + holds_callback, &cb); + if (ret != 0) + ++errors; + } + + /* + * 2. print holds data + */ + print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl); + + if (nvlist_empty(nvl)) + (void) printf(gettext("no datasets available\n")); + + nvlist_free(nvl); + + return (0 != errors); +} + #define CHECK_SPINNER 30 #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ @@ -3809,15 +6027,6 @@ zfs_do_unshare(int argc, char **argv) return (unshare_unmount(OP_SHARE, argc, argv)); } -/* ARGSUSED */ -static int -zfs_do_python(int argc, char **argv) -{ - (void) execv(pypath, argv-1); - (void) printf("internal error: %s not found\n", pypath); - return (-1); -} - static int find_command_idx(char *command, int *idx) { diff --git a/include/libzfs.h b/include/libzfs.h index 23422b2c9..26b1ce302 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #ifndef _LIBZFS_H @@ -572,13 +573,17 @@ extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, boolean_t, boolean_t, int, uint64_t, uint64_t); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); +extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); -extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, - zfs_userspace_cb_t func, void *arg); +extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, + zfs_userspace_cb_t, void *); + +extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); +extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h index b4cb8e2b4..9997dffae 100644 --- a/include/zfs_deleg.h +++ b/include/zfs_deleg.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #ifndef _ZFS_DELEG_H @@ -51,6 +52,7 @@ typedef enum { ZFS_DELEG_NOTE_CLONE, ZFS_DELEG_NOTE_PROMOTE, ZFS_DELEG_NOTE_RENAME, + ZFS_DELEG_NOTE_SEND, ZFS_DELEG_NOTE_RECEIVE, ZFS_DELEG_NOTE_ALLOW, ZFS_DELEG_NOTE_USERPROP, diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 996bae2d7..74015139a 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ @@ -95,6 +96,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, namecheck_err_t why; char what; + (void) zfs_prop_get_table(); if (dataset_namecheck(path, &why, &what) != 0) { if (hdl != NULL) { switch (why) { @@ -4313,6 +4315,193 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, return (0); } +int +zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int nvsz = 2048; + void *nvbuf; + int err = 0; + char errbuf[ZFS_MAXNAMELEN+32]; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + +tryagain: + + nvbuf = malloc(nvsz); + if (nvbuf == NULL) { + err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); + goto out; + } + + zc.zc_nvlist_dst_size = nvsz; + zc.zc_nvlist_dst = (uintptr_t)nvbuf; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + + if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOMEM: + free(nvbuf); + nvsz = zc.zc_nvlist_dst_size; + goto tryagain; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } else { + /* success */ + int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); + if (rc) { + (void) snprintf(errbuf, sizeof (errbuf), dgettext( + TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + err = zfs_standard_error_fmt(hdl, rc, errbuf); + } + } + + free(nvbuf); +out: + return (err); +} + +int +zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char *nvbuf; + char errbuf[ZFS_MAXNAMELEN+32]; + size_t nvsz; + int err; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); + assert(err == 0); + + nvbuf = malloc(nvsz); + + err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); + assert(err == 0); + + zc.zc_nvlist_src_size = nvsz; + zc.zc_nvlist_src = (uintptr_t)nvbuf; + zc.zc_perm_action = un; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } + + free(nvbuf); + + return (err); +} + +int +zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int nvsz = 2048; + void *nvbuf; + int err = 0; + char errbuf[ZFS_MAXNAMELEN+32]; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + +tryagain: + + nvbuf = malloc(nvsz); + if (nvbuf == NULL) { + err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); + goto out; + } + + zc.zc_nvlist_dst_size = nvsz; + zc.zc_nvlist_dst = (uintptr_t)nvbuf; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + + if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), + zc.zc_name); + switch (errno) { + case ENOMEM: + free(nvbuf); + nvsz = zc.zc_nvlist_dst_size; + goto tryagain; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } else { + /* success */ + int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); + if (rc) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), + zc.zc_name); + err = zfs_standard_error_fmt(hdl, rc, errbuf); + } + } + + free(nvbuf); +out: + return (err); +} + uint64_t zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) { diff --git a/module/zcommon/zfs_deleg.c b/module/zcommon/zfs_deleg.c index 6754ab84b..9de61790c 100644 --- a/module/zcommon/zfs_deleg.c +++ b/module/zcommon/zfs_deleg.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #if defined(_KERNEL) @@ -60,7 +61,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, - {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, + {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, From cddafdcbc55a38cdbdd3dc8c58f447b22bd847ee Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Mon, 1 Aug 2011 10:34:06 -0700 Subject: [PATCH 10/10] Illumos #1313: Integer overflow in txg_delay() The function txg_delay() is used to delay txg (transaction group) threads in ZFS. The timeout value for this function is calculated using: int timeout = ddi_get_lbolt() + ticks; Later, the actual wait is performed: while (ddi_get_lbolt() < timeout && tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, timeout - ddi_get_lbolt()); The ddi_get_lbolt() function returns current uptime in clock ticks and is typed as clock_t. The clock_t type on 64-bit architectures is int64_t. The "timeout" variable will overflow depending on the tick frequency (e.g. for 1000 it will overflow in 28.855 days). This will make the expression "ddi_get_lbolt() < timeout" always false - txg threads will not be delayed anymore at all. This leads to a slowdown in ZFS writes. The attached patch initializes timeout as clock_t to match the return value of ddi_get_lbolt(). Signed-off-by: Brian Behlendorf Issue #352 --- module/zfs/txg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 340c42ae8..d0d2b1716 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -506,7 +506,7 @@ void txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) { tx_state_t *tx = &dp->dp_tx; - int timeout = ddi_get_lbolt() + ticks; + clock_t timeout = ddi_get_lbolt() + ticks; /* don't delay if this txg could transition to quiesing immediately */ if (tx->tx_open_txg > txg ||