Add support for user/group dnode accounting & quota

This patch tracks dnode usage for each user/group in the
DMU_USER/GROUPUSED_OBJECT ZAPs. ZAP entries dedicated to dnode
accounting have the key prefixed with "obj-" followed by the UID/GID
in string format (as done for the block accounting).
A new SPA feature has been added for dnode accounting as well as
a new ZPL version. The SPA feature must be enabled in the pool
before upgrading the zfs filesystem. During the zfs version upgrade,
a "quotacheck" will be executed by marking all dnode as dirty.

ZoL-bug-id: https://github.com/zfsonlinux/zfs/issues/3500

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: Johann Lombardi <johann.lombardi@intel.com>
This commit is contained in:
Jinshan Xiong
2016-10-04 11:46:10 -07:00
committed by Brian Behlendorf
parent af322debaa
commit 1de321e626
43 changed files with 1119 additions and 98 deletions
+171 -9
View File
@@ -31,6 +31,7 @@
/* Portions Copyright 2010 Robert Milkowski */
#include <sys/zfeature.h>
#include <sys/cred.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
@@ -53,6 +54,7 @@
#include <sys/dsl_destroy.h>
#include <sys/vdev.h>
#include <sys/policy.h>
#include <sys/spa_impl.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -77,6 +79,9 @@ int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT;
static void dmu_objset_find_dp_cb(void *arg);
static void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
static void dmu_objset_upgrade_stop(objset_t *os);
void
dmu_objset_init(void)
{
@@ -519,6 +524,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
}
mutex_init(&os->os_upgrade_lock, NULL, MUTEX_DEFAULT, NULL);
*osp = os;
return (0);
}
@@ -625,6 +632,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
dsl_pool_rele(dp, FTAG);
if (err == 0 && dmu_objset_userobjspace_upgradable(*osp))
dmu_objset_userobjspace_upgrade(*osp);
return (err);
}
@@ -685,6 +695,10 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
void
dmu_objset_disown(objset_t *os, void *tag)
{
/*
* Stop upgrading thread
*/
dmu_objset_upgrade_stop(os);
dsl_dataset_disown(os->os_dsl_dataset, tag);
}
@@ -859,6 +873,12 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_phys->os_type = type;
if (dmu_objset_userused_enabled(os)) {
os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
if (dmu_objset_userobjused_enabled(os)) {
ds->ds_feature_activation_needed[
SPA_FEATURE_USEROBJ_ACCOUNTING] = B_TRUE;
os->os_phys->os_flags |=
OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
}
os->os_flags = os->os_phys->os_flags;
}
@@ -1067,6 +1087,60 @@ dmu_objset_snapshot_one(const char *fsname, const char *snapname)
return (err);
}
static void
dmu_objset_upgrade_task_cb(void *data)
{
objset_t *os = data;
mutex_enter(&os->os_upgrade_lock);
os->os_upgrade_status = EINTR;
if (!os->os_upgrade_exit) {
mutex_exit(&os->os_upgrade_lock);
os->os_upgrade_status = os->os_upgrade_cb(os);
mutex_enter(&os->os_upgrade_lock);
}
os->os_upgrade_exit = B_TRUE;
os->os_upgrade_id = 0;
mutex_exit(&os->os_upgrade_lock);
}
static void
dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
{
if (os->os_upgrade_id != 0)
return;
mutex_enter(&os->os_upgrade_lock);
if (os->os_upgrade_id == 0 && os->os_upgrade_status == 0) {
os->os_upgrade_exit = B_FALSE;
os->os_upgrade_cb = cb;
os->os_upgrade_id = taskq_dispatch(
os->os_spa->spa_upgrade_taskq,
dmu_objset_upgrade_task_cb, os, TQ_SLEEP);
if (os->os_upgrade_id == 0)
os->os_upgrade_status = ENOMEM;
}
mutex_exit(&os->os_upgrade_lock);
}
static void
dmu_objset_upgrade_stop(objset_t *os)
{
mutex_enter(&os->os_upgrade_lock);
os->os_upgrade_exit = B_TRUE;
if (os->os_upgrade_id != 0) {
taskqid_t id = os->os_upgrade_id;
os->os_upgrade_id = 0;
mutex_exit(&os->os_upgrade_lock);
taskq_cancel_id(os->os_spa->spa_upgrade_taskq, id);
} else {
mutex_exit(&os->os_upgrade_lock);
}
}
static void
dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
{
@@ -1257,6 +1331,13 @@ dmu_objset_userused_enabled(objset_t *os)
DMU_USERUSED_DNODE(os) != NULL);
}
boolean_t
dmu_objset_userobjused_enabled(objset_t *os)
{
return (dmu_objset_userused_enabled(os) &&
spa_feature_is_enabled(os->os_spa, SPA_FEATURE_USEROBJ_ACCOUNTING));
}
static void
do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
@@ -1272,6 +1353,25 @@ do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
}
}
static void
do_userobjquota_update(objset_t *os, uint64_t flags, uint64_t user,
uint64_t group, boolean_t subtract, dmu_tx_t *tx)
{
if (flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) {
char name[20 + DMU_OBJACCT_PREFIX_LEN];
(void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
(longlong_t)user);
VERIFY0(zap_increment(os, DMU_USERUSED_OBJECT, name,
subtract ? -1 : 1, tx));
(void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
(longlong_t)group);
VERIFY0(zap_increment(os, DMU_GROUPUSED_OBJECT, name,
subtract ? -1 : 1, tx));
}
}
void
dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
{
@@ -1310,11 +1410,15 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
if (flags & DN_ID_OLD_EXIST) {
do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
do_userobjquota_update(os, dn->dn_oldflags,
dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
}
if (flags & DN_ID_NEW_EXIST) {
do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
dn->dn_phys->dn_flags, dn->dn_newuid,
dn->dn_newgid, B_FALSE, tx);
do_userobjquota_update(os, dn->dn_phys->dn_flags,
dn->dn_newuid, dn->dn_newgid, B_FALSE, tx);
}
mutex_enter(&dn->dn_mtx);
@@ -1486,19 +1590,19 @@ dmu_objset_userspace_present(objset_t *os)
OBJSET_FLAG_USERACCOUNTING_COMPLETE);
}
int
dmu_objset_userspace_upgrade(objset_t *os)
boolean_t
dmu_objset_userobjspace_present(objset_t *os)
{
return (os->os_phys->os_flags &
OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
}
static int
dmu_objset_space_upgrade(objset_t *os)
{
uint64_t obj;
int err = 0;
if (dmu_objset_userspace_present(os))
return (0);
if (!dmu_objset_userused_enabled(os))
return (SET_ERROR(ENOTSUP));
if (dmu_objset_is_snapshot(os))
return (SET_ERROR(EINVAL));
/*
* We simply need to mark every object dirty, so that it will be
* synced out and now accounted. If this is called
@@ -1512,6 +1616,13 @@ dmu_objset_userspace_upgrade(objset_t *os)
dmu_buf_t *db;
int objerr;
mutex_enter(&os->os_upgrade_lock);
if (os->os_upgrade_exit)
err = SET_ERROR(EINTR);
mutex_exit(&os->os_upgrade_lock);
if (err != 0)
return (err);
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
@@ -1529,12 +1640,60 @@ dmu_objset_userspace_upgrade(objset_t *os)
dmu_buf_rele(db, FTAG);
dmu_tx_commit(tx);
}
return (0);
}
int
dmu_objset_userspace_upgrade(objset_t *os)
{
int err = 0;
if (dmu_objset_userspace_present(os))
return (0);
if (dmu_objset_is_snapshot(os))
return (SET_ERROR(EINVAL));
if (!dmu_objset_userused_enabled(os))
return (SET_ERROR(ENOTSUP));
err = dmu_objset_space_upgrade(os);
if (err)
return (err);
os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
txg_wait_synced(dmu_objset_pool(os), 0);
return (0);
}
static int
dmu_objset_userobjspace_upgrade_cb(objset_t *os)
{
int err = 0;
if (dmu_objset_userobjspace_present(os))
return (0);
if (dmu_objset_is_snapshot(os))
return (SET_ERROR(EINVAL));
if (!dmu_objset_userobjused_enabled(os))
return (SET_ERROR(ENOTSUP));
dmu_objset_ds(os)->ds_feature_activation_needed[
SPA_FEATURE_USEROBJ_ACCOUNTING] = B_TRUE;
err = dmu_objset_space_upgrade(os);
if (err)
return (err);
os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
txg_wait_synced(dmu_objset_pool(os), 0);
return (0);
}
void
dmu_objset_userobjspace_upgrade(objset_t *os)
{
dmu_objset_upgrade(os, dmu_objset_userobjspace_upgrade_cb);
}
void
dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -2096,4 +2255,7 @@ EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
EXPORT_SYMBOL(dmu_objset_userused_enabled);
EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
EXPORT_SYMBOL(dmu_objset_userspace_present);
EXPORT_SYMBOL(dmu_objset_userobjused_enabled);
EXPORT_SYMBOL(dmu_objset_userobjspace_upgrade);
EXPORT_SYMBOL(dmu_objset_userobjspace_present);
#endif