mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Illumos #3740
3740 Poor ZFS send / receive performance due to snapshot
hold / release processing
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Christopher Siden <christopher.siden@delphix.com>
References:
https://www.illumos.org/issues/3740
illumos/illumos-gate@a7a845e4bf
Ported-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1775
Porting notes:
1. 13fe019870 introduced a merge conflict
in dsl_dataset_user_release_tmp where some variables were moved
outside of the preprocessor directive.
2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge
conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable
because this commit refactors the code, adding a new KM_SLEEP
allocation. It is not clear to me whether this should be converted
to KM_PUSHPAGE.
3. We had a merge conflict in libzfs_sendrecv.c because of copyright
notices.
4. Several small C99 compatibility fixed were made.
This commit is contained in:
committed by
Brian Behlendorf
parent
7bc7f25040
commit
95fd54a1c5
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -127,6 +128,10 @@ dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
|
||||
pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
|
||||
if (pair != NULL)
|
||||
return (fnvpair_value_int32(pair));
|
||||
|
||||
if (nvlist_empty(dsda->dsda_successful_snaps))
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
+15
-3
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_pool.h>
|
||||
@@ -840,23 +841,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
|
||||
zap_cursor_t zc;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
|
||||
nvlist_t *holds;
|
||||
|
||||
if (zapobj == 0)
|
||||
return;
|
||||
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
||||
|
||||
holds = fnvlist_alloc();
|
||||
|
||||
for (zap_cursor_init(&zc, mos, zapobj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
char *htag;
|
||||
uint64_t dsobj;
|
||||
nvlist_t *tags;
|
||||
|
||||
htag = strchr(za.za_name, '-');
|
||||
*htag = '\0';
|
||||
++htag;
|
||||
dsobj = strtonum(za.za_name, NULL);
|
||||
dsl_dataset_user_release_tmp(dp, dsobj, htag);
|
||||
if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
|
||||
tags = fnvlist_alloc();
|
||||
fnvlist_add_boolean(tags, htag);
|
||||
fnvlist_add_nvlist(holds, za.za_name, tags);
|
||||
fnvlist_free(tags);
|
||||
} else {
|
||||
fnvlist_add_boolean(tags, htag);
|
||||
}
|
||||
}
|
||||
dsl_dataset_user_release_tmp(dp, holds);
|
||||
fnvlist_free(holds);
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
|
||||
+485
-345
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -37,6 +38,7 @@
|
||||
|
||||
typedef struct dsl_dataset_user_hold_arg {
|
||||
nvlist_t *dduha_holds;
|
||||
nvlist_t *dduha_chkholds;
|
||||
nvlist_t *dduha_errlist;
|
||||
minor_t dduha_minor;
|
||||
} dsl_dataset_user_hold_arg_t;
|
||||
@@ -53,25 +55,24 @@ dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(dsl_pool_config_held(dp));
|
||||
|
||||
if (strlen(htag) > MAXNAMELEN)
|
||||
return (E2BIG);
|
||||
return (SET_ERROR(E2BIG));
|
||||
/* Tempholds have a more restricted length */
|
||||
if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
|
||||
return (E2BIG);
|
||||
return (SET_ERROR(E2BIG));
|
||||
|
||||
/* tags must be unique (if ds already exists) */
|
||||
if (ds != NULL) {
|
||||
mutex_enter(&ds->ds_lock);
|
||||
if (ds->ds_phys->ds_userrefs_obj != 0) {
|
||||
uint64_t value;
|
||||
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
|
||||
htag, 8, 1, &value);
|
||||
if (error == 0)
|
||||
error = SET_ERROR(EEXIST);
|
||||
else if (error == ENOENT)
|
||||
error = 0;
|
||||
}
|
||||
mutex_exit(&ds->ds_lock);
|
||||
if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
|
||||
uint64_t value;
|
||||
|
||||
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
|
||||
htag, 8, 1, &value);
|
||||
if (error == 0)
|
||||
error = SET_ERROR(EEXIST);
|
||||
else if (error == ENOENT)
|
||||
error = 0;
|
||||
}
|
||||
|
||||
return (error);
|
||||
@@ -83,51 +84,67 @@ dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
|
||||
dsl_dataset_user_hold_arg_t *dduha = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvpair_t *pair;
|
||||
int rv = 0;
|
||||
|
||||
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
|
||||
int error = 0;
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
|
||||
pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
char *htag;
|
||||
int error = 0;
|
||||
char *htag, *name;
|
||||
|
||||
/* must be a snapshot */
|
||||
if (strchr(nvpair_name(pair), '@') == NULL)
|
||||
name = nvpair_name(pair);
|
||||
if (strchr(name, '@') == NULL)
|
||||
error = SET_ERROR(EINVAL);
|
||||
|
||||
if (error == 0)
|
||||
error = nvpair_value_string(pair, &htag);
|
||||
if (error == 0) {
|
||||
error = dsl_dataset_hold(dp,
|
||||
nvpair_name(pair), FTAG, &ds);
|
||||
}
|
||||
|
||||
if (error == 0)
|
||||
error = dsl_dataset_hold(dp, name, FTAG, &ds);
|
||||
|
||||
if (error == 0) {
|
||||
error = dsl_dataset_user_hold_check_one(ds, htag,
|
||||
dduha->dduha_minor != 0, tx);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
|
||||
if (error != 0) {
|
||||
rv = error;
|
||||
fnvlist_add_int32(dduha->dduha_errlist,
|
||||
nvpair_name(pair), error);
|
||||
if (error == 0) {
|
||||
fnvlist_add_string(dduha->dduha_chkholds, name, htag);
|
||||
} else {
|
||||
/*
|
||||
* We register ENOENT errors so they can be correctly
|
||||
* reported if needed, such as when all holds fail.
|
||||
*/
|
||||
fnvlist_add_int32(dduha->dduha_errlist, name, error);
|
||||
if (error != ENOENT)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
return (rv);
|
||||
|
||||
/* Return ENOENT if no holds would be created. */
|
||||
if (nvlist_empty(dduha->dduha_chkholds))
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
|
||||
minor_t minor, uint64_t now, dmu_tx_t *tx)
|
||||
|
||||
static void
|
||||
dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
|
||||
const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t zapobj;
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
|
||||
|
||||
if (ds->ds_phys->ds_userrefs_obj == 0) {
|
||||
/*
|
||||
* This is the first user hold for this dataset. Create
|
||||
@@ -140,14 +157,26 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
}
|
||||
ds->ds_userrefs++;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
|
||||
VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
|
||||
|
||||
if (minor != 0) {
|
||||
char name[MAXNAMELEN];
|
||||
nvlist_t *tags;
|
||||
|
||||
VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
|
||||
htag, now, tx));
|
||||
dsl_register_onexit_hold_cleanup(ds, htag, minor);
|
||||
(void) snprintf(name, sizeof (name), "%llx",
|
||||
(u_longlong_t)ds->ds_object);
|
||||
|
||||
if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
|
||||
tags = fnvlist_alloc();
|
||||
fnvlist_add_boolean(tags, htag);
|
||||
fnvlist_add_nvlist(tmpholds, name, tags);
|
||||
fnvlist_free(tags);
|
||||
} else {
|
||||
fnvlist_add_boolean(tags, htag);
|
||||
}
|
||||
}
|
||||
|
||||
spa_history_log_internal_ds(ds, "hold", tx,
|
||||
@@ -155,306 +184,10 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
|
||||
htag, minor != 0, ds->ds_userrefs);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_hold_arg_t *dduha = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvpair_t *pair;
|
||||
uint64_t now = gethrestime_sec();
|
||||
|
||||
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
|
||||
dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
|
||||
dduha->dduha_minor, now, tx);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* holds is nvl of snapname -> holdname
|
||||
* errlist will be filled in with snapname -> error
|
||||
* if cleanup_minor is not 0, the holds will be temporary, cleaned up
|
||||
* when the process exits.
|
||||
*
|
||||
* if any fails, all will fail.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
|
||||
{
|
||||
dsl_dataset_user_hold_arg_t dduha;
|
||||
nvpair_t *pair;
|
||||
|
||||
pair = nvlist_next_nvpair(holds, NULL);
|
||||
if (pair == NULL)
|
||||
return (0);
|
||||
|
||||
dduha.dduha_holds = holds;
|
||||
dduha.dduha_errlist = errlist;
|
||||
dduha.dduha_minor = cleanup_minor;
|
||||
|
||||
return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
|
||||
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
|
||||
}
|
||||
|
||||
typedef struct dsl_dataset_user_release_arg {
|
||||
nvlist_t *ddura_holds;
|
||||
nvlist_t *ddura_todelete;
|
||||
nvlist_t *ddura_errlist;
|
||||
} dsl_dataset_user_release_arg_t;
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
|
||||
nvlist_t *holds, boolean_t *todelete)
|
||||
{
|
||||
uint64_t zapobj;
|
||||
nvpair_t *pair;
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
int error;
|
||||
int numholds = 0;
|
||||
|
||||
*todelete = B_FALSE;
|
||||
|
||||
if (!dsl_dataset_is_snapshot(ds))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
if (zapobj == 0)
|
||||
return (SET_ERROR(ESRCH));
|
||||
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
/* Make sure the hold exists */
|
||||
uint64_t tmp;
|
||||
error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
|
||||
if (error == ENOENT)
|
||||
error = SET_ERROR(ESRCH);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
numholds++;
|
||||
}
|
||||
|
||||
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
|
||||
ds->ds_userrefs == numholds) {
|
||||
/* we need to destroy the snapshot as well */
|
||||
|
||||
if (dsl_dataset_long_held(ds))
|
||||
return (SET_ERROR(EBUSY));
|
||||
*todelete = B_TRUE;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t *ddura = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvpair_t *pair;
|
||||
int rv = 0;
|
||||
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
|
||||
const char *name = nvpair_name(pair);
|
||||
int error;
|
||||
dsl_dataset_t *ds;
|
||||
nvlist_t *holds;
|
||||
|
||||
error = nvpair_value_nvlist(pair, &holds);
|
||||
if (error != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = dsl_dataset_hold(dp, name, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
boolean_t deleteme;
|
||||
error = dsl_dataset_user_release_check_one(ds,
|
||||
holds, &deleteme);
|
||||
if (error == 0 && deleteme) {
|
||||
fnvlist_add_boolean(ddura->ddura_todelete,
|
||||
name);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
if (error != 0) {
|
||||
if (ddura->ddura_errlist != NULL) {
|
||||
fnvlist_add_int32(ddura->ddura_errlist,
|
||||
name, error);
|
||||
}
|
||||
rv = error;
|
||||
}
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t zapobj;
|
||||
int error;
|
||||
nvpair_t *pair;
|
||||
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
ds->ds_userrefs--;
|
||||
error = dsl_pool_user_release(dp, ds->ds_object,
|
||||
nvpair_name(pair), tx);
|
||||
VERIFY(error == 0 || error == ENOENT);
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
|
||||
|
||||
spa_history_log_internal_ds(ds, "release", tx,
|
||||
"tag=%s refs=%lld", nvpair_name(pair),
|
||||
(longlong_t)ds->ds_userrefs);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t *ddura = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvpair_t *pair;
|
||||
|
||||
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
|
||||
dsl_dataset_user_release_sync_one(ds,
|
||||
fnvpair_value_nvlist(pair), tx);
|
||||
if (nvlist_exists(ddura->ddura_todelete,
|
||||
nvpair_name(pair))) {
|
||||
ASSERT(ds->ds_userrefs == 0 &&
|
||||
ds->ds_phys->ds_num_children == 1 &&
|
||||
DS_IS_DEFER_DESTROY(ds));
|
||||
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* holds is nvl of snapname -> { holdname, ... }
|
||||
* errlist will be filled in with snapname -> error
|
||||
*
|
||||
* if any fails, all will fail.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t ddura;
|
||||
nvpair_t *pair;
|
||||
int error;
|
||||
|
||||
pair = nvlist_next_nvpair(holds, NULL);
|
||||
if (pair == NULL)
|
||||
return (0);
|
||||
|
||||
ddura.ddura_holds = holds;
|
||||
ddura.ddura_errlist = errlist;
|
||||
ddura.ddura_todelete = fnvlist_alloc();
|
||||
|
||||
error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
|
||||
dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
|
||||
fnvlist_free(ddura.ddura_todelete);
|
||||
return (error);
|
||||
}
|
||||
|
||||
typedef struct dsl_dataset_user_release_tmp_arg {
|
||||
uint64_t ddurta_dsobj;
|
||||
nvlist_t *ddurta_holds;
|
||||
boolean_t ddurta_deleteme;
|
||||
} dsl_dataset_user_release_tmp_arg_t;
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dataset_t *ds;
|
||||
int error;
|
||||
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
error = dsl_dataset_user_release_check_one(ds,
|
||||
ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
|
||||
dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
|
||||
if (ddurta->ddurta_deleteme) {
|
||||
ASSERT(ds->ds_userrefs == 0 &&
|
||||
ds->ds_phys->ds_num_children == 1 &&
|
||||
DS_IS_DEFER_DESTROY(ds));
|
||||
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called at spa_load time to release a stale temporary user hold.
|
||||
* Also called by the onexit code.
|
||||
*/
|
||||
void
|
||||
dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
|
||||
{
|
||||
dsl_dataset_user_release_tmp_arg_t ddurta;
|
||||
|
||||
#ifdef _KERNEL
|
||||
dsl_dataset_t *ds;
|
||||
int error;
|
||||
|
||||
/* Make sure it is not mounted. */
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
char name[MAXNAMELEN];
|
||||
dsl_dataset_name(ds, name);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
(void) zfs_unmount_snap(name);
|
||||
} else {
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
}
|
||||
#endif
|
||||
|
||||
ddurta.ddurta_dsobj = dsobj;
|
||||
ddurta.ddurta_holds = fnvlist_alloc();
|
||||
fnvlist_add_boolean(ddurta.ddurta_holds, htag);
|
||||
|
||||
(void) dsl_sync_task(spa_name(dp->dp_spa),
|
||||
dsl_dataset_user_release_tmp_check,
|
||||
dsl_dataset_user_release_tmp_sync, &ddurta, 1);
|
||||
fnvlist_free(ddurta.ddurta_holds);
|
||||
}
|
||||
|
||||
typedef struct zfs_hold_cleanup_arg {
|
||||
char zhca_spaname[MAXNAMELEN];
|
||||
uint64_t zhca_spa_load_guid;
|
||||
uint64_t zhca_dsobj;
|
||||
char zhca_htag[MAXNAMELEN];
|
||||
nvlist_t *zhca_holds;
|
||||
} zfs_hold_cleanup_arg_t;
|
||||
|
||||
static void
|
||||
@@ -466,40 +199,447 @@ dsl_dataset_user_release_onexit(void *arg)
|
||||
|
||||
error = spa_open(ca->zhca_spaname, &spa, FTAG);
|
||||
if (error != 0) {
|
||||
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
|
||||
zfs_dbgmsg("couldn't release holds on pool=%s "
|
||||
"because pool is no longer loaded",
|
||||
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
|
||||
ca->zhca_spaname);
|
||||
return;
|
||||
}
|
||||
if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
|
||||
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
|
||||
zfs_dbgmsg("couldn't release holds on pool=%s "
|
||||
"because pool is no longer loaded (guid doesn't match)",
|
||||
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
|
||||
ca->zhca_spaname);
|
||||
spa_close(spa, FTAG);
|
||||
return;
|
||||
}
|
||||
|
||||
dsl_dataset_user_release_tmp(spa_get_dsl(spa),
|
||||
ca->zhca_dsobj, ca->zhca_htag);
|
||||
(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
|
||||
fnvlist_free(ca->zhca_holds);
|
||||
kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
|
||||
minor_t minor)
|
||||
static void
|
||||
dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
|
||||
{
|
||||
zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_PUSHPAGE);
|
||||
spa_t *spa = dsl_dataset_get_spa(ds);
|
||||
zfs_hold_cleanup_arg_t *ca;
|
||||
|
||||
if (minor == 0 || nvlist_empty(holds)) {
|
||||
fnvlist_free(holds);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(spa != NULL);
|
||||
ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
|
||||
|
||||
(void) strlcpy(ca->zhca_spaname, spa_name(spa),
|
||||
sizeof (ca->zhca_spaname));
|
||||
ca->zhca_spa_load_guid = spa_load_guid(spa);
|
||||
ca->zhca_dsobj = ds->ds_object;
|
||||
(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
|
||||
ca->zhca_holds = holds;
|
||||
VERIFY0(zfs_onexit_add_cb(minor,
|
||||
dsl_dataset_user_release_onexit, ca, NULL));
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
|
||||
minor_t minor, uint64_t now, dmu_tx_t *tx)
|
||||
{
|
||||
nvlist_t *tmpholds;
|
||||
|
||||
if (minor != 0)
|
||||
tmpholds = fnvlist_alloc();
|
||||
else
|
||||
tmpholds = NULL;
|
||||
dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
|
||||
dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_hold_arg_t *dduha = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvlist_t *tmpholds;
|
||||
nvpair_t *pair;
|
||||
uint64_t now = gethrestime_sec();
|
||||
|
||||
if (dduha->dduha_minor != 0)
|
||||
tmpholds = fnvlist_alloc();
|
||||
else
|
||||
tmpholds = NULL;
|
||||
for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
|
||||
pair != NULL;
|
||||
pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
|
||||
dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
|
||||
fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
|
||||
}
|
||||
|
||||
/*
|
||||
* The full semantics of this function are described in the comment above
|
||||
* lzc_hold().
|
||||
*
|
||||
* To summarize:
|
||||
* holds is nvl of snapname -> holdname
|
||||
* errlist will be filled in with snapname -> error
|
||||
*
|
||||
* The snaphosts must all be in the same pool.
|
||||
*
|
||||
* Holds for snapshots that don't exist will be skipped.
|
||||
*
|
||||
* If none of the snapshots for requested holds exist then ENOENT will be
|
||||
* returned.
|
||||
*
|
||||
* If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
|
||||
* up when the process exits.
|
||||
*
|
||||
* On success all the holds, for snapshots that existed, will be created and 0
|
||||
* will be returned.
|
||||
*
|
||||
* On failure no holds will be created, the errlist will be filled in,
|
||||
* and an errno will returned.
|
||||
*
|
||||
* In all cases the errlist will contain entries for holds where the snapshot
|
||||
* didn't exist.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
|
||||
{
|
||||
dsl_dataset_user_hold_arg_t dduha;
|
||||
nvpair_t *pair;
|
||||
int ret;
|
||||
|
||||
pair = nvlist_next_nvpair(holds, NULL);
|
||||
if (pair == NULL)
|
||||
return (0);
|
||||
|
||||
dduha.dduha_holds = holds;
|
||||
dduha.dduha_chkholds = fnvlist_alloc();
|
||||
dduha.dduha_errlist = errlist;
|
||||
dduha.dduha_minor = cleanup_minor;
|
||||
|
||||
ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
|
||||
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
|
||||
fnvlist_free(dduha.dduha_chkholds);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
|
||||
dsl_dataset_t **dsp);
|
||||
|
||||
typedef struct dsl_dataset_user_release_arg {
|
||||
dsl_holdfunc_t *ddura_holdfunc;
|
||||
nvlist_t *ddura_holds;
|
||||
nvlist_t *ddura_todelete;
|
||||
nvlist_t *ddura_errlist;
|
||||
nvlist_t *ddura_chkholds;
|
||||
} dsl_dataset_user_release_arg_t;
|
||||
|
||||
/* Place a dataset hold on the snapshot identified by passed dsobj string */
|
||||
static int
|
||||
dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
|
||||
dsl_dataset_t **dsp)
|
||||
{
|
||||
return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
|
||||
dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
|
||||
{
|
||||
uint64_t zapobj;
|
||||
nvlist_t *holds_found;
|
||||
nvpair_t *pair;
|
||||
objset_t *mos;
|
||||
int numholds;
|
||||
|
||||
if (!dsl_dataset_is_snapshot(ds))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (nvlist_empty(holds))
|
||||
return (0);
|
||||
|
||||
numholds = 0;
|
||||
mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
holds_found = fnvlist_alloc();
|
||||
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
uint64_t tmp;
|
||||
int error;
|
||||
const char *holdname = nvpair_name(pair);
|
||||
|
||||
if (zapobj != 0)
|
||||
error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
|
||||
else
|
||||
error = SET_ERROR(ENOENT);
|
||||
|
||||
/*
|
||||
* Non-existent holds are put on the errlist, but don't
|
||||
* cause an overall failure.
|
||||
*/
|
||||
if (error == ENOENT) {
|
||||
if (ddura->ddura_errlist != NULL) {
|
||||
char *errtag = kmem_asprintf("%s#%s",
|
||||
snapname, holdname);
|
||||
fnvlist_add_int32(ddura->ddura_errlist, errtag,
|
||||
ENOENT);
|
||||
strfree(errtag);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (error != 0) {
|
||||
fnvlist_free(holds_found);
|
||||
return (error);
|
||||
}
|
||||
|
||||
fnvlist_add_boolean(holds_found, holdname);
|
||||
numholds++;
|
||||
}
|
||||
|
||||
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
|
||||
ds->ds_userrefs == numholds) {
|
||||
/* we need to destroy the snapshot as well */
|
||||
if (dsl_dataset_long_held(ds)) {
|
||||
fnvlist_free(holds_found);
|
||||
return (SET_ERROR(EBUSY));
|
||||
}
|
||||
fnvlist_add_boolean(ddura->ddura_todelete, snapname);
|
||||
}
|
||||
|
||||
if (numholds != 0) {
|
||||
fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
|
||||
holds_found);
|
||||
}
|
||||
fnvlist_free(holds_found);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t *ddura;
|
||||
dsl_holdfunc_t *holdfunc;
|
||||
dsl_pool_t *dp;
|
||||
nvpair_t *pair;
|
||||
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
dp = dmu_tx_pool(tx);
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
|
||||
|
||||
ddura = arg;
|
||||
holdfunc = ddura->ddura_holdfunc;
|
||||
|
||||
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
|
||||
pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
|
||||
int error;
|
||||
dsl_dataset_t *ds;
|
||||
nvlist_t *holds;
|
||||
const char *snapname = nvpair_name(pair);
|
||||
|
||||
error = nvpair_value_nvlist(pair, &holds);
|
||||
if (error != 0)
|
||||
error = (SET_ERROR(EINVAL));
|
||||
else
|
||||
error = holdfunc(dp, snapname, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
error = dsl_dataset_user_release_check_one(ddura, ds,
|
||||
holds, snapname);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
if (error != 0) {
|
||||
if (ddura->ddura_errlist != NULL) {
|
||||
fnvlist_add_int32(ddura->ddura_errlist,
|
||||
snapname, error);
|
||||
}
|
||||
/*
|
||||
* Non-existent snapshots are put on the errlist,
|
||||
* but don't cause an overall failure.
|
||||
*/
|
||||
if (error != ENOENT)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return ENOENT if none of the holds existed. */
|
||||
if (nvlist_empty(ddura->ddura_chkholds))
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
nvpair_t *pair;
|
||||
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
int error;
|
||||
const char *holdname = nvpair_name(pair);
|
||||
|
||||
/* Remove temporary hold if one exists. */
|
||||
error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
|
||||
VERIFY(error == 0 || error == ENOENT);
|
||||
|
||||
VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
|
||||
tx));
|
||||
ds->ds_userrefs--;
|
||||
|
||||
spa_history_log_internal_ds(ds, "release", tx,
|
||||
"tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t *ddura = arg;
|
||||
dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
nvpair_t *pair;
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
|
||||
|
||||
for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
|
||||
pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
|
||||
pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
const char *name = nvpair_name(pair);
|
||||
|
||||
VERIFY0(holdfunc(dp, name, FTAG, &ds));
|
||||
|
||||
dsl_dataset_user_release_sync_one(ds,
|
||||
fnvpair_value_nvlist(pair), tx);
|
||||
if (nvlist_exists(ddura->ddura_todelete, name)) {
|
||||
ASSERT(ds->ds_userrefs == 0 &&
|
||||
ds->ds_phys->ds_num_children == 1 &&
|
||||
DS_IS_DEFER_DESTROY(ds));
|
||||
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The full semantics of this function are described in the comment above
|
||||
* lzc_release().
|
||||
*
|
||||
* To summarize:
|
||||
* Releases holds specified in the nvl holds.
|
||||
*
|
||||
* holds is nvl of snapname -> { holdname, ... }
|
||||
* errlist will be filled in with snapname -> error
|
||||
*
|
||||
* If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
|
||||
* otherwise they should be the names of shapshots.
|
||||
*
|
||||
* As a release may cause snapshots to be destroyed this trys to ensure they
|
||||
* aren't mounted.
|
||||
*
|
||||
* The release of non-existent holds are skipped.
|
||||
*
|
||||
* At least one hold must have been released for the this function to succeed
|
||||
* and return 0.
|
||||
*/
|
||||
static int
|
||||
dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
|
||||
dsl_pool_t *tmpdp)
|
||||
{
|
||||
dsl_dataset_user_release_arg_t ddura;
|
||||
nvpair_t *pair;
|
||||
char *pool;
|
||||
int error;
|
||||
|
||||
pair = nvlist_next_nvpair(holds, NULL);
|
||||
if (pair == NULL)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* The release may cause snapshots to be destroyed; make sure they
|
||||
* are not mounted.
|
||||
*/
|
||||
if (tmpdp != NULL) {
|
||||
/* Temporary holds are specified by dsobj string. */
|
||||
ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
|
||||
pool = spa_name(tmpdp->dp_spa);
|
||||
#ifdef _KERNEL
|
||||
dsl_pool_config_enter(tmpdp, FTAG);
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
error = dsl_dataset_hold_obj_string(tmpdp,
|
||||
nvpair_name(pair), FTAG, &ds);
|
||||
if (error == 0) {
|
||||
char name[MAXNAMELEN];
|
||||
dsl_dataset_name(ds, name);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
(void) zfs_unmount_snap(name);
|
||||
}
|
||||
}
|
||||
dsl_pool_config_exit(tmpdp, FTAG);
|
||||
#endif
|
||||
} else {
|
||||
/* Non-temporary holds are specified by name. */
|
||||
ddura.ddura_holdfunc = dsl_dataset_hold;
|
||||
pool = nvpair_name(pair);
|
||||
#ifdef _KERNEL
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
(void) zfs_unmount_snap(nvpair_name(pair));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
ddura.ddura_holds = holds;
|
||||
ddura.ddura_errlist = errlist;
|
||||
ddura.ddura_todelete = fnvlist_alloc();
|
||||
ddura.ddura_chkholds = fnvlist_alloc();
|
||||
|
||||
error = dsl_sync_task(pool, dsl_dataset_user_release_check,
|
||||
dsl_dataset_user_release_sync, &ddura,
|
||||
fnvlist_num_pairs(holds));
|
||||
fnvlist_free(ddura.ddura_todelete);
|
||||
fnvlist_free(ddura.ddura_chkholds);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* holds is nvl of snapname -> { holdname, ... }
|
||||
* errlist will be filled in with snapname -> error
|
||||
*/
|
||||
int
|
||||
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
|
||||
{
|
||||
return (dsl_dataset_user_release_impl(holds, errlist, NULL));
|
||||
}
|
||||
|
||||
/*
|
||||
* holds is nvl of snapdsobj -> { holdname, ... }
|
||||
*/
|
||||
void
|
||||
dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
|
||||
{
|
||||
ASSERT(dp != NULL);
|
||||
(void) dsl_dataset_user_release_impl(holds, NULL, dp);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
|
||||
{
|
||||
|
||||
+1
-14
@@ -28,6 +28,7 @@
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 201i3 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -4867,20 +4868,6 @@ zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
|
||||
static int
|
||||
zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
|
||||
{
|
||||
nvpair_t *pair;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* The release may cause the snapshot to be destroyed; make sure it
|
||||
* is not mounted.
|
||||
*/
|
||||
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(holds, pair)) {
|
||||
err = zfs_unmount_snap(nvpair_name(pair));
|
||||
if (err != 0)
|
||||
return (err);
|
||||
}
|
||||
|
||||
return (dsl_dataset_user_release(holds, errlist));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user