mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 03:08:51 +03:00
Fast Clone Deletion
Deleting a clone requires finding blocks are clone-only, not shared with the snapshot. This was done by traversing the entire block tree which results in a large performance penalty for sparsely written clones. This is new method keeps track of clone blocks when they are modified in a "Livelist" so that, when it’s time to delete, the clone-specific blocks are already at hand. We see performance improvements because now deletion work is proportional to the number of clone-modified blocks, not the size of the original dataset. Reviewed-by: Sean Eric Fagan <sef@ixsystems.com> Reviewed-by: Matt Ahrens <matt@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: Sara Hartse <sara.hartse@delphix.com> Closes #8416
This commit is contained in:
committed by
Brian Behlendorf
parent
d274ac5460
commit
37f03da8ba
+103
-1
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Martin Matuska. All rights reserved.
|
||||
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <sys/policy.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/zthr.h>
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
|
||||
@@ -155,6 +156,9 @@ dsl_dir_evict_async(void *dbu)
|
||||
|
||||
spa_async_close(dd->dd_pool->dp_spa, dd);
|
||||
|
||||
if (dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
dsl_dir_livelist_close(dd);
|
||||
|
||||
dsl_prop_fini(dd);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
@@ -255,6 +259,16 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
dd->dd_origin_txg =
|
||||
origin_phys->ds_creation_txg;
|
||||
dmu_buf_rele(origin_bonus, FTAG);
|
||||
if (dsl_dir_is_zapified(dd)) {
|
||||
uint64_t obj;
|
||||
err = zap_lookup(dp->dp_meta_objset,
|
||||
dd->dd_object, DD_FIELD_LIVELIST,
|
||||
sizeof (uint64_t), 1, &obj);
|
||||
if (err == 0)
|
||||
dsl_dir_livelist_open(dd, obj);
|
||||
else if (err != ENOENT)
|
||||
goto errout;
|
||||
}
|
||||
}
|
||||
|
||||
dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
|
||||
@@ -263,6 +277,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
if (winner != NULL) {
|
||||
if (dd->dd_parent)
|
||||
dsl_dir_rele(dd->dd_parent, dd);
|
||||
if (dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
dsl_dir_livelist_close(dd);
|
||||
dsl_prop_fini(dd);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
@@ -291,6 +307,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
errout:
|
||||
if (dd->dd_parent)
|
||||
dsl_dir_rele(dd->dd_parent, dd);
|
||||
if (dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
dsl_dir_livelist_close(dd);
|
||||
dsl_prop_fini(dd);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
@@ -2178,6 +2196,90 @@ dsl_dir_is_zapified(dsl_dir_t *dd)
|
||||
return (doi.doi_type == DMU_OTN_ZAP_METADATA);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj)
|
||||
{
|
||||
objset_t *mos = dd->dd_pool->dp_meta_objset;
|
||||
ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa,
|
||||
SPA_FEATURE_LIVELIST));
|
||||
dsl_deadlist_open(&dd->dd_livelist, mos, obj);
|
||||
bplist_create(&dd->dd_pending_allocs);
|
||||
bplist_create(&dd->dd_pending_frees);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_livelist_close(dsl_dir_t *dd)
|
||||
{
|
||||
dsl_deadlist_close(&dd->dd_livelist);
|
||||
bplist_destroy(&dd->dd_pending_allocs);
|
||||
bplist_destroy(&dd->dd_pending_frees);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
|
||||
{
|
||||
uint64_t obj;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
spa_t *spa = dp->dp_spa;
|
||||
livelist_condense_entry_t to_condense = spa->spa_to_condense;
|
||||
|
||||
if (!dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the livelist being removed is set to be condensed, stop the
|
||||
* condense zthr and indicate the cancellation in the spa_to_condense
|
||||
* struct in case the condense no-wait synctask has already started
|
||||
*/
|
||||
zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
|
||||
if (ll_condense_thread != NULL &&
|
||||
(to_condense.ds != NULL) && (to_condense.ds->ds_dir == dd)) {
|
||||
/*
|
||||
* We use zthr_wait_cycle_done instead of zthr_cancel
|
||||
* because we don't want to destroy the zthr, just have
|
||||
* it skip its current task.
|
||||
*/
|
||||
spa->spa_to_condense.cancelled = B_TRUE;
|
||||
zthr_wait_cycle_done(ll_condense_thread);
|
||||
/*
|
||||
* If we've returned from zthr_wait_cycle_done without
|
||||
* clearing the to_condense data structure it's either
|
||||
* because the no-wait synctask has started (which is
|
||||
* indicated by 'syncing' field of to_condense) and we
|
||||
* can expect it to clear to_condense on its own.
|
||||
* Otherwise, we returned before the zthr ran. The
|
||||
* checkfunc will now fail as cancelled == B_TRUE so we
|
||||
* can safely NULL out ds, allowing a different dir's
|
||||
* livelist to be condensed.
|
||||
*
|
||||
* We can be sure that the to_condense struct will not
|
||||
* be repopulated at this stage because both this
|
||||
* function and dsl_livelist_try_condense execute in
|
||||
* syncing context.
|
||||
*/
|
||||
if ((spa->spa_to_condense.ds != NULL) &&
|
||||
!spa->spa_to_condense.syncing) {
|
||||
dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf,
|
||||
spa);
|
||||
spa->spa_to_condense.ds = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
dsl_dir_livelist_close(dd);
|
||||
int err = zap_lookup(dp->dp_meta_objset, dd->dd_object,
|
||||
DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj);
|
||||
if (err == 0) {
|
||||
VERIFY0(zap_remove(dp->dp_meta_objset, dd->dd_object,
|
||||
DD_FIELD_LIVELIST, tx));
|
||||
if (total) {
|
||||
dsl_deadlist_free(dp->dp_meta_objset, obj, tx);
|
||||
spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx);
|
||||
}
|
||||
} else {
|
||||
ASSERT3U(err, !=, ENOENT);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
EXPORT_SYMBOL(dsl_dir_set_quota);
|
||||
EXPORT_SYMBOL(dsl_dir_set_reservation);
|
||||
|
||||
Reference in New Issue
Block a user