Fast Clone Deletion

Deleting a clone requires finding blocks are clone-only, not shared
with the snapshot. This was done by traversing the entire block tree
which results in a large performance penalty for sparsely
written clones.

This is new method keeps track of clone blocks when they are
modified in a "Livelist" so that, when it’s time to delete,
the clone-specific blocks are already at hand.

We see performance improvements because now deletion work is
proportional to the number of clone-modified blocks, not the size
of the original dataset.

Reviewed-by: Sean Eric Fagan <sef@ixsystems.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Signed-off-by: Sara Hartse <sara.hartse@delphix.com>
Closes #8416
This commit is contained in:
Sara Hartse
2019-07-26 10:54:14 -07:00
committed by Brian Behlendorf
parent d274ac5460
commit 37f03da8ba
38 changed files with 2583 additions and 205 deletions
+15 -4
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2015, 2019 by Delphix. All rights reserved.
*/
#ifndef _SYS_BPOBJ_H
@@ -31,6 +31,7 @@
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/zfs_context.h>
#include <sys/bplist.h>
#ifdef __cplusplus
extern "C" {
@@ -48,10 +49,12 @@ typedef struct bpobj_phys {
uint64_t bpo_uncomp;
uint64_t bpo_subobjs;
uint64_t bpo_num_subobjs;
uint64_t bpo_num_freed;
} bpobj_phys_t;
#define BPOBJ_SIZE_V0 (2 * sizeof (uint64_t))
#define BPOBJ_SIZE_V1 (4 * sizeof (uint64_t))
#define BPOBJ_SIZE_V2 (6 * sizeof (uint64_t))
typedef struct bpobj {
kmutex_t bpo_lock;
@@ -60,12 +63,14 @@ typedef struct bpobj {
int bpo_epb;
uint8_t bpo_havecomp;
uint8_t bpo_havesubobj;
uint8_t bpo_havefreed;
bpobj_phys_t *bpo_phys;
dmu_buf_t *bpo_dbuf;
dmu_buf_t *bpo_cached_dbuf;
} bpobj_t;
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
uint64_t bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx);
@@ -77,10 +82,13 @@ void bpobj_close(bpobj_t *bpo);
boolean_t bpobj_is_open(const bpobj_t *bpo);
int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, uint64_t *);
int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
void *arg, int64_t start);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
int bpobj_space(bpobj_t *bpo,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
@@ -88,6 +96,9 @@ int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
boolean_t bpobj_is_empty(bpobj_t *bpo);
int bplist_append_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif