Several sorted scrub optimizations

- Reduce size and comparison complexity of q_exts_by_size B-tree.
Previous code used two 64-bit divisions and many other operations to
compare two B-tree elements.  It created enormous overhead.  This
implementation moves the math to the upper level and stores the score
in the B-tree elements themselves.  Since all that we need to store in
that B-tree is the extent score and offset, those can fit into single
8 byte value instead of 24 bytes of q_exts_by_addr element and can be
compared with single operation.
 - Better decouple secondary tree logic from main range_tree by moving
rt_btree_ops and related functions into dsl_scan.c as ext_size_ops.
Those functions are very small to worry about the code duplication and
range_tree does not need to know details such as rt_btree_compare.
 - Instead of accounting number of pending bytes per pool, that needs
atomic on global variable per block, account the number of non-empty
per-vdev queues, that change much more rarely.
 - When extent scan is interrupted by TXG end, continue it in the next
TXG instead of selecting next best extent.  It allows to avoid leaving
one truncated (and so likely not the best any more) extent each TXG.

On top of some other optimizations this saves about 1.5 minutes out of
10 to scrub pool of 12 SSDs, storing 1.5TB of 4KB zvol blocks.

Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tom Caputi <caputit1@tcnj.edu>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #13576
This commit is contained in:
Alexander Motin
2022-06-24 12:50:37 -04:00
committed by GitHub
parent 83691bebf0
commit 1c0c729ab4
4 changed files with 156 additions and 203 deletions
+3 -74
View File
@@ -188,10 +188,8 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
}
range_tree_t *
range_tree_create_impl(const range_tree_ops_t *ops, range_seg_type_t type,
void *arg, uint64_t start, uint64_t shift,
int (*zfs_btree_compare) (const void *, const void *),
uint64_t gap)
range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
void *arg, uint64_t start, uint64_t shift, uint64_t gap)
{
range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
@@ -223,7 +221,6 @@ range_tree_create_impl(const range_tree_ops_t *ops, range_seg_type_t type,
rt->rt_type = type;
rt->rt_start = start;
rt->rt_shift = shift;
rt->rt_btree_compare = zfs_btree_compare;
if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
rt->rt_ops->rtop_create(rt, rt->rt_arg);
@@ -235,7 +232,7 @@ range_tree_t *
range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type,
void *arg, uint64_t start, uint64_t shift)
{
return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0));
return (range_tree_create_gap(ops, type, arg, start, shift, 0));
}
void
@@ -741,74 +738,6 @@ range_tree_is_empty(range_tree_t *rt)
return (range_tree_space(rt) == 0);
}
void
rt_btree_create(range_tree_t *rt, void *arg)
{
zfs_btree_t *size_tree = arg;
size_t size;
switch (rt->rt_type) {
case RANGE_SEG32:
size = sizeof (range_seg32_t);
break;
case RANGE_SEG64:
size = sizeof (range_seg64_t);
break;
case RANGE_SEG_GAP:
size = sizeof (range_seg_gap_t);
break;
default:
panic("Invalid range seg type %d", rt->rt_type);
}
zfs_btree_create(size_tree, rt->rt_btree_compare, size);
}
void
rt_btree_destroy(range_tree_t *rt, void *arg)
{
(void) rt;
zfs_btree_t *size_tree = arg;
ASSERT0(zfs_btree_numnodes(size_tree));
zfs_btree_destroy(size_tree);
}
void
rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg)
{
(void) rt;
zfs_btree_t *size_tree = arg;
zfs_btree_add(size_tree, rs);
}
void
rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
{
(void) rt;
zfs_btree_t *size_tree = arg;
zfs_btree_remove(size_tree, rs);
}
void
rt_btree_vacate(range_tree_t *rt, void *arg)
{
zfs_btree_t *size_tree = arg;
zfs_btree_clear(size_tree);
zfs_btree_destroy(size_tree);
rt_btree_create(rt, arg);
}
const range_tree_ops_t rt_btree_ops = {
.rtop_create = rt_btree_create,
.rtop_destroy = rt_btree_destroy,
.rtop_add = rt_btree_add,
.rtop_remove = rt_btree_remove,
.rtop_vacate = rt_btree_vacate
};
/*
* Remove any overlapping ranges between the given segment [start, end)
* from removefrom. Add non-overlapping leftovers to addto.