diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index fb1f1d65b..d716510f8 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -155,7 +155,7 @@ typedef struct dsl_scan { dsl_scan_phys_t scn_phys; /* on disk representation of scan */ dsl_scan_phys_t scn_phys_cached; avl_tree_t scn_queue; /* queue of datasets to scan */ - uint64_t scn_bytes_pending; /* outstanding data to issue */ + uint64_t scn_queues_pending; /* outstanding data to issue */ } dsl_scan_t; typedef struct dsl_scan_io_queue dsl_scan_io_queue_t; diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h index 895d80257..daa39e20d 100644 --- a/include/sys/range_tree.h +++ b/include/sys/range_tree.h @@ -64,11 +64,7 @@ typedef struct range_tree { uint8_t rt_shift; uint64_t rt_start; const range_tree_ops_t *rt_ops; - - /* rt_btree_compare should only be set if rt_arg is a b-tree */ void *rt_arg; - int (*rt_btree_compare)(const void *, const void *); - uint64_t rt_gap; /* allowable inter-segment gap */ /* @@ -278,9 +274,9 @@ rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill) typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size); -range_tree_t *range_tree_create_impl(const range_tree_ops_t *ops, +range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, - int (*zfs_btree_compare) (const void *, const void *), uint64_t gap); + uint64_t gap); range_tree_t *range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); void range_tree_destroy(range_tree_t *rt); @@ -316,13 +312,6 @@ void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, range_tree_t *addto); -void rt_btree_create(range_tree_t *rt, void *arg); -void rt_btree_destroy(range_tree_t *rt, void *arg); -void rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg); -void rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg); -void rt_btree_vacate(range_tree_t *rt, void *arg); -extern const range_tree_ops_t rt_btree_ops; - #ifdef __cplusplus } #endif diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index d8152939d..62209f4da 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -220,9 +220,9 @@ typedef struct { /* * This controls what conditions are placed on dsl_scan_sync_state(): - * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0 - * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0. - * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise + * SYNC_OPTIONAL) write out scn_phys iff scn_queues_pending == 0 + * SYNC_MANDATORY) write out scn_phys always. scn_queues_pending must be 0. + * SYNC_CACHED) if scn_queues_pending == 0, write out scn_phys. Otherwise * write out the scn_phys_cached version. * See dsl_scan_sync_state for details. */ @@ -284,9 +284,10 @@ struct dsl_scan_io_queue { /* trees used for sorting I/Os and extents of I/Os */ range_tree_t *q_exts_by_addr; - zfs_btree_t q_exts_by_size; + zfs_btree_t q_exts_by_size; avl_tree_t q_sios_by_addr; uint64_t q_sio_memused; + uint64_t q_last_ext_addr; /* members for zio rate limiting */ uint64_t q_maxinflight_bytes; @@ -644,7 +645,7 @@ dsl_scan_is_paused_scrub(const dsl_scan_t *scn) * Because we can be running in the block sorting algorithm, we do not always * want to write out the record, only when it is "safe" to do so. This safety * condition is achieved by making sure that the sorting queues are empty - * (scn_bytes_pending == 0). When this condition is not true, the sync'd state + * (scn_queues_pending == 0). When this condition is not true, the sync'd state * is inconsistent with how much actual scanning progress has been made. The * kind of sync to be performed is specified by the sync_type argument. If the * sync is optional, we only sync if the queues are empty. If the sync is @@ -667,8 +668,8 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type) int i; spa_t *spa = scn->scn_dp->dp_spa; - ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0); - if (scn->scn_bytes_pending == 0) { + ASSERT(sync_type != SYNC_MANDATORY || scn->scn_queues_pending == 0); + if (scn->scn_queues_pending == 0) { for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) { vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue; @@ -1206,7 +1207,7 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx) dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ? DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER; - ASSERT0(scn->scn_bytes_pending); + ASSERT0(scn->scn_queues_pending); ASSERT(scn->scn_phys.scn_queue_obj != 0); VERIFY0(dmu_object_free(dp->dp_meta_objset, @@ -1278,11 +1279,12 @@ dsl_scan_should_clear(dsl_scan_t *scn) queue = tvd->vdev_scan_io_queue; if (queue != NULL) { /* - * # of extents in exts_by_size = # in exts_by_addr. + * # of extents in exts_by_addr = # in exts_by_size. * B-tree efficiency is ~75%, but can be as low as 50%. */ mused += zfs_btree_numnodes(&queue->q_exts_by_size) * - 3 * sizeof (range_seg_gap_t) + queue->q_sio_memused; + ((sizeof (range_seg_gap_t) + sizeof (uint64_t)) * + 3 / 2) + queue->q_sio_memused; } mutex_exit(&tvd->vdev_scan_io_queue_lock); } @@ -1290,7 +1292,7 @@ dsl_scan_should_clear(dsl_scan_t *scn) dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused); if (mused == 0) - ASSERT0(scn->scn_bytes_pending); + ASSERT0(scn->scn_queues_pending); /* * If we are above our hard limit, we need to clear out memory. @@ -2855,7 +2857,6 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list) { dsl_scan_t *scn = queue->q_scn; scan_io_t *sio; - int64_t bytes_issued = 0; boolean_t suspended = B_FALSE; while ((sio = list_head(io_list)) != NULL) { @@ -2867,16 +2868,12 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list) } sio2bp(sio, &bp); - bytes_issued += SIO_GET_ASIZE(sio); scan_exec_io(scn->scn_dp, &bp, sio->sio_flags, &sio->sio_zb, queue); (void) list_remove_head(io_list); scan_io_queues_update_zio_stats(queue, &bp); sio_free(sio); } - - atomic_add_64(&scn->scn_bytes_pending, -bytes_issued); - return (suspended); } @@ -2921,6 +2918,8 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio); avl_remove(&queue->q_sios_by_addr, sio); + if (avl_is_empty(&queue->q_sios_by_addr)) + atomic_add_64(&queue->q_scn->scn_queues_pending, -1); queue->q_sio_memused -= SIO_GET_MUSED(sio); bytes_issued += SIO_GET_ASIZE(sio); @@ -2942,12 +2941,13 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) range_tree_resize_segment(queue->q_exts_by_addr, rs, SIO_GET_OFFSET(sio), rs_get_end(rs, queue->q_exts_by_addr) - SIO_GET_OFFSET(sio)); - + queue->q_last_ext_addr = SIO_GET_OFFSET(sio); return (B_TRUE); } else { uint64_t rstart = rs_get_start(rs, queue->q_exts_by_addr); uint64_t rend = rs_get_end(rs, queue->q_exts_by_addr); range_tree_remove(queue->q_exts_by_addr, rstart, rend - rstart); + queue->q_last_ext_addr = -1; return (B_FALSE); } } @@ -2972,31 +2972,8 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); ASSERT(scn->scn_is_sorted); - /* handle tunable overrides */ - if (scn->scn_checkpointing || scn->scn_clearing) { - if (zfs_scan_issue_strategy == 1) { - return (range_tree_first(rt)); - } else if (zfs_scan_issue_strategy == 2) { - /* - * We need to get the original entry in the by_addr - * tree so we can modify it. - */ - range_seg_t *size_rs = - zfs_btree_first(&queue->q_exts_by_size, NULL); - if (size_rs == NULL) - return (NULL); - uint64_t start = rs_get_start(size_rs, rt); - uint64_t size = rs_get_end(size_rs, rt) - start; - range_seg_t *addr_rs = range_tree_find(rt, start, - size); - ASSERT3P(addr_rs, !=, NULL); - ASSERT3U(rs_get_start(size_rs, rt), ==, - rs_get_start(addr_rs, rt)); - ASSERT3U(rs_get_end(size_rs, rt), ==, - rs_get_end(addr_rs, rt)); - return (addr_rs); - } - } + if (!scn->scn_checkpointing && !scn->scn_clearing) + return (NULL); /* * During normal clearing, we want to issue our largest segments @@ -3007,28 +2984,42 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) * so the way we are sorted now is as good as it will ever get. * In this case, we instead switch to issuing extents in LBA order. */ - if (scn->scn_checkpointing) { + if ((zfs_scan_issue_strategy < 1 && scn->scn_checkpointing) || + zfs_scan_issue_strategy == 1) return (range_tree_first(rt)); - } else if (scn->scn_clearing) { - /* - * We need to get the original entry in the by_addr - * tree so we can modify it. - */ - range_seg_t *size_rs = zfs_btree_first(&queue->q_exts_by_size, - NULL); - if (size_rs == NULL) - return (NULL); - uint64_t start = rs_get_start(size_rs, rt); - uint64_t size = rs_get_end(size_rs, rt) - start; - range_seg_t *addr_rs = range_tree_find(rt, start, size); - ASSERT3P(addr_rs, !=, NULL); - ASSERT3U(rs_get_start(size_rs, rt), ==, rs_get_start(addr_rs, - rt)); - ASSERT3U(rs_get_end(size_rs, rt), ==, rs_get_end(addr_rs, rt)); - return (addr_rs); - } else { - return (NULL); + + /* + * Try to continue previous extent if it is not completed yet. After + * shrink in scan_io_queue_gather() it may no longer be the best, but + * otherwise we leave shorter remnant every txg. + */ + uint64_t start; + uint64_t size = 1 << rt->rt_shift; + range_seg_t *addr_rs; + if (queue->q_last_ext_addr != -1) { + start = queue->q_last_ext_addr; + addr_rs = range_tree_find(rt, start, size); + if (addr_rs != NULL) + return (addr_rs); } + + /* + * Nothing to continue, so find new best extent. + */ + uint64_t *v = zfs_btree_first(&queue->q_exts_by_size, NULL); + if (v == NULL) + return (NULL); + queue->q_last_ext_addr = start = *v << rt->rt_shift; + + /* + * We need to get the original entry in the by_addr tree so we can + * modify it. + */ + addr_rs = range_tree_find(rt, start, size); + ASSERT3P(addr_rs, !=, NULL); + ASSERT3U(rs_get_start(addr_rs, rt), ==, start); + ASSERT3U(rs_get_end(addr_rs, rt), >, start); + return (addr_rs); } static void @@ -3064,12 +3055,12 @@ scan_io_queues_run_one(void *arg) /* loop until we run out of time or sios */ while ((rs = scan_io_queue_fetch_ext(queue)) != NULL) { uint64_t seg_start = 0, seg_end = 0; - boolean_t more_left = B_TRUE; + boolean_t more_left; ASSERT(list_is_empty(&sio_list)); /* loop while we still have sios left to process in this rs */ - while (more_left) { + do { scan_io_t *first_sio, *last_sio; /* @@ -3098,7 +3089,7 @@ scan_io_queues_run_one(void *arg) if (suspended) break; - } + } while (more_left); /* update statistics for debugging purposes */ scan_io_queues_update_seg_stats(queue, seg_start, seg_end); @@ -3139,7 +3130,7 @@ scan_io_queues_run(dsl_scan_t *scn) ASSERT(scn->scn_is_sorted); ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - if (scn->scn_bytes_pending == 0) + if (scn->scn_queues_pending == 0) return; if (scn->scn_taskq == NULL) { @@ -3771,7 +3762,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) spa->spa_name, (longlong_t)tx->tx_txg); } - } else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) { + } else if (scn->scn_is_sorted && scn->scn_queues_pending != 0) { ASSERT(scn->scn_clearing); /* need to issue scrubbing IOs from per-vdev queues */ @@ -3801,7 +3792,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) spa->spa_name); ASSERT3U(scn->scn_done_txg, !=, 0); ASSERT0(spa->spa_scrub_inflight); - ASSERT0(scn->scn_bytes_pending); + ASSERT0(scn->scn_queues_pending); dsl_scan_done(scn, B_TRUE, tx); sync_type = SYNC_MANDATORY; } @@ -3892,20 +3883,21 @@ static void scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio) { avl_index_t idx; - int64_t asize = SIO_GET_ASIZE(sio); dsl_scan_t *scn = queue->q_scn; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); + if (unlikely(avl_is_empty(&queue->q_sios_by_addr))) + atomic_add_64(&scn->scn_queues_pending, 1); if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) { /* block is already scheduled for reading */ - atomic_add_64(&scn->scn_bytes_pending, -asize); sio_free(sio); return; } avl_insert(&queue->q_sios_by_addr, sio, idx); queue->q_sio_memused += SIO_GET_MUSED(sio); - range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), asize); + range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), + SIO_GET_ASIZE(sio)); } /* @@ -3918,7 +3910,6 @@ static void scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i, int zio_flags, const zbookmark_phys_t *zb) { - dsl_scan_t *scn = queue->q_scn; scan_io_t *sio = sio_alloc(BP_GET_NDVAS(bp)); ASSERT0(BP_IS_GANG(bp)); @@ -3928,13 +3919,7 @@ scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i, sio->sio_flags = zio_flags; sio->sio_zb = *zb; - /* - * Increment the bytes pending counter now so that we can't - * get an integer underflow in case the worker processes the - * zio before we get to incrementing this counter. - */ - atomic_add_64(&scn->scn_bytes_pending, SIO_GET_ASIZE(sio)); - + queue->q_last_ext_addr = -1; scan_io_queue_insert_impl(queue, sio); } @@ -4020,8 +4005,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, * Keep track of how much data we've examined so that * zpool(8) status can make useful progress reports. */ - scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva); - spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva); + uint64_t asize = DVA_GET_ASIZE(dva); + scn->scn_phys.scn_examined += asize; + spa->spa_scan_pass_exam += asize; /* if it's a resilver, this may not be in the target range */ if (!needs_io) @@ -4142,33 +4128,88 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, * extents that are more completely filled (in a 3:2 ratio) vs just larger. * Note that as an optimization, we replace multiplication and division by * 100 with bitshifting by 7 (which effectively multiplies and divides by 128). + * + * Since we do not care if one extent is only few percent better than another, + * compress the score into 6 bits via binary logarithm AKA highbit64() and + * put into otherwise unused due to ashift high bits of offset. This allows + * to reduce q_exts_by_size B-tree elements to only 64 bits and compare them + * with single operation. Plus it makes scrubs more sequential and reduces + * chances that minor extent change move it within the B-tree. */ static int ext_size_compare(const void *x, const void *y) { - const range_seg_gap_t *rsa = x, *rsb = y; + const uint64_t *a = x, *b = y; - uint64_t sa = rsa->rs_end - rsa->rs_start; - uint64_t sb = rsb->rs_end - rsb->rs_start; - uint64_t score_a, score_b; - - score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) * - fill_weight * rsa->rs_fill) >> 7); - score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) * - fill_weight * rsb->rs_fill) >> 7); - - if (score_a > score_b) - return (-1); - if (score_a == score_b) { - if (rsa->rs_start < rsb->rs_start) - return (-1); - if (rsa->rs_start == rsb->rs_start) - return (0); - return (1); - } - return (1); + return (TREE_CMP(*a, *b)); } +static void +ext_size_create(range_tree_t *rt, void *arg) +{ + (void) rt; + zfs_btree_t *size_tree = arg; + + zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t)); +} + +static void +ext_size_destroy(range_tree_t *rt, void *arg) +{ + (void) rt; + zfs_btree_t *size_tree = arg; + ASSERT0(zfs_btree_numnodes(size_tree)); + + zfs_btree_destroy(size_tree); +} + +static uint64_t +ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg) +{ + (void) rt; + uint64_t size = rsg->rs_end - rsg->rs_start; + uint64_t score = rsg->rs_fill + ((((rsg->rs_fill << 7) / size) * + fill_weight * rsg->rs_fill) >> 7); + ASSERT3U(rt->rt_shift, >=, 8); + return (((uint64_t)(64 - highbit64(score)) << 56) | rsg->rs_start); +} + +static void +ext_size_add(range_tree_t *rt, range_seg_t *rs, void *arg) +{ + zfs_btree_t *size_tree = arg; + ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP); + uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs); + zfs_btree_add(size_tree, &v); +} + +static void +ext_size_remove(range_tree_t *rt, range_seg_t *rs, void *arg) +{ + zfs_btree_t *size_tree = arg; + ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP); + uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs); + zfs_btree_remove(size_tree, &v); +} + +static void +ext_size_vacate(range_tree_t *rt, void *arg) +{ + zfs_btree_t *size_tree = arg; + zfs_btree_clear(size_tree); + zfs_btree_destroy(size_tree); + + ext_size_create(rt, arg); +} + +static const range_tree_ops_t ext_size_ops = { + .rtop_create = ext_size_create, + .rtop_destroy = ext_size_destroy, + .rtop_add = ext_size_add, + .rtop_remove = ext_size_remove, + .rtop_vacate = ext_size_vacate +}; + /* * Comparator for the q_sios_by_addr tree. Sorting is simply performed * based on LBA-order (from lowest to highest). @@ -4191,9 +4232,10 @@ scan_io_queue_create(vdev_t *vd) q->q_scn = scn; q->q_vd = vd; q->q_sio_memused = 0; + q->q_last_ext_addr = -1; cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL); - q->q_exts_by_addr = range_tree_create_impl(&rt_btree_ops, RANGE_SEG_GAP, - &q->q_exts_by_size, 0, 0, ext_size_compare, zfs_scan_max_ext_gap); + q->q_exts_by_addr = range_tree_create_gap(&ext_size_ops, RANGE_SEG_GAP, + &q->q_exts_by_size, 0, vd->vdev_ashift, zfs_scan_max_ext_gap); avl_create(&q->q_sios_by_addr, sio_addr_compare, sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node)); @@ -4211,21 +4253,20 @@ dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue) dsl_scan_t *scn = queue->q_scn; scan_io_t *sio; void *cookie = NULL; - int64_t bytes_dequeued = 0; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); + if (!avl_is_empty(&queue->q_sios_by_addr)) + atomic_add_64(&scn->scn_queues_pending, -1); while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) != NULL) { ASSERT(range_tree_contains(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio))); - bytes_dequeued += SIO_GET_ASIZE(sio); queue->q_sio_memused -= SIO_GET_MUSED(sio); sio_free(sio); } ASSERT0(queue->q_sio_memused); - atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued); range_tree_vacate(queue->q_exts_by_addr, NULL, queue); range_tree_destroy(queue->q_exts_by_addr); avl_destroy(&queue->q_sios_by_addr); @@ -4321,25 +4362,19 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i) sio_free(srch_sio); if (sio != NULL) { - int64_t asize = SIO_GET_ASIZE(sio); blkptr_t tmpbp; /* Got it while it was cold in the queue */ ASSERT3U(start, ==, SIO_GET_OFFSET(sio)); - ASSERT3U(size, ==, asize); + ASSERT3U(size, ==, SIO_GET_ASIZE(sio)); avl_remove(&queue->q_sios_by_addr, sio); + if (avl_is_empty(&queue->q_sios_by_addr)) + atomic_add_64(&scn->scn_queues_pending, -1); queue->q_sio_memused -= SIO_GET_MUSED(sio); ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size)); range_tree_remove_fill(queue->q_exts_by_addr, start, size); - /* - * We only update scn_bytes_pending in the cold path, - * otherwise it will already have been accounted for as - * part of the zio's execution. - */ - atomic_add_64(&scn->scn_bytes_pending, -asize); - /* count the block as though we issued it */ sio2bp(sio, &tmpbp); count_block(scn, dp->dp_blkstats, &tmpbp); diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c index fe4bf616c..1ffb48cb5 100644 --- a/module/zfs/range_tree.c +++ b/module/zfs/range_tree.c @@ -188,10 +188,8 @@ range_tree_seg_gap_compare(const void *x1, const void *x2) } range_tree_t * -range_tree_create_impl(const range_tree_ops_t *ops, range_seg_type_t type, - void *arg, uint64_t start, uint64_t shift, - int (*zfs_btree_compare) (const void *, const void *), - uint64_t gap) +range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type, + void *arg, uint64_t start, uint64_t shift, uint64_t gap) { range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); @@ -223,7 +221,6 @@ range_tree_create_impl(const range_tree_ops_t *ops, range_seg_type_t type, rt->rt_type = type; rt->rt_start = start; rt->rt_shift = shift; - rt->rt_btree_compare = zfs_btree_compare; if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) rt->rt_ops->rtop_create(rt, rt->rt_arg); @@ -235,7 +232,7 @@ range_tree_t * range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) { - return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0)); + return (range_tree_create_gap(ops, type, arg, start, shift, 0)); } void @@ -741,74 +738,6 @@ range_tree_is_empty(range_tree_t *rt) return (range_tree_space(rt) == 0); } -void -rt_btree_create(range_tree_t *rt, void *arg) -{ - zfs_btree_t *size_tree = arg; - - size_t size; - switch (rt->rt_type) { - case RANGE_SEG32: - size = sizeof (range_seg32_t); - break; - case RANGE_SEG64: - size = sizeof (range_seg64_t); - break; - case RANGE_SEG_GAP: - size = sizeof (range_seg_gap_t); - break; - default: - panic("Invalid range seg type %d", rt->rt_type); - } - zfs_btree_create(size_tree, rt->rt_btree_compare, size); -} - -void -rt_btree_destroy(range_tree_t *rt, void *arg) -{ - (void) rt; - zfs_btree_t *size_tree = arg; - ASSERT0(zfs_btree_numnodes(size_tree)); - - zfs_btree_destroy(size_tree); -} - -void -rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg) -{ - (void) rt; - zfs_btree_t *size_tree = arg; - - zfs_btree_add(size_tree, rs); -} - -void -rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg) -{ - (void) rt; - zfs_btree_t *size_tree = arg; - - zfs_btree_remove(size_tree, rs); -} - -void -rt_btree_vacate(range_tree_t *rt, void *arg) -{ - zfs_btree_t *size_tree = arg; - zfs_btree_clear(size_tree); - zfs_btree_destroy(size_tree); - - rt_btree_create(rt, arg); -} - -const range_tree_ops_t rt_btree_ops = { - .rtop_create = rt_btree_create, - .rtop_destroy = rt_btree_destroy, - .rtop_add = rt_btree_add, - .rtop_remove = rt_btree_remove, - .rtop_vacate = rt_btree_vacate -}; - /* * Remove any overlapping ranges between the given segment [start, end) * from removefrom. Add non-overlapping leftovers to addto.