Performance optimization of AVL tree comparator functions

perf: 2.75x faster ddt_entry_compare()
    First 256bits of ddt_key_t is a block checksum, which are expected
to be close to random data. Hence, on average, comparison only needs to
look at first few bytes of the keys. To reduce number of conditional
jump instructions, the result is computed as: sign(memcmp(k1, k2)).

Sign of an integer 'a' can be obtained as: `(0 < a) - (a < 0)` := {-1, 0, 1} ,
which is computed efficiently.  Synthetic performance evaluation of
original and new algorithm over 1G random keys on 2.6GHz Intel(R) Xeon(R)
CPU E5-2660 v3:

old	6.85789 s
new	2.49089 s

perf: 2.8x faster vdev_queue_offset_compare() and vdev_queue_timestamp_compare()
    Compute the result directly instead of using conditionals

perf: zfs_range_compare()
    Speedup between 1.1x - 2.5x, depending on compiler version and
optimization level.

perf: spa_error_entry_compare()
    `bcmp()` is not suitable for comparator use. Use `memcmp()` instead.

perf: 2.8x faster metaslab_compare() and metaslab_rangesize_compare()
perf: 2.8x faster zil_bp_compare()
perf: 2.8x faster mze_compare()
perf: faster dbuf_compare()
perf: faster compares in spa_misc
perf: 2.8x faster layout_hash_compare()
perf: 2.8x faster space_reftree_compare()
perf: libzfs: faster avl tree comparators
perf: guid_compare()
perf: dsl_deadlist_compare()
perf: perm_set_compare()
perf: 2x faster range_tree_seg_compare()
perf: faster unique_compare()
perf: faster vdev_cache _compare()
perf: faster vdev_uberblock_compare()
perf: faster fuid _compare()
perf: faster zfs_znode_hold_compare()

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: Richard Elling <richard.elling@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #5033
This commit is contained in:
Gvozden Neskovic 2016-08-27 20:12:53 +02:00 committed by Brian Behlendorf
parent 9d69e9b268
commit ee36c709c3
25 changed files with 145 additions and 271 deletions

View File

@ -105,6 +105,13 @@ extern "C" {
* as is needed for any linked list implementation.
*/
/*
* AVL comparator helpers
*/
#define AVL_ISIGN(a) (((a) > 0) - ((a) < 0))
#define AVL_CMP(a, b) (((a) > (b)) - ((a) < (b)))
#define AVL_PCMP(a, b) \
(((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b)))
/*
* Type used for the root of the AVL tree.

View File

@ -131,6 +131,7 @@
*/
#define noinline __attribute__((noinline))
#define likely(x) __builtin_expect((x), 1)
/*
* Debugging

View File

@ -674,15 +674,13 @@ typedef struct mnttab_node {
static int
libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
{
const mnttab_node_t *mtn1 = arg1;
const mnttab_node_t *mtn2 = arg2;
const mnttab_node_t *mtn1 = (const mnttab_node_t *)arg1;
const mnttab_node_t *mtn2 = (const mnttab_node_t *)arg2;
int rv;
rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special);
if (rv == 0)
return (0);
return (rv > 0 ? 1 : -1);
return (AVL_ISIGN(rv));
}
void

View File

@ -272,12 +272,7 @@ zfs_snapshot_compare(const void *larg, const void *rarg)
lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
if (lcreate < rcreate)
return (-1);
else if (lcreate > rcreate)
return (+1);
else
return (0);
return (AVL_CMP(lcreate, rcreate));
}
int

View File

@ -475,15 +475,10 @@ typedef struct fsavl_node {
static int
fsavl_compare(const void *arg1, const void *arg2)
{
const fsavl_node_t *fn1 = arg1;
const fsavl_node_t *fn2 = arg2;
const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
if (fn1->fn_guid > fn2->fn_guid)
return (+1);
else if (fn1->fn_guid < fn2->fn_guid)
return (-1);
else
return (0);
return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
}
/*

View File

@ -811,23 +811,32 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp)
}
}
/*
* Opaque struct used for ddt_key comparison
*/
#define DDT_KEY_CMP_LEN (sizeof (ddt_key_t) / sizeof (uint16_t))
typedef struct ddt_key_cmp {
uint16_t u16[DDT_KEY_CMP_LEN];
} ddt_key_cmp_t;
int
ddt_entry_compare(const void *x1, const void *x2)
{
const ddt_entry_t *dde1 = x1;
const ddt_entry_t *dde2 = x2;
const uint64_t *u1 = (const uint64_t *)&dde1->dde_key;
const uint64_t *u2 = (const uint64_t *)&dde2->dde_key;
const ddt_key_cmp_t *k1 = (const ddt_key_cmp_t *)&dde1->dde_key;
const ddt_key_cmp_t *k2 = (const ddt_key_cmp_t *)&dde2->dde_key;
int32_t cmp = 0;
int i;
for (i = 0; i < DDT_KEY_WORDS; i++) {
if (u1[i] < u2[i])
return (-1);
if (u1[i] > u2[i])
return (1);
for (i = 0; i < DDT_KEY_CMP_LEN; i++) {
cmp = (int32_t)k1->u16[i] - (int32_t)k2->u16[i];
if (likely(cmp))
break;
}
return (0);
return (AVL_ISIGN(cmp));
}
static ddt_t *

View File

@ -1793,14 +1793,10 @@ typedef struct guid_map_entry {
static int
guid_compare(const void *arg1, const void *arg2)
{
const guid_map_entry_t *gmep1 = arg1;
const guid_map_entry_t *gmep2 = arg2;
const guid_map_entry_t *gmep1 = (const guid_map_entry_t *)arg1;
const guid_map_entry_t *gmep2 = (const guid_map_entry_t *)arg2;
if (gmep1->guid < gmep2->guid)
return (-1);
else if (gmep1->guid > gmep2->guid)
return (1);
return (0);
return (AVL_CMP(gmep1->guid, gmep2->guid));
}
static void

View File

@ -69,19 +69,13 @@ dbuf_compare(const void *x1, const void *x2)
const dmu_buf_impl_t *d1 = x1;
const dmu_buf_impl_t *d2 = x2;
if (d1->db_level < d2->db_level) {
return (-1);
}
if (d1->db_level > d2->db_level) {
return (1);
}
int cmp = AVL_CMP(d1->db_level, d2->db_level);
if (likely(cmp))
return (cmp);
if (d1->db_blkid < d2->db_blkid) {
return (-1);
}
if (d1->db_blkid > d2->db_blkid) {
return (1);
}
cmp = AVL_CMP(d1->db_blkid, d2->db_blkid);
if (likely(cmp))
return (cmp);
if (d1->db_state == DB_SEARCH) {
ASSERT3S(d2->db_state, !=, DB_SEARCH);
@ -91,13 +85,7 @@ dbuf_compare(const void *x1, const void *x2)
return (1);
}
if ((uintptr_t)d1 < (uintptr_t)d2) {
return (-1);
}
if ((uintptr_t)d1 > (uintptr_t)d2) {
return (1);
}
return (0);
return (AVL_PCMP(d1, d2));
}
/* ARGSUSED */

View File

@ -54,15 +54,10 @@
static int
dsl_deadlist_compare(const void *arg1, const void *arg2)
{
const dsl_deadlist_entry_t *dle1 = arg1;
const dsl_deadlist_entry_t *dle2 = arg2;
const dsl_deadlist_entry_t *dle1 = (const dsl_deadlist_entry_t *)arg1;
const dsl_deadlist_entry_t *dle2 = (const dsl_deadlist_entry_t *)arg2;
if (dle1->dle_mintxg < dle2->dle_mintxg)
return (-1);
else if (dle1->dle_mintxg > dle2->dle_mintxg)
return (+1);
else
return (0);
return (AVL_CMP(dle1->dle_mintxg, dle2->dle_mintxg));
}
static void

View File

@ -393,14 +393,13 @@ typedef struct perm_set {
static int
perm_set_compare(const void *arg1, const void *arg2)
{
const perm_set_t *node1 = arg1;
const perm_set_t *node2 = arg2;
const perm_set_t *node1 = (const perm_set_t *)arg1;
const perm_set_t *node2 = (const perm_set_t *)arg2;
int val;
val = strcmp(node1->p_setname, node2->p_setname);
if (val == 0)
return (0);
return (val > 0 ? 1 : -1);
return (AVL_ISIGN(val));
}
/*

View File

@ -399,25 +399,16 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
static int
metaslab_compare(const void *x1, const void *x2)
{
const metaslab_t *m1 = x1;
const metaslab_t *m2 = x2;
const metaslab_t *m1 = (const metaslab_t *)x1;
const metaslab_t *m2 = (const metaslab_t *)x2;
if (m1->ms_weight < m2->ms_weight)
return (1);
if (m1->ms_weight > m2->ms_weight)
return (-1);
int cmp = AVL_CMP(m2->ms_weight, m1->ms_weight);
if (likely(cmp))
return (cmp);
/*
* If the weights are identical, use the offset to force uniqueness.
*/
if (m1->ms_start < m2->ms_start)
return (-1);
if (m1->ms_start > m2->ms_start)
return (1);
IMPLY(AVL_CMP(m1->ms_start, m2->ms_start) == 0, m1 == m2);
ASSERT3P(m1, ==, m2);
return (0);
return (AVL_CMP(m1->ms_start, m2->ms_start));
}
/*
@ -795,18 +786,11 @@ metaslab_rangesize_compare(const void *x1, const void *x2)
uint64_t rs_size1 = r1->rs_end - r1->rs_start;
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
if (rs_size1 < rs_size2)
return (-1);
if (rs_size1 > rs_size2)
return (1);
int cmp = AVL_CMP(rs_size1, rs_size2);
if (likely(cmp))
return (cmp);
if (r1->rs_start < r2->rs_start)
return (-1);
if (r1->rs_start > r2->rs_start)
return (1);
return (0);
return (AVL_CMP(r1->rs_start, r2->rs_start));
}
/*

View File

@ -111,20 +111,13 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
static int
range_tree_seg_compare(const void *x1, const void *x2)
{
const range_seg_t *r1 = x1;
const range_seg_t *r2 = x2;
const range_seg_t *r1 = (const range_seg_t *)x1;
const range_seg_t *r2 = (const range_seg_t *)x2;
if (r1->rs_start < r2->rs_start) {
if (r1->rs_end > r2->rs_start)
return (0);
return (-1);
}
if (r1->rs_start > r2->rs_start) {
if (r1->rs_start < r2->rs_end)
return (0);
return (1);
}
return (0);
ASSERT3U(r1->rs_start, <=, r1->rs_end);
ASSERT3U(r2->rs_start, <=, r2->rs_end);
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
}
range_tree_t *

View File

@ -241,31 +241,23 @@ sa_cache_fini(void)
static int
layout_num_compare(const void *arg1, const void *arg2)
{
const sa_lot_t *node1 = arg1;
const sa_lot_t *node2 = arg2;
const sa_lot_t *node1 = (const sa_lot_t *)arg1;
const sa_lot_t *node2 = (const sa_lot_t *)arg2;
if (node1->lot_num > node2->lot_num)
return (1);
else if (node1->lot_num < node2->lot_num)
return (-1);
return (0);
return (AVL_CMP(node1->lot_num, node2->lot_num));
}
static int
layout_hash_compare(const void *arg1, const void *arg2)
{
const sa_lot_t *node1 = arg1;
const sa_lot_t *node2 = arg2;
const sa_lot_t *node1 = (const sa_lot_t *)arg1;
const sa_lot_t *node2 = (const sa_lot_t *)arg2;
if (node1->lot_hash > node2->lot_hash)
return (1);
if (node1->lot_hash < node2->lot_hash)
return (-1);
if (node1->lot_instance > node2->lot_instance)
return (1);
if (node1->lot_instance < node2->lot_instance)
return (-1);
return (0);
int cmp = AVL_CMP(node1->lot_hash, node2->lot_hash);
if (likely(cmp))
return (cmp);
return (AVL_CMP(node1->lot_instance, node2->lot_instance));
}
boolean_t

View File

@ -822,19 +822,14 @@ spa_change_guid(spa_t *spa)
static int
spa_error_entry_compare(const void *a, const void *b)
{
spa_error_entry_t *sa = (spa_error_entry_t *)a;
spa_error_entry_t *sb = (spa_error_entry_t *)b;
const spa_error_entry_t *sa = (const spa_error_entry_t *)a;
const spa_error_entry_t *sb = (const spa_error_entry_t *)b;
int ret;
ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
ret = memcmp(&sa->se_bookmark, &sb->se_bookmark,
sizeof (zbookmark_phys_t));
if (ret < 0)
return (-1);
else if (ret > 0)
return (1);
else
return (0);
return (AVL_ISIGN(ret));
}
/*

View File

@ -797,18 +797,13 @@ typedef struct spa_aux {
int aux_count;
} spa_aux_t;
static int
static inline int
spa_aux_compare(const void *a, const void *b)
{
const spa_aux_t *sa = a;
const spa_aux_t *sb = b;
const spa_aux_t *sa = (const spa_aux_t *)a;
const spa_aux_t *sb = (const spa_aux_t *)b;
if (sa->aux_guid < sb->aux_guid)
return (-1);
else if (sa->aux_guid > sb->aux_guid)
return (1);
else
return (0);
return (AVL_CMP(sa->aux_guid, sb->aux_guid));
}
void
@ -1774,11 +1769,8 @@ spa_name_compare(const void *a1, const void *a2)
int s;
s = strcmp(s1->spa_name, s2->spa_name);
if (s > 0)
return (1);
if (s < 0)
return (-1);
return (0);
return (AVL_ISIGN(s));
}
void

View File

@ -54,20 +54,14 @@
static int
space_reftree_compare(const void *x1, const void *x2)
{
const space_ref_t *sr1 = x1;
const space_ref_t *sr2 = x2;
const space_ref_t *sr1 = (const space_ref_t *)x1;
const space_ref_t *sr2 = (const space_ref_t *)x2;
if (sr1->sr_offset < sr2->sr_offset)
return (-1);
if (sr1->sr_offset > sr2->sr_offset)
return (1);
int cmp = AVL_CMP(sr1->sr_offset, sr2->sr_offset);
if (likely(cmp))
return (cmp);
if (sr1 < sr2)
return (-1);
if (sr1 > sr2)
return (1);
return (0);
return (AVL_PCMP(sr1, sr2));
}
void

View File

@ -42,14 +42,10 @@ typedef struct unique {
static int
unique_compare(const void *a, const void *b)
{
const unique_t *una = a;
const unique_t *unb = b;
const unique_t *una = (const unique_t *)a;
const unique_t *unb = (const unique_t *)b;
if (una->un_value < unb->un_value)
return (-1);
if (una->un_value > unb->un_value)
return (+1);
return (0);
return (AVL_CMP(una->un_value, unb->un_value));
}
void

View File

@ -104,29 +104,24 @@ static vdc_stats_t vdc_stats = {
#define VDCSTAT_BUMP(stat) atomic_inc_64(&vdc_stats.stat.value.ui64);
static int
static inline int
vdev_cache_offset_compare(const void *a1, const void *a2)
{
const vdev_cache_entry_t *ve1 = a1;
const vdev_cache_entry_t *ve2 = a2;
const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
if (ve1->ve_offset < ve2->ve_offset)
return (-1);
if (ve1->ve_offset > ve2->ve_offset)
return (1);
return (0);
return (AVL_CMP(ve1->ve_offset, ve2->ve_offset));
}
static int
vdev_cache_lastused_compare(const void *a1, const void *a2)
{
const vdev_cache_entry_t *ve1 = a1;
const vdev_cache_entry_t *ve2 = a2;
const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
if (ddi_time_before(ve1->ve_lastused, ve2->ve_lastused))
return (-1);
if (ddi_time_after(ve1->ve_lastused, ve2->ve_lastused))
return (1);
int cmp = AVL_CMP(ve1->ve_lastused, ve2->ve_lastused);
if (likely(cmp))
return (cmp);
/*
* Among equally old entries, sort by offset to ensure uniqueness.

View File

@ -1015,19 +1015,13 @@ retry:
* among uberblocks with equal txg, choose the one with the latest timestamp.
*/
static int
vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
{
if (ub1->ub_txg < ub2->ub_txg)
return (-1);
if (ub1->ub_txg > ub2->ub_txg)
return (1);
int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
if (likely(cmp))
return (cmp);
if (ub1->ub_timestamp < ub2->ub_timestamp)
return (-1);
if (ub1->ub_timestamp > ub2->ub_timestamp)
return (1);
return (0);
return (AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp));
}
struct ubl_cbdata {

View File

@ -174,20 +174,15 @@ int zfs_vdev_write_gap_limit = 4 << 10;
int
vdev_queue_offset_compare(const void *x1, const void *x2)
{
const zio_t *z1 = x1;
const zio_t *z2 = x2;
const zio_t *z1 = (const zio_t *)x1;
const zio_t *z2 = (const zio_t *)x2;
if (z1->io_offset < z2->io_offset)
return (-1);
if (z1->io_offset > z2->io_offset)
return (1);
int cmp = AVL_CMP(z1->io_offset, z2->io_offset);
if (z1 < z2)
return (-1);
if (z1 > z2)
return (1);
if (likely(cmp))
return (cmp);
return (0);
return (AVL_PCMP(z1, z2));
}
static inline avl_tree_t *
@ -209,20 +204,15 @@ vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
int
vdev_queue_timestamp_compare(const void *x1, const void *x2)
{
const zio_t *z1 = x1;
const zio_t *z2 = x2;
const zio_t *z1 = (const zio_t *)x1;
const zio_t *z2 = (const zio_t *)x2;
if (z1->io_timestamp < z2->io_timestamp)
return (-1);
if (z1->io_timestamp > z2->io_timestamp)
return (1);
int cmp = AVL_CMP(z1->io_timestamp, z2->io_timestamp);
if (z1 < z2)
return (-1);
if (z1 > z2)
return (1);
if (likely(cmp))
return (cmp);
return (0);
return (AVL_PCMP(z1, z2));
}
static int

View File

@ -257,15 +257,11 @@ mze_compare(const void *arg1, const void *arg2)
const mzap_ent_t *mze1 = arg1;
const mzap_ent_t *mze2 = arg2;
if (mze1->mze_hash > mze2->mze_hash)
return (+1);
if (mze1->mze_hash < mze2->mze_hash)
return (-1);
if (mze1->mze_cd > mze2->mze_cd)
return (+1);
if (mze1->mze_cd < mze2->mze_cd)
return (-1);
return (0);
int cmp = AVL_CMP(mze1->mze_hash, mze2->mze_hash);
if (likely(cmp))
return (cmp);
return (AVL_CMP(mze1->mze_cd, mze2->mze_cd));
}
static void

View File

@ -71,14 +71,10 @@ static char *nulldomain = "";
static int
idx_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = arg1;
const fuid_domain_t *node2 = arg2;
const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
if (node1->f_idx < node2->f_idx)
return (-1);
else if (node1->f_idx > node2->f_idx)
return (1);
return (0);
return (AVL_CMP(node1->f_idx, node2->f_idx));
}
/*
@ -87,14 +83,13 @@ idx_compare(const void *arg1, const void *arg2)
static int
domain_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = arg1;
const fuid_domain_t *node2 = arg2;
const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
int val;
val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
if (val == 0)
return (0);
return (val > 0 ? 1 : -1);
return (AVL_ISIGN(val));
}
void

View File

@ -617,14 +617,10 @@ zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
int
zfs_range_compare(const void *arg1, const void *arg2)
{
const rl_t *rl1 = arg1;
const rl_t *rl2 = arg2;
const rl_t *rl1 = (const rl_t *)arg1;
const rl_t *rl2 = (const rl_t *)arg2;
if (rl1->r_off > rl2->r_off)
return (1);
if (rl1->r_off < rl2->r_off)
return (-1);
return (0);
return (AVL_CMP(rl1->r_off, rl2->r_off));
}
#ifdef _KERNEL

View File

@ -228,15 +228,10 @@ zfs_znode_fini(void)
int
zfs_znode_hold_compare(const void *a, const void *b)
{
const znode_hold_t *zh_a = a;
const znode_hold_t *zh_b = b;
const znode_hold_t *zh_a = (const znode_hold_t *)a;
const znode_hold_t *zh_b = (const znode_hold_t *)b;
if (zh_a->zh_obj < zh_b->zh_obj)
return (-1);
else if (zh_a->zh_obj > zh_b->zh_obj)
return (1);
else
return (0);
return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
}
boolean_t

View File

@ -123,17 +123,11 @@ zil_bp_compare(const void *x1, const void *x2)
const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva;
const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva;
if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2))
return (-1);
if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2))
return (1);
int cmp = AVL_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
if (likely(cmp))
return (cmp);
if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2))
return (-1);
if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2))
return (1);
return (0);
return (AVL_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)));
}
static void
@ -786,12 +780,7 @@ zil_vdev_compare(const void *x1, const void *x2)
const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
if (v1 < v2)
return (-1);
if (v1 > v2)
return (1);
return (0);
return (AVL_CMP(v1, v2));
}
void
@ -1257,12 +1246,7 @@ zil_aitx_compare(const void *x1, const void *x2)
const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid;
const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid;
if (o1 < o2)
return (-1);
if (o1 > o2)
return (1);
return (0);
return (AVL_CMP(o1, o2));
}
/*