From f3969ea78b5f54935474d49455cf9c4d6a1e107a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 22 Jun 2021 19:35:23 -0400 Subject: [PATCH] Optimize small random numbers generation In all places except two spa_get_random() is used for small values, and the consumers do not require well seeded high quality values. Switch those two exceptions directly to random_get_pseudo_bytes() and optimize spa_get_random(), renaming it to random_in_range(), since it is not related to SPA or ZFS in general. On FreeBSD directly map random_in_range() to new prng32_bounded() KPI added in FreeBSD 13. On Linux and in user-space just reduce the type used to uint32_t to avoid more expensive 64bit division. Reviewed-by: Ryan Moeller Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #12183 --- include/os/freebsd/spl/sys/random.h | 22 +++++++++++++++++++++ include/os/linux/spl/sys/random.h | 15 +++++++++++++++ include/sys/spa.h | 1 - include/sys/zfs_context.h | 15 +++++++++++++++ module/os/linux/zfs/arc_os.c | 4 ++-- module/zfs/metaslab.c | 2 +- module/zfs/mmp.c | 4 ++-- module/zfs/multilist.c | 5 +---- module/zfs/spa.c | 6 +++--- module/zfs/spa_misc.c | 30 +++++++++-------------------- module/zfs/space_map.c | 2 +- module/zfs/vdev_indirect.c | 4 ++-- module/zfs/vdev_mirror.c | 2 +- module/zfs/zil.c | 6 ++++-- module/zfs/zio_compress.c | 2 +- module/zfs/zio_inject.c | 8 ++++---- 16 files changed, 83 insertions(+), 45 deletions(-) diff --git a/include/os/freebsd/spl/sys/random.h b/include/os/freebsd/spl/sys/random.h index b3c9115f5..746275e53 100644 --- a/include/os/freebsd/spl/sys/random.h +++ b/include/os/freebsd/spl/sys/random.h @@ -30,6 +30,9 @@ #define _OPENSOLARIS_SYS_RANDOM_H_ #include_next +#if __FreeBSD_version >= 1300108 +#include +#endif static inline int random_get_bytes(uint8_t *p, size_t s) @@ -45,4 +48,23 @@ random_get_pseudo_bytes(uint8_t *p, size_t s) return (0); } +static inline uint32_t +random_in_range(uint32_t range) +{ +#if __FreeBSD_version >= 1300108 + return (prng32_bounded(range)); +#else + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +#endif +} + #endif /* !_OPENSOLARIS_SYS_RANDOM_H_ */ diff --git a/include/os/linux/spl/sys/random.h b/include/os/linux/spl/sys/random.h index 1b8cb60d0..2c446e155 100644 --- a/include/os/linux/spl/sys/random.h +++ b/include/os/linux/spl/sys/random.h @@ -36,4 +36,19 @@ random_get_bytes(uint8_t *ptr, size_t len) extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); +static __inline__ uint32_t +random_in_range(uint32_t range) +{ + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +} + #endif /* _SPL_RANDOM_H */ diff --git a/include/sys/spa.h b/include/sys/spa.h index 9dd47a1eb..532926e12 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1086,7 +1086,6 @@ extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid); extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid); extern char *spa_strdup(const char *); extern void spa_strfree(char *); -extern uint64_t spa_get_random(uint64_t range); extern uint64_t spa_generate_guid(spa_t *spa); extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp); extern void spa_freeze(spa_t *spa); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index ffb20e1fe..dbeb323ba 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -640,6 +640,21 @@ extern int lowbit64(uint64_t i); extern int random_get_bytes(uint8_t *ptr, size_t len); extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); +static __inline__ uint32_t +random_in_range(uint32_t range) +{ + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +} + extern void kernel_init(int mode); extern void kernel_fini(void); extern void random_init(void); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index b03ad8318..415cfc281 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -437,7 +437,7 @@ arc_available_memory(void) int64_t lowest = INT64_MAX; /* Every 100 calls, free a small amount */ - if (spa_get_random(100) == 0) + if (random_in_range(100) == 0) lowest = -1024; return (lowest); @@ -458,7 +458,7 @@ arc_all_memory(void) uint64_t arc_free_memory(void) { - return (spa_get_random(arc_all_memory() * 20 / 100)); + return (random_in_range(arc_all_memory() * 20 / 100)); } void diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 08d7a5635..23f3e2989 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5084,7 +5084,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, * damage can result in extremely long reconstruction times. This * will also test spilling from special to normal. */ - if (psize >= metaslab_force_ganging && (spa_get_random(100) < 3)) { + if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) { metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG, allocator); return (SET_ERROR(ENOSPC)); diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index d9ed457a7..f67a4eb22 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -524,9 +524,9 @@ mmp_write_uberblock(spa_t *spa) mutex_exit(&mmp->mmp_io_lock); offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) - - MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL)); + MMP_BLOCKS_PER_LABEL + random_in_range(MMP_BLOCKS_PER_LABEL)); - label = spa_get_random(VDEV_LABELS); + label = random_in_range(VDEV_LABELS); vdev_label_write(zio, vd, label, ub_abd, offset, VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp, flags | ZIO_FLAG_DONT_PROPAGATE); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index eeac73bd7..8bbc9b376 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -20,9 +20,6 @@ #include #include -/* needed for spa_get_random() */ -#include - /* * This overrides the number of sublists in each multilist_t, which defaults * to the number of CPUs in the system (see multilist_create()). @@ -275,7 +272,7 @@ multilist_get_num_sublists(multilist_t *ml) unsigned int multilist_get_random_index(multilist_t *ml) { - return (spa_get_random(ml->ml_num_sublists)); + return (random_in_range(ml->ml_num_sublists)); } /* Lock and return the sublist specified at the given index */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index bacd04fc0..f6dce076d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3186,7 +3186,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_delay = spa_activity_check_duration(spa, ub); /* Add a small random factor in case of simultaneous imports (0-25%) */ - import_delay += import_delay * spa_get_random(250) / 1000; + import_delay += import_delay * random_in_range(250) / 1000; import_expire = gethrtime() + import_delay; @@ -4633,7 +4633,7 @@ spa_ld_checkpoint_rewind(spa_t *spa) vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; - int c0 = spa_get_random(children); + int c0 = random_in_range(children); for (int c = 0; c < children; c++) { vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; @@ -9132,7 +9132,7 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx) vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; - int c0 = spa_get_random(children); + int c0 = random_in_range(children); for (int c = 0; c < children; c++) { vdev_t *vd = diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 65b0988d6..e2523231d 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1494,32 +1494,21 @@ spa_strfree(char *s) kmem_free(s, strlen(s) + 1); } -uint64_t -spa_get_random(uint64_t range) -{ - uint64_t r; - - ASSERT(range != 0); - - if (range == 1) - return (0); - - (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t)); - - return (r % range); -} - uint64_t spa_generate_guid(spa_t *spa) { - uint64_t guid = spa_get_random(-1ULL); + uint64_t guid; if (spa != NULL) { - while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)) - guid = spa_get_random(-1ULL); + do { + (void) random_get_pseudo_bytes((void *)&guid, + sizeof (guid)); + } while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)); } else { - while (guid == 0 || spa_guid_exists(guid, 0)) - guid = spa_get_random(-1ULL); + do { + (void) random_get_pseudo_bytes((void *)&guid, + sizeof (guid)); + } while (guid == 0 || spa_guid_exists(guid, 0)); } return (guid); @@ -2888,7 +2877,6 @@ EXPORT_SYMBOL(spa_maxdnodesize); EXPORT_SYMBOL(spa_guid_exists); EXPORT_SYMBOL(spa_strdup); EXPORT_SYMBOL(spa_strfree); -EXPORT_SYMBOL(spa_get_random); EXPORT_SYMBOL(spa_generate_guid); EXPORT_SYMBOL(snprintf_blkptr); EXPORT_SYMBOL(spa_freeze); diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index 138e6c75e..11d479892 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -726,7 +726,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, length > SM_RUN_MAX || vdev_id != SM_NO_VDEVID || (zfs_force_some_double_word_sm_entries && - spa_get_random(100) == 0))) + random_in_range(100) == 0))) words = 2; space_map_write_seg(sm, rs_get_start(rs, rt), rs_get_end(rs, diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 1b05ff03a..ffe592b2a 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1578,7 +1578,7 @@ vdev_indirect_splits_enumerate_randomly(indirect_vsd_t *iv, zio_t *zio) indirect_child_t *ic = list_head(&is->is_unique_child); int children = is->is_unique_children; - for (int i = spa_get_random(children); i > 0; i--) + for (int i = random_in_range(children); i > 0; i--) ic = list_next(&is->is_unique_child, ic); ASSERT3P(ic, !=, NULL); @@ -1742,7 +1742,7 @@ vdev_indirect_reconstruct_io_done(zio_t *zio) * Known_good will be TRUE when reconstruction is known to be possible. */ if (zfs_reconstruct_indirect_damage_fraction != 0 && - spa_get_random(zfs_reconstruct_indirect_damage_fraction) == 0) + random_in_range(zfs_reconstruct_indirect_damage_fraction) == 0) known_good = (vdev_indirect_splits_damage(iv, zio) == 0); /* diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 106678a87..5eb331046 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -496,7 +496,7 @@ vdev_mirror_preferred_child_randomize(zio_t *zio) int p; if (mm->mm_root) { - p = spa_get_random(mm->mm_preferred_cnt); + p = random_in_range(mm->mm_preferred_cnt); return (vdev_mirror_dva_select(zio, p)); } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 7f11c3913..78d0711cc 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -205,8 +205,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) { zio_cksum_t *zc = &bp->blk_cksum; - zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); - zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); + (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_0], + sizeof (zc->zc_word[ZIL_ZC_GUID_0])); + (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_1], + sizeof (zc->zc_word[ZIL_ZC_GUID_1])); zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); zc->zc_word[ZIL_ZC_SEQ] = 1ULL; } diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index 2db3cec35..33602bd47 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -201,7 +201,7 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, * in non-ECC RAM), we handle this error (and test it). */ if (zio_decompress_fail_fraction != 0 && - spa_get_random(zio_decompress_fail_fraction) == 0) + random_in_range(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); return (ret); diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index e56ea8868..feaf41dc6 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -117,7 +117,7 @@ freq_triggered(uint32_t frequency) */ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; - return (spa_get_random(maximum) < frequency); + return (random_in_range(maximum) < frequency); } /* @@ -347,12 +347,12 @@ zio_inject_bitflip_cb(void *data, size_t len, void *private) { zio_t *zio __maybe_unused = private; uint8_t *buffer = data; - uint_t byte = spa_get_random(len); + uint_t byte = random_in_range(len); ASSERT(zio->io_type == ZIO_TYPE_READ); /* flip a single random bit in an abd data buffer */ - buffer[byte] ^= 1 << spa_get_random(8); + buffer[byte] ^= 1 << random_in_range(8); return (1); /* stop after first flip */ } @@ -493,7 +493,7 @@ zio_handle_ignored_writes(zio_t *zio) } /* Have a "problem" writing 60% of the time */ - if (spa_get_random(100) < 60) + if (random_in_range(100) < 60) zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; break; }