Reduce fletcher4 and raidz benchmark times

During module load time all of the available fetcher4 and raidz
implementations are benchmarked for a fixed amount of time to
determine the fastest available.  Manual testing has shown that this
time can be significantly reduced with negligible effect on the final
results.

This commit changes the benchmark time to 1ms which can reduce the
module load time by over a second on x86_64.  On an x86_64 system
with sse3, ssse3, and avx2 instructions the benchmark times are:

    Fletcher4    603ms   -> 15ms
    RAIDZ        1,322ms -> 64ms

Reviewed-by: Matthew Macy <mmacy@freebsd.org>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #11282
This commit is contained in:
Brian Behlendorf 2020-12-06 09:57:20 -08:00
parent f21d1f8fad
commit 07ca433973
2 changed files with 3 additions and 3 deletions

View File

@ -660,7 +660,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \ fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \
} }
#define FLETCHER_4_BENCH_NS (MSEC2NSEC(50)) /* 50ms */ #define FLETCHER_4_BENCH_NS (MSEC2NSEC(1)) /* 1ms */
typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *, typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
zio_cksum_t *); zio_cksum_t *);

View File

@ -360,7 +360,7 @@ raidz_math_kstat_addr(kstat_t *ksp, loff_t n)
#define BENCH_D_COLS (8ULL) #define BENCH_D_COLS (8ULL)
#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR) #define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */ #define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
#define BENCH_NS MSEC2NSEC(25) /* 25ms */ #define BENCH_NS MSEC2NSEC(1) /* 1ms */
typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn); typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
@ -410,7 +410,7 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
t_start = gethrtime(); t_start = gethrtime();
do { do {
for (i = 0; i < 25; i++, run_cnt++) for (i = 0; i < 5; i++, run_cnt++)
bench_fn(bench_rm, fn); bench_fn(bench_rm, fn);
t_diff = gethrtime() - t_start; t_diff = gethrtime() - t_start;