Fixes and enhancements of SIMD raidz parity

- Implementation lock replaced with atomic variable

- Trailing whitespace is removed from user specified parameter, to enhance
experience when using commands that add newline, e.g. `echo`

- raidz_test: remove dependency on `getrusage()` and RUSAGE_THREAD, Issue #4813

- silence `cppcheck` in vdev_raidz, partial solution of Issue #1392

- Minor fixes and cleanups

- Enable use of original parity methods in [fastest] configuration.
New opaque original ops structure, representing native methods, is added
to supported raidz methods. Original parity methods are executed if selected
implementation has NULL fn pointer.

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #4813
Issue #1392
This commit is contained in:
Gvozden Neskovic
2016-07-17 19:41:11 +02:00
committed by Brian Behlendorf
parent 1d9b3bd8fb
commit c9187d867f
9 changed files with 191 additions and 178 deletions
+10 -14
View File
@@ -458,8 +458,8 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
/* RAIDZ ops init */
vdev_raidz_math_get_ops(rm);
/* init RAIDZ parity ops */
rm->rm_ops = vdev_raidz_math_get_ops();
return (rm);
}
@@ -611,10 +611,9 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
void
vdev_raidz_generate_parity(raidz_map_t *rm)
{
if (rm->rm_ops) {
vdev_raidz_math_generate(rm);
/* Generate using the new math implementation */
if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL)
return;
}
switch (rm->rm_firstdatacol) {
case 1:
@@ -1284,7 +1283,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
{
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
int ntgts;
int i, c;
int i, c, ret;
int code;
int nbadparity, nbaddata;
int parity_valid[VDEV_RAIDZ_MAXPARITY];
@@ -1322,14 +1321,11 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
dt = &tgts[nbadparity];
/*
* Reconstruct using the new math implementation if
* rm_ops is set.
*/
if (rm->rm_ops) {
return (vdev_raidz_math_reconstruct(rm, parity_valid, dt,
nbaddata));
}
/* Reconstruct using the new math implementation */
ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata);
if (ret != RAIDZ_ORIGINAL_IMPL)
return (ret);
/*
* See if we can use any of our optimized reconstruction routines.
+151 -124
View File
@@ -31,8 +31,22 @@
#include <sys/vdev_raidz.h>
#include <sys/vdev_raidz_impl.h>
extern boolean_t raidz_will_scalar_work(void);
/* Opaque implementation with NULL methods to represent original methods */
static const raidz_impl_ops_t vdev_raidz_original_impl = {
.name = "original",
.is_supported = raidz_will_scalar_work,
};
/* RAIDZ parity op that contain the fastest methods */
static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest"
};
/* All compiled in implementations */
const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_original_impl,
&vdev_raidz_scalar_impl,
#if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */
&vdev_raidz_sse2_impl,
@@ -49,30 +63,19 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
static boolean_t raidz_math_initialized = B_FALSE;
/* Select raidz implementation */
static enum vdev_raidz_impl_sel {
IMPL_FASTEST = -1,
IMPL_ORIGINAL = -2,
IMPL_CYCLE = -3,
IMPL_SCALAR = 0,
} zfs_vdev_raidz_impl = IMPL_SCALAR;
#define IMPL_FASTEST (UINT32_MAX)
#define IMPL_CYCLE (UINT32_MAX - 1)
#define IMPL_ORIGINAL (0)
#define IMPL_SCALAR (1)
/* selected implementation and its lock */
static krwlock_t vdev_raidz_impl_lock;
static raidz_impl_ops_t *vdev_raidz_used_impl =
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl;
static boolean_t vdev_raidz_impl_user_set = B_FALSE;
#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
/* RAIDZ op that contain the fastest routines */
static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest"
};
static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
static uint32_t user_sel_impl = IMPL_FASTEST;
/* Hold all supported implementations */
size_t raidz_supp_impl_cnt = 1;
raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths) + 1] = {
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl, /* scalar is supported */
NULL
};
static size_t raidz_supp_impl_cnt = 0;
static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
/*
* kstats values for supported impl & original methods
@@ -87,33 +90,52 @@ static kstat_t *raidz_math_kstat = NULL;
* Selects the raidz operation for raidz_map
* If rm_ops is set to NULL original raidz implementation will be used
*/
void
vdev_raidz_math_get_ops(raidz_map_t *rm)
raidz_impl_ops_t *
vdev_raidz_math_get_ops()
{
rw_enter(&vdev_raidz_impl_lock, RW_READER);
rm->rm_ops = vdev_raidz_used_impl;
raidz_impl_ops_t *ops = NULL;
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
switch (impl) {
case IMPL_FASTEST:
ASSERT(raidz_math_initialized);
ops = &vdev_raidz_fastest_impl;
break;
#if !defined(_KERNEL)
if (zfs_vdev_raidz_impl == IMPL_CYCLE) {
case IMPL_CYCLE:
{
ASSERT(raidz_math_initialized);
ASSERT3U(raidz_supp_impl_cnt, >, 0);
/* Cycle through all supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx;
/*
* Cycle through all supported new implementations, and
* when idx == raidz_supp_impl_cnt, use the original
*/
idx = (++cycle_impl_idx) % (raidz_supp_impl_cnt + 1);
rm->rm_ops = raidz_supp_impl[idx];
size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
ops = raidz_supp_impl[idx];
}
break;
#endif
case IMPL_ORIGINAL:
ops = (raidz_impl_ops_t *) &vdev_raidz_original_impl;
break;
case IMPL_SCALAR:
ops = (raidz_impl_ops_t *) &vdev_raidz_scalar_impl;
break;
default:
ASSERT(raidz_math_initialized);
ASSERT3U(impl, <, raidz_supp_impl_cnt);
ASSERT3U(raidz_supp_impl_cnt, >, 0);
ops = raidz_supp_impl[impl];
break;
}
rw_exit(&vdev_raidz_impl_lock);
ASSERT3P(ops, !=, NULL);
return (ops);
}
/*
* Select parity generation method for raidz_map
*/
void
int
vdev_raidz_math_generate(raidz_map_t *rm)
{
raidz_gen_f gen_parity = NULL;
@@ -135,13 +157,17 @@ vdev_raidz_math_generate(raidz_map_t *rm)
break;
}
ASSERT(gen_parity != NULL);
/* if method is NULL execute the original implementation */
if (gen_parity == NULL)
return (RAIDZ_ORIGINAL_IMPL);
gen_parity(rm);
return (0);
}
static raidz_rec_f
_reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid,
reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1 && parity_valid[CODE_P]) {
@@ -151,7 +177,7 @@ _reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid,
}
static raidz_rec_f
_reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid,
reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1) {
@@ -168,7 +194,7 @@ _reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid,
}
static raidz_rec_f
_reconstruct_fun_raidz3(raidz_map_t *rm, const int *parity_valid,
reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1) {
@@ -208,27 +234,25 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
raidz_rec_f rec_data = NULL;
switch (raidz_parity(rm)) {
case 1:
rec_data = _reconstruct_fun_raidz1(rm, parity_valid,
nbaddata);
break;
case 2:
rec_data = _reconstruct_fun_raidz2(rm, parity_valid,
nbaddata);
break;
case 3:
rec_data = _reconstruct_fun_raidz3(rm, parity_valid,
nbaddata);
break;
default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
case PARITY_P:
rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQ:
rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQR:
rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
break;
default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
}
ASSERT(rec_data != NULL);
return (rec_data(rm, dt));
if (rec_data == NULL)
return (RAIDZ_ORIGINAL_IMPL);
else
return (rec_data(rm, dt));
}
const char *raidz_gen_name[] = {
@@ -309,13 +333,10 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
uint64_t run_cnt, speed, best_speed = 0;
hrtime_t t_start, t_diff;
raidz_impl_ops_t *curr_impl;
raidz_impl_kstat_t * fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
int impl, i;
/*
* Use the sentinel (NULL) from the end of raidz_supp_impl_cnt
* to run "original" implementation (bench_rm->rm_ops = NULL)
*/
for (impl = 0; impl <= raidz_supp_impl_cnt; impl++) {
for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
/* set an implementation to benchmark */
curr_impl = raidz_supp_impl[impl];
bench_rm->rm_ops = curr_impl;
@@ -338,16 +359,19 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
else
raidz_impl_kstats[impl].rec[fn].value.ui64 = speed;
/* if curr_impl==NULL the original impl is benchmarked */
if (curr_impl != NULL && speed > best_speed) {
/* Update fastest implementation method */
if (speed > best_speed) {
best_speed = speed;
if (bench_fn == benchmark_gen_impl)
if (bench_fn == benchmark_gen_impl) {
vdev_raidz_fastest_impl.gen[fn] =
curr_impl->gen[fn];
else
fstat->gen[fn].value.ui64 = speed;
} else {
vdev_raidz_fastest_impl.rec[fn] =
curr_impl->rec[fn];
fstat->rec[fn].value.ui64 = speed;
}
}
}
}
@@ -361,9 +385,6 @@ vdev_raidz_math_init(void)
uint64_t bench_parity;
int i, c, fn;
/* init & vdev_raidz_impl_lock */
rw_init(&vdev_raidz_impl_lock, NULL, RW_DEFAULT, NULL);
/* move supported impl into raidz_supp_impl */
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = (raidz_impl_ops_t *) raidz_all_maths[i];
@@ -379,20 +400,16 @@ vdev_raidz_math_init(void)
raidz_supp_impl[c++] = (raidz_impl_ops_t *) curr_impl;
}
}
membar_producer(); /* complete raidz_supp_impl[] init */
raidz_supp_impl_cnt = c; /* number of supported impl */
raidz_supp_impl[c] = NULL; /* sentinel */
/* init kstat for original routines */
init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "original");
init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "fastest");
#if !defined(_KERNEL)
/*
* Skip benchmarking and use last implementation as fastest
*/
/* Skip benchmarking and use last implementation as fastest */
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
sizeof (vdev_raidz_fastest_impl));
vdev_raidz_fastest_impl.name = "fastest";
strcpy(vdev_raidz_fastest_impl.name, "fastest");
raidz_math_initialized = B_TRUE;
@@ -407,12 +424,13 @@ vdev_raidz_math_init(void)
bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
bench_zio->io_data = zio_data_buf_alloc(BENCH_ZIO_SIZE);
VERIFY(bench_zio->io_data);
memset(bench_zio->io_data, 0xAA, BENCH_ZIO_SIZE); /* warm up */
/* Benchmark parity generation methods */
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
bench_parity = fn + 1;
/* New raidz_map is needed for each generate_p/q/r */
bench_rm = vdev_raidz_map_alloc(bench_zio, 9,
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
BENCH_D_COLS + bench_parity, bench_parity);
benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
@@ -421,7 +439,8 @@ vdev_raidz_math_init(void)
}
/* Benchmark data reconstruction methods */
bench_rm = vdev_raidz_map_alloc(bench_zio, 9, BENCH_COLS, PARITY_PQR);
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
BENCH_COLS, PARITY_PQR);
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
@@ -444,9 +463,8 @@ vdev_raidz_math_init(void)
}
/* Finish initialization */
atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
raidz_math_initialized = B_TRUE;
if (!vdev_raidz_impl_user_set)
VERIFY0(vdev_raidz_impl_set("fastest"));
}
void
@@ -460,33 +478,33 @@ vdev_raidz_math_fini(void)
raidz_math_kstat = NULL;
}
rw_destroy(&vdev_raidz_impl_lock);
/* fini impl */
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = raidz_all_maths[i];
if (curr_impl->fini)
curr_impl->fini();
}
}
static const
struct {
char *name;
raidz_impl_ops_t *impl;
enum vdev_raidz_impl_sel sel;
static const struct {
char *name;
uint32_t sel;
} math_impl_opts[] = {
{ "fastest", &vdev_raidz_fastest_impl, IMPL_FASTEST },
{ "original", NULL, IMPL_ORIGINAL },
#if !defined(_KERNEL)
{ "cycle", NULL, IMPL_CYCLE },
{ "cycle", IMPL_CYCLE },
#endif
{ "fastest", IMPL_FASTEST },
{ "original", IMPL_ORIGINAL },
{ "scalar", IMPL_SCALAR }
};
/*
* Function sets desired raidz implementation.
* If called after module_init(), vdev_raidz_impl_lock must be held for writing.
*
* If we are called before init(), user preference will be saved in
* user_sel_impl, and applied in later init() call. This occurs when module
* parameter is specified on module load. Otherwise, directly update
* zfs_vdev_raidz_impl.
*
* @val Name of raidz implementation to use
* @param Unused.
@@ -494,42 +512,58 @@ struct {
static int
zfs_vdev_raidz_impl_set(const char *val, struct kernel_param *kp)
{
int err = -EINVAL;
char req_name[RAIDZ_IMPL_NAME_MAX];
uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
size_t i;
/* sanitize input */
i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
return (err);
strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
while (i > 0 && !!isspace(req_name[i-1]))
i--;
req_name[i] = '\0';
/* Check mandatory options */
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
if (strcmp(val, math_impl_opts[i].name) == 0) {
zfs_vdev_raidz_impl = math_impl_opts[i].sel;
vdev_raidz_used_impl = math_impl_opts[i].impl;
vdev_raidz_impl_user_set = B_TRUE;
return (0);
if (strcmp(req_name, math_impl_opts[i].name) == 0) {
impl = math_impl_opts[i].sel;
err = 0;
break;
}
}
/* check all supported implementations */
for (i = 0; i < raidz_supp_impl_cnt; i++) {
if (strcmp(val, raidz_supp_impl[i]->name) == 0) {
zfs_vdev_raidz_impl = i;
vdev_raidz_used_impl = raidz_supp_impl[i];
vdev_raidz_impl_user_set = B_TRUE;
return (0);
/* check all supported impl if init() was already called */
if (err != 0 && raidz_math_initialized) {
/* check all supported implementations */
for (i = 0; i < raidz_supp_impl_cnt; i++) {
if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
impl = i;
err = 0;
break;
}
}
}
return (-EINVAL);
if (err == 0) {
if (raidz_math_initialized)
atomic_swap_32(&zfs_vdev_raidz_impl, impl);
else
atomic_swap_32(&user_sel_impl, impl);
}
return (err);
}
int
vdev_raidz_impl_set(const char *val)
{
int err;
ASSERT(raidz_math_initialized);
rw_enter(&vdev_raidz_impl_lock, RW_WRITER);
err = zfs_vdev_raidz_impl_set(val, NULL);
rw_exit(&vdev_raidz_impl_lock);
return (err);
return (zfs_vdev_raidz_impl_set(val, NULL));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
@@ -538,29 +572,22 @@ zfs_vdev_raidz_impl_get(char *buffer, struct kernel_param *kp)
{
int i, cnt = 0;
char *fmt;
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
ASSERT(raidz_math_initialized);
rw_enter(&vdev_raidz_impl_lock, RW_READER);
/* list mandatory options */
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
if (math_impl_opts[i].sel == zfs_vdev_raidz_impl)
fmt = "[%s] ";
else
fmt = "%s ";
for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
}
/* list all supported implementations */
for (i = 0; i < raidz_supp_impl_cnt; i++) {
fmt = (i == zfs_vdev_raidz_impl) ? "[%s] " : "%s ";
fmt = (i == impl) ? "[%s] " : "%s ";
cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
}
rw_exit(&vdev_raidz_impl_lock);
return (cnt);
}
+4 -4
View File
@@ -47,10 +47,10 @@
#define VR1(r...) VR1_(r)
#define VR2(r...) VR2_(r, 1)
#define VR3(r...) VR3_(r, 1, 2)
#define VR4(r...) VR4_(r, 1)
#define VR5(r...) VR5_(r, 1, 2)
#define VR6(r...) VR6_(r, 1, 2, 3)
#define VR7(r...) VR7_(r, 1, 2, 3, 4)
#define VR4(r...) VR4_(r, 1, 2)
#define VR5(r...) VR5_(r, 1, 2, 3)
#define VR6(r...) VR6_(r, 1, 2, 3, 4)
#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
#define R_01(REG1, REG2, ...) REG1, REG2
#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+1 -1
View File
@@ -222,7 +222,7 @@ static const struct {
DEFINE_GEN_METHODS(scalar);
DEFINE_REC_METHODS(scalar);
static boolean_t
boolean_t
raidz_will_scalar_work(void)
{
return (B_TRUE); /* always */
+4 -4
View File
@@ -47,10 +47,10 @@
#define VR1(r...) VR1_(r)
#define VR2(r...) VR2_(r, 1)
#define VR3(r...) VR3_(r, 1, 2)
#define VR4(r...) VR4_(r, 1)
#define VR5(r...) VR5_(r, 1, 2)
#define VR6(r...) VR6_(r, 1, 2, 3)
#define VR7(r...) VR7_(r, 1, 2, 3, 4)
#define VR4(r...) VR4_(r, 1, 2)
#define VR5(r...) VR5_(r, 1, 2, 3)
#define VR6(r...) VR6_(r, 1, 2, 3, 4)
#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
#define R_01(REG1, REG2, ...) REG1, REG2
#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3