mirror_zfs/module/zfs/vdev_raidz_math.c
Gvozden Neskovic 590c9a0994 Allow building with CFLAGS="-O0"
If compiled with -O0, gcc doesn't do any stack frame coalescing
and -Wframe-larger-than=1024 is triggered in debug mode.
Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
does not trigger this warning.

Fix bench zio size, using SPA_OLD_MAXBLOCKSHIFT

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #4799
2016-07-11 16:53:02 -07:00

572 lines
15 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/types.h>
#include <sys/zio.h>
#include <sys/debug.h>
#include <sys/zfs_debug.h>
#include <sys/vdev_raidz.h>
#include <sys/vdev_raidz_impl.h>
extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
extern const raidz_impl_ops_t vdev_raidz_sse_impl;
extern const raidz_impl_ops_t vdev_raidz_avx2_impl;
/* All compiled in implementations */
const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_scalar_impl,
#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */
&vdev_raidz_sse_impl,
#endif
#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */
&vdev_raidz_avx2_impl
#endif
};
/* Indicate that benchmark has been completed */
static boolean_t raidz_math_initialized = B_FALSE;
/* Select raidz implementation */
static enum vdev_raidz_impl_sel {
IMPL_FASTEST = -1,
IMPL_ORIGINAL = -2,
IMPL_CYCLE = -3,
IMPL_SCALAR = 0,
} zfs_vdev_raidz_impl = IMPL_SCALAR;
/* selected implementation and its lock */
static krwlock_t vdev_raidz_impl_lock;
static raidz_impl_ops_t *vdev_raidz_used_impl =
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl;
static boolean_t vdev_raidz_impl_user_set = B_FALSE;
/* RAIDZ op that contain the fastest routines */
static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest"
};
/* Hold all supported implementations */
size_t raidz_supp_impl_cnt = 1;
raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths) + 1] = {
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl, /* scalar is supported */
NULL
};
/*
* kstats values for supported impl & original methods
* Values represent per disk throughput of 8 disk+parity raidz vdev (Bps)
*/
static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
/* kstat for benchmarked implementations */
static kstat_t *raidz_math_kstat = NULL;
/*
* Selects the raidz operation for raidz_map
* If rm_ops is set to NULL original raidz implementation will be used
*/
void
vdev_raidz_math_get_ops(raidz_map_t *rm)
{
rw_enter(&vdev_raidz_impl_lock, RW_READER);
rm->rm_ops = vdev_raidz_used_impl;
#if !defined(_KERNEL)
if (zfs_vdev_raidz_impl == IMPL_CYCLE) {
static size_t cycle_impl_idx = 0;
size_t idx;
/*
* Cycle through all supported new implementations, and
* when idx == raidz_supp_impl_cnt, use the original
*/
idx = (++cycle_impl_idx) % (raidz_supp_impl_cnt + 1);
rm->rm_ops = raidz_supp_impl[idx];
}
#endif
rw_exit(&vdev_raidz_impl_lock);
}
/*
* Select parity generation method for raidz_map
*/
void
vdev_raidz_math_generate(raidz_map_t *rm)
{
raidz_gen_f gen_parity = NULL;
switch (raidz_parity(rm)) {
case 1:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
break;
case 2:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
break;
case 3:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
break;
default:
gen_parity = NULL;
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
}
ASSERT(gen_parity != NULL);
gen_parity(rm);
}
static raidz_rec_f
_reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1 && parity_valid[CODE_P]) {
return (rm->rm_ops->rec[RAIDZ_REC_P]);
}
return ((raidz_rec_f) NULL);
}
static raidz_rec_f
_reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1) {
if (parity_valid[CODE_P]) {
return (rm->rm_ops->rec[RAIDZ_REC_P]);
} else if (parity_valid[CODE_Q]) {
return (rm->rm_ops->rec[RAIDZ_REC_Q]);
}
} else if (nbaddata == 2 &&
parity_valid[CODE_P] && parity_valid[CODE_Q]) {
return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
}
return ((raidz_rec_f) NULL);
}
static raidz_rec_f
_reconstruct_fun_raidz3(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
{
if (nbaddata == 1) {
if (parity_valid[CODE_P]) {
return (rm->rm_ops->rec[RAIDZ_REC_P]);
} else if (parity_valid[CODE_Q]) {
return (rm->rm_ops->rec[RAIDZ_REC_Q]);
} else if (parity_valid[CODE_R]) {
return (rm->rm_ops->rec[RAIDZ_REC_R]);
}
} else if (nbaddata == 2) {
if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
} else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
return (rm->rm_ops->rec[RAIDZ_REC_PR]);
} else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
return (rm->rm_ops->rec[RAIDZ_REC_QR]);
}
} else if (nbaddata == 3 &&
parity_valid[CODE_P] && parity_valid[CODE_Q] &&
parity_valid[CODE_R]) {
return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
}
return ((raidz_rec_f) NULL);
}
/*
* Select data reconstruction method for raidz_map
* @parity_valid - Parity validity flag
* @dt - Failed data index array
* @nbaddata - Number of failed data columns
*/
int
vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
const int *dt, const int nbaddata)
{
raidz_rec_f rec_data = NULL;
switch (raidz_parity(rm)) {
case 1:
rec_data = _reconstruct_fun_raidz1(rm, parity_valid,
nbaddata);
break;
case 2:
rec_data = _reconstruct_fun_raidz2(rm, parity_valid,
nbaddata);
break;
case 3:
rec_data = _reconstruct_fun_raidz3(rm, parity_valid,
nbaddata);
break;
default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
}
ASSERT(rec_data != NULL);
return (rec_data(rm, dt));
}
const char *raidz_gen_name[] = {
"gen_p", "gen_pq", "gen_pqr"
};
const char *raidz_rec_name[] = {
"rec_p", "rec_q", "rec_r",
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
};
static void
init_raidz_kstat(raidz_impl_kstat_t *rs, const char *name)
{
int i;
const size_t impl_name_len = strnlen(name, KSTAT_STRLEN);
const size_t op_name_max = (KSTAT_STRLEN - 2) > impl_name_len ?
KSTAT_STRLEN - impl_name_len - 2 : 0;
for (i = 0; i < RAIDZ_GEN_NUM; i++) {
strncpy(rs->gen[i].name, name, impl_name_len);
strncpy(rs->gen[i].name + impl_name_len, "_", 1);
strncpy(rs->gen[i].name + impl_name_len + 1,
raidz_gen_name[i], op_name_max);
rs->gen[i].data_type = KSTAT_DATA_UINT64;
rs->gen[i].value.ui64 = 0;
}
for (i = 0; i < RAIDZ_REC_NUM; i++) {
strncpy(rs->rec[i].name, name, impl_name_len);
strncpy(rs->rec[i].name + impl_name_len, "_", 1);
strncpy(rs->rec[i].name + impl_name_len + 1,
raidz_rec_name[i], op_name_max);
rs->rec[i].data_type = KSTAT_DATA_UINT64;
rs->rec[i].value.ui64 = 0;
}
}
#define BENCH_D_COLS (8ULL)
#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
#define BENCH_NS MSEC2NSEC(25) /* 25ms */
typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
static void
benchmark_gen_impl(raidz_map_t *rm, const int fn)
{
(void) fn;
vdev_raidz_generate_parity(rm);
}
static void
benchmark_rec_impl(raidz_map_t *rm, const int fn)
{
static const int rec_tgt[7][3] = {
{1, 2, 3}, /* rec_p: bad QR & D[0] */
{0, 2, 3}, /* rec_q: bad PR & D[0] */
{0, 1, 3}, /* rec_r: bad PQ & D[0] */
{2, 3, 4}, /* rec_pq: bad R & D[0][1] */
{1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
{0, 3, 4}, /* rec_qr: bad P & D[0][1] */
{3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
};
vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
}
/*
* Benchmarking of all supported implementations (raidz_supp_impl_cnt)
* is performed by setting the rm_ops pointer and calling the top level
* generate/reconstruct methods of bench_rm.
*/
static void
benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
{
uint64_t run_cnt, speed, best_speed = 0;
hrtime_t t_start, t_diff;
raidz_impl_ops_t *curr_impl;
int impl, i;
/*
* Use the sentinel (NULL) from the end of raidz_supp_impl_cnt
* to run "original" implementation (bench_rm->rm_ops = NULL)
*/
for (impl = 0; impl <= raidz_supp_impl_cnt; impl++) {
/* set an implementation to benchmark */
curr_impl = raidz_supp_impl[impl];
bench_rm->rm_ops = curr_impl;
run_cnt = 0;
t_start = gethrtime();
do {
for (i = 0; i < 25; i++, run_cnt++)
bench_fn(bench_rm, fn);
t_diff = gethrtime() - t_start;
} while (t_diff < BENCH_NS);
speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
speed /= (t_diff * BENCH_COLS);
if (bench_fn == benchmark_gen_impl)
raidz_impl_kstats[impl].gen[fn].value.ui64 = speed;
else
raidz_impl_kstats[impl].rec[fn].value.ui64 = speed;
/* if curr_impl==NULL the original impl is benchmarked */
if (curr_impl != NULL && speed > best_speed) {
best_speed = speed;
if (bench_fn == benchmark_gen_impl)
vdev_raidz_fastest_impl.gen[fn] =
curr_impl->gen[fn];
else
vdev_raidz_fastest_impl.rec[fn] =
curr_impl->rec[fn];
}
}
}
void
vdev_raidz_math_init(void)
{
raidz_impl_ops_t *curr_impl;
zio_t *bench_zio = NULL;
raidz_map_t *bench_rm = NULL;
uint64_t bench_parity;
int i, c, fn;
/* init & vdev_raidz_impl_lock */
rw_init(&vdev_raidz_impl_lock, NULL, RW_DEFAULT, NULL);
/* move supported impl into raidz_supp_impl */
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = (raidz_impl_ops_t *) raidz_all_maths[i];
/* initialize impl */
if (curr_impl->init)
curr_impl->init();
if (curr_impl->is_supported()) {
/* init kstat */
init_raidz_kstat(&raidz_impl_kstats[c],
curr_impl->name);
raidz_supp_impl[c++] = (raidz_impl_ops_t *) curr_impl;
}
}
raidz_supp_impl_cnt = c; /* number of supported impl */
raidz_supp_impl[c] = NULL; /* sentinel */
/* init kstat for original routines */
init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "original");
#if !defined(_KERNEL)
/*
* Skip benchmarking and use last implementation as fastest
*/
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
sizeof (vdev_raidz_fastest_impl));
vdev_raidz_fastest_impl.name = "fastest";
raidz_math_initialized = B_TRUE;
/* Use 'cycle' math selection method for userspace */
VERIFY0(vdev_raidz_impl_set("cycle"));
return;
#endif
/* Fake an zio and run the benchmark on it */
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
bench_zio->io_offset = 0;
bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
bench_zio->io_data = zio_data_buf_alloc(BENCH_ZIO_SIZE);
VERIFY(bench_zio->io_data);
/* Benchmark parity generation methods */
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
bench_parity = fn + 1;
/* New raidz_map is needed for each generate_p/q/r */
bench_rm = vdev_raidz_map_alloc(bench_zio, 9,
BENCH_D_COLS + bench_parity, bench_parity);
benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
vdev_raidz_map_free(bench_rm);
}
/* Benchmark data reconstruction methods */
bench_rm = vdev_raidz_map_alloc(bench_zio, 9, BENCH_COLS, PARITY_PQR);
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
vdev_raidz_map_free(bench_rm);
/* cleanup the bench zio */
zio_data_buf_free(bench_zio->io_data, BENCH_ZIO_SIZE);
kmem_free(bench_zio, sizeof (zio_t));
/* install kstats for all impl */
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench",
"misc", KSTAT_TYPE_NAMED,
sizeof (raidz_impl_kstat_t) / sizeof (kstat_named_t) *
(raidz_supp_impl_cnt + 1), KSTAT_FLAG_VIRTUAL);
if (raidz_math_kstat != NULL) {
raidz_math_kstat->ks_data = raidz_impl_kstats;
kstat_install(raidz_math_kstat);
}
/* Finish initialization */
raidz_math_initialized = B_TRUE;
if (!vdev_raidz_impl_user_set)
VERIFY0(vdev_raidz_impl_set("fastest"));
}
void
vdev_raidz_math_fini(void)
{
raidz_impl_ops_t const *curr_impl;
int i;
if (raidz_math_kstat != NULL) {
kstat_delete(raidz_math_kstat);
raidz_math_kstat = NULL;
}
rw_destroy(&vdev_raidz_impl_lock);
/* fini impl */
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = raidz_all_maths[i];
if (curr_impl->fini)
curr_impl->fini();
}
}
static const
struct {
char *name;
raidz_impl_ops_t *impl;
enum vdev_raidz_impl_sel sel;
} math_impl_opts[] = {
{ "fastest", &vdev_raidz_fastest_impl, IMPL_FASTEST },
{ "original", NULL, IMPL_ORIGINAL },
#if !defined(_KERNEL)
{ "cycle", NULL, IMPL_CYCLE },
#endif
};
/*
* Function sets desired raidz implementation.
* If called after module_init(), vdev_raidz_impl_lock must be held for writing.
*
* @val Name of raidz implementation to use
* @param Unused.
*/
static int
zfs_vdev_raidz_impl_set(const char *val, struct kernel_param *kp)
{
size_t i;
/* Check mandatory options */
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
if (strcmp(val, math_impl_opts[i].name) == 0) {
zfs_vdev_raidz_impl = math_impl_opts[i].sel;
vdev_raidz_used_impl = math_impl_opts[i].impl;
vdev_raidz_impl_user_set = B_TRUE;
return (0);
}
}
/* check all supported implementations */
for (i = 0; i < raidz_supp_impl_cnt; i++) {
if (strcmp(val, raidz_supp_impl[i]->name) == 0) {
zfs_vdev_raidz_impl = i;
vdev_raidz_used_impl = raidz_supp_impl[i];
vdev_raidz_impl_user_set = B_TRUE;
return (0);
}
}
return (-EINVAL);
}
int
vdev_raidz_impl_set(const char *val)
{
int err;
ASSERT(raidz_math_initialized);
rw_enter(&vdev_raidz_impl_lock, RW_WRITER);
err = zfs_vdev_raidz_impl_set(val, NULL);
rw_exit(&vdev_raidz_impl_lock);
return (err);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
static int
zfs_vdev_raidz_impl_get(char *buffer, struct kernel_param *kp)
{
int i, cnt = 0;
char *fmt;
ASSERT(raidz_math_initialized);
rw_enter(&vdev_raidz_impl_lock, RW_READER);
/* list mandatory options */
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
if (math_impl_opts[i].sel == zfs_vdev_raidz_impl)
fmt = "[%s] ";
else
fmt = "%s ";
cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
}
/* list all supported implementations */
for (i = 0; i < raidz_supp_impl_cnt; i++) {
fmt = (i == zfs_vdev_raidz_impl) ? "[%s] " : "%s ";
cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
}
rw_exit(&vdev_raidz_impl_lock);
return (cnt);
}
module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
zfs_vdev_raidz_impl_get, NULL, 0644);
MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
#endif