mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
This commit is contained in:
@@ -83,8 +83,17 @@ run_gen_bench_impl(const char *impl)
|
||||
/* create suitable raidz_map */
|
||||
ncols = rto_opts.rto_dcols + fn + 1;
|
||||
zio_bench.io_size = 1ULL << ds;
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, fn+1);
|
||||
|
||||
if (rto_opts.rto_expand) {
|
||||
rm_bench = vdev_raidz_map_alloc_expanded(
|
||||
zio_bench.io_abd,
|
||||
zio_bench.io_size, zio_bench.io_offset,
|
||||
rto_opts.rto_ashift, ncols+1, ncols,
|
||||
fn+1, rto_opts.rto_expand_offset);
|
||||
} else {
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, fn+1);
|
||||
}
|
||||
|
||||
/* estimate iteration count */
|
||||
iter_cnt = GEN_BENCH_MEMORY;
|
||||
@@ -163,8 +172,16 @@ run_rec_bench_impl(const char *impl)
|
||||
(1ULL << BENCH_ASHIFT))
|
||||
continue;
|
||||
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, PARITY_PQR);
|
||||
if (rto_opts.rto_expand) {
|
||||
rm_bench = vdev_raidz_map_alloc_expanded(
|
||||
zio_bench.io_abd,
|
||||
zio_bench.io_size, zio_bench.io_offset,
|
||||
BENCH_ASHIFT, ncols+1, ncols,
|
||||
PARITY_PQR, rto_opts.rto_expand_offset);
|
||||
} else {
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, PARITY_PQR);
|
||||
}
|
||||
|
||||
/* estimate iteration count */
|
||||
iter_cnt = (REC_BENCH_MEMORY);
|
||||
|
||||
+285
-45
@@ -77,16 +77,20 @@ static void print_opts(raidz_test_opts_t *opts, boolean_t force)
|
||||
(void) fprintf(stdout, DBLSEP "Running with options:\n"
|
||||
" (-a) zio ashift : %zu\n"
|
||||
" (-o) zio offset : 1 << %zu\n"
|
||||
" (-e) expanded map : %s\n"
|
||||
" (-r) reflow offset : %llx\n"
|
||||
" (-d) number of raidz data columns : %zu\n"
|
||||
" (-s) size of DATA : 1 << %zu\n"
|
||||
" (-S) sweep parameters : %s \n"
|
||||
" (-v) verbose : %s \n\n",
|
||||
opts->rto_ashift, /* -a */
|
||||
ilog2(opts->rto_offset), /* -o */
|
||||
opts->rto_dcols, /* -d */
|
||||
ilog2(opts->rto_dsize), /* -s */
|
||||
opts->rto_sweep ? "yes" : "no", /* -S */
|
||||
verbose); /* -v */
|
||||
opts->rto_ashift, /* -a */
|
||||
ilog2(opts->rto_offset), /* -o */
|
||||
opts->rto_expand ? "yes" : "no", /* -e */
|
||||
(u_longlong_t)opts->rto_expand_offset, /* -r */
|
||||
opts->rto_dcols, /* -d */
|
||||
ilog2(opts->rto_dsize), /* -s */
|
||||
opts->rto_sweep ? "yes" : "no", /* -S */
|
||||
verbose); /* -v */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +108,8 @@ static void usage(boolean_t requested)
|
||||
"\t[-S parameter sweep (default: %s)]\n"
|
||||
"\t[-t timeout for parameter sweep test]\n"
|
||||
"\t[-B benchmark all raidz implementations]\n"
|
||||
"\t[-e use expanded raidz map (default: %s)]\n"
|
||||
"\t[-r expanded raidz map reflow offset (default: %llx)]\n"
|
||||
"\t[-v increase verbosity (default: %zu)]\n"
|
||||
"\t[-h (print help)]\n"
|
||||
"\t[-T test the test, see if failure would be detected]\n"
|
||||
@@ -114,6 +120,8 @@ static void usage(boolean_t requested)
|
||||
o->rto_dcols, /* -d */
|
||||
ilog2(o->rto_dsize), /* -s */
|
||||
rto_opts.rto_sweep ? "yes" : "no", /* -S */
|
||||
rto_opts.rto_expand ? "yes" : "no", /* -e */
|
||||
(u_longlong_t)o->rto_expand_offset, /* -r */
|
||||
o->rto_v); /* -d */
|
||||
|
||||
exit(requested ? 0 : 1);
|
||||
@@ -128,7 +136,7 @@ static void process_options(int argc, char **argv)
|
||||
|
||||
bcopy(&rto_opts_defaults, o, sizeof (*o));
|
||||
|
||||
while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
|
||||
value = 0;
|
||||
|
||||
switch (opt) {
|
||||
@@ -136,6 +144,12 @@ static void process_options(int argc, char **argv)
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_ashift = MIN(13, MAX(9, value));
|
||||
break;
|
||||
case 'e':
|
||||
o->rto_expand = 1;
|
||||
break;
|
||||
case 'r':
|
||||
o->rto_expand_offset = strtoull(optarg, NULL, 0);
|
||||
break;
|
||||
case 'o':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
|
||||
@@ -179,25 +193,34 @@ static void process_options(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
|
||||
#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
|
||||
#define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
|
||||
#define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
|
||||
|
||||
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
|
||||
#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
|
||||
#define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
|
||||
#define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
|
||||
|
||||
static int
|
||||
cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
|
||||
{
|
||||
int i, ret = 0;
|
||||
int r, i, ret = 0;
|
||||
|
||||
VERIFY(parity >= 1 && parity <= 3);
|
||||
|
||||
for (i = 0; i < parity; i++) {
|
||||
if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
|
||||
!= 0) {
|
||||
ret++;
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nParity block [%d] different!\n", i);
|
||||
for (r = 0; r < rm->rm_nrows; r++) {
|
||||
raidz_row_t * const rr = rm->rm_row[r];
|
||||
raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
|
||||
for (i = 0; i < parity; i++) {
|
||||
if (CODE_COL_SIZE(rrg, i) == 0) {
|
||||
VERIFY0(CODE_COL_SIZE(rr, i));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (abd_cmp(CODE_COL(rr, i),
|
||||
CODE_COL(rrg, i)) != 0) {
|
||||
ret++;
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nParity block [%d] different!\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
@@ -206,16 +229,26 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
|
||||
static int
|
||||
cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
|
||||
{
|
||||
int i, ret = 0;
|
||||
int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
|
||||
int r, i, dcols, ret = 0;
|
||||
|
||||
for (i = 0; i < dcols; i++) {
|
||||
if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
|
||||
!= 0) {
|
||||
ret++;
|
||||
for (r = 0; r < rm->rm_nrows; r++) {
|
||||
raidz_row_t *rr = rm->rm_row[r];
|
||||
raidz_row_t *rrg = opts->rm_golden->rm_row[r];
|
||||
dcols = opts->rm_golden->rm_row[0]->rr_cols -
|
||||
raidz_parity(opts->rm_golden);
|
||||
for (i = 0; i < dcols; i++) {
|
||||
if (DATA_COL_SIZE(rrg, i) == 0) {
|
||||
VERIFY0(DATA_COL_SIZE(rr, i));
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nData block [%d] different!\n", i);
|
||||
if (abd_cmp(DATA_COL(rrg, i),
|
||||
DATA_COL(rr, i)) != 0) {
|
||||
ret++;
|
||||
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nData block [%d] different!\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
@@ -236,12 +269,13 @@ init_rand(void *data, size_t size, void *private)
|
||||
static void
|
||||
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
|
||||
{
|
||||
int i;
|
||||
raidz_col_t *col;
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
col = &rm->rm_col[tgts[i]];
|
||||
abd_iterate_func(col->rc_abd, 0, col->rc_size, init_rand, NULL);
|
||||
for (int r = 0; r < rm->rm_nrows; r++) {
|
||||
raidz_row_t *rr = rm->rm_row[r];
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
raidz_col_t *col = &rr->rr_col[tgts[i]];
|
||||
abd_iterate_func(col->rc_abd, 0, col->rc_size,
|
||||
init_rand, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -288,10 +322,22 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
|
||||
|
||||
VERIFY0(vdev_raidz_impl_set("original"));
|
||||
|
||||
opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
rm_test = vdev_raidz_map_alloc(zio_test,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
if (opts->rto_expand) {
|
||||
opts->rm_golden =
|
||||
vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
|
||||
opts->zio_golden->io_size, opts->zio_golden->io_offset,
|
||||
opts->rto_ashift, total_ncols+1, total_ncols,
|
||||
parity, opts->rto_expand_offset);
|
||||
rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
|
||||
zio_test->io_size, zio_test->io_offset,
|
||||
opts->rto_ashift, total_ncols+1, total_ncols,
|
||||
parity, opts->rto_expand_offset);
|
||||
} else {
|
||||
opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
rm_test = vdev_raidz_map_alloc(zio_test,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
}
|
||||
|
||||
VERIFY(opts->zio_golden);
|
||||
VERIFY(opts->rm_golden);
|
||||
@@ -312,6 +358,188 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* If reflow is not in progress, reflow_offset should be UINT64_MAX.
|
||||
* For each row, if the row is entirely before reflow_offset, it will
|
||||
* come from the new location. Otherwise this row will come from the
|
||||
* old location. Therefore, rows that straddle the reflow_offset will
|
||||
* come from the old location.
|
||||
*
|
||||
* NOTE: Until raidz expansion is implemented this function is only
|
||||
* needed by raidz_test.c to the multi-row raid_map_t functionality.
|
||||
*/
|
||||
raidz_map_t *
|
||||
vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
|
||||
uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
|
||||
uint64_t nparity, uint64_t reflow_offset)
|
||||
{
|
||||
/* The zio's size in units of the vdev's minimum sector size. */
|
||||
uint64_t s = size >> ashift;
|
||||
uint64_t q, r, bc, devidx, asize = 0, tot;
|
||||
|
||||
/*
|
||||
* "Quotient": The number of data sectors for this stripe on all but
|
||||
* the "big column" child vdevs that also contain "remainder" data.
|
||||
* AKA "full rows"
|
||||
*/
|
||||
q = s / (logical_cols - nparity);
|
||||
|
||||
/*
|
||||
* "Remainder": The number of partial stripe data sectors in this I/O.
|
||||
* This will add a sector to some, but not all, child vdevs.
|
||||
*/
|
||||
r = s - q * (logical_cols - nparity);
|
||||
|
||||
/* The number of "big columns" - those which contain remainder data. */
|
||||
bc = (r == 0 ? 0 : r + nparity);
|
||||
|
||||
/*
|
||||
* The total number of data and parity sectors associated with
|
||||
* this I/O.
|
||||
*/
|
||||
tot = s + nparity * (q + (r == 0 ? 0 : 1));
|
||||
|
||||
/* How many rows contain data (not skip) */
|
||||
uint64_t rows = howmany(tot, logical_cols);
|
||||
int cols = MIN(tot, logical_cols);
|
||||
|
||||
raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
|
||||
KM_SLEEP);
|
||||
rm->rm_nrows = rows;
|
||||
|
||||
for (uint64_t row = 0; row < rows; row++) {
|
||||
raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
|
||||
rr_col[cols]), KM_SLEEP);
|
||||
rm->rm_row[row] = rr;
|
||||
|
||||
/* The starting RAIDZ (parent) vdev sector of the row. */
|
||||
uint64_t b = (offset >> ashift) + row * logical_cols;
|
||||
|
||||
/*
|
||||
* If we are in the middle of a reflow, and any part of this
|
||||
* row has not been copied, then use the old location of
|
||||
* this row.
|
||||
*/
|
||||
int row_phys_cols = physical_cols;
|
||||
if (b + (logical_cols - nparity) > reflow_offset >> ashift)
|
||||
row_phys_cols--;
|
||||
|
||||
/* starting child of this row */
|
||||
uint64_t child_id = b % row_phys_cols;
|
||||
/* The starting byte offset on each child vdev. */
|
||||
uint64_t child_offset = (b / row_phys_cols) << ashift;
|
||||
|
||||
/*
|
||||
* We set cols to the entire width of the block, even
|
||||
* if this row is shorter. This is needed because parity
|
||||
* generation (for Q and R) needs to know the entire width,
|
||||
* because it treats the short row as though it was
|
||||
* full-width (and the "phantom" sectors were zero-filled).
|
||||
*
|
||||
* Another approach to this would be to set cols shorter
|
||||
* (to just the number of columns that we might do i/o to)
|
||||
* and have another mechanism to tell the parity generation
|
||||
* about the "entire width". Reconstruction (at least
|
||||
* vdev_raidz_reconstruct_general()) would also need to
|
||||
* know about the "entire width".
|
||||
*/
|
||||
rr->rr_cols = cols;
|
||||
rr->rr_bigcols = bc;
|
||||
rr->rr_missingdata = 0;
|
||||
rr->rr_missingparity = 0;
|
||||
rr->rr_firstdatacol = nparity;
|
||||
rr->rr_abd_copy = NULL;
|
||||
rr->rr_abd_empty = NULL;
|
||||
rr->rr_nempty = 0;
|
||||
|
||||
for (int c = 0; c < rr->rr_cols; c++, child_id++) {
|
||||
if (child_id >= row_phys_cols) {
|
||||
child_id -= row_phys_cols;
|
||||
child_offset += 1ULL << ashift;
|
||||
}
|
||||
rr->rr_col[c].rc_devidx = child_id;
|
||||
rr->rr_col[c].rc_offset = child_offset;
|
||||
rr->rr_col[c].rc_gdata = NULL;
|
||||
rr->rr_col[c].rc_orig_data = NULL;
|
||||
rr->rr_col[c].rc_error = 0;
|
||||
rr->rr_col[c].rc_tried = 0;
|
||||
rr->rr_col[c].rc_skipped = 0;
|
||||
rr->rr_col[c].rc_need_orig_restore = B_FALSE;
|
||||
|
||||
uint64_t dc = c - rr->rr_firstdatacol;
|
||||
if (c < rr->rr_firstdatacol) {
|
||||
rr->rr_col[c].rc_size = 1ULL << ashift;
|
||||
rr->rr_col[c].rc_abd =
|
||||
abd_alloc_linear(rr->rr_col[c].rc_size,
|
||||
B_TRUE);
|
||||
} else if (row == rows - 1 && bc != 0 && c >= bc) {
|
||||
/*
|
||||
* Past the end, this for parity generation.
|
||||
*/
|
||||
rr->rr_col[c].rc_size = 0;
|
||||
rr->rr_col[c].rc_abd = NULL;
|
||||
} else {
|
||||
/*
|
||||
* "data column" (col excluding parity)
|
||||
* Add an ASCII art diagram here
|
||||
*/
|
||||
uint64_t off;
|
||||
|
||||
if (c < bc || r == 0) {
|
||||
off = dc * rows + row;
|
||||
} else {
|
||||
off = r * rows +
|
||||
(dc - r) * (rows - 1) + row;
|
||||
}
|
||||
rr->rr_col[c].rc_size = 1ULL << ashift;
|
||||
rr->rr_col[c].rc_abd =
|
||||
abd_get_offset(abd, off << ashift);
|
||||
}
|
||||
|
||||
asize += rr->rr_col[c].rc_size;
|
||||
}
|
||||
/*
|
||||
* If all data stored spans all columns, there's a danger that
|
||||
* parity will always be on the same device and, since parity
|
||||
* isn't read during normal operation, that that device's I/O
|
||||
* bandwidth won't be used effectively. We therefore switch
|
||||
* the parity every 1MB.
|
||||
*
|
||||
* ...at least that was, ostensibly, the theory. As a practical
|
||||
* matter unless we juggle the parity between all devices
|
||||
* evenly, we won't see any benefit. Further, occasional writes
|
||||
* that aren't a multiple of the LCM of the number of children
|
||||
* and the minimum stripe width are sufficient to avoid pessimal
|
||||
* behavior. Unfortunately, this decision created an implicit
|
||||
* on-disk format requirement that we need to support for all
|
||||
* eternity, but only for single-parity RAID-Z.
|
||||
*
|
||||
* If we intend to skip a sector in the zeroth column for
|
||||
* padding we must make sure to note this swap. We will never
|
||||
* intend to skip the first column since at least one data and
|
||||
* one parity column must appear in each row.
|
||||
*/
|
||||
if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
|
||||
(offset & (1ULL << 20))) {
|
||||
ASSERT(rr->rr_cols >= 2);
|
||||
ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
|
||||
devidx = rr->rr_col[0].rc_devidx;
|
||||
uint64_t o = rr->rr_col[0].rc_offset;
|
||||
rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
|
||||
rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
|
||||
rr->rr_col[1].rc_devidx = devidx;
|
||||
rr->rr_col[1].rc_offset = o;
|
||||
}
|
||||
|
||||
}
|
||||
ASSERT3U(asize, ==, tot << ashift);
|
||||
|
||||
/* init RAIDZ parity ops */
|
||||
rm->rm_ops = vdev_raidz_math_get_ops();
|
||||
|
||||
return (rm);
|
||||
}
|
||||
|
||||
static raidz_map_t *
|
||||
init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
|
||||
{
|
||||
@@ -330,8 +558,15 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
|
||||
(*zio)->io_abd = raidz_alloc(alloc_dsize);
|
||||
init_zio_abd(*zio);
|
||||
|
||||
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
|
||||
total_ncols, parity);
|
||||
if (opts->rto_expand) {
|
||||
rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
|
||||
(*zio)->io_size, (*zio)->io_offset,
|
||||
opts->rto_ashift, total_ncols+1, total_ncols,
|
||||
parity, opts->rto_expand_offset);
|
||||
} else {
|
||||
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
|
||||
total_ncols, parity);
|
||||
}
|
||||
VERIFY(rm);
|
||||
|
||||
/* Make sure code columns are destroyed */
|
||||
@@ -420,7 +655,7 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
|
||||
if (fn < RAIDZ_REC_PQ) {
|
||||
/* can reconstruct 1 failed data disk */
|
||||
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
/* Check if should stop */
|
||||
@@ -445,10 +680,11 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
|
||||
} else if (fn < RAIDZ_REC_PQR) {
|
||||
/* can reconstruct 2 failed data disk */
|
||||
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
|
||||
continue;
|
||||
for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
|
||||
if (x1 >= rm->rm_cols - raidz_parity(rm))
|
||||
if (x1 >= rm->rm_row[0]->rr_cols -
|
||||
raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
/* Check if should stop */
|
||||
@@ -475,14 +711,15 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
|
||||
} else {
|
||||
/* can reconstruct 3 failed data disk */
|
||||
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
|
||||
continue;
|
||||
for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
|
||||
if (x1 >= rm->rm_cols - raidz_parity(rm))
|
||||
if (x1 >= rm->rm_row[0]->rr_cols -
|
||||
raidz_parity(rm))
|
||||
continue;
|
||||
for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
|
||||
if (x2 >=
|
||||
rm->rm_cols - raidz_parity(rm))
|
||||
if (x2 >= rm->rm_row[0]->rr_cols -
|
||||
raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
/* Check if should stop */
|
||||
@@ -700,6 +937,8 @@ run_sweep(void)
|
||||
opts->rto_dcols = dcols_v[d];
|
||||
opts->rto_offset = (1 << ashift_v[a]) * rand();
|
||||
opts->rto_dsize = size_v[s];
|
||||
opts->rto_expand = rto_opts.rto_expand;
|
||||
opts->rto_expand_offset = rto_opts.rto_expand_offset;
|
||||
opts->rto_v = 0; /* be quiet */
|
||||
|
||||
VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
|
||||
@@ -732,6 +971,7 @@ exit:
|
||||
return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
|
||||
@@ -44,13 +44,15 @@ static const char *raidz_impl_names[] = {
|
||||
|
||||
typedef struct raidz_test_opts {
|
||||
size_t rto_ashift;
|
||||
size_t rto_offset;
|
||||
uint64_t rto_offset;
|
||||
size_t rto_dcols;
|
||||
size_t rto_dsize;
|
||||
size_t rto_v;
|
||||
size_t rto_sweep;
|
||||
size_t rto_sweep_timeout;
|
||||
size_t rto_benchmark;
|
||||
size_t rto_expand;
|
||||
uint64_t rto_expand_offset;
|
||||
size_t rto_sanity;
|
||||
size_t rto_gdb;
|
||||
|
||||
@@ -69,6 +71,8 @@ static const raidz_test_opts_t rto_opts_defaults = {
|
||||
.rto_v = 0,
|
||||
.rto_sweep = 0,
|
||||
.rto_benchmark = 0,
|
||||
.rto_expand = 0,
|
||||
.rto_expand_offset = -1ULL,
|
||||
.rto_sanity = 0,
|
||||
.rto_gdb = 0,
|
||||
.rto_should_stop = B_FALSE
|
||||
@@ -113,4 +117,7 @@ void init_zio_abd(zio_t *zio);
|
||||
|
||||
void run_raidz_benchmark(void);
|
||||
|
||||
struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
|
||||
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
|
||||
|
||||
#endif /* RAIDZ_TEST_H */
|
||||
|
||||
Reference in New Issue
Block a user