mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
This commit is contained in:
+210
-71
@@ -104,6 +104,7 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/zil_impl.h>
|
||||
#include <sys/vdev_draid.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_file.h>
|
||||
#include <sys/vdev_initialize.h>
|
||||
@@ -167,8 +168,11 @@ typedef struct ztest_shared_opts {
|
||||
size_t zo_vdev_size;
|
||||
int zo_ashift;
|
||||
int zo_mirrors;
|
||||
int zo_raidz;
|
||||
int zo_raidz_parity;
|
||||
int zo_raid_children;
|
||||
int zo_raid_parity;
|
||||
char zo_raid_type[8];
|
||||
int zo_draid_data;
|
||||
int zo_draid_spares;
|
||||
int zo_datasets;
|
||||
int zo_threads;
|
||||
uint64_t zo_passtime;
|
||||
@@ -191,9 +195,12 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
|
||||
.zo_vdevs = 5,
|
||||
.zo_ashift = SPA_MINBLOCKSHIFT,
|
||||
.zo_mirrors = 2,
|
||||
.zo_raidz = 4,
|
||||
.zo_raidz_parity = 1,
|
||||
.zo_raid_children = 4,
|
||||
.zo_raid_parity = 1,
|
||||
.zo_raid_type = VDEV_TYPE_RAIDZ,
|
||||
.zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */
|
||||
.zo_draid_data = 4, /* data drives */
|
||||
.zo_draid_spares = 1, /* distributed spares */
|
||||
.zo_datasets = 7,
|
||||
.zo_threads = 23,
|
||||
.zo_passtime = 60, /* 60 seconds */
|
||||
@@ -232,7 +239,7 @@ static ztest_shared_ds_t *ztest_shared_ds;
|
||||
|
||||
#define BT_MAGIC 0x123456789abcdefULL
|
||||
#define MAXFAULTS(zs) \
|
||||
(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
|
||||
(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1)
|
||||
|
||||
enum ztest_io_type {
|
||||
ZTEST_IO_WRITE_TAG,
|
||||
@@ -689,8 +696,11 @@ usage(boolean_t requested)
|
||||
"\t[-s size_of_each_vdev (default: %s)]\n"
|
||||
"\t[-a alignment_shift (default: %d)] use 0 for random\n"
|
||||
"\t[-m mirror_copies (default: %d)]\n"
|
||||
"\t[-r raidz_disks (default: %d)]\n"
|
||||
"\t[-R raidz_parity (default: %d)]\n"
|
||||
"\t[-r raidz_disks / draid_disks (default: %d)]\n"
|
||||
"\t[-R raid_parity (default: %d)]\n"
|
||||
"\t[-K raid_kind (default: random)] raidz|draid|random\n"
|
||||
"\t[-D draid_data (default: %d)] in config\n"
|
||||
"\t[-S draid_spares (default: %d)]\n"
|
||||
"\t[-d datasets (default: %d)]\n"
|
||||
"\t[-t threads (default: %d)]\n"
|
||||
"\t[-g gang_block_threshold (default: %s)]\n"
|
||||
@@ -716,8 +726,10 @@ usage(boolean_t requested)
|
||||
nice_vdev_size, /* -s */
|
||||
zo->zo_ashift, /* -a */
|
||||
zo->zo_mirrors, /* -m */
|
||||
zo->zo_raidz, /* -r */
|
||||
zo->zo_raidz_parity, /* -R */
|
||||
zo->zo_raid_children, /* -r */
|
||||
zo->zo_raid_parity, /* -R */
|
||||
zo->zo_draid_data, /* -D */
|
||||
zo->zo_draid_spares, /* -S */
|
||||
zo->zo_datasets, /* -d */
|
||||
zo->zo_threads, /* -t */
|
||||
nice_force_ganging, /* -g */
|
||||
@@ -731,6 +743,21 @@ usage(boolean_t requested)
|
||||
exit(requested ? 0 : 1);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
ztest_random(uint64_t range)
|
||||
{
|
||||
uint64_t r;
|
||||
|
||||
ASSERT3S(ztest_fd_rand, >=, 0);
|
||||
|
||||
if (range == 0)
|
||||
return (0);
|
||||
|
||||
if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
|
||||
fatal(1, "short read from /dev/urandom");
|
||||
|
||||
return (r % range);
|
||||
}
|
||||
|
||||
static void
|
||||
ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
|
||||
@@ -780,11 +807,12 @@ process_options(int argc, char **argv)
|
||||
int opt;
|
||||
uint64_t value;
|
||||
char altdir[MAXNAMELEN] = { 0 };
|
||||
char raid_kind[8] = { "random" };
|
||||
|
||||
bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
|
||||
|
||||
while ((opt = getopt(argc, argv,
|
||||
"v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) {
|
||||
"v:s:a:m:r:R:K:D:S:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) {
|
||||
value = 0;
|
||||
switch (opt) {
|
||||
case 'v':
|
||||
@@ -793,6 +821,8 @@ process_options(int argc, char **argv)
|
||||
case 'm':
|
||||
case 'r':
|
||||
case 'R':
|
||||
case 'D':
|
||||
case 'S':
|
||||
case 'd':
|
||||
case 't':
|
||||
case 'g':
|
||||
@@ -817,10 +847,19 @@ process_options(int argc, char **argv)
|
||||
zo->zo_mirrors = value;
|
||||
break;
|
||||
case 'r':
|
||||
zo->zo_raidz = MAX(1, value);
|
||||
zo->zo_raid_children = MAX(1, value);
|
||||
break;
|
||||
case 'R':
|
||||
zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
|
||||
zo->zo_raid_parity = MIN(MAX(value, 1), 3);
|
||||
break;
|
||||
case 'K':
|
||||
(void) strlcpy(raid_kind, optarg, sizeof (raid_kind));
|
||||
break;
|
||||
case 'D':
|
||||
zo->zo_draid_data = MAX(1, value);
|
||||
break;
|
||||
case 'S':
|
||||
zo->zo_draid_spares = MAX(1, value);
|
||||
break;
|
||||
case 'd':
|
||||
zo->zo_datasets = MAX(1, value);
|
||||
@@ -895,7 +934,54 @@ process_options(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
|
||||
/* When raid choice is 'random' add a draid pool 50% of the time */
|
||||
if (strcmp(raid_kind, "random") == 0) {
|
||||
(void) strlcpy(raid_kind, (ztest_random(2) == 0) ?
|
||||
"draid" : "raidz", sizeof (raid_kind));
|
||||
|
||||
if (ztest_opts.zo_verbose >= 3)
|
||||
(void) printf("choosing RAID type '%s'\n", raid_kind);
|
||||
}
|
||||
|
||||
if (strcmp(raid_kind, "draid") == 0) {
|
||||
uint64_t min_devsize;
|
||||
|
||||
/* With fewer disk use 256M, otherwise 128M is OK */
|
||||
min_devsize = (ztest_opts.zo_raid_children < 16) ?
|
||||
(256ULL << 20) : (128ULL << 20);
|
||||
|
||||
/* No top-level mirrors with dRAID for now */
|
||||
zo->zo_mirrors = 0;
|
||||
|
||||
/* Use more appropriate defaults for dRAID */
|
||||
if (zo->zo_vdevs == ztest_opts_defaults.zo_vdevs)
|
||||
zo->zo_vdevs = 1;
|
||||
if (zo->zo_raid_children ==
|
||||
ztest_opts_defaults.zo_raid_children)
|
||||
zo->zo_raid_children = 16;
|
||||
if (zo->zo_ashift < 12)
|
||||
zo->zo_ashift = 12;
|
||||
if (zo->zo_vdev_size < min_devsize)
|
||||
zo->zo_vdev_size = min_devsize;
|
||||
|
||||
if (zo->zo_draid_data + zo->zo_raid_parity >
|
||||
zo->zo_raid_children - zo->zo_draid_spares) {
|
||||
(void) fprintf(stderr, "error: too few draid "
|
||||
"children (%d) for stripe width (%d)\n",
|
||||
zo->zo_raid_children,
|
||||
zo->zo_draid_data + zo->zo_raid_parity);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
(void) strlcpy(zo->zo_raid_type, VDEV_TYPE_DRAID,
|
||||
sizeof (zo->zo_raid_type));
|
||||
|
||||
} else /* using raidz */ {
|
||||
ASSERT0(strcmp(raid_kind, "raidz"));
|
||||
|
||||
zo->zo_raid_parity = MIN(zo->zo_raid_parity,
|
||||
zo->zo_raid_children - 1);
|
||||
}
|
||||
|
||||
zo->zo_vdevtime =
|
||||
(zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
|
||||
@@ -966,22 +1052,6 @@ ztest_kill(ztest_shared_t *zs)
|
||||
(void) kill(getpid(), SIGKILL);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
ztest_random(uint64_t range)
|
||||
{
|
||||
uint64_t r;
|
||||
|
||||
ASSERT3S(ztest_fd_rand, >=, 0);
|
||||
|
||||
if (range == 0)
|
||||
return (0);
|
||||
|
||||
if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
|
||||
fatal(1, "short read from /dev/urandom");
|
||||
|
||||
return (r % range);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
ztest_record_enospc(const char *s)
|
||||
@@ -997,12 +1067,27 @@ ztest_get_ashift(void)
|
||||
return (ztest_opts.zo_ashift);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
ztest_is_draid_spare(const char *name)
|
||||
{
|
||||
uint64_t spare_id = 0, parity = 0, vdev_id = 0;
|
||||
|
||||
if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
|
||||
(u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
|
||||
(u_longlong_t *)&spare_id) == 3) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static nvlist_t *
|
||||
make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
|
||||
{
|
||||
char *pathbuf;
|
||||
uint64_t vdev;
|
||||
nvlist_t *file;
|
||||
boolean_t draid_spare = B_FALSE;
|
||||
|
||||
pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
|
||||
|
||||
@@ -1024,9 +1109,11 @@ make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
|
||||
ztest_dev_template, ztest_opts.zo_dir,
|
||||
pool == NULL ? ztest_opts.zo_pool : pool, vdev);
|
||||
}
|
||||
} else {
|
||||
draid_spare = ztest_is_draid_spare(path);
|
||||
}
|
||||
|
||||
if (size != 0) {
|
||||
if (size != 0 && !draid_spare) {
|
||||
int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
|
||||
if (fd == -1)
|
||||
fatal(1, "can't open %s", path);
|
||||
@@ -1035,20 +1122,21 @@ make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
|
||||
(void) close(fd);
|
||||
}
|
||||
|
||||
VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
|
||||
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
|
||||
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
|
||||
VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
|
||||
VERIFY0(nvlist_alloc(&file, NV_UNIQUE_NAME, 0));
|
||||
VERIFY0(nvlist_add_string(file, ZPOOL_CONFIG_TYPE,
|
||||
draid_spare ? VDEV_TYPE_DRAID_SPARE : VDEV_TYPE_FILE));
|
||||
VERIFY0(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path));
|
||||
VERIFY0(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift));
|
||||
umem_free(pathbuf, MAXPATHLEN);
|
||||
|
||||
return (file);
|
||||
}
|
||||
|
||||
static nvlist_t *
|
||||
make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
|
||||
make_vdev_raid(char *path, char *aux, char *pool, size_t size,
|
||||
uint64_t ashift, int r)
|
||||
{
|
||||
nvlist_t *raidz, **child;
|
||||
nvlist_t *raid, **child;
|
||||
int c;
|
||||
|
||||
if (r < 2)
|
||||
@@ -1058,20 +1146,41 @@ make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
|
||||
for (c = 0; c < r; c++)
|
||||
child[c] = make_vdev_file(path, aux, pool, size, ashift);
|
||||
|
||||
VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
|
||||
VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_RAIDZ) == 0);
|
||||
VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
|
||||
ztest_opts.zo_raidz_parity) == 0);
|
||||
VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
|
||||
child, r) == 0);
|
||||
VERIFY0(nvlist_alloc(&raid, NV_UNIQUE_NAME, 0));
|
||||
VERIFY0(nvlist_add_string(raid, ZPOOL_CONFIG_TYPE,
|
||||
ztest_opts.zo_raid_type));
|
||||
VERIFY0(nvlist_add_uint64(raid, ZPOOL_CONFIG_NPARITY,
|
||||
ztest_opts.zo_raid_parity));
|
||||
VERIFY0(nvlist_add_nvlist_array(raid, ZPOOL_CONFIG_CHILDREN,
|
||||
child, r));
|
||||
|
||||
if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) {
|
||||
uint64_t ndata = ztest_opts.zo_draid_data;
|
||||
uint64_t nparity = ztest_opts.zo_raid_parity;
|
||||
uint64_t nspares = ztest_opts.zo_draid_spares;
|
||||
uint64_t children = ztest_opts.zo_raid_children;
|
||||
uint64_t ngroups = 1;
|
||||
|
||||
/*
|
||||
* Calculate the minimum number of groups required to fill a
|
||||
* slice. This is the LCM of the stripe width (data + parity)
|
||||
* and the number of data drives (children - spares).
|
||||
*/
|
||||
while (ngroups * (ndata + nparity) % (children - nspares) != 0)
|
||||
ngroups++;
|
||||
|
||||
/* Store the basic dRAID configuration. */
|
||||
fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata);
|
||||
fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
|
||||
fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
|
||||
}
|
||||
|
||||
for (c = 0; c < r; c++)
|
||||
nvlist_free(child[c]);
|
||||
|
||||
umem_free(child, r * sizeof (nvlist_t *));
|
||||
|
||||
return (raidz);
|
||||
return (raid);
|
||||
}
|
||||
|
||||
static nvlist_t *
|
||||
@@ -1082,12 +1191,12 @@ make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
|
||||
int c;
|
||||
|
||||
if (m < 1)
|
||||
return (make_vdev_raidz(path, aux, pool, size, ashift, r));
|
||||
return (make_vdev_raid(path, aux, pool, size, ashift, r));
|
||||
|
||||
child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
|
||||
|
||||
for (c = 0; c < m; c++)
|
||||
child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
|
||||
child[c] = make_vdev_raid(path, aux, pool, size, ashift, r);
|
||||
|
||||
VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
|
||||
VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
|
||||
@@ -2809,6 +2918,10 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
|
||||
if (ztest_opts.zo_mmp_test)
|
||||
return;
|
||||
|
||||
/* dRAID added after feature flags, skip upgrade test. */
|
||||
if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0)
|
||||
return;
|
||||
|
||||
mutex_enter(&ztest_vdev_lock);
|
||||
name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
|
||||
|
||||
@@ -2818,13 +2931,13 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
|
||||
(void) spa_destroy(name);
|
||||
|
||||
nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
|
||||
NULL, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
|
||||
NULL, ztest_opts.zo_raid_children, ztest_opts.zo_mirrors, 1);
|
||||
|
||||
/*
|
||||
* If we're configuring a RAIDZ device then make sure that the
|
||||
* initial version is capable of supporting that feature.
|
||||
*/
|
||||
switch (ztest_opts.zo_raidz_parity) {
|
||||
switch (ztest_opts.zo_raid_parity) {
|
||||
case 0:
|
||||
case 1:
|
||||
initial_version = SPA_VERSION_INITIAL;
|
||||
@@ -2970,7 +3083,8 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
|
||||
return;
|
||||
|
||||
mutex_enter(&ztest_vdev_lock);
|
||||
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
|
||||
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
|
||||
ztest_opts.zo_raid_children;
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
@@ -3024,7 +3138,8 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
|
||||
*/
|
||||
nvroot = make_vdev_root(NULL, NULL, NULL,
|
||||
ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ?
|
||||
"log" : NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
|
||||
"log" : NULL, ztest_opts.zo_raid_children, zs->zs_mirrors,
|
||||
1);
|
||||
|
||||
error = spa_vdev_add(spa, nvroot);
|
||||
nvlist_free(nvroot);
|
||||
@@ -3078,14 +3193,15 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id)
|
||||
return;
|
||||
}
|
||||
|
||||
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
|
||||
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
|
||||
ztest_opts.zo_raid_children;
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
|
||||
nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
|
||||
class, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
|
||||
class, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
|
||||
|
||||
error = spa_vdev_add(spa, nvroot);
|
||||
nvlist_free(nvroot);
|
||||
@@ -3134,7 +3250,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
|
||||
char *aux;
|
||||
char *path;
|
||||
uint64_t guid = 0;
|
||||
int error;
|
||||
int error, ignore_err = 0;
|
||||
|
||||
if (ztest_opts.zo_mmp_test)
|
||||
return;
|
||||
@@ -3157,7 +3273,13 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
|
||||
/*
|
||||
* Pick a random device to remove.
|
||||
*/
|
||||
guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
|
||||
vdev_t *svd = sav->sav_vdevs[ztest_random(sav->sav_count)];
|
||||
|
||||
/* dRAID spares cannot be removed; try anyways to see ENOTSUP */
|
||||
if (strstr(svd->vdev_path, VDEV_TYPE_DRAID) != NULL)
|
||||
ignore_err = ENOTSUP;
|
||||
|
||||
guid = svd->vdev_guid;
|
||||
} else {
|
||||
/*
|
||||
* Find an unused device we can add.
|
||||
@@ -3214,7 +3336,9 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
|
||||
case ZFS_ERR_DISCARDING_CHECKPOINT:
|
||||
break;
|
||||
default:
|
||||
fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
|
||||
if (error != ignore_err)
|
||||
fatal(0, "spa_vdev_remove(%llu) = %d", guid,
|
||||
error);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3243,7 +3367,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
|
||||
mutex_enter(&ztest_vdev_lock);
|
||||
|
||||
/* ensure we have a usable config; mirrors of raidz aren't supported */
|
||||
if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
|
||||
if (zs->zs_mirrors < 3 || ztest_opts.zo_raid_children > 1) {
|
||||
mutex_exit(&ztest_vdev_lock);
|
||||
return;
|
||||
}
|
||||
@@ -3343,6 +3467,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
int replacing;
|
||||
int oldvd_has_siblings = B_FALSE;
|
||||
int newvd_is_spare = B_FALSE;
|
||||
int newvd_is_dspare = B_FALSE;
|
||||
int oldvd_is_log;
|
||||
int error, expected_error;
|
||||
|
||||
@@ -3353,7 +3478,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
|
||||
|
||||
mutex_enter(&ztest_vdev_lock);
|
||||
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
|
||||
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
|
||||
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
|
||||
@@ -3393,14 +3518,17 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
if (zs->zs_mirrors >= 1) {
|
||||
ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
|
||||
ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
|
||||
oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz];
|
||||
oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raid_children];
|
||||
}
|
||||
|
||||
/* pick a child out of the raidz group */
|
||||
if (ztest_opts.zo_raidz > 1) {
|
||||
ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
|
||||
ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz);
|
||||
oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz];
|
||||
if (ztest_opts.zo_raid_children > 1) {
|
||||
if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0)
|
||||
ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
|
||||
else
|
||||
ASSERT(oldvd->vdev_ops == &vdev_draid_ops);
|
||||
ASSERT(oldvd->vdev_children == ztest_opts.zo_raid_children);
|
||||
oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raid_children];
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3447,6 +3575,10 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
if (sav->sav_count != 0 && ztest_random(3) == 0) {
|
||||
newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
|
||||
newvd_is_spare = B_TRUE;
|
||||
|
||||
if (newvd->vdev_ops == &vdev_draid_spare_ops)
|
||||
newvd_is_dspare = B_TRUE;
|
||||
|
||||
(void) strcpy(newpath, newvd->vdev_path);
|
||||
} else {
|
||||
(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
|
||||
@@ -3480,6 +3612,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
* If newvd is already part of the pool, it should fail with EBUSY.
|
||||
*
|
||||
* If newvd is too small, it should fail with EOVERFLOW.
|
||||
*
|
||||
* If newvd is a distributed spare and it's being attached to a
|
||||
* dRAID which is not its parent it should fail with EINVAL.
|
||||
*/
|
||||
if (pvd->vdev_ops != &vdev_mirror_ops &&
|
||||
pvd->vdev_ops != &vdev_root_ops && (!replacing ||
|
||||
@@ -3492,10 +3627,12 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
||||
expected_error = replacing ? 0 : EBUSY;
|
||||
else if (vdev_lookup_by_path(rvd, newpath) != NULL)
|
||||
expected_error = EBUSY;
|
||||
else if (newsize < oldsize)
|
||||
else if (!newvd_is_dspare && newsize < oldsize)
|
||||
expected_error = EOVERFLOW;
|
||||
else if (ashift > oldvd->vdev_top->vdev_ashift)
|
||||
expected_error = EDOM;
|
||||
else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
|
||||
expected_error = ENOTSUP;
|
||||
else
|
||||
expected_error = 0;
|
||||
|
||||
@@ -4880,13 +5017,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
|
||||
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
|
||||
|
||||
VERIFY(0 == dmu_read(os, packobj, packoff,
|
||||
VERIFY0(dmu_read(os, packobj, packoff,
|
||||
packsize, packcheck, DMU_READ_PREFETCH));
|
||||
VERIFY(0 == dmu_read(os, bigobj, bigoff,
|
||||
VERIFY0(dmu_read(os, bigobj, bigoff,
|
||||
bigsize, bigcheck, DMU_READ_PREFETCH));
|
||||
|
||||
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
|
||||
ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
|
||||
ASSERT0(bcmp(packbuf, packcheck, packsize));
|
||||
ASSERT0(bcmp(bigbuf, bigcheck, bigsize));
|
||||
|
||||
umem_free(packcheck, packsize);
|
||||
umem_free(bigcheck, bigsize);
|
||||
@@ -5761,7 +5898,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
|
||||
}
|
||||
|
||||
maxfaults = MAXFAULTS(zs);
|
||||
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
|
||||
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
|
||||
mirror_save = zs->zs_mirrors;
|
||||
mutex_exit(&ztest_vdev_lock);
|
||||
|
||||
@@ -6011,7 +6148,7 @@ out:
|
||||
/*
|
||||
* By design ztest will never inject uncorrectable damage in to the pool.
|
||||
* Issue a scrub, wait for it to complete, and verify there is never any
|
||||
* any persistent damage.
|
||||
* persistent damage.
|
||||
*
|
||||
* Only after a full scrub has been completed is it safe to start injecting
|
||||
* data corruption. See the comment in zfs_fault_inject().
|
||||
@@ -7347,7 +7484,7 @@ ztest_init(ztest_shared_t *zs)
|
||||
zs->zs_splits = 0;
|
||||
zs->zs_mirrors = ztest_opts.zo_mirrors;
|
||||
nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
|
||||
NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
|
||||
NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
|
||||
props = make_random_props();
|
||||
|
||||
/*
|
||||
@@ -7683,10 +7820,12 @@ main(int argc, char **argv)
|
||||
|
||||
if (ztest_opts.zo_verbose >= 1) {
|
||||
(void) printf("%llu vdevs, %d datasets, %d threads,"
|
||||
" %llu seconds...\n",
|
||||
"%d %s disks, %llu seconds...\n\n",
|
||||
(u_longlong_t)ztest_opts.zo_vdevs,
|
||||
ztest_opts.zo_datasets,
|
||||
ztest_opts.zo_threads,
|
||||
ztest_opts.zo_raid_children,
|
||||
ztest_opts.zo_raid_type,
|
||||
(u_longlong_t)ztest_opts.zo_time);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user