Pool allocation classes

Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Reviewed-by: Håkan Johansson <f96hajo@chalmers.se>
Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com>
Reviewed-by: DHE <git@dehacked.net>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Gregor Kopka <gregor@kopka.net>
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #5182
This commit is contained in:
Don Brady
2018-09-05 19:33:36 -06:00
committed by Brian Behlendorf
parent cfa37548eb
commit cc99f275a2
57 changed files with 2326 additions and 282 deletions
+103 -10
View File
@@ -21,8 +21,8 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2016 Intel Corporation.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright (c) 2016, 2017 Intel Corporation.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
*/
@@ -684,6 +684,9 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (is_log)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_LOG) == 0);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);
@@ -742,6 +745,9 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
*
* Otherwise, make sure that the current spec (if there is one) and the new
* spec have consistent replication levels.
*
* If there is no current spec (create), make sure new spec has at least
* one general purpose vdev.
*/
typedef struct replication_level {
char *zprl_type;
@@ -965,7 +971,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
/*
* At this point, we have the replication of the last toplevel
* vdev in 'rep'. Compare it to 'lastrep' to see if its
* vdev in 'rep'. Compare it to 'lastrep' to see if it is
* different.
*/
if (lastrep.zprl_type != NULL) {
@@ -1466,6 +1472,13 @@ is_grouping(const char *type, int *mindev, int *maxdev)
return (VDEV_TYPE_LOG);
}
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 ||
strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
if (mindev != NULL)
*mindev = 1;
return (type);
}
if (strcmp(type, "cache") == 0) {
if (mindev != NULL)
*mindev = 1;
@@ -1487,7 +1500,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
const char *type;
uint64_t is_log;
uint64_t is_log, is_special, is_dedup;
boolean_t seen_logs;
top = NULL;
@@ -1497,7 +1510,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nspares = 0;
nlogs = 0;
nl2cache = 0;
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
seen_logs = B_FALSE;
nvroot = NULL;
@@ -1520,7 +1533,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
"specified only once\n"));
goto spec_out;
}
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
}
if (strcmp(type, VDEV_TYPE_LOG) == 0) {
@@ -1533,6 +1546,8 @@ construct_spec(nvlist_t *props, int argc, char **argv)
}
seen_logs = B_TRUE;
is_log = B_TRUE;
is_special = B_FALSE;
is_dedup = B_FALSE;
argc--;
argv++;
/*
@@ -1542,6 +1557,24 @@ construct_spec(nvlist_t *props, int argc, char **argv)
continue;
}
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
is_special = B_TRUE;
is_log = B_FALSE;
is_dedup = B_FALSE;
argc--;
argv++;
continue;
}
if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
is_dedup = B_TRUE;
is_log = B_FALSE;
is_special = B_FALSE;
argc--;
argv++;
continue;
}
if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
if (l2cache != NULL) {
(void) fprintf(stderr,
@@ -1550,15 +1583,16 @@ construct_spec(nvlist_t *props, int argc, char **argv)
"specified only once\n"));
goto spec_out;
}
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
}
if (is_log) {
if (is_log || is_special || is_dedup) {
if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
(void) fprintf(stderr,
gettext("invalid vdev "
"specification: unsupported 'log' "
"device: %s\n"), type);
"specification: unsupported '%s' "
"device: %s\n"), is_log ? "log" :
"special", type);
goto spec_out;
}
nlogs++;
@@ -1615,12 +1649,27 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nl2cache = children;
continue;
} else {
/* create a top-level vdev with children */
verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
0) == 0);
verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
type) == 0);
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (is_log)
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_LOG) == 0);
if (is_special) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_SPECIAL) == 0);
}
if (is_dedup) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_DEDUP) == 0);
}
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_NPARITY,
@@ -1645,6 +1694,16 @@ construct_spec(nvlist_t *props, int argc, char **argv)
if (is_log)
nlogs++;
if (is_special) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_SPECIAL) == 0);
}
if (is_dedup) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_DEDUP) == 0);
}
argc--;
argv++;
}
@@ -1745,6 +1804,30 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
return (newroot);
}
static int
num_normal_vdevs(nvlist_t *nvroot)
{
nvlist_t **top;
uint_t t, toplevels, normal = 0;
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&top, &toplevels) == 0);
for (t = 0; t < toplevels; t++) {
uint64_t log = B_FALSE;
(void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log);
if (log)
continue;
if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
normal++;
}
return (normal);
}
/*
* Get and validate the contents of the given vdev specification. This ensures
* that the nvlist returned is well-formed, that all the devices exist, and that
@@ -1797,6 +1880,16 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
return (NULL);
}
/*
* On pool create the new vdev spec must have one normal vdev.
*/
if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) {
vdev_error(gettext("at least one general top-level vdev must "
"be specified\n"));
nvlist_free(newroot);
return (NULL);
}
/*
* Run through the vdev specification and label any whole disks found.
*/