Pool allocation classes

Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Reviewed-by: Håkan Johansson <f96hajo@chalmers.se>
Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com>
Reviewed-by: DHE <git@dehacked.net>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Gregor Kopka <gregor@kopka.net>
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #5182
This commit is contained in:
Don Brady
2018-09-05 19:33:36 -06:00
committed by Brian Behlendorf
parent cfa37548eb
commit cc99f275a2
57 changed files with 2326 additions and 282 deletions
+206 -83
View File
@@ -22,13 +22,14 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2017, Intel Corporation.
*/
#include <assert.h>
@@ -291,6 +292,8 @@ static zpool_command_t command_table[] = {
#define NCOMMAND (ARRAY_SIZE(command_table))
#define VDEV_ALLOC_CLASS_LOGS "logs"
static zpool_command_t *current_command;
static char history_str[HIS_MAX_RECORD_LEN];
static boolean_t log_history = B_TRUE;
@@ -393,7 +396,7 @@ print_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
(void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop));
(void) fprintf(fp, "\t%-19s ", zpool_prop_to_name(prop));
if (zpool_prop_readonly(prop))
(void) fprintf(fp, " NO ");
@@ -445,14 +448,14 @@ usage(boolean_t requested)
(void) fprintf(fp,
gettext("\nthe following properties are supported:\n"));
(void) fprintf(fp, "\n\t%-15s %s %s\n\n",
(void) fprintf(fp, "\n\t%-19s %s %s\n\n",
"PROPERTY", "EDIT", "VALUES");
/* Iterate over all properties */
(void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
ZFS_TYPE_POOL);
(void) fprintf(fp, "\t%-15s ", "feature@...");
(void) fprintf(fp, "\t%-19s ", "feature@...");
(void) fprintf(fp, "YES disabled | enabled | active\n");
(void) fprintf(fp, gettext("\nThe feature@ properties must be "
@@ -470,32 +473,45 @@ usage(boolean_t requested)
exit(requested ? 0 : 2);
}
void
/*
* print a pool vdev config for dry runs
*/
static void
print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
boolean_t print_logs, int name_flags)
const char *match, int name_flags)
{
nvlist_t **child;
uint_t c, children;
char *vname;
if (name != NULL)
(void) printf("\t%*s%s\n", indent, "", name);
boolean_t printed = B_FALSE;
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
&child, &children) != 0) {
if (name != NULL)
(void) printf("\t%*s%s\n", indent, "", name);
return;
}
for (c = 0; c < children; c++) {
uint64_t is_log = B_FALSE;
char *class = "";
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
&is_log);
if ((is_log && !print_logs) || (!is_log && print_logs))
if (is_log)
class = VDEV_ALLOC_BIAS_LOG;
(void) nvlist_lookup_string(child[c],
ZPOOL_CONFIG_ALLOCATION_BIAS, &class);
if (strcmp(match, class) != 0)
continue;
if (!printed && name != NULL) {
(void) printf("\t%*s%s\n", indent, "", name);
printed = B_TRUE;
}
vname = zpool_vdev_name(g_zfs, zhp, child[c], name_flags);
print_vdev_tree(zhp, vname, child[c], indent + 2,
B_FALSE, name_flags);
print_vdev_tree(zhp, vname, child[c], indent + 2, "",
name_flags);
free(vname);
}
}
@@ -734,20 +750,25 @@ zpool_do_add(int argc, char **argv)
"configuration:\n"), zpool_get_name(zhp));
/* print original main pool and new tree */
print_vdev_tree(zhp, poolname, poolnvroot, 0, B_FALSE,
name_flags);
print_vdev_tree(zhp, NULL, nvroot, 0, B_FALSE, name_flags);
print_vdev_tree(zhp, poolname, poolnvroot, 0, "",
name_flags | VDEV_NAME_TYPE_ID);
print_vdev_tree(zhp, NULL, nvroot, 0, "", name_flags);
/* Do the same for the logs */
if (num_logs(poolnvroot) > 0) {
print_vdev_tree(zhp, "logs", poolnvroot, 0, B_TRUE,
name_flags);
print_vdev_tree(zhp, NULL, nvroot, 0, B_TRUE,
name_flags);
} else if (num_logs(nvroot) > 0) {
print_vdev_tree(zhp, "logs", nvroot, 0, B_TRUE,
name_flags);
}
/* print other classes: 'dedup', 'special', and 'log' */
print_vdev_tree(zhp, "dedup", poolnvroot, 0,
VDEV_ALLOC_BIAS_DEDUP, name_flags);
print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_DEDUP,
name_flags);
print_vdev_tree(zhp, "special", poolnvroot, 0,
VDEV_ALLOC_BIAS_SPECIAL, name_flags);
print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL,
name_flags);
print_vdev_tree(zhp, "logs", poolnvroot, 0, VDEV_ALLOC_BIAS_LOG,
name_flags);
print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_LOG,
name_flags);
/* Do the same for the caches */
if (nvlist_lookup_nvlist_array(poolnvroot, ZPOOL_CONFIG_L2CACHE,
@@ -1305,9 +1326,13 @@ zpool_do_create(int argc, char **argv)
(void) printf(gettext("would create '%s' with the "
"following layout:\n\n"), poolname);
print_vdev_tree(NULL, poolname, nvroot, 0, B_FALSE, 0);
if (num_logs(nvroot) > 0)
print_vdev_tree(NULL, "logs", nvroot, 0, B_TRUE, 0);
print_vdev_tree(NULL, poolname, nvroot, 0, "", 0);
print_vdev_tree(NULL, "dedup", nvroot, 0,
VDEV_ALLOC_BIAS_DEDUP, 0);
print_vdev_tree(NULL, "special", nvroot, 0,
VDEV_ALLOC_BIAS_SPECIAL, 0);
print_vdev_tree(NULL, "logs", nvroot, 0,
VDEV_ALLOC_BIAS_LOG, 0);
ret = 0;
} else {
@@ -1865,6 +1890,10 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
&ishole);
if (islog || ishole)
continue;
/* Only print normal classes here */
if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
print_status_config(zhp, cb, vname, child[c], depth + 2,
@@ -1951,6 +1980,8 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
&is_log);
if (is_log)
continue;
if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
vname = zpool_vdev_name(g_zfs, NULL, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
@@ -1982,34 +2013,56 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
}
/*
* Print log vdevs.
* Logs are recorded as top level vdevs in the main pool child array
* but with "is_log" set to 1. We use either print_status_config() or
* print_import_config() to print the top level logs then any log
* children (eg mirrored slogs) are printed recursively - which
* works because only the top level vdev is marked "is_log"
* Print specialized class vdevs.
*
* These are recorded as top level vdevs in the main pool child array
* but with "is_log" set to 1 or an "alloc_bias" string. We use either
* print_status_config() or print_import_config() to print the top level
* class vdevs then any of their children (eg mirrored slogs) are printed
* recursively - which works because only the top level vdev is marked.
*/
static void
print_logs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv)
print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv,
const char *class)
{
uint_t c, children;
nvlist_t **child;
boolean_t printed = B_FALSE;
assert(zhp != NULL || !cb->cb_verbose);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
&children) != 0)
return;
(void) printf(gettext("\tlogs\n"));
for (c = 0; c < children; c++) {
uint64_t is_log = B_FALSE;
char *name;
char *bias = NULL;
char *type = NULL;
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
&is_log);
if (!is_log)
if (is_log) {
bias = VDEV_ALLOC_CLASS_LOGS;
} else {
(void) nvlist_lookup_string(child[c],
ZPOOL_CONFIG_ALLOCATION_BIAS, &bias);
(void) nvlist_lookup_string(child[c],
ZPOOL_CONFIG_TYPE, &type);
}
if (bias == NULL || strcmp(bias, class) != 0)
continue;
name = zpool_vdev_name(g_zfs, zhp, child[c],
if (!is_log && strcmp(type, VDEV_TYPE_INDIRECT) == 0)
continue;
if (!printed) {
(void) printf("\t%s\t\n", gettext(class));
printed = B_TRUE;
}
char *name = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
if (cb->cb_print_status)
print_status_config(zhp, cb, name, child[c], 2,
@@ -2313,8 +2366,10 @@ show_import(nvlist_t *config)
cb.cb_namewidth = 10;
print_import_config(&cb, name, nvroot, 0);
if (num_logs(nvroot) > 0)
print_logs(NULL, &cb, nvroot);
print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_DEDUP);
print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_CLASS_LOGS);
if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
(void) printf(gettext("\n\tAdditional devices are known to "
@@ -3810,6 +3865,12 @@ print_iostat_default(vdev_stat_t *vs, iostat_cbdata_t *cb, double scale)
format, column_width, cb->cb_scripted);
}
static const char *class_name[] = {
VDEV_ALLOC_BIAS_DEDUP,
VDEV_ALLOC_BIAS_SPECIAL,
VDEV_ALLOC_CLASS_LOGS
};
/*
* Print out all the statistics for the given vdev. This can either be the
* toplevel configuration, or called recursively. If 'name' is NULL, then this
@@ -3817,7 +3878,7 @@ print_iostat_default(vdev_stat_t *vs, iostat_cbdata_t *cb, double scale)
*
* Returns the number of stat lines printed.
*/
unsigned int
static unsigned int
print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
nvlist_t *newnv, iostat_cbdata_t *cb, int depth)
{
@@ -3945,6 +4006,9 @@ children:
children = MIN(oldchildren, children);
}
/*
* print normal top-level devices
*/
for (c = 0; c < children; c++) {
uint64_t ishole = B_FALSE, islog = B_FALSE;
@@ -3957,6 +4021,9 @@ children:
if (ishole || islog)
continue;
if (nvlist_exists(newchild[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
cb->cb_name_flags);
ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
@@ -3965,31 +4032,47 @@ children:
}
/*
* Log device section
* print all other top-level devices
*/
if (num_logs(newnv) > 0) {
if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted &&
!cb->cb_vdev_names) {
print_iostat_dashes(cb, 0, "logs");
}
printf("\n");
for (uint_t n = 0; n < 3; n++) {
boolean_t printed = B_FALSE;
for (c = 0; c < children; c++) {
uint64_t islog = B_FALSE;
char *bias = NULL;
char *type = NULL;
(void) nvlist_lookup_uint64(newchild[c],
ZPOOL_CONFIG_IS_LOG, &islog);
if (islog) {
vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
cb->cb_name_flags);
ret += print_vdev_stats(zhp, vname, oldnv ?
oldchild[c] : NULL, newchild[c],
cb, depth + 2);
free(vname);
bias = VDEV_ALLOC_CLASS_LOGS;
} else {
(void) nvlist_lookup_string(newchild[c],
ZPOOL_CONFIG_ALLOCATION_BIAS, &bias);
(void) nvlist_lookup_string(newchild[c],
ZPOOL_CONFIG_TYPE, &type);
}
}
if (bias == NULL || strcmp(bias, class_name[n]) != 0)
continue;
if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0)
continue;
if (!printed) {
if ((!(cb->cb_flags & IOS_ANYHISTO_M)) &&
!cb->cb_scripted && !cb->cb_vdev_names) {
print_iostat_dashes(cb, 0,
class_name[n]);
}
printf("\n");
printed = B_TRUE;
}
vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
cb->cb_name_flags);
ret += print_vdev_stats(zhp, vname, oldnv ?
oldchild[c] : NULL, newchild[c], cb, depth + 2);
free(vname);
}
}
/*
@@ -4937,6 +5020,7 @@ typedef struct list_cbdata {
boolean_t cb_literal;
} list_cbdata_t;
/*
* Given a list of columns to display, output appropriate headers for each one.
*/
@@ -4991,7 +5075,7 @@ print_header(list_cbdata_t *cb)
/*
* Given a pool and a list of properties, print out all the properties according
* to the described layout.
* to the described layout. Used by zpool_do_list().
*/
static void
print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
@@ -5087,12 +5171,14 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted,
}
break;
case ZPOOL_PROP_CAPACITY:
/* capacity value is in parts-per-10,000 (aka permyriad) */
if (format == ZFS_NICENUM_RAW)
(void) snprintf(propval, sizeof (propval), "%llu",
(unsigned long long)value);
(unsigned long long)value / 100);
else
(void) snprintf(propval, sizeof (propval), "%llu%%",
(unsigned long long)value);
(void) snprintf(propval, sizeof (propval),
value < 1000 ? "%1.2f%%" : value < 10000 ?
"%2.1f%%" : "%3.0f%%", value / 100.0);
break;
default:
zfs_nicenum_format(value, propval, sizeof (propval), format);
@@ -5107,6 +5193,10 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted,
(void) printf(" %*s", (int)width, propval);
}
/*
* print static default line per vdev
* not compatible with '-o' <proplist> option
*/
void
print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
list_cbdata_t *cb, int depth)
@@ -5117,7 +5207,6 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
char *vname;
boolean_t scripted = cb->cb_scripted;
uint64_t islog = B_FALSE;
boolean_t haslog = B_FALSE;
char *dashes = "%-*s - - - - - -\n";
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
@@ -5165,7 +5254,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel),
format);
cap = (vs->vs_space == 0) ? 0 :
(vs->vs_alloc * 100 / vs->vs_space);
(vs->vs_alloc * 10000 / vs->vs_space);
print_one_column(ZPOOL_PROP_CAPACITY, cap, scripted, toplevel,
format);
(void) printf("\n");
@@ -5175,6 +5264,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
&child, &children) != 0)
return;
/* list the normal vdevs first */
for (c = 0; c < children; c++) {
uint64_t ishole = B_FALSE;
@@ -5183,10 +5273,11 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
continue;
if (nvlist_lookup_uint64(child[c],
ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) {
haslog = B_TRUE;
ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog)
continue;
if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
}
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags);
@@ -5194,13 +5285,34 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
free(vname);
}
if (haslog == B_TRUE) {
/* LINTED E_SEC_PRINTF_VAR_FMT */
(void) printf(dashes, cb->cb_namewidth, "log");
/* list the classes: 'logs', 'dedup', and 'special' */
for (uint_t n = 0; n < 3; n++) {
boolean_t printed = B_FALSE;
for (c = 0; c < children; c++) {
char *bias = NULL;
char *type = NULL;
if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
&islog) != 0 || !islog)
&islog) == 0 && islog) {
bias = VDEV_ALLOC_CLASS_LOGS;
} else {
(void) nvlist_lookup_string(child[c],
ZPOOL_CONFIG_ALLOCATION_BIAS, &bias);
(void) nvlist_lookup_string(child[c],
ZPOOL_CONFIG_TYPE, &type);
}
if (bias == NULL || strcmp(bias, class_name[n]) != 0)
continue;
if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0)
continue;
if (!printed) {
/* LINTED E_SEC_PRINTF_VAR_FMT */
(void) printf(dashes, cb->cb_namewidth,
class_name[n]);
printed = B_TRUE;
}
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags);
print_list_stats(zhp, vname, child[c], cb, depth + 2);
@@ -5233,7 +5345,6 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
}
}
/*
* Generic callback function to list a pool.
*/
@@ -5246,13 +5357,21 @@ list_callback(zpool_handle_t *zhp, void *data)
config = zpool_get_config(zhp, NULL);
print_pool(zhp, cbp);
if (!cbp->cb_verbose)
return (0);
if (cbp->cb_verbose) {
config = zpool_get_config(zhp, NULL);
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
print_list_stats(zhp, NULL, nvroot, cbp, 0);
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
}
if (cbp->cb_verbose)
cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0,
cbp->cb_name_flags);
print_pool(zhp, cbp);
if (cbp->cb_verbose)
print_list_stats(zhp, NULL, nvroot, cbp, 0);
return (0);
}
@@ -5315,6 +5434,7 @@ zpool_do_list(int argc, char **argv)
break;
case 'v':
cb.cb_verbose = B_TRUE;
cb.cb_namewidth = 8; /* 8 until precalc is avail */
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -5716,7 +5836,7 @@ zpool_do_split(int argc, char **argv)
if (flags.dryrun) {
(void) printf(gettext("would create '%s' with the "
"following layout:\n\n"), newpool);
print_vdev_tree(NULL, newpool, config, 0, B_FALSE,
print_vdev_tree(NULL, newpool, config, 0, "",
flags.name_flags);
}
}
@@ -6965,11 +7085,14 @@ status_callback(zpool_handle_t *zhp, void *data)
print_cmd_columns(cbp->vcdl, 0);
printf("\n");
print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0,
B_FALSE);
if (num_logs(nvroot) > 0)
print_logs(zhp, cbp, nvroot);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_CLASS_LOGS);
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&l2cache, &nl2cache) == 0)
print_l2cache(zhp, cbp, l2cache, nl2cache);
+103 -10
View File
@@ -21,8 +21,8 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2016 Intel Corporation.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright (c) 2016, 2017 Intel Corporation.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
*/
@@ -684,6 +684,9 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (is_log)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_LOG) == 0);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);
@@ -742,6 +745,9 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
*
* Otherwise, make sure that the current spec (if there is one) and the new
* spec have consistent replication levels.
*
* If there is no current spec (create), make sure new spec has at least
* one general purpose vdev.
*/
typedef struct replication_level {
char *zprl_type;
@@ -965,7 +971,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
/*
* At this point, we have the replication of the last toplevel
* vdev in 'rep'. Compare it to 'lastrep' to see if its
* vdev in 'rep'. Compare it to 'lastrep' to see if it is
* different.
*/
if (lastrep.zprl_type != NULL) {
@@ -1466,6 +1472,13 @@ is_grouping(const char *type, int *mindev, int *maxdev)
return (VDEV_TYPE_LOG);
}
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 ||
strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
if (mindev != NULL)
*mindev = 1;
return (type);
}
if (strcmp(type, "cache") == 0) {
if (mindev != NULL)
*mindev = 1;
@@ -1487,7 +1500,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
const char *type;
uint64_t is_log;
uint64_t is_log, is_special, is_dedup;
boolean_t seen_logs;
top = NULL;
@@ -1497,7 +1510,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nspares = 0;
nlogs = 0;
nl2cache = 0;
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
seen_logs = B_FALSE;
nvroot = NULL;
@@ -1520,7 +1533,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
"specified only once\n"));
goto spec_out;
}
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
}
if (strcmp(type, VDEV_TYPE_LOG) == 0) {
@@ -1533,6 +1546,8 @@ construct_spec(nvlist_t *props, int argc, char **argv)
}
seen_logs = B_TRUE;
is_log = B_TRUE;
is_special = B_FALSE;
is_dedup = B_FALSE;
argc--;
argv++;
/*
@@ -1542,6 +1557,24 @@ construct_spec(nvlist_t *props, int argc, char **argv)
continue;
}
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
is_special = B_TRUE;
is_log = B_FALSE;
is_dedup = B_FALSE;
argc--;
argv++;
continue;
}
if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
is_dedup = B_TRUE;
is_log = B_FALSE;
is_special = B_FALSE;
argc--;
argv++;
continue;
}
if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
if (l2cache != NULL) {
(void) fprintf(stderr,
@@ -1550,15 +1583,16 @@ construct_spec(nvlist_t *props, int argc, char **argv)
"specified only once\n"));
goto spec_out;
}
is_log = B_FALSE;
is_log = is_special = is_dedup = B_FALSE;
}
if (is_log) {
if (is_log || is_special || is_dedup) {
if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
(void) fprintf(stderr,
gettext("invalid vdev "
"specification: unsupported 'log' "
"device: %s\n"), type);
"specification: unsupported '%s' "
"device: %s\n"), is_log ? "log" :
"special", type);
goto spec_out;
}
nlogs++;
@@ -1615,12 +1649,27 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nl2cache = children;
continue;
} else {
/* create a top-level vdev with children */
verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
0) == 0);
verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
type) == 0);
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (is_log)
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_LOG) == 0);
if (is_special) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_SPECIAL) == 0);
}
if (is_dedup) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_DEDUP) == 0);
}
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_NPARITY,
@@ -1645,6 +1694,16 @@ construct_spec(nvlist_t *props, int argc, char **argv)
if (is_log)
nlogs++;
if (is_special) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_SPECIAL) == 0);
}
if (is_dedup) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_DEDUP) == 0);
}
argc--;
argv++;
}
@@ -1745,6 +1804,30 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
return (newroot);
}
static int
num_normal_vdevs(nvlist_t *nvroot)
{
nvlist_t **top;
uint_t t, toplevels, normal = 0;
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&top, &toplevels) == 0);
for (t = 0; t < toplevels; t++) {
uint64_t log = B_FALSE;
(void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log);
if (log)
continue;
if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS))
continue;
normal++;
}
return (normal);
}
/*
* Get and validate the contents of the given vdev specification. This ensures
* that the nvlist returned is well-formed, that all the devices exist, and that
@@ -1797,6 +1880,16 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
return (NULL);
}
/*
* On pool create the new vdev spec must have one normal vdev.
*/
if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) {
vdev_error(gettext("at least one general top-level vdev must "
"be specified\n"));
nvlist_free(newroot);
return (NULL);
}
/*
* Run through the vdev specification and label any whole disks found.
*/