Pool allocation classes

Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Reviewed-by: Håkan Johansson <f96hajo@chalmers.se>
Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com>
Reviewed-by: DHE <git@dehacked.net>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Gregor Kopka <gregor@kopka.net>
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #5182
This commit is contained in:
Don Brady
2018-09-05 19:33:36 -06:00
committed by Brian Behlendorf
parent cfa37548eb
commit cc99f275a2
57 changed files with 2326 additions and 282 deletions
+53 -12
View File
@@ -31,6 +31,7 @@
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -272,8 +273,14 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
ASSERT(MUTEX_HELD(&spa->spa_props_lock));
if (rvd != NULL) {
alloc = metaslab_class_get_alloc(spa_normal_class(spa));
size = metaslab_class_get_space(spa_normal_class(spa));
alloc = metaslab_class_get_alloc(mc);
alloc += metaslab_class_get_alloc(spa_special_class(spa));
alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
size = metaslab_class_get_space(mc);
size += metaslab_class_get_space(spa_special_class(spa));
size += metaslab_class_get_space(spa_dedup_class(spa));
spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
@@ -1173,6 +1180,8 @@ spa_activate(spa_t *spa, int mode)
spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops);
/* Try to create a covering process */
mutex_enter(&spa->spa_proc_lock);
@@ -1320,6 +1329,12 @@ spa_deactivate(spa_t *spa)
metaslab_class_destroy(spa->spa_log_class);
spa->spa_log_class = NULL;
metaslab_class_destroy(spa->spa_special_class);
spa->spa_special_class = NULL;
metaslab_class_destroy(spa->spa_dedup_class);
spa->spa_dedup_class = NULL;
/*
* If this was part of an import or the open otherwise failed, we may
* still have errors left in the queues. Empty them just in case.
@@ -4988,7 +5003,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
char *poolname;
nvlist_t *nvl;
if (nvlist_lookup_string(props, "tname", &poolname) != 0)
if (props == NULL ||
nvlist_lookup_string(props, "tname", &poolname) != 0)
poolname = (char *)pool;
/*
@@ -5092,9 +5108,15 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
(error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
(error = spa_validate_aux(spa, nvroot, txg,
VDEV_ALLOC_ADD)) == 0) {
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_metaslab_set_size(rvd->vdev_child[c]);
vdev_expand(rvd->vdev_child[c], txg);
/*
* instantiate the metaslab groups (this will dirty the vdevs)
* we can no longer error exit past this point
*/
for (int c = 0; error == 0 && c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
vdev_metaslab_set_size(vd);
vdev_expand(vd, txg);
}
}
@@ -6940,8 +6962,14 @@ spa_async_thread(void *arg)
mutex_enter(&spa_namespace_lock);
old_space = metaslab_class_get_space(spa_normal_class(spa));
old_space += metaslab_class_get_space(spa_special_class(spa));
old_space += metaslab_class_get_space(spa_dedup_class(spa));
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
new_space = metaslab_class_get_space(spa_normal_class(spa));
new_space += metaslab_class_get_space(spa_special_class(spa));
new_space += metaslab_class_get_space(spa_dedup_class(spa));
mutex_exit(&spa_namespace_lock);
/*
@@ -7630,6 +7658,9 @@ spa_sync(spa_t *spa, uint64_t txg)
dsl_pool_t *dp = spa->spa_dsl_pool;
objset_t *mos = spa->spa_meta_objset;
bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
metaslab_class_t *normal = spa_normal_class(spa);
metaslab_class_t *special = spa_special_class(spa);
metaslab_class_t *dedup = spa_dedup_class(spa);
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd;
dmu_tx_t *tx;
@@ -7723,9 +7754,13 @@ spa_sync(spa_t *spa, uint64_t txg)
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
metaslab_group_t *mg = tvd->vdev_mg;
metaslab_class_t *mc;
if (mg == NULL || mg->mg_class != spa_normal_class(spa) ||
!metaslab_group_initialized(mg))
if (mg == NULL || !metaslab_group_initialized(mg))
continue;
mc = mg->mg_class;
if (mc != normal && mc != special && mc != dedup)
continue;
/*
@@ -7743,12 +7778,18 @@ spa_sync(spa_t *spa, uint64_t txg)
}
slots_per_allocator += zfs_vdev_def_queue_depth;
}
metaslab_class_t *mc = spa_normal_class(spa);
for (int i = 0; i < spa->spa_alloc_count; i++) {
ASSERT0(refcount_count(&mc->mc_alloc_slots[i]));
mc->mc_alloc_max_slots[i] = slots_per_allocator;
ASSERT0(refcount_count(&normal->mc_alloc_slots[i]));
ASSERT0(refcount_count(&special->mc_alloc_slots[i]));
ASSERT0(refcount_count(&dedup->mc_alloc_slots[i]));
normal->mc_alloc_max_slots[i] = slots_per_allocator;
special->mc_alloc_max_slots[i] = slots_per_allocator;
dedup->mc_alloc_max_slots[i] = slots_per_allocator;
}
mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
normal->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
special->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
dedup->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];