Pool allocation classes

Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Reviewed-by: Håkan Johansson <f96hajo@chalmers.se>
Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com>
Reviewed-by: DHE <git@dehacked.net>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Gregor Kopka <gregor@kopka.net>
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #5182
This commit is contained in:
Don Brady
2018-09-05 19:33:36 -06:00
committed by Brian Behlendorf
parent cfa37548eb
commit cc99f275a2
57 changed files with 2326 additions and 282 deletions
+109
View File
@@ -25,6 +25,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
*/
#include <sys/zfs_context.h>
@@ -408,6 +409,19 @@ spa_load_note(spa_t *spa, const char *fmt, ...)
spa->spa_trust_config ? "trusted" : "untrusted", buf);
}
/*
* By default dedup and user data indirects land in the special class
*/
int zfs_ddt_data_is_special = B_TRUE;
int zfs_user_indirect_is_special = B_TRUE;
/*
* The percentage of special class final space reserved for metadata only.
* Once we allocate 100 - zfs_special_class_metadata_reserve_pct we only
* let metadata into the class.
*/
int zfs_special_class_metadata_reserve_pct = 25;
/*
* ==========================================================================
* SPA config locking
@@ -1159,6 +1173,8 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
*/
ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
spa_config_exit(spa, SCL_ALL, spa);
@@ -1554,6 +1570,16 @@ zfs_strtonum(const char *str, char **nptr)
return (val);
}
void
spa_activate_allocation_classes(spa_t *spa, dmu_tx_t *tx)
{
/*
* We bump the feature refcount for each special vdev added to the pool
*/
ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES));
spa_feature_incr(spa, SPA_FEATURE_ALLOCATION_CLASSES, tx);
}
/*
* ==========================================================================
* Accessor functions
@@ -1811,6 +1837,79 @@ spa_log_class(spa_t *spa)
return (spa->spa_log_class);
}
metaslab_class_t *
spa_special_class(spa_t *spa)
{
return (spa->spa_special_class);
}
metaslab_class_t *
spa_dedup_class(spa_t *spa)
{
return (spa->spa_dedup_class);
}
/*
* Locate an appropriate allocation class
*/
metaslab_class_t *
spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype,
uint_t level, uint_t special_smallblk)
{
if (DMU_OT_IS_ZIL(objtype)) {
if (spa->spa_log_class->mc_groups != 0)
return (spa_log_class(spa));
else
return (spa_normal_class(spa));
}
boolean_t has_special_class = spa->spa_special_class->mc_groups != 0;
if (DMU_OT_IS_DDT(objtype)) {
if (spa->spa_dedup_class->mc_groups != 0)
return (spa_dedup_class(spa));
else if (has_special_class && zfs_ddt_data_is_special)
return (spa_special_class(spa));
else
return (spa_normal_class(spa));
}
/* Indirect blocks for user data can land in special if allowed */
if (level > 0 && (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) {
if (has_special_class && zfs_user_indirect_is_special)
return (spa_special_class(spa));
else
return (spa_normal_class(spa));
}
if (DMU_OT_IS_METADATA(objtype) || level > 0) {
if (has_special_class)
return (spa_special_class(spa));
else
return (spa_normal_class(spa));
}
/*
* Allow small file blocks in special class in some cases (like
* for the dRAID vdev feature). But always leave a reserve of
* zfs_special_class_metadata_reserve_pct exclusively for metadata.
*/
if (DMU_OT_IS_FILE(objtype) &&
has_special_class && size < special_smallblk) {
metaslab_class_t *special = spa_special_class(spa);
uint64_t alloc = metaslab_class_get_alloc(special);
uint64_t space = metaslab_class_get_space(special);
uint64_t limit =
(space * (100 - zfs_special_class_metadata_reserve_pct))
/ 100;
if (alloc < limit)
return (special);
}
return (spa_normal_class(spa));
}
void
spa_evicting_os_register(spa_t *spa, objset_t *os)
{
@@ -2500,6 +2599,8 @@ EXPORT_SYMBOL(spa_update_dspace);
EXPORT_SYMBOL(spa_deflate);
EXPORT_SYMBOL(spa_normal_class);
EXPORT_SYMBOL(spa_log_class);
EXPORT_SYMBOL(spa_special_class);
EXPORT_SYMBOL(spa_preferred_class);
EXPORT_SYMBOL(spa_max_replication);
EXPORT_SYMBOL(spa_prev_software_version);
EXPORT_SYMBOL(spa_get_failmode);
@@ -2579,5 +2680,13 @@ MODULE_PARM_DESC(spa_asize_inflation,
module_param(spa_slop_shift, int, 0644);
MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool");
module_param(zfs_ddt_data_is_special, int, 0644);
MODULE_PARM_DESC(zfs_ddt_data_is_special,
"Place DDT data into the special class");
module_param(zfs_user_indirect_is_special, int, 0644);
MODULE_PARM_DESC(zfs_user_indirect_is_special,
"Place user data indirect blocks into the special class");
/* END CSTYLED */
#endif