mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 19:04:45 +03:00
Pool allocation classes
Allocation Classes add the ability to have allocation classes in a pool that are dedicated to serving specific block categories, such as DDT data, metadata, and small file blocks. A pool can opt-in to this feature by adding a 'special' or 'dedup' top-level VDEV. Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed-by: Richard Laager <rlaager@wiktel.com> Reviewed-by: Alek Pinchuk <apinchuk@datto.com> Reviewed-by: Håkan Johansson <f96hajo@chalmers.se> Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com> Reviewed-by: DHE <git@dehacked.net> Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com> Reviewed-by: Gregor Kopka <gregor@kopka.net> Reviewed-by: Kash Pande <kash@tripleback.net> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Don Brady <don.brady@delphix.com> Closes #5182
This commit is contained in:
committed by
Brian Behlendorf
parent
cfa37548eb
commit
cc99f275a2
+105
-23
@@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
* Copyright 2016 Nexenta Systems, Inc.
|
||||
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
|
||||
@@ -932,13 +932,23 @@ dump_metaslab(metaslab_t *msp)
|
||||
static void
|
||||
print_vdev_metaslab_header(vdev_t *vd)
|
||||
{
|
||||
(void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
|
||||
(u_longlong_t)vd->vdev_id,
|
||||
vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
|
||||
const char *bias_str;
|
||||
|
||||
bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
|
||||
VDEV_ALLOC_BIAS_LOG :
|
||||
(alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
|
||||
(alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
|
||||
vd->vdev_islog ? "log" : "";
|
||||
|
||||
(void) printf("\tvdev %10llu %s\n"
|
||||
"\t%-10s%5llu %-19s %-15s %-12s\n",
|
||||
(u_longlong_t)vd->vdev_id, bias_str,
|
||||
"metaslabs", (u_longlong_t)vd->vdev_ms_count,
|
||||
"offset", "spacemap", "free");
|
||||
(void) printf("\t%15s %19s %15s %10s\n",
|
||||
(void) printf("\t%15s %19s %15s %12s\n",
|
||||
"---------------", "-------------------",
|
||||
"---------------", "-------------");
|
||||
"---------------", "------------");
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -954,7 +964,7 @@ dump_metaslab_groups(spa_t *spa)
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
metaslab_group_t *mg = tvd->vdev_mg;
|
||||
|
||||
if (mg->mg_class != mc)
|
||||
if (mg == NULL || mg->mg_class != mc)
|
||||
continue;
|
||||
|
||||
metaslab_group_histogram_verify(mg);
|
||||
@@ -3158,6 +3168,7 @@ typedef struct zdb_blkstats {
|
||||
uint64_t zb_count;
|
||||
uint64_t zb_gangs;
|
||||
uint64_t zb_ditto_samevdev;
|
||||
uint64_t zb_ditto_same_ms;
|
||||
uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
|
||||
} zdb_blkstats_t;
|
||||
|
||||
@@ -3197,6 +3208,16 @@ typedef struct zdb_cb {
|
||||
uint32_t **zcb_vd_obsolete_counts;
|
||||
} zdb_cb_t;
|
||||
|
||||
/* test if two DVA offsets from same vdev are within the same metaslab */
|
||||
static boolean_t
|
||||
same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
|
||||
{
|
||||
vdev_t *vd = vdev_lookup_top(spa, vdev);
|
||||
uint64_t ms_shift = vd->vdev_ms_shift;
|
||||
|
||||
return ((off1 >> ms_shift) == (off2 >> ms_shift));
|
||||
}
|
||||
|
||||
static void
|
||||
zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
dmu_object_type_t type)
|
||||
@@ -3209,6 +3230,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
if (zilog && zil_bp_tree_add(zilog, bp) != 0)
|
||||
return;
|
||||
|
||||
spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
|
||||
int t = (i & 1) ? type : ZDB_OT_TOTAL;
|
||||
@@ -3234,8 +3257,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
switch (BP_GET_NDVAS(bp)) {
|
||||
case 2:
|
||||
if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
|
||||
DVA_GET_VDEV(&bp->blk_dva[1]))
|
||||
DVA_GET_VDEV(&bp->blk_dva[1])) {
|
||||
zb->zb_ditto_samevdev++;
|
||||
|
||||
if (same_metaslab(zcb->zcb_spa,
|
||||
DVA_GET_VDEV(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[1])))
|
||||
zb->zb_ditto_same_ms++;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
|
||||
@@ -3244,13 +3274,37 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
DVA_GET_VDEV(&bp->blk_dva[2])) +
|
||||
(DVA_GET_VDEV(&bp->blk_dva[1]) ==
|
||||
DVA_GET_VDEV(&bp->blk_dva[2]));
|
||||
if (equal != 0)
|
||||
if (equal != 0) {
|
||||
zb->zb_ditto_samevdev++;
|
||||
|
||||
if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
|
||||
DVA_GET_VDEV(&bp->blk_dva[1]) &&
|
||||
same_metaslab(zcb->zcb_spa,
|
||||
DVA_GET_VDEV(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[1])))
|
||||
zb->zb_ditto_same_ms++;
|
||||
else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
|
||||
DVA_GET_VDEV(&bp->blk_dva[2]) &&
|
||||
same_metaslab(zcb->zcb_spa,
|
||||
DVA_GET_VDEV(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[0]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[2])))
|
||||
zb->zb_ditto_same_ms++;
|
||||
else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
|
||||
DVA_GET_VDEV(&bp->blk_dva[2]) &&
|
||||
same_metaslab(zcb->zcb_spa,
|
||||
DVA_GET_VDEV(&bp->blk_dva[1]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[1]),
|
||||
DVA_GET_OFFSET(&bp->blk_dva[2])))
|
||||
zb->zb_ditto_same_ms++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
|
||||
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
|
||||
@@ -4103,6 +4157,8 @@ dump_block_stats(spa_t *spa)
|
||||
flags |= TRAVERSE_PREFETCH_DATA;
|
||||
|
||||
zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
|
||||
zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
|
||||
zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
|
||||
zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
|
||||
err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
|
||||
|
||||
@@ -4147,7 +4203,10 @@ dump_block_stats(spa_t *spa)
|
||||
norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
|
||||
norm_space = metaslab_class_get_space(spa_normal_class(spa));
|
||||
|
||||
total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
|
||||
total_alloc = norm_alloc +
|
||||
metaslab_class_get_alloc(spa_log_class(spa)) +
|
||||
metaslab_class_get_alloc(spa_special_class(spa)) +
|
||||
metaslab_class_get_alloc(spa_dedup_class(spa));
|
||||
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
|
||||
zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
|
||||
|
||||
@@ -4169,31 +4228,50 @@ dump_block_stats(spa_t *spa)
|
||||
return (2);
|
||||
|
||||
(void) printf("\n");
|
||||
(void) printf("\tbp count: %10llu\n",
|
||||
(void) printf("\t%-16s %14llu\n", "bp count:",
|
||||
(u_longlong_t)tzb->zb_count);
|
||||
(void) printf("\tganged count: %10llu\n",
|
||||
(void) printf("\t%-16s %14llu\n", "ganged count:",
|
||||
(longlong_t)tzb->zb_gangs);
|
||||
(void) printf("\tbp logical: %10llu avg: %6llu\n",
|
||||
(void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
|
||||
(u_longlong_t)tzb->zb_lsize,
|
||||
(u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
|
||||
(void) printf("\tbp physical: %10llu avg:"
|
||||
" %6llu compression: %6.2f\n",
|
||||
(u_longlong_t)tzb->zb_psize,
|
||||
(void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
|
||||
"bp physical:", (u_longlong_t)tzb->zb_psize,
|
||||
(u_longlong_t)(tzb->zb_psize / tzb->zb_count),
|
||||
(double)tzb->zb_lsize / tzb->zb_psize);
|
||||
(void) printf("\tbp allocated: %10llu avg:"
|
||||
" %6llu compression: %6.2f\n",
|
||||
(u_longlong_t)tzb->zb_asize,
|
||||
(void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
|
||||
"bp allocated:", (u_longlong_t)tzb->zb_asize,
|
||||
(u_longlong_t)(tzb->zb_asize / tzb->zb_count),
|
||||
(double)tzb->zb_lsize / tzb->zb_asize);
|
||||
(void) printf("\tbp deduped: %10llu ref>1:"
|
||||
" %6llu deduplication: %6.2f\n",
|
||||
(u_longlong_t)zcb.zcb_dedup_asize,
|
||||
(void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n",
|
||||
"bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
|
||||
(u_longlong_t)zcb.zcb_dedup_blocks,
|
||||
(double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
|
||||
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
|
||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
||||
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
||||
|
||||
if (spa_special_class(spa)->mc_rotor != NULL) {
|
||||
uint64_t alloc = metaslab_class_get_alloc(
|
||||
spa_special_class(spa));
|
||||
uint64_t space = metaslab_class_get_space(
|
||||
spa_special_class(spa));
|
||||
|
||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n",
|
||||
"Special class", (u_longlong_t)alloc,
|
||||
100.0 * alloc / space);
|
||||
}
|
||||
|
||||
if (spa_dedup_class(spa)->mc_rotor != NULL) {
|
||||
uint64_t alloc = metaslab_class_get_alloc(
|
||||
spa_dedup_class(spa));
|
||||
uint64_t space = metaslab_class_get_space(
|
||||
spa_dedup_class(spa));
|
||||
|
||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n",
|
||||
"Dedup class", (u_longlong_t)alloc,
|
||||
100.0 * alloc / space);
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
|
||||
if (zcb.zcb_embedded_blocks[i] == 0)
|
||||
continue;
|
||||
@@ -4215,6 +4293,10 @@ dump_block_stats(spa_t *spa)
|
||||
(void) printf("\tDittoed blocks on same vdev: %llu\n",
|
||||
(longlong_t)tzb->zb_ditto_samevdev);
|
||||
}
|
||||
if (tzb->zb_ditto_same_ms != 0) {
|
||||
(void) printf("\tDittoed blocks in same metaslab: %llu\n",
|
||||
(longlong_t)tzb->zb_ditto_same_ms);
|
||||
}
|
||||
|
||||
for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
|
||||
vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
|
||||
|
||||
Reference in New Issue
Block a user