mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Illumos 4976-4984 - metaslab improvements
4976 zfs should only avoid writing to a failing non-redundant top-level vdev 4978 ztest fails in get_metaslab_refcount() 4979 extend free space histogram to device and pool 4980 metaslabs should have a fragmentation metric 4981 remove fragmented ops vector from block allocator 4982 space_map object should proactively upgrade when feature is enabled 4983 need to collect metaslab information via mdb 4984 device selection should use fragmentation metric Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Adam Leventhal <adam.leventhal@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Approved by: Garrett D'Amore <garrett@damore.org> References: https://www.illumos.org/issues/4976 https://www.illumos.org/issues/4978 https://www.illumos.org/issues/4979 https://www.illumos.org/issues/4980 https://www.illumos.org/issues/4981 https://www.illumos.org/issues/4982 https://www.illumos.org/issues/4983 https://www.illumos.org/issues/4984 https://github.com/illumos/illumos-gate/commit/2e4c998 Notes: The "zdb -M" option has been re-tasked to display the new metaslab fragmentation metric and the new "zdb -I" option is used to control the maximum number of in-flight I/Os. The new fragmentation metric is derived from the space map histogram which has been rolled up to the vdev and pool level and is presented to the user via "zpool list". Add a number of module parameters related to the new metaslab weighting logic. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2595
This commit is contained in:
committed by
Brian Behlendorf
parent
f67d709080
commit
f3a7f6610f
+60
-14
@@ -110,11 +110,11 @@ static void
|
||||
usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
||||
"[-U config] [-M inflight I/Os] poolname [object...]\n"
|
||||
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
||||
"[-U config] [-I inflight I/Os] poolname [object...]\n"
|
||||
" %s [-divPA] [-e -p path...] [-U config] dataset "
|
||||
"[object...]\n"
|
||||
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
||||
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
||||
"poolname [vdev [metaslab...]]\n"
|
||||
" %s -R [-A] [-e [-p path...]] poolname "
|
||||
"vdev:offset:size[:flags]\n"
|
||||
@@ -137,6 +137,7 @@ usage(void)
|
||||
(void) fprintf(stderr, " -h pool history\n");
|
||||
(void) fprintf(stderr, " -b block statistics\n");
|
||||
(void) fprintf(stderr, " -m metaslabs\n");
|
||||
(void) fprintf(stderr, " -M metaslab groups\n");
|
||||
(void) fprintf(stderr, " -c checksum all metadata (twice for "
|
||||
"all data) blocks\n");
|
||||
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
|
||||
@@ -165,7 +166,7 @@ usage(void)
|
||||
(void) fprintf(stderr, " -P print numbers in parseable form\n");
|
||||
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
|
||||
"searching for uberblocks\n");
|
||||
(void) fprintf(stderr, " -M <number of inflight I/Os> -- "
|
||||
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
|
||||
"specify the maximum number of checksumming I/Os "
|
||||
"[default is 200]\n");
|
||||
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
||||
@@ -547,7 +548,7 @@ get_metaslab_refcount(vdev_t *vd)
|
||||
int refcount = 0;
|
||||
int c, m;
|
||||
|
||||
if (vd->vdev_top == vd) {
|
||||
if (vd->vdev_top == vd && !vd->vdev_removing) {
|
||||
for (m = 0; m < vd->vdev_ms_count; m++) {
|
||||
space_map_t *sm = vd->vdev_ms[m]->ms_sm;
|
||||
|
||||
@@ -685,9 +686,10 @@ dump_metaslab(metaslab_t *msp)
|
||||
* The space map histogram represents free space in chunks
|
||||
* of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
|
||||
*/
|
||||
(void) printf("\tOn-disk histogram:\n");
|
||||
(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
|
||||
(u_longlong_t)msp->ms_fragmentation);
|
||||
dump_histogram(sm->sm_phys->smp_histogram,
|
||||
SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift);
|
||||
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
|
||||
}
|
||||
|
||||
if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
|
||||
@@ -711,6 +713,48 @@ print_vdev_metaslab_header(vdev_t *vd)
|
||||
"---------------", "-------------");
|
||||
}
|
||||
|
||||
static void
|
||||
dump_metaslab_groups(spa_t *spa)
|
||||
{
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
uint64_t fragmentation;
|
||||
int c;
|
||||
|
||||
metaslab_class_histogram_verify(mc);
|
||||
|
||||
for (c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
metaslab_group_t *mg = tvd->vdev_mg;
|
||||
|
||||
if (mg->mg_class != mc)
|
||||
continue;
|
||||
|
||||
metaslab_group_histogram_verify(mg);
|
||||
mg->mg_fragmentation = metaslab_group_fragmentation(mg);
|
||||
|
||||
(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
|
||||
"fragmentation",
|
||||
(u_longlong_t)tvd->vdev_id,
|
||||
(u_longlong_t)tvd->vdev_ms_count);
|
||||
if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
|
||||
(void) printf("%3s\n", "-");
|
||||
} else {
|
||||
(void) printf("%3llu%%\n",
|
||||
(u_longlong_t)mg->mg_fragmentation);
|
||||
}
|
||||
dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
|
||||
}
|
||||
|
||||
(void) printf("\tpool %s\tfragmentation", spa_name(spa));
|
||||
fragmentation = metaslab_class_fragmentation(mc);
|
||||
if (fragmentation == ZFS_FRAG_INVALID)
|
||||
(void) printf("\t%3s\n", "-");
|
||||
else
|
||||
(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
|
||||
dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_metaslabs(spa_t *spa)
|
||||
{
|
||||
@@ -2381,8 +2425,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size)
|
||||
}
|
||||
|
||||
static metaslab_ops_t zdb_metaslab_ops = {
|
||||
NULL, /* alloc */
|
||||
NULL /* fragmented */
|
||||
NULL /* alloc */
|
||||
};
|
||||
|
||||
static void
|
||||
@@ -2874,6 +2917,8 @@ dump_zpool(spa_t *spa)
|
||||
|
||||
if (dump_opt['d'] > 2 || dump_opt['m'])
|
||||
dump_metaslabs(spa);
|
||||
if (dump_opt['M'])
|
||||
dump_metaslab_groups(spa);
|
||||
|
||||
if (dump_opt['d'] || dump_opt['i']) {
|
||||
dump_dir(dp->dp_meta_objset);
|
||||
@@ -3363,7 +3408,7 @@ main(int argc, char **argv)
|
||||
int flags = ZFS_IMPORT_MISSING_LOG;
|
||||
int rewind = ZPOOL_NEVER_REWIND;
|
||||
char *spa_config_path_env;
|
||||
const char *opts = "bcdhilmM:suCDRSAFLVXevp:t:U:P";
|
||||
const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:P";
|
||||
|
||||
(void) setrlimit(RLIMIT_NOFILE, &rl);
|
||||
(void) enable_extended_FILE_stdio(-1, -1);
|
||||
@@ -3392,6 +3437,7 @@ main(int argc, char **argv)
|
||||
case 'u':
|
||||
case 'C':
|
||||
case 'D':
|
||||
case 'M':
|
||||
case 'R':
|
||||
case 'S':
|
||||
dump_opt[c]++;
|
||||
@@ -3408,10 +3454,7 @@ main(int argc, char **argv)
|
||||
case 'V':
|
||||
flags = ZFS_IMPORT_VERBATIM;
|
||||
break;
|
||||
case 'v':
|
||||
verbose++;
|
||||
break;
|
||||
case 'M':
|
||||
case 'I':
|
||||
max_inflight = strtoull(optarg, NULL, 0);
|
||||
if (max_inflight == 0) {
|
||||
(void) fprintf(stderr, "maximum number "
|
||||
@@ -3446,6 +3489,9 @@ main(int argc, char **argv)
|
||||
case 'U':
|
||||
spa_config_path = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
verbose++;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
break;
|
||||
|
||||
+13
-5
@@ -2998,10 +2998,16 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted)
|
||||
boolean_t fixed;
|
||||
size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);
|
||||
|
||||
zfs_nicenum(value, propval, sizeof (propval));
|
||||
|
||||
if (prop == ZPOOL_PROP_EXPANDSZ && value == 0)
|
||||
(void) strlcpy(propval, "-", sizeof (propval));
|
||||
else if (prop == ZPOOL_PROP_FRAGMENTATION && value == ZFS_FRAG_INVALID)
|
||||
(void) strlcpy(propval, "-", sizeof (propval));
|
||||
else if (prop == ZPOOL_PROP_FRAGMENTATION)
|
||||
(void) snprintf(propval, sizeof (propval), "%llu%%",
|
||||
(unsigned long long)value);
|
||||
else
|
||||
zfs_nicenum(value, propval, sizeof (propval));
|
||||
|
||||
if (scripted)
|
||||
(void) printf("\t%s", propval);
|
||||
@@ -3034,9 +3040,9 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
/* only toplevel vdevs have capacity stats */
|
||||
if (vs->vs_space == 0) {
|
||||
if (scripted)
|
||||
(void) printf("\t-\t-\t-");
|
||||
(void) printf("\t-\t-\t-\t-");
|
||||
else
|
||||
(void) printf(" - - -");
|
||||
(void) printf(" - - - -");
|
||||
} else {
|
||||
print_one_column(ZPOOL_PROP_SIZE, vs->vs_space,
|
||||
scripted);
|
||||
@@ -3044,6 +3050,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
scripted);
|
||||
print_one_column(ZPOOL_PROP_FREE,
|
||||
vs->vs_space - vs->vs_alloc, scripted);
|
||||
print_one_column(ZPOOL_PROP_FRAGMENTATION,
|
||||
vs->vs_fragmentation, scripted);
|
||||
}
|
||||
print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize,
|
||||
scripted);
|
||||
@@ -3128,8 +3136,8 @@ zpool_do_list(int argc, char **argv)
|
||||
int ret = 0;
|
||||
list_cbdata_t cb = { 0 };
|
||||
static char default_props[] =
|
||||
"name,size,allocated,free,capacity,dedupratio,"
|
||||
"health,altroot";
|
||||
"name,size,allocated,free,fragmentation,capacity,"
|
||||
"dedupratio,health,altroot";
|
||||
char *props = default_props;
|
||||
unsigned long interval = 0, count = 0;
|
||||
zpool_list_t *list;
|
||||
|
||||
Reference in New Issue
Block a user