mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Add allocation profile export and zhack subcommand for import
When attempting to debug performance problems on large systems, one of the major factors that affect performance is free space fragmentation. This heavily affects the allocation process, which is an area of active development in ZFS. Unfortunately, fragmenting a large pool for testing purposes is time consuming; it usually involves filling the pool and then repeatedly overwriting data until the free space becomes fragmented, which can take many hours. And even if the time is available, artificial workloads rarely generate the same fragmentation patterns as the natural workloads they're attempting to mimic. This patch has two parts. First, in zdb, we add the ability to export the full allocation map of the pool. It iterates over each vdev, printing every allocated segment in the ms_allocatable range tree. This can be done while the pool is online, though in that case the allocation map may actually be from several different TXGs as new ones are loaded on demand. The second is a new subcommand for zhack, zhack metaslab leak (and its supporting kernel changes). This is a zhack subcommand that imports a pool and then modified the range trees of the metaslabs, allowing the sync process to write them out normall. It does not currently store those allocations anywhere to make them reversible, and there is no corresponding free subcommand (which would be extremely dangerous); this is an irreversible process, only intended for performance testing. The only way to reclaim the space afterwards is to destroy the pool or roll back to a checkpoint. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Closes #17576
This commit is contained in:
committed by
Brian Behlendorf
parent
92ca3ae56a
commit
8f15d2e4d5
+39
-6
@@ -107,7 +107,9 @@ extern uint_t zfs_reconstruct_indirect_combinations_max;
|
||||
extern uint_t zfs_btree_verify_intensity;
|
||||
|
||||
static const char cmdname[] = "zdb";
|
||||
uint8_t dump_opt[256];
|
||||
uint8_t dump_opt[512];
|
||||
|
||||
#define ALLOCATED_OPT 256
|
||||
|
||||
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
|
||||
|
||||
@@ -1666,6 +1668,16 @@ dump_metaslab_stats(metaslab_t *msp)
|
||||
dump_histogram(rt->rt_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_allocated(void *arg, uint64_t start, uint64_t size)
|
||||
{
|
||||
uint64_t *off = arg;
|
||||
if (*off != start)
|
||||
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off,
|
||||
start - *off);
|
||||
*off = start + size;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_metaslab(metaslab_t *msp)
|
||||
{
|
||||
@@ -1682,13 +1694,24 @@ dump_metaslab(metaslab_t *msp)
|
||||
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
|
||||
(u_longlong_t)space_map_object(sm), freebuf);
|
||||
|
||||
if (dump_opt['m'] > 2 && !dump_opt['L']) {
|
||||
if (dump_opt[ALLOCATED_OPT] ||
|
||||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
|
||||
mutex_enter(&msp->ms_lock);
|
||||
VERIFY0(metaslab_load(msp));
|
||||
}
|
||||
|
||||
if (dump_opt['m'] > 2 && !dump_opt['L']) {
|
||||
zfs_range_tree_stat_verify(msp->ms_allocatable);
|
||||
dump_metaslab_stats(msp);
|
||||
metaslab_unload(msp);
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
if (dump_opt[ALLOCATED_OPT]) {
|
||||
uint64_t off = msp->ms_start;
|
||||
zfs_range_tree_walk(msp->ms_allocatable, dump_allocated,
|
||||
&off);
|
||||
if (off != msp->ms_start + msp->ms_size)
|
||||
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off,
|
||||
msp->ms_size - off);
|
||||
}
|
||||
|
||||
if (dump_opt['m'] > 1 && sm != NULL &&
|
||||
@@ -1703,6 +1726,12 @@ dump_metaslab(metaslab_t *msp)
|
||||
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
|
||||
}
|
||||
|
||||
if (dump_opt[ALLOCATED_OPT] ||
|
||||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
|
||||
metaslab_unload(msp);
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
if (vd->vdev_ops == &vdev_draid_ops)
|
||||
ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
|
||||
else
|
||||
@@ -1739,8 +1768,9 @@ print_vdev_metaslab_header(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
(void) printf("\tvdev %10llu %s",
|
||||
(u_longlong_t)vd->vdev_id, bias_str);
|
||||
(void) printf("\tvdev %10llu\t%s metaslab shift %4llu",
|
||||
(u_longlong_t)vd->vdev_id, bias_str,
|
||||
(u_longlong_t)vd->vdev_ms_shift);
|
||||
|
||||
if (ms_flush_data_obj != 0) {
|
||||
(void) printf(" ms_unflushed_phys object %llu",
|
||||
@@ -9375,6 +9405,8 @@ main(int argc, char **argv)
|
||||
{"all-reconstruction", no_argument, NULL, 'Y'},
|
||||
{"livelist", no_argument, NULL, 'y'},
|
||||
{"zstd-headers", no_argument, NULL, 'Z'},
|
||||
{"allocated-map", no_argument, NULL,
|
||||
ALLOCATED_OPT},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
@@ -9405,6 +9437,7 @@ main(int argc, char **argv)
|
||||
case 'u':
|
||||
case 'y':
|
||||
case 'Z':
|
||||
case ALLOCATED_OPT:
|
||||
dump_opt[c]++;
|
||||
dump_all = 0;
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user