Illumos 5243 - zdb -b could be much faster

5243 zdb -b could be much faster
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Richard Elling <richard.elling@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/5243
  https://github.com/illumos/illumos-gate/commit/f7950bf

Ported-by: Don Brady <don.brady@intel.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3414
This commit is contained in:
Matthew Ahrens 2015-05-14 17:41:29 -06:00 committed by Brian Behlendorf
parent b1b85c8772
commit e5fd1dd682
2 changed files with 47 additions and 21 deletions

View File

@ -76,9 +76,11 @@
#ifndef lint #ifndef lint
extern int zfs_recover; extern int zfs_recover;
extern uint64_t zfs_arc_max, zfs_arc_meta_limit; extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
extern int zfs_vdev_async_read_max_active;
#else #else
int zfs_recover; int zfs_recover;
uint64_t zfs_arc_max, zfs_arc_meta_limit; uint64_t zfs_arc_max, zfs_arc_meta_limit;
int zfs_vdev_async_read_max_active;
#endif #endif
const char cmdname[] = "zdb"; const char cmdname[] = "zdb";
@ -2513,8 +2515,14 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zcb->zcb_readfails = 0; zcb->zcb_readfails = 0;
if (dump_opt['b'] < 5 && /* only call gethrtime() every 100 blocks */
gethrtime() > zcb->zcb_lastprint + NANOSEC) { static int iters;
if (++iters > 100)
iters = 0;
else
return (0);
if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
uint64_t now = gethrtime(); uint64_t now = gethrtime();
char buf[10]; char buf[10];
uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
@ -2625,6 +2633,14 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
(longlong_t)vd->vdev_ms_count); (longlong_t)vd->vdev_ms_count);
msp->ms_ops = &zdb_metaslab_ops; msp->ms_ops = &zdb_metaslab_ops;
/*
* We don't want to spend the CPU
* manipulating the size-ordered
* tree, so clear the range_tree
* ops.
*/
msp->ms_tree->rt_ops = NULL;
VERIFY0(space_map_load(msp->ms_sm, VERIFY0(space_map_load(msp->ms_sm,
msp->ms_tree, SM_ALLOC)); msp->ms_tree, SM_ALLOC));
msp->ms_loaded = B_TRUE; msp->ms_loaded = B_TRUE;
@ -3675,6 +3691,13 @@ main(int argc, char **argv)
zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
#endif #endif
/*
* "zdb -c" uses checksum-verifying scrub i/os which are async reads.
* "zdb -b" uses traversal prefetch which uses async reads.
* For good performance, let several of them be active at once.
*/
zfs_vdev_async_read_max_active = 10;
kernel_init(FREAD); kernel_init(FREAD);
if ((g_zfs = libzfs_init()) == NULL) if ((g_zfs = libzfs_init()) == NULL)
return (1); return (1);

View File

@ -58,6 +58,7 @@ typedef struct traverse_data {
int td_flags; int td_flags;
prefetch_data_t *td_pfd; prefetch_data_t *td_pfd;
boolean_t td_paused; boolean_t td_paused;
uint64_t td_hole_birth_enabled_txg;
blkptr_cb_t *td_func; blkptr_cb_t *td_func;
void *td_arg; void *td_arg;
} traverse_data_t; } traverse_data_t;
@ -226,25 +227,20 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
} }
if (bp->blk_birth == 0) { if (bp->blk_birth == 0) {
if (spa_feature_is_active(td->td_spa, SPA_FEATURE_HOLE_BIRTH)) { /*
/* * Since this block has a birth time of 0 it must be a
* Since this block has a birth time of 0 it must be a * hole created before the SPA_FEATURE_HOLE_BIRTH
* hole created before the SPA_FEATURE_HOLE_BIRTH * feature was enabled. If SPA_FEATURE_HOLE_BIRTH
* feature was enabled. If SPA_FEATURE_HOLE_BIRTH * was enabled before the min_txg for this traveral we
* was enabled before the min_txg for this traveral we * know the hole must have been created before the
* know the hole must have been created before the * min_txg for this traveral, so we can skip it. If
* min_txg for this traveral, so we can skip it. If * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
* SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg * for this traveral we cannot tell if the hole was
* for this traveral we cannot tell if the hole was * created before or after the min_txg for this
* created before or after the min_txg for this * traversal, so we cannot skip it.
* traversal, so we cannot skip it. */
*/ if (td->td_hole_birth_enabled_txg < td->td_min_txg)
uint64_t hole_birth_enabled_txg; return (0);
VERIFY(spa_feature_enabled_txg(td->td_spa,
SPA_FEATURE_HOLE_BIRTH, &hole_birth_enabled_txg));
if (hole_birth_enabled_txg < td->td_min_txg)
return (0);
}
} else if (bp->blk_birth <= td->td_min_txg) { } else if (bp->blk_birth <= td->td_min_txg) {
return (0); return (0);
} }
@ -533,6 +529,13 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
td->td_flags = flags; td->td_flags = flags;
td->td_paused = B_FALSE; td->td_paused = B_FALSE;
if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
VERIFY(spa_feature_enabled_txg(spa,
SPA_FEATURE_HOLE_BIRTH, &td->td_hole_birth_enabled_txg));
} else {
td->td_hole_birth_enabled_txg = 0;
}
pd->pd_flags = flags; pd->pd_flags = flags;
mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL); cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);