mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 03:37:45 +03:00
fat zap should prefetch when iterating
When iterating over a ZAP object, we're almost always certain to iterate over the entire object. If there are multiple leaf blocks, we can realize a performance win by issuing reads for all the leaf blocks in parallel when the iteration begins. For example, if we have 10,000 snapshots, "zfs destroy -nv pool/fs@1%9999" can take 30 minutes when the cache is cold. This change provides a >3x performance improvement, by issuing the reads for all ~64 blocks of each ZAP object in parallel. Reviewed-by: Andreas Dilger <andreas.dilger@whamcloud.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> External-issue: DLPX-58347 Closes #8862
This commit is contained in:
committed by
Brian Behlendorf
parent
d9cd66e45f
commit
d9b4bf0665
+55
-1
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -49,6 +49,36 @@
|
||||
#include <sys/zap_impl.h>
|
||||
#include <sys/zap_leaf.h>
|
||||
|
||||
/*
|
||||
* If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
|
||||
* (all leaf blocks) when we start iterating over it.
|
||||
*
|
||||
* For zap_cursor_init(), the callers all intend to iterate through all the
|
||||
* entries. There are a few cases where an error (typically i/o error) could
|
||||
* cause it to bail out early.
|
||||
*
|
||||
* For zap_cursor_init_serialized(), there are callers that do the iteration
|
||||
* outside of ZFS. Typically they would iterate over everything, but we
|
||||
* don't have control of that. E.g. zfs_ioc_snapshot_list_next(),
|
||||
* zcp_snapshots_iter(), and other iterators over things in the MOS - these
|
||||
* are called by /sbin/zfs and channel programs. The other example is
|
||||
* zfs_readdir() which iterates over directory entries for the getdents()
|
||||
* syscall. /sbin/ls iterates to the end (unless it receives a signal), but
|
||||
* userland doesn't have to.
|
||||
*
|
||||
* Given that the ZAP entries aren't returned in a specific order, the only
|
||||
* legitimate use cases for partial iteration would be:
|
||||
*
|
||||
* 1. Pagination: e.g. you only want to display 100 entries at a time, so you
|
||||
* get the first 100 and then wait for the user to hit "next page", which
|
||||
* they may never do).
|
||||
*
|
||||
* 2. You want to know if there are more than X entries, without relying on
|
||||
* the zfs-specific implementation of the directory's st_size (which is
|
||||
* the number of entries).
|
||||
*/
|
||||
int zap_iterate_prefetch = B_TRUE;
|
||||
|
||||
int fzap_default_block_shift = 14; /* 16k blocksize */
|
||||
|
||||
extern inline zap_phys_t *zap_f_phys(zap_t *zap);
|
||||
@@ -1189,6 +1219,21 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
|
||||
/* retrieve the next entry at or after zc_hash/zc_cd */
|
||||
/* if no entry, return ENOENT */
|
||||
|
||||
/*
|
||||
* If we are reading from the beginning, we're almost certain to
|
||||
* iterate over the entire ZAP object. If there are multiple leaf
|
||||
* blocks (freeblk > 2), prefetch the whole object (up to
|
||||
* dmu_prefetch_max bytes), so that we read the leaf blocks
|
||||
* concurrently. (Unless noprefetch was requested via
|
||||
* zap_cursor_init_noprefetch()).
|
||||
*/
|
||||
if (zc->zc_hash == 0 && zap_iterate_prefetch &&
|
||||
zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
|
||||
dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0,
|
||||
zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
if (zc->zc_leaf &&
|
||||
(ZAP_HASH_IDX(zc->zc_hash,
|
||||
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
|
||||
@@ -1333,3 +1378,12 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/* BEGIN CSTYLED */
|
||||
module_param(zap_iterate_prefetch, int, 0644);
|
||||
MODULE_PARM_DESC(zap_iterate_prefetch,
|
||||
"When iterating ZAP object, prefetch it");
|
||||
|
||||
/* END CSTYLED */
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user