mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
Improve zfs destroy performance with zio_t-free zio_free()
When "zfs destroy" is run, it completes quickly, and in the background we locate the blocks to free and free them. This background activity can be observed with `zpool get freeing` and `zpool wait -t free ...`. This background activity is processed by a single thread (the spa_sync thread) which calls zio_free() on each of the blocks to free. With even modest storage performance, the CPU consumption of zio_free() can be the performance bottleneck. Performance of zio_free() can be improved by not actually creating a zio_t in the common case (non-dedup, non-gang), instead calling metaslab_free() directly. This avoids the CPU cost of allocating the zio_t, and more importantly the cost of adding and later removing this zio_t from the parent zio's child list. The result is that performance of background freeing more than doubles, from 0.6 million blocks per second to 1.3 million blocks per second. Reviewed-by: Paul Dagnelie <pcd@delphix.com> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <gwilson@delphix.com> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #10034
This commit is contained in:
parent
6c0abcfddd
commit
9cdf7b1f6b
@ -6913,7 +6913,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
||||
*/
|
||||
if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) &&
|
||||
!(flags & ZDB_FLAG_GBH)) {
|
||||
zio_t *czio, *cio;
|
||||
zio_t *czio;
|
||||
(void) printf("\n");
|
||||
for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL;
|
||||
ck < ZIO_CHECKSUM_FUNCTIONS; ck++) {
|
||||
@ -6929,12 +6929,11 @@ zdb_read_block(char *thing, spa_t *spa)
|
||||
czio->io_bp = bp;
|
||||
|
||||
if (vd == vd->vdev_top) {
|
||||
cio = zio_read(czio, spa, bp, pabd, psize,
|
||||
zio_nowait(zio_read(czio, spa, bp, pabd, psize,
|
||||
NULL, NULL,
|
||||
ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
|
||||
ZIO_FLAG_DONT_RETRY, NULL);
|
||||
zio_nowait(cio);
|
||||
ZIO_FLAG_DONT_RETRY, NULL));
|
||||
} else {
|
||||
zio_nowait(zio_vdev_child_io(czio, bp, vd,
|
||||
offset, pabd, psize, ZIO_TYPE_READ,
|
||||
|
@ -8396,7 +8396,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
(void) zio_nowait(wzio);
|
||||
zio_nowait(wzio);
|
||||
}
|
||||
|
||||
multilist_sublist_unlock(mls);
|
||||
|
@ -1714,8 +1714,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||
while ((dr = list_head(list)) != NULL) {
|
||||
ASSERT0(dr->dr_dbuf->db_level);
|
||||
list_remove(list, dr);
|
||||
if (dr->dr_zio)
|
||||
zio_nowait(dr->dr_zio);
|
||||
zio_nowait(dr->dr_zio);
|
||||
}
|
||||
|
||||
/* Enable dnode backfill if enough objects have been freed. */
|
||||
|
@ -678,8 +678,7 @@ mmp_thread(void *arg)
|
||||
}
|
||||
|
||||
/* Outstanding writes are allowed to complete. */
|
||||
if (mmp->mmp_zio_root)
|
||||
zio_wait(mmp->mmp_zio_root);
|
||||
zio_wait(mmp->mmp_zio_root);
|
||||
|
||||
mmp->mmp_zio_root = NULL;
|
||||
mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
|
||||
|
@ -8125,10 +8125,10 @@ bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
static int
|
||||
spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
zio_t *zio = arg;
|
||||
zio_t *pio = arg;
|
||||
|
||||
zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
|
||||
zio->io_flags));
|
||||
zio_nowait(zio_free_sync(pio, pio->io_spa, dmu_tx_get_txg(tx), bp,
|
||||
pio->io_flags));
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -1194,40 +1194,46 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
|
||||
!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))) {
|
||||
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
|
||||
} else {
|
||||
VERIFY0(zio_wait(zio_free_sync(NULL, spa, txg, bp, 0)));
|
||||
VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* To improve performance, this function may return NULL if we were able
|
||||
* to do the free immediately. This avoids the cost of creating a zio
|
||||
* (and linking it to the parent, etc).
|
||||
*/
|
||||
zio_t *
|
||||
zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
enum zio_flag flags)
|
||||
{
|
||||
zio_t *zio;
|
||||
enum zio_stage stage = ZIO_FREE_PIPELINE;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(spa_syncing_txg(spa) == txg);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
|
||||
return (NULL);
|
||||
|
||||
metaslab_check_free(spa, bp);
|
||||
arc_freed(spa, bp);
|
||||
dsl_scan_freed(spa, bp);
|
||||
|
||||
/*
|
||||
* GANG and DEDUP blocks can induce a read (for the gang block header,
|
||||
* or the DDT), so issue them asynchronously so that this thread is
|
||||
* not tied up.
|
||||
*/
|
||||
if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp))
|
||||
stage |= ZIO_STAGE_ISSUE_ASYNC;
|
||||
if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp)) {
|
||||
/*
|
||||
* GANG and DEDUP blocks can induce a read (for the gang block
|
||||
* header, or the DDT), so issue them asynchronously so that
|
||||
* this thread is not tied up.
|
||||
*/
|
||||
enum zio_stage stage =
|
||||
ZIO_FREE_PIPELINE | ZIO_STAGE_ISSUE_ASYNC;
|
||||
|
||||
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
|
||||
BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
|
||||
flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
|
||||
|
||||
return (zio);
|
||||
return (zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
|
||||
BP_GET_PSIZE(bp), NULL, NULL,
|
||||
ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
|
||||
flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage));
|
||||
} else {
|
||||
metaslab_free(spa, bp, txg, B_FALSE);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
zio_t *
|
||||
@ -2165,6 +2171,15 @@ __zio_execute(zio_t *zio)
|
||||
int
|
||||
zio_wait(zio_t *zio)
|
||||
{
|
||||
/*
|
||||
* Some routines, like zio_free_sync(), may return a NULL zio
|
||||
* to avoid the performance overhead of creating and then destroying
|
||||
* an unneeded zio. For the callers' simplicity, we accept a NULL
|
||||
* zio and ignore it.
|
||||
*/
|
||||
if (zio == NULL)
|
||||
return (0);
|
||||
|
||||
long timeout = MSEC_TO_TICK(zfs_deadman_ziotime_ms);
|
||||
int error;
|
||||
|
||||
@ -2202,6 +2217,12 @@ zio_wait(zio_t *zio)
|
||||
void
|
||||
zio_nowait(zio_t *zio)
|
||||
{
|
||||
/*
|
||||
* See comment in zio_wait().
|
||||
*/
|
||||
if (zio == NULL)
|
||||
return;
|
||||
|
||||
ASSERT3P(zio->io_executor, ==, NULL);
|
||||
|
||||
if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
|
||||
@ -2489,8 +2510,13 @@ static zio_t *
|
||||
zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
|
||||
uint64_t offset)
|
||||
{
|
||||
return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
|
||||
ZIO_GANG_CHILD_FLAGS(pio)));
|
||||
zio_t *zio = zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
|
||||
ZIO_GANG_CHILD_FLAGS(pio));
|
||||
if (zio == NULL) {
|
||||
zio = zio_null(pio, pio->io_spa,
|
||||
NULL, NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio));
|
||||
}
|
||||
return (zio);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -3273,8 +3299,7 @@ zio_ddt_write(zio_t *zio)
|
||||
|
||||
ddt_exit(ddt);
|
||||
|
||||
if (cio)
|
||||
zio_nowait(cio);
|
||||
zio_nowait(cio);
|
||||
|
||||
return (zio);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user