mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 11:18:52 +03:00
Prefetch on deadlists merge
During snapshot deletion ZFS may issue several reads for each deadlist to merge them into next snapshot's or pool's bpobj. Number of the dead lists increases with number of snapshots. On HDD pools it may take significant time during which sync thread is blocked. This patch introduces prescient prefetch of required blocks for up to 128 deadlists ahead. Tests show reduction of time required to delete dataset with 720 snapshots with randomly overwritten file on wide HDD pool from 75-85 to 22-28 seconds. Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Issue #14276 Closes #14402
This commit is contained in:
+63
-2
@@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
|
||||
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
|
||||
|
||||
if (bpobj_is_empty(&subbpo)) {
|
||||
/* No point in having an empty subobj. */
|
||||
bpobj_close(&subbpo);
|
||||
bpobj_free(bpo->bpo_os, subobj, tx);
|
||||
return;
|
||||
}
|
||||
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
|
||||
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
|
||||
@@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Prefetch metadata required for bpobj_enqueue_subobj().
|
||||
*/
|
||||
void
|
||||
bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
bpobj_t subbpo;
|
||||
uint64_t subsubobjs;
|
||||
boolean_t copy_subsub = B_TRUE;
|
||||
boolean_t copy_bps = B_TRUE;
|
||||
|
||||
ASSERT(bpobj_is_open(bpo));
|
||||
ASSERT(subobj != 0);
|
||||
|
||||
if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj)
|
||||
return;
|
||||
|
||||
if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0)
|
||||
return;
|
||||
if (bpobj_is_empty(&subbpo)) {
|
||||
bpobj_close(&subbpo);
|
||||
return;
|
||||
}
|
||||
subsubobjs = subbpo.bpo_phys->bpo_subobjs;
|
||||
bpobj_close(&subbpo);
|
||||
|
||||
if (subsubobjs != 0) {
|
||||
if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0)
|
||||
return;
|
||||
if (doi.doi_max_offset > doi.doi_data_block_size)
|
||||
copy_subsub = B_FALSE;
|
||||
}
|
||||
|
||||
if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0)
|
||||
return;
|
||||
if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub)
|
||||
copy_bps = B_FALSE;
|
||||
|
||||
if (copy_subsub && subsubobjs != 0) {
|
||||
if (bpo->bpo_phys->bpo_subobjs) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
|
||||
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
if (copy_bps) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
|
||||
bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
} else if (bpo->bpo_phys->bpo_subobjs) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
|
||||
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
|
||||
dmu_tx_t *tx)
|
||||
|
||||
Reference in New Issue
Block a user