mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Remove fastwrite mechanism.
Fastwrite was introduced many years ago to improve ZIL writes spread between multiple top-level vdevs by tracking number of allocated but not written blocks and choosing vdev with smaller count. It suposed to reduce ZIL knowledge about allocation, but actually made ZIL to even more actively report allocation code about the allocations, complicating both ZIL and metaslabs code. On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync() was lost many years ago, that was one of the declared benefits. Plus introduction of embedded log metaslab class solved another problem with allocation rotor accounting both normal and log allocations, since in most cases those are now in different metaslab classes. After all that, I'd prefer to simplify already too complicated ZIL, ZIO and metaslab code if the benefit of complexity is not obvious. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <george.wilson@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15107
This commit is contained in:
+2
-65
@@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
zio_alloc_list_t *zal, int allocator)
|
||||
{
|
||||
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
||||
metaslab_group_t *mg, *fast_mg, *rotor;
|
||||
metaslab_group_t *mg, *rotor;
|
||||
vdev_t *vd;
|
||||
boolean_t try_hard = B_FALSE;
|
||||
|
||||
@@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
} else if (d != 0) {
|
||||
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
|
||||
mg = vd->vdev_mg->mg_next;
|
||||
} else if (flags & METASLAB_FASTWRITE) {
|
||||
mg = fast_mg = mca->mca_rotor;
|
||||
|
||||
do {
|
||||
if (fast_mg->mg_vd->vdev_pending_fastwrite <
|
||||
mg->mg_vd->vdev_pending_fastwrite)
|
||||
mg = fast_mg;
|
||||
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
|
||||
|
||||
} else {
|
||||
ASSERT(mca->mca_rotor != NULL);
|
||||
mg = mca->mca_rotor;
|
||||
@@ -5297,7 +5288,7 @@ top:
|
||||
mg->mg_bias = 0;
|
||||
}
|
||||
|
||||
if ((flags & METASLAB_FASTWRITE) ||
|
||||
if ((flags & METASLAB_ZIL) ||
|
||||
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
|
||||
mg->mg_aliquot + mg->mg_bias) {
|
||||
mca->mca_rotor = mg->mg_next;
|
||||
@@ -5310,11 +5301,6 @@ top:
|
||||
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
|
||||
DVA_SET_ASIZE(&dva[d], asize);
|
||||
|
||||
if (flags & METASLAB_FASTWRITE) {
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite,
|
||||
psize);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
next:
|
||||
@@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
|
||||
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *arg)
|
||||
|
||||
@@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
|
||||
ASSERT(tvd == tvd->vdev_top);
|
||||
|
||||
tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
|
||||
tvd->vdev_ms_array = svd->vdev_ms_array;
|
||||
tvd->vdev_ms_shift = svd->vdev_ms_shift;
|
||||
tvd->vdev_ms_count = svd->vdev_ms_count;
|
||||
@@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
ASSERT0(vd->vdev_ms_count);
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
|
||||
}
|
||||
|
||||
typedef struct vdev_probe_stats {
|
||||
|
||||
+5
-37
@@ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2)
|
||||
}
|
||||
|
||||
static lwb_t *
|
||||
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
|
||||
boolean_t fastwrite)
|
||||
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
|
||||
{
|
||||
lwb_t *lwb;
|
||||
|
||||
lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
|
||||
lwb->lwb_zilog = zilog;
|
||||
lwb->lwb_blk = *bp;
|
||||
lwb->lwb_fastwrite = fastwrite;
|
||||
lwb->lwb_slog = slog;
|
||||
lwb->lwb_indirect = B_FALSE;
|
||||
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
|
||||
@@ -916,7 +914,6 @@ zil_create(zilog_t *zilog)
|
||||
dmu_tx_t *tx = NULL;
|
||||
blkptr_t blk;
|
||||
int error = 0;
|
||||
boolean_t fastwrite = FALSE;
|
||||
boolean_t slog = FALSE;
|
||||
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
|
||||
|
||||
@@ -949,8 +946,6 @@ zil_create(zilog_t *zilog)
|
||||
|
||||
error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
|
||||
ZIL_MIN_BLKSZ, &slog);
|
||||
fastwrite = TRUE;
|
||||
|
||||
if (error == 0)
|
||||
zil_init_log_chain(zilog, &blk);
|
||||
}
|
||||
@@ -959,7 +954,7 @@ zil_create(zilog_t *zilog)
|
||||
* Allocate a log write block (lwb) for the first log block.
|
||||
*/
|
||||
if (error == 0)
|
||||
lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
|
||||
lwb = zil_alloc_lwb(zilog, &blk, slog, txg);
|
||||
|
||||
/*
|
||||
* If we just allocated the first log block, commit our transaction
|
||||
@@ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
||||
ASSERT(zh->zh_claim_txg == 0);
|
||||
VERIFY(!keep_first);
|
||||
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
|
||||
if (lwb->lwb_fastwrite)
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa,
|
||||
&lwb->lwb_blk);
|
||||
if (lwb->lwb_buf != NULL)
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
|
||||
@@ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio)
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
|
||||
lwb->lwb_state = LWB_STATE_WRITE_DONE;
|
||||
lwb->lwb_write_zio = NULL;
|
||||
lwb->lwb_fastwrite = FALSE;
|
||||
nlwb = list_next(&zilog->zl_lwb_list, lwb);
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
@@ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
|
||||
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
|
||||
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
||||
|
||||
/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
if (!lwb->lwb_fastwrite) {
|
||||
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 1;
|
||||
}
|
||||
|
||||
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
|
||||
&lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
|
||||
zil_lwb_write_done, lwb, prio,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
|
||||
zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb);
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
lwb->lwb_state = LWB_STATE_OPENED;
|
||||
|
||||
zil_lwb_set_zio_dependency(zilog, lwb);
|
||||
zilog->zl_last_lwb_opened = lwb;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
@@ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
|
||||
/*
|
||||
* Allocate a new log write block (lwb).
|
||||
*/
|
||||
nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
|
||||
nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
|
||||
}
|
||||
|
||||
lwb->lwb_state = LWB_STATE_ISSUED;
|
||||
@@ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
||||
BP_ZERO(&zh->zh_log);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove fastwrite on any blocks that have been pre-allocated for
|
||||
* the next commit. This prevents fastwrite counter pollution by
|
||||
* unused, long-lived LWBs.
|
||||
*/
|
||||
for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) {
|
||||
if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) {
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 0;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
||||
@@ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog)
|
||||
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
|
||||
|
||||
if (lwb->lwb_fastwrite)
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
zil_free_lwb(zilog, lwb);
|
||||
}
|
||||
|
||||
+1
-13
@@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
|
||||
*/
|
||||
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
/*
|
||||
* We didn't allocate this bp, so make sure it doesn't get unmarked.
|
||||
*/
|
||||
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
|
||||
|
||||
zio_nowait(zio);
|
||||
|
||||
return (pio);
|
||||
@@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
|
||||
if (zio->io_flags & ZIO_FLAG_NODATA)
|
||||
flags |= METASLAB_DONT_THROTTLE;
|
||||
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
|
||||
@@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
|
||||
* of, so we just hash the objset ID to pick the allocator to get
|
||||
* some parallelism.
|
||||
*/
|
||||
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
|
||||
int flags = METASLAB_ZIL;
|
||||
int allocator = (uint_t)cityhash4(0, 0, 0,
|
||||
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
|
||||
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
|
||||
@@ -4931,12 +4925,6 @@ zio_done(zio_t *zio)
|
||||
zfs_ereport_free_checksum(zcr);
|
||||
}
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
|
||||
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
|
||||
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
|
||||
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is the responsibility of the done callback to ensure that this
|
||||
* particular zio is no longer discoverable for adoption, and as
|
||||
|
||||
Reference in New Issue
Block a user