ZIL: Revert zl_lock scope reduction.

While I have no reports of it, I suspect possible use-after-free
scenario when zil_commit_waiter() tries to dereference zcw_lwb
for lwb already freed by zil_sync(), while zcw_done is not set.
Extension of zl_lock scope as it was originally should block
zil_sync() from freeing the lwb, closing this race.

This reverts #14959 and couple chunks of #14841.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15228
This commit is contained in:
Alexander Motin 2023-09-01 20:13:52 -04:00 committed by GitHub
parent bbcf18c293
commit b1b99e10a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1411,15 +1411,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog_t *zilog = lwb->lwb_zilog; zilog_t *zilog = lwb->lwb_zilog;
zil_commit_waiter_t *zcw; zil_commit_waiter_t *zcw;
itx_t *itx; itx_t *itx;
uint64_t txg;
list_t itxs, waiters;
spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
list_create(&waiters, sizeof (zil_commit_waiter_t),
offsetof(zil_commit_waiter_t, zcw_node));
hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
mutex_enter(&zilog->zl_lock); mutex_enter(&zilog->zl_lock);
@ -1428,6 +1422,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
lwb->lwb_root_zio = NULL; lwb->lwb_root_zio = NULL;
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
if (zilog->zl_last_lwb_opened == lwb) { if (zilog->zl_last_lwb_opened == lwb) {
/* /*
* Remember the highest committed log sequence number * Remember the highest committed log sequence number
@ -1438,22 +1435,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog->zl_commit_lr_seq = zilog->zl_lr_seq; zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
} }
list_move_tail(&itxs, &lwb->lwb_itxs); while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
list_move_tail(&waiters, &lwb->lwb_waiters);
txg = lwb->lwb_issued_txg;
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
mutex_exit(&zilog->zl_lock);
while ((itx = list_remove_head(&itxs)) != NULL)
zil_itx_destroy(itx); zil_itx_destroy(itx);
list_destroy(&itxs);
while ((zcw = list_remove_head(&waiters)) != NULL) { while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
mutex_enter(&zcw->zcw_lock); mutex_enter(&zcw->zcw_lock);
ASSERT3P(zcw->zcw_lwb, ==, lwb);
zcw->zcw_lwb = NULL; zcw->zcw_lwb = NULL;
/* /*
* We expect any ZIO errors from child ZIOs to have been * We expect any ZIO errors from child ZIOs to have been
@ -1478,7 +1466,11 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
mutex_exit(&zcw->zcw_lock); mutex_exit(&zcw->zcw_lock);
} }
list_destroy(&waiters);
uint64_t txg = lwb->lwb_issued_txg;
/* Once we drop the lock, lwb may be freed by zil_sync(). */
mutex_exit(&zilog->zl_lock);
mutex_enter(&zilog->zl_lwb_io_lock); mutex_enter(&zilog->zl_lwb_io_lock);
ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0); ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0);