ZIL: Fix race introduced by f63811f072.

We are not allowed to access lwb after setting LWB_STATE_FLUSH_DONE
state and dropping zl_lock, since it may be freed by zil_sync().
To free itxs and waiters after dropping the lock we need to move
lwb_itxs and lwb_waiters lists elements to local storage.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #14957
Closes #14959
This commit is contained in:
Alexander Motin 2023-06-09 13:08:05 -04:00 committed by GitHub
parent 6c96269024
commit 55b1842f92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1393,9 +1393,14 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zil_commit_waiter_t *zcw;
itx_t *itx;
uint64_t txg;
list_t itxs, waiters;
spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
list_create(&waiters, sizeof (zil_commit_waiter_t),
offsetof(zil_commit_waiter_t, zcw_node));
hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
mutex_enter(&zilog->zl_lock);
@ -1404,9 +1409,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
lwb->lwb_root_zio = NULL;
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
if (zilog->zl_last_lwb_opened == lwb) {
/*
* Remember the highest committed log sequence number
@ -1417,15 +1419,21 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
}
list_move_tail(&itxs, &lwb->lwb_itxs);
list_move_tail(&waiters, &lwb->lwb_waiters);
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
mutex_exit(&zilog->zl_lock);
while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
while ((itx = list_remove_head(&itxs)) != NULL)
zil_itx_destroy(itx);
list_destroy(&itxs);
while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
while ((zcw = list_remove_head(&waiters)) != NULL) {
mutex_enter(&zcw->zcw_lock);
ASSERT3P(zcw->zcw_lwb, ==, lwb);
zcw->zcw_lwb = NULL;
/*
* We expect any ZIO errors from child ZIOs to have been
@ -1450,6 +1458,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
mutex_exit(&zcw->zcw_lock);
}
list_destroy(&waiters);
mutex_enter(&zilog->zl_lwb_io_lock);
txg = lwb->lwb_issued_txg;