mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
zvol: call zil_replaying() during replay
zil_replaying(zil, tx) has the side-effect of informing the ZIL that an entry has been replayed in the (still open) tx. The ZIL uses that information to record the replay progress in the ZIL header when that tx's txg syncs. ZPL log entries are not idempotent and logically dependent and thus calling zil_replaying() is necessary for correctness. For ZVOLs the question of correctness is more nuanced: ZVOL logs only TX_WRITE and TX_TRUNCATE, both of which are idempotent. Logical dependencies between two records exist only if the write or discard request had sync semantics or if the ranges affected by the records overlap. Thus, at a first glance, it would be correct to restart replay from the beginning if we crash before replay completes. But this does not address the following scenario: Assume one log record per LWB. The chain on disk is HDR -> 1:W(1, "A") -> 2:W(1, "B") -> 3:W(2, "X") -> 4:W(3, "Z") where N:W(O, C) represents log entry number N which is a TX_WRITE of C to offset A. We replay 1, 2 and 3 in one txg, sync that txg, then crash. Bit flips corrupt 2, 3, and 4. We come up again and restart replay from the beginning because we did not call zil_replaying() during replay. We replay 1 again, then interpret 2's invalid checksum as the end of the ZIL chain and call replay done. The replayed zvol content is "AX". If we had called zil_replaying() the HDR would have pointed to 3 and our resumed replay would not have replayed anything because 3 was corrupted, resulting in zvol content "BX". If 3 logically depends on 2 then the replay corrupted the ZVOL_OBJ's contents. This patch adds the zil_replaying() calls to the replay functions. Since the callbacks in the replay function need the zilog_t* pointer so that they can call zil_replaying() we open the ZIL while replaying in zvol_create_minor(). We also verify that replay has been done when on-demand-opening the ZIL on the first modifying bio. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Christian Schwarz <me@cschwarz.com> Closes #11667
This commit is contained in:
parent
0ccffb2634
commit
abb485a34a
@ -1157,6 +1157,9 @@ zvol_ensure_zilog(zvol_state_t *zv)
|
||||
zv->zv_zilog = zil_open(zv->zv_objset,
|
||||
zvol_get_data);
|
||||
zv->zv_flags |= ZVOL_WRITTEN_TO;
|
||||
/* replay / destroy done in zvol_create_minor_impl() */
|
||||
VERIFY0((zv->zv_zilog->zl_header->zh_flags &
|
||||
ZIL_REPLAY_NEEDED));
|
||||
}
|
||||
rw_downgrade(&zv->zv_suspend_lock);
|
||||
}
|
||||
@ -1381,12 +1384,16 @@ zvol_create_minor_impl(const char *name)
|
||||
zv->zv_volsize = volsize;
|
||||
zv->zv_objset = os;
|
||||
|
||||
ASSERT3P(zv->zv_zilog, ==, NULL);
|
||||
zv->zv_zilog = zil_open(os, zvol_get_data);
|
||||
if (spa_writeable(dmu_objset_spa(os))) {
|
||||
if (zil_replay_disable)
|
||||
zil_destroy(dmu_objset_zil(os), B_FALSE);
|
||||
zil_destroy(zv->zv_zilog, B_FALSE);
|
||||
else
|
||||
zil_replay(os, zv, zvol_replay_vector);
|
||||
}
|
||||
zil_close(zv->zv_zilog);
|
||||
zv->zv_zilog = NULL;
|
||||
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
|
||||
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
|
||||
|
||||
|
@ -357,6 +357,9 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
zv->zv_zilog = zil_open(zv->zv_objset,
|
||||
zvol_get_data);
|
||||
zv->zv_flags |= ZVOL_WRITTEN_TO;
|
||||
/* replay / destroy done in zvol_create_minor */
|
||||
VERIFY0((zv->zv_zilog->zl_header->zh_flags &
|
||||
ZIL_REPLAY_NEEDED));
|
||||
}
|
||||
rw_downgrade(&zv->zv_suspend_lock);
|
||||
}
|
||||
@ -947,12 +950,16 @@ zvol_os_create_minor(const char *name)
|
||||
blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
|
||||
#endif
|
||||
|
||||
ASSERT3P(zv->zv_zilog, ==, NULL);
|
||||
zv->zv_zilog = zil_open(os, zvol_get_data);
|
||||
if (spa_writeable(dmu_objset_spa(os))) {
|
||||
if (zil_replay_disable)
|
||||
zil_destroy(dmu_objset_zil(os), B_FALSE);
|
||||
zil_destroy(zv->zv_zilog, B_FALSE);
|
||||
else
|
||||
zil_replay(os, zv, zvol_replay_vector);
|
||||
}
|
||||
zil_close(zv->zv_zilog);
|
||||
zv->zv_zilog = NULL;
|
||||
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
|
||||
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
|
||||
|
||||
|
@ -473,7 +473,19 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
|
||||
offset = lr->lr_offset;
|
||||
length = lr->lr_length;
|
||||
|
||||
return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
|
||||
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
|
||||
dmu_tx_mark_netfree(tx);
|
||||
int error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
zil_replaying(zv->zv_zilog, tx);
|
||||
dmu_tx_commit(tx);
|
||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset,
|
||||
length);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -513,6 +525,7 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
|
||||
zil_replaying(zv->zv_zilog, tx);
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user