ZIL: Cleanup sync and commit handling

ZVOL:
 - Mark all ZVOL ZIL transactions as sync.  Since ZVOLs have only
one object, it makes no sense to maintain async queue and on each
commit merge it into sync. Single sync queue is just cheaper, while
it changes nothing until actual commit request arrives.
 - Remove zsd_sync_cnt and the zil_async_to_sync() calls since we
are no longer switching between sync and async queues.

ZFS:
 - Mark write transactions as sync based only on number of sync
opens (z_sync_cnt).  We can not randomly jump between sync and
async unless we want data corruptions due to writes reordering.
 - When file first opened with O_SYNC (z_sync_cnt incremented to 1)
call zil_async_to_sync() for it to preserve correct ordering between
past and future writes.
 - Drop zfs_fsyncer_key logic.  Looks like it was an optimization
for workloads heavily intermixing async writes with tons of fsyncs.
But first it was broken 8 years ago due to Linux tsd implementation
not allowing data storage between syscalls, and second, I doubt it
is safe to switch from async to sync so often and without calling
zil_async_to_sync().

 - Rename sync argument of *_log_write() into commit, now only
signalling caller's intent to call zil_commit() soon after.  It
allows WR_COPIED optimizations without extra other meanings.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15366
This commit is contained in:
Alexander Motin
2023-10-30 17:51:56 -04:00
committed by GitHub
parent 043c6ee3b6
commit c3773de168
12 changed files with 65 additions and 82 deletions
-3
View File
@@ -238,7 +238,6 @@ uint64_t zfs_max_nvlist_src_size = 0;
*/
static uint64_t zfs_history_output_max = 1024 * 1024;
uint_t zfs_fsyncer_key;
uint_t zfs_allow_log_key;
/* DATA_TYPE_ANY is used when zkey_type can vary. */
@@ -7882,7 +7881,6 @@ zfs_kmod_init(void)
if ((error = zfsdev_attach()) != 0)
goto out;
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
@@ -7919,7 +7917,6 @@ zfs_kmod_fini(void)
spa_fini();
zvol_fini();
tsd_destroy(&zfs_fsyncer_key);
tsd_destroy(&rrw_tsd_key);
tsd_destroy(&zfs_allow_log_key);
}
+3 -11
View File
@@ -606,13 +606,12 @@ static int64_t zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag,
znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
zil_callback_t callback, void *callback_data)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
ssize_t size = resid;
@@ -628,15 +627,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (O_SYNC | O_DSYNC))
else if (commit)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
}
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
sizeof (gen));
@@ -687,12 +682,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = ZTOZSB(zp);
itx->itx_sync = (zp->z_sync_cnt != 0);
itx->itx_gen = gen;
if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
(fsync_cnt == 0))
itx->itx_sync = B_FALSE;
itx->itx_callback = callback;
itx->itx_callback_data = callback_data;
zil_itx_assign(zilog, itx, tx);
+5 -11
View File
@@ -58,27 +58,20 @@
#include <sys/zfs_znode.h>
static ulong_t zfs_fsync_sync_cnt = 4;
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
{
int error = 0;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
(void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
goto out;
return (error);
atomic_inc_32(&zp->z_sync_writes_cnt);
zil_commit(zfsvfs->z_log, zp->z_id);
atomic_dec_32(&zp->z_sync_writes_cnt);
zfs_exit(zfsvfs, FTAG);
}
out:
tsd_set(zfs_fsyncer_key, NULL);
return (error);
}
@@ -520,6 +513,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
uint64_t end_size = MAX(zp->z_size, woff + n);
zilog_t *zilog = zfsvfs->z_log;
boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) ||
(zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
@@ -741,7 +736,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
* zfs_clear_setid_bits_if_necessary must precede any of
* the TX_WRITE records logged here.
*/
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit,
NULL, NULL);
dmu_tx_commit(tx);
@@ -767,8 +762,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
return (error);
}
if (ioflag & (O_SYNC | O_DSYNC) ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
if (commit)
zil_commit(zilog, zp->z_id);
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
+3 -6
View File
@@ -583,7 +583,7 @@ static const ssize_t zvol_immediate_write_sz = 32768;
void
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, int sync)
uint64_t size, boolean_t commit)
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
@@ -598,7 +598,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
else if (!spa_has_slogs(zilog->zl_spa) &&
size >= blocksize && blocksize > zvol_immediate_write_sz)
write_state = WR_INDIRECT;
else if (sync)
else if (commit)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
@@ -633,7 +633,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = zv;
itx->itx_sync = sync;
(void) zil_itx_assign(zilog, itx, tx);
@@ -650,8 +649,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
*/
void
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
boolean_t sync)
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len)
{
itx_t *itx;
lr_truncate_t *lr;
@@ -666,7 +664,6 @@ zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
lr->lr_offset = off;
lr->lr_length = len;
itx->itx_sync = sync;
zil_itx_assign(zilog, itx, tx);
}