diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h index 24bb03575..7f0f24325 100644 --- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h @@ -284,7 +284,6 @@ typedef struct zfid_long { #define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t)) #define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t)) -extern uint_t zfs_fsyncer_key; extern int zfs_super_owner; extern void zfs_init(void); diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 91439d4b7..26dfe9760 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -575,7 +575,6 @@ typedef struct zfsdev_state { extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which); extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp); -extern uint_t zfs_fsyncer_key; extern uint_t zfs_allow_log_key; #endif /* _KERNEL */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 2f266f532..d71144807 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -307,7 +307,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp, znode_t *wzp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, offset_t off, ssize_t len, int ioflag, + znode_t *zp, offset_t off, ssize_t len, boolean_t commit, zil_callback_t callback, void *callback_data); extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len); diff --git a/include/sys/zvol_impl.h b/include/sys/zvol_impl.h index 3243917bc..810320862 100644 --- a/include/sys/zvol_impl.h +++ b/include/sys/zvol_impl.h @@ -81,9 +81,9 @@ void zvol_remove_minors_impl(const char *name); void zvol_last_close(zvol_state_t *zv); void zvol_insert(zvol_state_t *zv); void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, - uint64_t len, boolean_t sync); + uint64_t len); void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, - uint64_t size, int sync); + uint64_t size, boolean_t commit); int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio); int zvol_init_impl(void); diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index ab72e91b4..c37f543ce 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -244,9 +244,15 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr) return (SET_ERROR(EPERM)); } - /* Keep a count of the synchronous opens in the znode */ - if (flag & O_SYNC) - atomic_inc_32(&zp->z_sync_cnt); + /* + * Keep a count of the synchronous opens in the znode. On first + * synchronous open we must convert all previous async transactions + * into sync to keep correct ordering. + */ + if (flag & O_SYNC) { + if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1) + zil_async_to_sync(zfsvfs->z_log, zp->z_id); + } zfs_exit(zfsvfs, FTAG); return (0); @@ -4201,6 +4207,10 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, } zfs_vmobject_wunlock(object); + boolean_t commit = (flags & (zfs_vm_pagerput_sync | + zfs_vm_pagerput_inval)) != 0 || + zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS; + if (ncount == 0) goto out; @@ -4253,7 +4263,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, * but that would make the locking messier */ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, - len, 0, NULL, NULL); + len, commit, NULL, NULL); zfs_vmobject_wlock(object); for (i = 0; i < ncount; i++) { @@ -4268,8 +4278,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, out: zfs_rangelock_exit(lr); - if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || - zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (commit) zil_commit(zfsvfs->z_log, zp->z_id); dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len); diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index 2520507b9..0830e1c26 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -123,7 +123,6 @@ struct zvol_state_os { /* volmode=dev */ struct zvol_state_dev { struct cdev *zsd_cdev; - uint64_t zsd_sync_cnt; struct selinfo zsd_selinfo; } _zso_dev; @@ -669,7 +668,7 @@ zvol_geom_bio_strategy(struct bio *bp) int error = 0; boolean_t doread = B_FALSE; boolean_t is_dumpified; - boolean_t sync; + boolean_t commit; if (bp->bio_to) zv = bp->bio_to->private; @@ -696,7 +695,7 @@ zvol_geom_bio_strategy(struct bio *bp) } zvol_ensure_zilog(zv); if (bp->bio_cmd == BIO_FLUSH) - goto sync; + goto commit; break; default: error = SET_ERROR(EOPNOTSUPP); @@ -718,7 +717,7 @@ zvol_geom_bio_strategy(struct bio *bp) } is_dumpified = B_FALSE; - sync = !doread && !is_dumpified && + commit = !doread && !is_dumpified && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; /* @@ -734,7 +733,7 @@ zvol_geom_bio_strategy(struct bio *bp) if (error != 0) { dmu_tx_abort(tx); } else { - zvol_log_truncate(zv, tx, off, resid, sync); + zvol_log_truncate(zv, tx, off, resid); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, off, resid); @@ -755,7 +754,7 @@ zvol_geom_bio_strategy(struct bio *bp) dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, size, addr, tx); - zvol_log_write(zv, tx, off, size, sync); + zvol_log_write(zv, tx, off, size, commit); dmu_tx_commit(tx); } } @@ -793,8 +792,8 @@ unlock: break; } - if (sync) { -sync: + if (commit) { +commit: zil_commit(zv->zv_zilog, ZVOL_OBJ); } resume: @@ -866,7 +865,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) uint64_t volsize; zfs_locked_range_t *lr; int error = 0; - boolean_t sync; + boolean_t commit; zfs_uio_t uio; zv = dev->si_drv2; @@ -880,7 +879,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) return (SET_ERROR(EIO)); ssize_t start_resid = zfs_uio_resid(&uio); - sync = (ioflag & IO_SYNC) || + commit = (ioflag & IO_SYNC) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); @@ -904,7 +903,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) } error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx); if (error == 0) - zvol_log_write(zv, tx, off, bytes, sync); + zvol_log_write(zv, tx, off, bytes, commit); dmu_tx_commit(tx); if (error) @@ -913,7 +912,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) zfs_rangelock_exit(lr); int64_t nwritten = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); - if (sync) + if (commit) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); return (error); @@ -923,7 +922,6 @@ static int zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { zvol_state_t *zv; - struct zvol_state_dev *zsd; int err = 0; boolean_t drop_suspend = B_FALSE; @@ -1017,13 +1015,6 @@ retry: } zv->zv_open_count++; - if (flags & O_SYNC) { - zsd = &zv->zv_zso->zso_dev; - zsd->zsd_sync_cnt++; - if (zsd->zsd_sync_cnt == 1 && - (zv->zv_flags & ZVOL_WRITTEN_TO) != 0) - zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ); - } out_opened: if (zv->zv_open_count == 0) { zvol_last_close(zv); @@ -1041,7 +1032,6 @@ static int zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { zvol_state_t *zv; - struct zvol_state_dev *zsd; boolean_t drop_suspend = B_TRUE; rw_enter(&zvol_state_lock, ZVOL_RW_READER); @@ -1091,10 +1081,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) * You may get multiple opens, but only one close. */ zv->zv_open_count--; - if (flags & O_SYNC) { - zsd = &zv->zv_zso->zso_dev; - zsd->zsd_sync_cnt--; - } if (zv->zv_open_count == 0) { ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); @@ -1163,7 +1149,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, dmu_tx_abort(tx); } else { sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); - zvol_log_truncate(zv, tx, offset, length, sync); + zvol_log_truncate(zv, tx, offset, length); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 1770e2372..6ad75ace0 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -192,9 +192,15 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr) return (SET_ERROR(EPERM)); } - /* Keep a count of the synchronous opens in the znode */ - if (flag & O_SYNC) - atomic_inc_32(&zp->z_sync_cnt); + /* + * Keep a count of the synchronous opens in the znode. On first + * synchronous open we must convert all previous async transactions + * into sync to keep correct ordering. + */ + if (flag & O_SYNC) { + if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1) + zil_async_to_sync(zfsvfs->z_log, zp->z_id); + } zfs_exit(zfsvfs, FTAG); return (0); @@ -3826,21 +3832,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); - zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0, - for_sync ? zfs_putpage_sync_commit_cb : - zfs_putpage_async_commit_cb, pp); - - dmu_tx_commit(tx); - - zfs_rangelock_exit(lr); - + boolean_t commit = B_FALSE; if (wbc->sync_mode != WB_SYNC_NONE) { /* * Note that this is rarely called under writepages(), because * writepages() normally handles the entire commit for * performance reasons. */ - zil_commit(zfsvfs->z_log, zp->z_id); + commit = B_TRUE; } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) { /* * If the caller does not intend to wait synchronously @@ -3850,9 +3849,20 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, * our writeback to complete. Refer to the comment in * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details. */ - zil_commit(zfsvfs->z_log, zp->z_id); + commit = B_TRUE; } + zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit, + for_sync ? zfs_putpage_sync_commit_cb : + zfs_putpage_async_commit_cb, pp); + + dmu_tx_commit(tx); + + zfs_rangelock_exit(lr); + + if (commit) + zil_commit(zfsvfs->z_log, zp->z_id); + dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen); zfs_exit(zfsvfs, FTAG); diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index f94ce69fb..3dc0bb388 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -387,7 +387,7 @@ zvol_discard(zv_request_t *zvr) if (error != 0) { dmu_tx_abort(tx); } else { - zvol_log_truncate(zv, tx, start, size, B_TRUE); + zvol_log_truncate(zv, tx, start, size); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 2738385e2..bfada2894 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -238,7 +238,6 @@ uint64_t zfs_max_nvlist_src_size = 0; */ static uint64_t zfs_history_output_max = 1024 * 1024; -uint_t zfs_fsyncer_key; uint_t zfs_allow_log_key; /* DATA_TYPE_ANY is used when zkey_type can vary. */ @@ -7882,7 +7881,6 @@ zfs_kmod_init(void) if ((error = zfsdev_attach()) != 0) goto out; - tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, rrw_tsd_destroy); tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); @@ -7919,7 +7917,6 @@ zfs_kmod_fini(void) spa_fini(); zvol_fini(); - tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); tsd_destroy(&zfs_allow_log_key); } diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 50325907b..433a653e5 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -606,13 +606,12 @@ static int64_t zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, offset_t off, ssize_t resid, int ioflag, + znode_t *zp, offset_t off, ssize_t resid, boolean_t commit, zil_callback_t callback, void *callback_data) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); uint32_t blocksize = zp->z_blksz; itx_wr_state_t write_state; - uintptr_t fsync_cnt; uint64_t gen = 0; ssize_t size = resid; @@ -628,15 +627,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, else if (!spa_has_slogs(zilog->zl_spa) && resid >= zfs_immediate_write_sz) write_state = WR_INDIRECT; - else if (ioflag & (O_SYNC | O_DSYNC)) + else if (commit) write_state = WR_COPIED; else write_state = WR_NEED_COPY; - if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { - (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); - } - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen, sizeof (gen)); @@ -687,12 +682,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, BP_ZERO(&lr->lr_blkptr); itx->itx_private = ZTOZSB(zp); + itx->itx_sync = (zp->z_sync_cnt != 0); itx->itx_gen = gen; - if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) && - (fsync_cnt == 0)) - itx->itx_sync = B_FALSE; - itx->itx_callback = callback; itx->itx_callback_data = callback_data; zil_itx_assign(zilog, itx, tx); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 40d6c87a7..2d0d858cd 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -58,27 +58,20 @@ #include -static ulong_t zfs_fsync_sync_cnt = 4; - int zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) { int error = 0; zfsvfs_t *zfsvfs = ZTOZSB(zp); - (void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt); - if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) - goto out; + return (error); atomic_inc_32(&zp->z_sync_writes_cnt); zil_commit(zfsvfs->z_log, zp->z_id); atomic_dec_32(&zp->z_sync_writes_cnt); zfs_exit(zfsvfs, FTAG); } -out: - tsd_set(zfs_fsyncer_key, NULL); - return (error); } @@ -520,6 +513,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) uint64_t end_size = MAX(zp->z_size, woff + n); zilog_t *zilog = zfsvfs->z_log; + boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) || + (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS); const uint64_t uid = KUID_TO_SUID(ZTOUID(zp)); const uint64_t gid = KGID_TO_SGID(ZTOGID(zp)); @@ -741,7 +736,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * zfs_clear_setid_bits_if_necessary must precede any of * the TX_WRITE records logged here. */ - zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag, + zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit, NULL, NULL); dmu_tx_commit(tx); @@ -767,8 +762,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (error); } - if (ioflag & (O_SYNC | O_DSYNC) || - zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (commit) zil_commit(zilog, zp->z_id); const int64_t nwritten = start_resid - zfs_uio_resid(uio); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index cc11fd806..28f8510be 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -583,7 +583,7 @@ static const ssize_t zvol_immediate_write_sz = 32768; void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, - uint64_t size, int sync) + uint64_t size, boolean_t commit) { uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; @@ -598,7 +598,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, else if (!spa_has_slogs(zilog->zl_spa) && size >= blocksize && blocksize > zvol_immediate_write_sz) write_state = WR_INDIRECT; - else if (sync) + else if (commit) write_state = WR_COPIED; else write_state = WR_NEED_COPY; @@ -633,7 +633,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, BP_ZERO(&lr->lr_blkptr); itx->itx_private = zv; - itx->itx_sync = sync; (void) zil_itx_assign(zilog, itx, tx); @@ -650,8 +649,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. */ void -zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, - boolean_t sync) +zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len) { itx_t *itx; lr_truncate_t *lr; @@ -666,7 +664,6 @@ zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, lr->lr_offset = off; lr->lr_length = len; - itx->itx_sync = sync; zil_itx_assign(zilog, itx, tx); }