mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
When using lseek(2) to report data/holes memory mapped regions of the file were ignored. This could result in incorrect results. To handle this zfs_holey_common() was updated to asynchronously writeback any dirty mmap(2) regions prior to reporting holes. Additionally, while not strictly required, the dn_struct_rwlock is now held over the dirty check to prevent the dnode structure from changing. This ensures that a clean dnode can't be dirtied before the data/hole is located. The range lock is now also taken to ensure the call cannot race with zfs_write(). Furthermore, the code was refactored to provide a dnode_is_dirty() helper function which checks the dnode for any dirty records to determine its dirtiness. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Rich Ercolani <rincebrain@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #11900 Closes #12724
This commit is contained in:
committed by
Tony Hutter
parent
4ba1a6227a
commit
a524f8d6af
+26
-27
@@ -2045,42 +2045,41 @@ int
|
||||
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int i, err;
|
||||
boolean_t clean = B_TRUE;
|
||||
int err;
|
||||
|
||||
restart:
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
/*
|
||||
* Check if dnode is dirty
|
||||
*/
|
||||
for (i = 0; i < TXG_SIZE; i++) {
|
||||
if (multilist_link_active(&dn->dn_dirty_link[i])) {
|
||||
clean = B_FALSE;
|
||||
break;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
|
||||
if (dnode_is_dirty(dn)) {
|
||||
/*
|
||||
* If the zfs_dmu_offset_next_sync module option is enabled
|
||||
* then strict hole reporting has been requested. Dirty
|
||||
* dnodes must be synced to disk to accurately report all
|
||||
* holes. When disabled (the default) dirty dnodes are
|
||||
* reported to not have any holes which is always safe.
|
||||
*
|
||||
* When called by zfs_holey_common() the zp->z_rangelock
|
||||
* is held to prevent zfs_write() and mmap writeback from
|
||||
* re-dirtying the dnode after txg_wait_synced().
|
||||
*/
|
||||
if (zfs_dmu_offset_next_sync) {
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If compatibility option is on, sync any current changes before
|
||||
* we go trundling through the block pointers.
|
||||
*/
|
||||
if (!clean && zfs_dmu_offset_next_sync) {
|
||||
clean = B_TRUE;
|
||||
dnode_rele(dn, FTAG);
|
||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
if (clean)
|
||||
err = dnode_next_offset(dn,
|
||||
(hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
|
||||
else
|
||||
err = SET_ERROR(EBUSY);
|
||||
} else {
|
||||
err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK |
|
||||
(hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
|
||||
}
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
return (err);
|
||||
|
||||
@@ -1653,6 +1653,26 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
|
||||
slots, NULL, NULL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if the dnode contains any uncommitted dirty records.
|
||||
*/
|
||||
boolean_t
|
||||
dnode_is_dirty(dnode_t *dn)
|
||||
{
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
if (list_head(&dn->dn_dirty_records[i]) != NULL) {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
||||
{
|
||||
|
||||
@@ -85,6 +85,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
|
||||
static int
|
||||
zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
|
||||
{
|
||||
zfs_locked_range_t *lr;
|
||||
uint64_t noff = (uint64_t)*off; /* new offset */
|
||||
uint64_t file_sz;
|
||||
int error;
|
||||
@@ -100,12 +101,18 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
|
||||
else
|
||||
hole = B_FALSE;
|
||||
|
||||
/* Flush any mmap()'d data to disk */
|
||||
if (zn_has_cached_data(zp))
|
||||
zn_flush_cached_data(zp, B_FALSE);
|
||||
|
||||
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
|
||||
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
||||
zfs_rangelock_exit(lr);
|
||||
|
||||
if (error == ESRCH)
|
||||
return (SET_ERROR(ENXIO));
|
||||
|
||||
/* file was dirty, so fall back to using generic logic */
|
||||
/* File was dirty, so fall back to using generic logic */
|
||||
if (error == EBUSY) {
|
||||
if (hole)
|
||||
*off = file_sz;
|
||||
|
||||
Reference in New Issue
Block a user