mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
Update SAs when an inode is dirtied
Revert the portion of commit d3aa3ea
which always resulted in the
SAs being update when an mmap()'ed file was closed. That change
accidentally resulted in unexpected ctime updates which upset tools
like git. That was always a horrible hack and I'm happy it will
never make it in to a tagged release.
The right fix is something I initially resisted doing because I
was worried about the additional overhead. However, in hindsight
the overhead isn't as bad as I feared.
This patch implemented the sops->dirty_inode() callback which is
unsurprisingly called when an inode is dirtied. We leverage this
callback to keep the znode SAs strictly in sync with the inode.
However, for now we're going to go slowly to avoid introducing
any new unexpected issues by only updating the atime, mtime, and
ctime. This will cover the callpath of most concern to us.
->filemap_page_mkwrite->file_update_time->update_time->
mark_inode_dirty_sync->__mark_inode_dirty->dirty_inode
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #764
Closes #1140
This commit is contained in:
parent
bd192c4f48
commit
8780c53961
23
config/kernel-dirty-inode.m4
Normal file
23
config/kernel-dirty-inode.m4
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
dnl #
|
||||||
|
dnl # 3.0 API change
|
||||||
|
dnl # The sops->dirty_inode() callbacks were updated to take a flags
|
||||||
|
dnl # argument. This allows the greater control over whether the
|
||||||
|
dnl # filesystem needs to push out a transaction or not.
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS], [
|
||||||
|
AC_MSG_CHECKING([whether sops->dirty_inode() wants flags])
|
||||||
|
ZFS_LINUX_TRY_COMPILE([
|
||||||
|
#include <linux/fs.h>
|
||||||
|
],[
|
||||||
|
void (*dirty_inode) (struct inode *, int) = NULL;
|
||||||
|
struct super_operations sops __attribute__ ((unused));
|
||||||
|
|
||||||
|
sops.dirty_inode = dirty_inode;
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
AC_DEFINE(HAVE_DIRTY_INODE_WITH_FLAGS, 1,
|
||||||
|
[sops->dirty_inode() wants flags])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT([no])
|
||||||
|
])
|
||||||
|
])
|
@ -47,6 +47,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
|||||||
ZFS_AC_KERNEL_SHOW_OPTIONS
|
ZFS_AC_KERNEL_SHOW_OPTIONS
|
||||||
ZFS_AC_KERNEL_FSYNC
|
ZFS_AC_KERNEL_FSYNC
|
||||||
ZFS_AC_KERNEL_EVICT_INODE
|
ZFS_AC_KERNEL_EVICT_INODE
|
||||||
|
ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS
|
||||||
ZFS_AC_KERNEL_NR_CACHED_OBJECTS
|
ZFS_AC_KERNEL_NR_CACHED_OBJECTS
|
||||||
ZFS_AC_KERNEL_FREE_CACHED_OBJECTS
|
ZFS_AC_KERNEL_FREE_CACHED_OBJECTS
|
||||||
ZFS_AC_KERNEL_FALLOCATE
|
ZFS_AC_KERNEL_FALLOCATE
|
||||||
|
@ -75,6 +75,7 @@ extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
|
|||||||
extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
|
extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
|
||||||
extern int zfs_putpage(struct inode *ip, struct page *pp,
|
extern int zfs_putpage(struct inode *ip, struct page *pp,
|
||||||
struct writeback_control *wbc);
|
struct writeback_control *wbc);
|
||||||
|
extern int zfs_dirty_inode(struct inode *ip, int flags);
|
||||||
extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
|
extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
|
||||||
size_t len, unsigned long vm_flags);
|
size_t len, unsigned long vm_flags);
|
||||||
|
|
||||||
|
@ -218,31 +218,10 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
|
|||||||
{
|
{
|
||||||
znode_t *zp = ITOZ(ip);
|
znode_t *zp = ITOZ(ip);
|
||||||
zfs_sb_t *zsb = ITOZSB(ip);
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
ZFS_ENTER(zsb);
|
ZFS_ENTER(zsb);
|
||||||
ZFS_VERIFY_ZP(zp);
|
ZFS_VERIFY_ZP(zp);
|
||||||
|
|
||||||
/*
|
|
||||||
* When closing an mmap()'ed file ensure the inode atime, mtime, and
|
|
||||||
* ctime are written to disk. These values may have been updated in
|
|
||||||
* memory by filemap_page_mkwrite() bit are not yet reflected in the
|
|
||||||
* znode since writepage() may occur after the close.
|
|
||||||
*/
|
|
||||||
if (zp->z_is_mapped) {
|
|
||||||
vattr_t *vap;
|
|
||||||
|
|
||||||
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
||||||
vap->va_mask = ATTR_ATIME | ATTR_MTIME | ATTR_CTIME;
|
|
||||||
vap->va_atime = ip->i_atime;
|
|
||||||
vap->va_mtime = ip->i_mtime;
|
|
||||||
vap->va_ctime = ip->i_ctime;
|
|
||||||
|
|
||||||
error = zfs_setattr(ip, vap, 0, cr);
|
|
||||||
|
|
||||||
kmem_free(vap, sizeof(vattr_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zero the synchronous opens in the znode. Under Linux the
|
* Zero the synchronous opens in the znode. Under Linux the
|
||||||
* zfs_close() hook is not symmetric with zfs_open(), it is
|
* zfs_close() hook is not symmetric with zfs_open(), it is
|
||||||
@ -256,7 +235,7 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
|
|||||||
VERIFY(zfs_vscan(ip, cr, 1) == 0);
|
VERIFY(zfs_vscan(ip, cr, 1) == 0);
|
||||||
|
|
||||||
ZFS_EXIT(zsb);
|
ZFS_EXIT(zsb);
|
||||||
return (error);
|
return (0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(zfs_close);
|
EXPORT_SYMBOL(zfs_close);
|
||||||
|
|
||||||
@ -3920,6 +3899,56 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the system attributes when the inode has been dirtied. For the
|
||||||
|
* moment we're conservative and only update the atime, mtime, and ctime.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
zfs_dirty_inode(struct inode *ip, int flags)
|
||||||
|
{
|
||||||
|
znode_t *zp = ITOZ(ip);
|
||||||
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
|
dmu_tx_t *tx;
|
||||||
|
uint64_t atime[2], mtime[2], ctime[2];
|
||||||
|
sa_bulk_attr_t bulk[3];
|
||||||
|
int error;
|
||||||
|
int cnt = 0;
|
||||||
|
|
||||||
|
ZFS_ENTER(zsb);
|
||||||
|
ZFS_VERIFY_ZP(zp);
|
||||||
|
|
||||||
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
|
|
||||||
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
|
|
||||||
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
|
if (error) {
|
||||||
|
dmu_tx_abort(tx);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_enter(&zp->z_lock);
|
||||||
|
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
|
||||||
|
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
|
||||||
|
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
|
||||||
|
|
||||||
|
/* Preserve the mtime and ctime provided by the inode */
|
||||||
|
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||||
|
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
||||||
|
ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
|
||||||
|
zp->z_atime_dirty = 0;
|
||||||
|
|
||||||
|
error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
|
||||||
|
mutex_exit(&zp->z_lock);
|
||||||
|
|
||||||
|
dmu_tx_commit(tx);
|
||||||
|
out:
|
||||||
|
ZFS_EXIT(zsb);
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(zfs_dirty_inode);
|
||||||
|
|
||||||
/*ARGSUSED*/
|
/*ARGSUSED*/
|
||||||
void
|
void
|
||||||
zfs_inactive(struct inode *ip)
|
zfs_inactive(struct inode *ip)
|
||||||
|
@ -48,6 +48,25 @@ zpl_inode_destroy(struct inode *ip)
|
|||||||
zfs_inode_destroy(ip);
|
zfs_inode_destroy(ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from __mark_inode_dirty() to reflect that something in the
|
||||||
|
* inode has changed. We use it to ensure the znode system attributes
|
||||||
|
* are always strictly update to date with respect to the inode.
|
||||||
|
*/
|
||||||
|
#ifdef HAVE_DIRTY_INODE_WITH_FLAGS
|
||||||
|
static void
|
||||||
|
zpl_dirty_inode(struct inode *ip, int flags)
|
||||||
|
{
|
||||||
|
zfs_dirty_inode(ip, flags);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void
|
||||||
|
zpl_dirty_inode(struct inode *ip)
|
||||||
|
{
|
||||||
|
zfs_dirty_inode(ip, 0);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When ->drop_inode() is called its return value indicates if the
|
* When ->drop_inode() is called its return value indicates if the
|
||||||
* inode should be evicted from the inode cache. If the inode is
|
* inode should be evicted from the inode cache. If the inode is
|
||||||
@ -306,7 +325,7 @@ zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
|
|||||||
const struct super_operations zpl_super_operations = {
|
const struct super_operations zpl_super_operations = {
|
||||||
.alloc_inode = zpl_inode_alloc,
|
.alloc_inode = zpl_inode_alloc,
|
||||||
.destroy_inode = zpl_inode_destroy,
|
.destroy_inode = zpl_inode_destroy,
|
||||||
.dirty_inode = NULL,
|
.dirty_inode = zpl_dirty_inode,
|
||||||
.write_inode = NULL,
|
.write_inode = NULL,
|
||||||
.drop_inode = NULL,
|
.drop_inode = NULL,
|
||||||
#ifdef HAVE_EVICT_INODE
|
#ifdef HAVE_EVICT_INODE
|
||||||
|
Loading…
Reference in New Issue
Block a user