From 312bdab0f5027d290c684d05f49d8d888e32f841 Mon Sep 17 00:00:00 2001 From: Andrew Walker Date: Mon, 5 Jan 2026 16:06:28 -0600 Subject: [PATCH] Add handling for STATX_CHANGE_COOKIE This commit adds handling for the STATX_CHANGE_COOKIE so that we can properly surface the ZFS znode sequence to NFS clients via knfsd. If knfsd does not have STATX_CHANGE_COOKIE in statx result then it will synthesize the NFS change_info4 structure and related change4id values algorithmically based on the ctime value of the file. Since internally ZFS is using ktime_get_coarse_real_ts64() for the timestamp calculation here it introduces the possiblity that the change will not increment the change4id of directories / files causing a failure in the client to invalidate its attr cache (among other things). See RFC 8881 Section 10.8 for discussion of how clients may implement name and directory caching. Notable in this commit is that we are not initializing the inode->i_version to the znode->z_seq number. The reason for this is that we're intentionally not setting `SB_I_VERSION`. This indicates that the filesystem manages its own i_version and so it is not populated in the generic_fillattr. The following compares tight loop of setattr over NFSv4 protocol while traching nfsd4_change_attribute. Before change: inode, change_attribute 4723, 7590032215978780890 4723, 7590032215978780890 4723, 7590032215978780890 4723, 7590032215982780865 4723, 7590032215982780865 After change: inode, change_attribute 7602, 7590032992517123951 7602, 7590032992517123952 7602, 7590032992517123953 7602, 7590032992517123954 7602, 7590032992517123955 Reviewed-by: Ameer Hamza Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Andrew Walker Closes #18097 --- module/os/linux/zfs/zfs_vnops_os.c | 13 ++++++++++++- module/os/linux/zfs/zpl_inode.c | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index da09faba1..6ec154504 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2581,8 +2581,19 @@ top: if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); - if (mask != 0) + if (mask != 0) { zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); + /* + * Ensure that the z_seq is always incremented on setattr + * operation. This is required for change accounting for + * NFS clients. + * + * ATTR_MODE already increments via zfs_acl_chmod_setattr. + * ATTR_SIZE already increments via zfs_freesp. + */ + if (!(mask & (ATTR_MODE | ATTR_SIZE))) + zp->z_seq++; + } mutex_exit(&zp->z_lock); if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index f97662d05..e4e15c824 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -506,6 +506,32 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, } #endif +#ifdef STATX_CHANGE_COOKIE + if (request_mask & STATX_CHANGE_COOKIE) { + /* + * knfsd uses the STATX_CHANGE_COOKIE to surface to clients + * change_info4 data, which is used to implement NFS client + * name caching (see RFC 8881 Section 10.8). This number + * should always increase with changes and should not be + * reused. We cannot simply present ctime here because + * ZFS uses a coarse timer to set them, which may cause + * clients to fail to detect changes and invalidate cache. + * + * ZFS always increments znode z_seq number, but this is + * uint_t and so we mask in ctime to upper bits. + * + * STATX_ATTR_CHANGE_MONOTONIC is advertised + * to prevent knfsd from generating the change cookie + * based on ctime. C.f. nfsd4_change_attribute in + * fs/nfsd/nfsfh.c. + */ + stat->change_cookie = + ((u64)stat->ctime.tv_sec << 32) | zp->z_seq; + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; + stat->result_mask |= STATX_CHANGE_COOKIE; + } +#endif + #ifdef STATX_DIOALIGN if (request_mask & STATX_DIOALIGN) { uint64_t align;