diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 23303d741..c54721155 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -147,6 +147,7 @@ typedef enum { ZFS_PROP_SELINUX_FSCONTEXT, ZFS_PROP_SELINUX_DEFCONTEXT, ZFS_PROP_SELINUX_ROOTCONTEXT, + ZFS_PROP_RELATIME, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index 7dca3369d..eeeffbe4c 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -65,6 +65,7 @@ typedef struct zfs_sb { boolean_t z_utf8; /* utf8-only */ int z_norm; /* normalization flags */ boolean_t z_atime; /* enable atimes mount option */ + boolean_t z_relatime; /* enable relatime mount option */ boolean_t z_unmounted; /* unmounted */ rrwlock_t z_teardown_lock; krwlock_t z_teardown_inactive_lock; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 0acfa7923..e517e06bc 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1421,6 +1421,7 @@ zfs_is_namespace_prop(zfs_prop_t prop) switch (prop) { case ZFS_PROP_ATIME: + case ZFS_PROP_RELATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_SETUID: @@ -1756,6 +1757,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, mntopt_off = MNTOPT_NOATIME; break; + case ZFS_PROP_RELATIME: + mntopt_on = MNTOPT_RELATIME; + mntopt_off = MNTOPT_NORELATIME; + break; + case ZFS_PROP_DEVICES: mntopt_on = MNTOPT_DEVICES; mntopt_off = MNTOPT_NODEVICES; @@ -1816,6 +1822,7 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, switch (prop) { case ZFS_PROP_ATIME: + case ZFS_PROP_RELATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_READONLY: diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 2c540f059..a45f64085 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -738,7 +738,7 @@ the \fBxattr=sa\fR property. See the \fBxattr\fR property for more details. .ad .sp .6 .RS 4n -Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR. +Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR. See also \fBrelatime\fR below. .RE .sp @@ -1021,6 +1021,17 @@ If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough fr This property can also be referred to by its shortened column name, \fBrefreserv\fR. .RE +.sp +.ne 2 +.mk +.na +\fB\fBrelatime\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls the manner in which the access time is updated when \fBatime=on\fR is set. Turning this property \fBon\fR causes the access time to be updated relative to the modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time or if the existing access time hasn't been updated within the past 24 hours. The default value is \fBoff\fR. +.RE + .sp .ne 2 .mk @@ -3184,6 +3195,7 @@ pool/home/bob usedbyrefreservation 0 - pool/home/bob logbias latency default pool/home/bob dedup off default pool/home/bob mlslabel none default +pool/home/bob relatime off default .fi .in -2 .sp diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 121b1eb57..dd456b59a 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -263,6 +263,8 @@ zfs_prop_init(void) /* inherit index (boolean) properties */ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); + zprop_register_index(ZFS_PROP_RELATIME, "relatime", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "RELATIME", boolean_table); zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", boolean_table); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index f331a36ea..ec59bfbd8 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -137,6 +137,12 @@ atime_changed_cb(void *arg, uint64_t newval) ((zfs_sb_t *)arg)->z_atime = newval; } +static void +relatime_changed_cb(void *arg, uint64_t newval) +{ + ((zfs_sb_t *)arg)->z_relatime = newval; +} + static void xattr_changed_cb(void *arg, uint64_t newval) { @@ -275,6 +281,8 @@ zfs_register_callbacks(zfs_sb_t *zsb) dsl_pool_config_enter(dmu_objset_pool(os), FTAG); error = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); + error = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, @@ -314,6 +322,8 @@ unregister: */ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RELATIME), + relatime_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), @@ -914,6 +924,9 @@ zfs_unregister_callbacks(zfs_sb_t *zsb) VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb) == 0); + VERIFY(dsl_prop_unregister(ds, "relatime", relatime_changed_cb, + zsb) == 0); + VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb) == 0); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 2a4b1c648..5e9941034 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1109,25 +1109,89 @@ zfs_zinactive(znode_t *zp) ZFS_OBJ_HOLD_EXIT(zsb, z_id); } +static inline int +zfs_compare_timespec(struct timespec *t1, struct timespec *t2) +{ + if (t1->tv_sec < t2->tv_sec) + return (-1); + + if (t1->tv_sec > t2->tv_sec) + return (1); + + return (t1->tv_nsec - t2->tv_nsec); +} + +/* + * Determine whether the znode's atime must be updated. The logic mostly + * duplicates the Linux kernel's relatime_need_update() functionality. + * This function is only called if the underlying filesystem actually has + * atime updates enabled. + */ +static inline boolean_t +zfs_atime_need_update(znode_t *zp, timestruc_t *now) +{ + if (!ZTOZSB(zp)->z_relatime) + return (B_TRUE); + + /* + * In relatime mode, only update the atime if the previous atime + * is earlier than either the ctime or mtime or if at least a day + * has passed since the last update of atime. + */ + if (zfs_compare_timespec(&ZTOI(zp)->i_mtime, &ZTOI(zp)->i_atime) >= 0) + return (B_TRUE); + + if (zfs_compare_timespec(&ZTOI(zp)->i_ctime, &ZTOI(zp)->i_atime) >= 0) + return (B_TRUE); + + if ((long)now->tv_sec - ZTOI(zp)->i_atime.tv_sec >= 24*60*60) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Prepare to update znode time stamps. + * + * IN: zp - znode requiring timestamp update + * flag - ATTR_MTIME, ATTR_CTIME, ATTR_ATIME flags + * have_tx - true of caller is creating a new txg + * + * OUT: zp - new atime (via underlying inode's i_atime) + * mtime - new mtime + * ctime - new ctime + * + * NOTE: The arguments are somewhat redundant. The following condition + * is always true: + * + * have_tx == !(flag & ATTR_ATIME) + */ void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2], boolean_t have_tx) { timestruc_t now; + ASSERT(have_tx == !(flag & ATTR_ATIME)); gethrestime(&now); - if (have_tx) { /* will sa_bulk_update happen really soon? */ + /* + * NOTE: The following test intentionally does not update z_atime_dirty + * in the case where an ATIME update has been requested but for which + * the update is omitted due to relatime logic. The rationale being + * that if the flag was set somewhere else, we should leave it alone + * here. + */ + if (flag & ATTR_ATIME) { + if (zfs_atime_need_update(zp, &now)) { + ZFS_TIME_ENCODE(&now, zp->z_atime); + ZTOI(zp)->i_atime.tv_sec = zp->z_atime[0]; + ZTOI(zp)->i_atime.tv_nsec = zp->z_atime[1]; + zp->z_atime_dirty = 1; + } + } else { zp->z_atime_dirty = 0; zp->z_seq++; - } else { - zp->z_atime_dirty = 1; - } - - if (flag & ATTR_ATIME) { - ZFS_TIME_ENCODE(&now, zp->z_atime); - ZTOI(zp)->i_atime.tv_sec = zp->z_atime[0]; - ZTOI(zp)->i_atime.tv_nsec = zp->z_atime[1]; } if (flag & ATTR_MTIME) {