Fix zfs set atime|relatime=off|on behavior on inherited datasets

`zfs set atime|relatime=off|on` doesn't disable or enable the property
on read for datasets whose property was inherited from parent, until
a dataset is once unmounted and mounted again.

(The properties start to work properly if a dataset is once unmounted
and mounted again. The difference comes from regular mount process,
e.g. via zpool import, uses mount options based on properties read
from ondisk layout for each dataset, whereas
`zfs set atime|relatime=off|on` just remounts a specified dataset.)

--
 # zpool create p1 <device>
 # zfs create p1/f1
 # zfs set atime=off p1
 # echo test > /p1/f1/test
 # sync
 # zfs list
 NAME    USED  AVAIL     REFER  MOUNTPOINT
 p1      176K  18.9G     25.5K  /p1
 p1/f1    26K  18.9G       26K  /p1/f1
 # zfs get atime
 NAME   PROPERTY  VALUE  SOURCE
 p1     atime     off    local
 p1/f1  atime     off    inherited from p1
 # stat /p1/f1/test | grep Access | tail -1
 Access: 2019-04-26 23:32:33.741205192 +0900
 # cat /p1/f1/test
 test
 # stat /p1/f1/test | grep Access | tail -1
 Access: 2019-04-26 23:32:50.173231861 +0900
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ changed by read(2)
--

The problem is that zfsvfs::z_atime which was probably intended to keep
incore atime state just gets updated by a callback function of "atime"
property change, atime_changed_cb(), and never used for anything else.

Since now that all file read and atime update use a common function
zpl_iter_read_common() -> file_accessed(), and whether to update atime
via ->dirty_inode() is determined by atime_needs_update(),
atime_needs_update() needs to return false once atime is turned off.
It currently continues to return true on `zfs set atime=off`.

Fix atime_changed_cb() by setting or dropping SB_NOATIME in VFS super
block depending on a new atime value, so that atime_needs_update() works
as expected after property change.

The same problem applies to "relatime" except that a self contained
relatime test is needed. This is because relatime_need_update() is based
on a mount option flag MNT_RELATIME, which doesn't exist in datasets
with inherited "relatime" property via `zfs set relatime=...`, hence it
needs its own relatime test zfs_relatime_need_update().

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
Closes #8674 
Closes #8675
This commit is contained in:
Tomohiro Kusumi 2019-05-08 02:06:30 +09:00 committed by Brian Behlendorf
parent 75346937de
commit 9c53e51616
14 changed files with 333 additions and 18 deletions

View File

@ -207,6 +207,10 @@ zpl_bdi_destroy(struct super_block *sb)
#define SB_MANDLOCK MS_MANDLOCK #define SB_MANDLOCK MS_MANDLOCK
#endif #endif
#ifndef SB_NOATIME
#define SB_NOATIME MS_NOATIME
#endif
/* /*
* 2.6.38 API change, * 2.6.38 API change,
* LOOKUP_RCU flag introduced to distinguish rcu-walk from ref-walk cases. * LOOKUP_RCU flag introduced to distinguish rcu-walk from ref-walk cases.

View File

@ -98,7 +98,6 @@ struct zfsvfs {
zfs_case_t z_case; /* case-sense */ zfs_case_t z_case; /* case-sense */
boolean_t z_utf8; /* utf8-only */ boolean_t z_utf8; /* utf8-only */
int z_norm; /* normalization flags */ int z_norm; /* normalization flags */
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_relatime; /* enable relatime mount option */ boolean_t z_relatime; /* enable relatime mount option */
boolean_t z_unmounted; /* unmounted */ boolean_t z_unmounted; /* unmounted */
rrmlock_t z_teardown_lock; rrmlock_t z_teardown_lock;

View File

@ -363,6 +363,7 @@ extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
extern void zfs_inode_destroy(struct inode *); extern void zfs_inode_destroy(struct inode *);
extern void zfs_inode_update(znode_t *); extern void zfs_inode_update(znode_t *);
extern void zfs_mark_inode_dirty(struct inode *); extern void zfs_mark_inode_dirty(struct inode *);
extern boolean_t zfs_relatime_need_update(const struct inode *);
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *, znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,

View File

@ -303,7 +303,21 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
static void static void
atime_changed_cb(void *arg, uint64_t newval) atime_changed_cb(void *arg, uint64_t newval)
{ {
((zfsvfs_t *)arg)->z_atime = newval; zfsvfs_t *zfsvfs = arg;
struct super_block *sb = zfsvfs->z_sb;
if (sb == NULL)
return;
/*
* Update SB_NOATIME bit in VFS super block. Since atime update is
* determined by atime_needs_update(), atime_needs_update() needs to
* return false if atime is turned off, and not unconditionally return
* false if atime is turned on.
*/
if (newval)
sb->s_flags &= ~SB_NOATIME;
else
sb->s_flags |= SB_NOATIME;
} }
static void static void

View File

@ -1345,16 +1345,39 @@ zfs_zinactive(znode_t *zp)
zfs_znode_hold_exit(zfsvfs, zh); zfs_znode_hold_exit(zfsvfs, zh);
} }
static inline int #if defined(HAVE_INODE_TIMESPEC64_TIMES)
zfs_compare_timespec(struct timespec *t1, struct timespec *t2) #define zfs_compare_timespec timespec64_compare
#else
#define zfs_compare_timespec timespec_compare
#endif
/*
* Determine whether the znode's atime must be updated. The logic mostly
* duplicates the Linux kernel's relatime_need_update() functionality.
* This function is only called if the underlying filesystem actually has
* atime updates enabled.
*/
boolean_t
zfs_relatime_need_update(const struct inode *ip)
{ {
if (t1->tv_sec < t2->tv_sec) inode_timespec_t now;
return (-1);
if (t1->tv_sec > t2->tv_sec) gethrestime(&now);
return (1); /*
* In relatime mode, only update the atime if the previous atime
* is earlier than either the ctime or mtime or if at least a day
* has passed since the last update of atime.
*/
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
return (B_TRUE);
return (t1->tv_nsec - t2->tv_nsec); if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
return (B_TRUE);
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
return (B_TRUE);
return (B_FALSE);
} }
/* /*

View File

@ -289,6 +289,8 @@ zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
{ {
cred_t *cr = CRED(); cred_t *cr = CRED();
struct file *filp = kiocb->ki_filp; struct file *filp = kiocb->ki_filp;
struct inode *ip = filp->f_mapping->host;
zfsvfs_t *zfsvfs = ZTOZSB(ITOZ(ip));
ssize_t read; ssize_t read;
unsigned int f_flags = filp->f_flags; unsigned int f_flags = filp->f_flags;
@ -298,7 +300,20 @@ zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip); nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
crfree(cr); crfree(cr);
/*
* If relatime is enabled, call file_accessed() only if
* zfs_relatime_need_update() is true. This is needed since datasets
* with inherited "relatime" property aren't necessarily mounted with
* MNT_RELATIME flag (e.g. after `zfs set relatime=...`), which is what
* relatime test in VFS by relatime_need_update() is based on.
*/
if (!IS_NOATIME(ip) && zfsvfs->z_relatime) {
if (zfs_relatime_need_update(ip))
file_accessed(filp); file_accessed(filp);
} else {
file_accessed(filp);
}
return (read); return (read);
} }

View File

@ -37,7 +37,8 @@ tests = ['dbufstats_001_pos', 'dbufstats_002_pos']
tags = ['functional', 'arc'] tags = ['functional', 'arc']
[tests/functional/atime] [tests/functional/atime]
tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos'] tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos', 'root_atime_off',
'root_atime_on', 'root_relatime_on']
tags = ['functional', 'atime'] tags = ['functional', 'atime']
[tests/functional/bootfs] [tests/functional/bootfs]

View File

@ -214,6 +214,13 @@ function default_setup
log_pass log_pass
} }
function default_setup_no_mountpoint
{
default_setup_noexit "$1" "$2" "$3" "yes"
log_pass
}
# #
# Given a list of disks, setup storage pools and datasets. # Given a list of disks, setup storage pools and datasets.
# #
@ -222,6 +229,7 @@ function default_setup_noexit
typeset disklist=$1 typeset disklist=$1
typeset container=$2 typeset container=$2
typeset volume=$3 typeset volume=$3
typeset no_mountpoint=$4
log_note begin default_setup_noexit log_note begin default_setup_noexit
if is_global_zone; then if is_global_zone; then
@ -238,7 +246,9 @@ function default_setup_noexit
mkdir -p $TESTDIR || log_unresolved Could not create $TESTDIR mkdir -p $TESTDIR || log_unresolved Could not create $TESTDIR
log_must zfs create $TESTPOOL/$TESTFS log_must zfs create $TESTPOOL/$TESTFS
if [[ -z $no_mountpoint ]]; then
log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
fi
if [[ -n $container ]]; then if [[ -n $container ]]; then
rm -rf $TESTDIR1 || \ rm -rf $TESTDIR1 || \
@ -249,9 +259,11 @@ function default_setup_noexit
log_must zfs create $TESTPOOL/$TESTCTR log_must zfs create $TESTPOOL/$TESTCTR
log_must zfs set canmount=off $TESTPOOL/$TESTCTR log_must zfs set canmount=off $TESTPOOL/$TESTCTR
log_must zfs create $TESTPOOL/$TESTCTR/$TESTFS1 log_must zfs create $TESTPOOL/$TESTCTR/$TESTFS1
if [[ -z $no_mountpoint ]]; then
log_must zfs set mountpoint=$TESTDIR1 \ log_must zfs set mountpoint=$TESTDIR1 \
$TESTPOOL/$TESTCTR/$TESTFS1 $TESTPOOL/$TESTCTR/$TESTFS1
fi fi
fi
if [[ -n $volume ]]; then if [[ -n $volume ]]; then
if is_global_zone ; then if is_global_zone ; then

View File

@ -4,7 +4,10 @@ dist_pkgdata_SCRIPTS = \
setup.ksh \ setup.ksh \
atime_001_pos.ksh \ atime_001_pos.ksh \
atime_002_neg.ksh \ atime_002_neg.ksh \
atime_003_pos.ksh atime_003_pos.ksh \
root_atime_off.ksh \
root_atime_on.ksh \
root_relatime_on.ksh
dist_pkgdata_DATA = \ dist_pkgdata_DATA = \
atime.cfg \ atime.cfg \

View File

@ -68,13 +68,28 @@ function check_atime_updated
function setup_snap_clone function setup_snap_clone
{ {
if [ -d "/$TESTPOOL/$TESTFS" ]; then
# No mountpoint case (default).
log_must touch /$TESTPOOL/$TESTFS/$TESTFILE
else
# Create two file to verify snapshot. # Create two file to verify snapshot.
log_must touch $TESTDIR/$TESTFILE log_must touch $TESTDIR/$TESTFILE
fi
create_snapshot $TESTPOOL/$TESTFS $TESTSNAP create_snapshot $TESTPOOL/$TESTFS $TESTSNAP
create_clone $TESTPOOL/$TESTFS@$TESTSNAP $TESTPOOL/$TESTCLONE create_clone $TESTPOOL/$TESTFS@$TESTSNAP $TESTPOOL/$TESTCLONE
} }
function reset_atime
{
zfs inherit atime $TESTPOOL
zfs inherit atime $TESTPOOL/$TESTFS
zfs inherit atime $TESTPOOL/$TESTCLONE
zfs inherit relatime $TESTPOOL
zfs inherit relatime $TESTPOOL/$TESTFS
zfs inherit relatime $TESTPOOL/$TESTCLONE
}
function cleanup function cleanup
{ {
destroy_clone $TESTPOOL/$TESTCLONE destroy_clone $TESTPOOL/$TESTCLONE

View File

@ -0,0 +1,74 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved.
#
. $STF_SUITE/tests/functional/atime/atime_common.kshlib
#
# DESCRIPTION:
# When atime=off, verify the access time for files is not updated when read.
# It is available to pool, fs snapshot and clone.
#
# STRATEGY:
# 1. Create pool, fs.
# 2. Create '$TESTFILE' for fs.
# 3. Create snapshot and clone.
# 4. Setting atime=off on dataset and read '$TESTFILE'.
# 5. Verify the access time is not updated.
#
verify_runnable "both"
log_assert "Setting atime=off, the access time for files will not be updated \
when read."
log_onexit cleanup
#
# Create $TESTFILE, snapshot and clone.
# Same as 002 except that atime applies to root dataset (ZoL#8675).
#
setup_snap_clone
reset_atime
for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP
do
typeset mtpt=$(get_prop mountpoint $dst)
if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then
mtpt=$(snapshot_mountpoint $dst)
else
log_must zfs set atime=off $(dirname $dst)
fi
log_mustnot check_atime_updated $mtpt/$TESTFILE
done
log_pass "Verify the property atime=off passed."

View File

@ -0,0 +1,78 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved.
#
. $STF_SUITE/tests/functional/atime/atime_common.kshlib
#
# DESCRIPTION:
# When atime=on, verify the access time for files is updated when read. It
# is available to fs and clone. To snapshot, it is unavailable.
#
# STRATEGY:
# 1. Create pool and fs.
# 2. Create '$TESTFILE' for fs.
# 3. Create snapshot and clone.
# 4. Setting atime=on on datasets except snapshot, and read '$TESTFILE'.
# 5. Expect the access time is updated on datasets except snapshot.
#
verify_runnable "both"
log_assert "Setting atime=on, the access time for files is updated when read."
log_onexit cleanup
#
# Create $TESTFILE, snapshot and clone.
# Same as 001 except that atime/relatime applies to root dataset (ZoL#8675).
#
setup_snap_clone
reset_atime
for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP
do
typeset mtpt=$(get_prop mountpoint $dst)
if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then
mtpt=$(snapshot_mountpoint $dst)
log_mustnot check_atime_updated $mtpt/$TESTFILE
else
if is_linux; then
log_must zfs set relatime=off $(dirname $dst)
fi
log_must zfs set atime=on $(dirname $dst)
log_must check_atime_updated $mtpt/$TESTFILE
log_must check_atime_updated $mtpt/$TESTFILE
fi
done
log_pass "Verify the property atime=on passed."

View File

@ -0,0 +1,76 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved.
#
. $STF_SUITE/tests/functional/atime/atime_common.kshlib
#
# DESCRIPTION:
# When relatime=on, verify the access time for files is updated when first
# read but not on second.
# It is available to fs and clone. To snapshot, it is unavailable.
#
# STRATEGY:
# 1. Create pool and fs.
# 2. Create '$TESTFILE' for fs.
# 3. Create snapshot and clone.
# 4. Setting atime=on and relatime=on on datasets.
# 5. Expect the access time is updated for first read but not on second.
#
verify_runnable "both"
log_assert "Setting relatime=on, the access time for files is updated when \
when read the first time, but not second time."
log_onexit cleanup
#
# Create $TESTFILE, snapshot and clone.
# Same as 003 except that atime/relatime applies to root dataset (ZoL#8675).
#
setup_snap_clone
reset_atime
for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP
do
typeset mtpt=$(get_prop mountpoint $dst)
if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then
mtpt=$(snapshot_mountpoint $dst)
log_mustnot check_atime_updated $mtpt/$TESTFILE
else
log_must zfs set atime=on $(dirname $dst)
log_must zfs set relatime=on $(dirname $dst)
log_must check_atime_updated $mtpt/$TESTFILE
log_mustnot check_atime_updated $mtpt/$TESTFILE
fi
done
log_pass "Verify the property relatime=on passed."

View File

@ -28,4 +28,4 @@
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *} DISK=${DISKS%% *}
default_setup $DISK default_setup_no_mountpoint $DISK