mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-06-25 02:28:01 +03:00
FreeBSD: Add posix_fadvise(POSIX_FADV_WILLNEED) support
As commit 320f0c6
did for Linux, connect POSIX_FADV_WILLNEED
up to dmu_prefetch() on FreeBSD.
While there, fix portability problems in tests/functional/fadvise.
1. Instead of relying on the numerical values of POSIX_FADV_XXX macros,
accept macro names as arguments to the file_fadvise program. (The
numbers happen to match on Linux and FreeBSD, but future systems may
vary and it seems a little strange/raw to count on that.)
2. For implementation reasons, SEQUENTIAL doesn't reach ZFS via FreeBSD
VFS currently (perhaps something that should be investigated in
FreeBSD). Since on Linux we're treating SEQUENTIAL and WILLNEED the
same, it doesn't really matter which one we use, so switch the test
over to WILLNEED exercise the new prefetch code on both OSes the
same way.
Reviewed-by: Mateusz Guzik <mjg@FreeBSD.org>
Reviewed-by: Fedor Uporov <fuporov.vstack@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Thomas Munro <tmunro@FreeBSD.org>
Co-authored-by: Alexander Motin <mav@FreeBSD.org>
Closes #17379
This commit is contained in:
parent
00360efa35
commit
fa697b94e6
@ -6055,6 +6055,78 @@ zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
|
|||||||
return (EOPNOTSUPP);
|
return (EOPNOTSUPP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _SYS_SYSPROTO_H_
|
||||||
|
struct vop_advise_args {
|
||||||
|
struct vnode *a_vp;
|
||||||
|
off_t a_start;
|
||||||
|
off_t a_end;
|
||||||
|
int a_advice;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
zfs_freebsd_advise(struct vop_advise_args *ap)
|
||||||
|
{
|
||||||
|
vnode_t *vp = ap->a_vp;
|
||||||
|
off_t start = ap->a_start;
|
||||||
|
off_t end = ap->a_end;
|
||||||
|
int advice = ap->a_advice;
|
||||||
|
off_t len;
|
||||||
|
znode_t *zp;
|
||||||
|
zfsvfs_t *zfsvfs;
|
||||||
|
objset_t *os;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
if (end < start)
|
||||||
|
return (EINVAL);
|
||||||
|
|
||||||
|
error = vn_lock(vp, LK_SHARED);
|
||||||
|
if (error)
|
||||||
|
return (error);
|
||||||
|
|
||||||
|
zp = VTOZ(vp);
|
||||||
|
zfsvfs = zp->z_zfsvfs;
|
||||||
|
os = zp->z_zfsvfs->z_os;
|
||||||
|
|
||||||
|
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
/* kern_posix_fadvise points to the last byte, we want one past */
|
||||||
|
if (end != OFF_MAX)
|
||||||
|
end += 1;
|
||||||
|
len = end - start;
|
||||||
|
|
||||||
|
switch (advice) {
|
||||||
|
case POSIX_FADV_WILLNEED:
|
||||||
|
/*
|
||||||
|
* Pass on the caller's size directly, but note that
|
||||||
|
* dmu_prefetch_max will effectively cap it. If there really
|
||||||
|
* is a larger sequential access pattern, perhaps dmu_zfetch
|
||||||
|
* will detect it.
|
||||||
|
*/
|
||||||
|
dmu_prefetch(os, zp->z_id, 0, start, len,
|
||||||
|
ZIO_PRIORITY_ASYNC_READ);
|
||||||
|
break;
|
||||||
|
case POSIX_FADV_NORMAL:
|
||||||
|
case POSIX_FADV_RANDOM:
|
||||||
|
case POSIX_FADV_SEQUENTIAL:
|
||||||
|
case POSIX_FADV_DONTNEED:
|
||||||
|
case POSIX_FADV_NOREUSE:
|
||||||
|
/* ignored for now */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
error = EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
zfs_exit(zfsvfs, FTAG);
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
VOP_UNLOCK(vp);
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zfs_vptocnp(struct vop_vptocnp_args *ap)
|
zfs_vptocnp(struct vop_vptocnp_args *ap)
|
||||||
{
|
{
|
||||||
@ -6293,6 +6365,7 @@ struct vop_vector zfs_vnodeops = {
|
|||||||
.vop_link = zfs_freebsd_link,
|
.vop_link = zfs_freebsd_link,
|
||||||
.vop_symlink = zfs_freebsd_symlink,
|
.vop_symlink = zfs_freebsd_symlink,
|
||||||
.vop_readlink = zfs_freebsd_readlink,
|
.vop_readlink = zfs_freebsd_readlink,
|
||||||
|
.vop_advise = zfs_freebsd_advise,
|
||||||
.vop_read = zfs_freebsd_read,
|
.vop_read = zfs_freebsd_read,
|
||||||
.vop_write = zfs_freebsd_write,
|
.vop_write = zfs_freebsd_write,
|
||||||
.vop_remove = zfs_freebsd_remove,
|
.vop_remove = zfs_freebsd_remove,
|
||||||
|
@ -730,6 +730,18 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
|
|||||||
*/
|
*/
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
if (dn->dn_datablkshift != 0) {
|
if (dn->dn_datablkshift != 0) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Limit prefetch to present blocks.
|
||||||
|
*/
|
||||||
|
uint64_t size = (dn->dn_maxblkid + 1) << dn->dn_datablkshift;
|
||||||
|
if (offset >= size) {
|
||||||
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (offset + len < offset || offset + len > size)
|
||||||
|
len = size - offset;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The object has multiple blocks. Calculate the full range
|
* The object has multiple blocks. Calculate the full range
|
||||||
* of blocks [start, end2) and then split it into two parts,
|
* of blocks [start, end2) and then split it into two parts,
|
||||||
|
@ -717,6 +717,10 @@ tags = ['functional', 'direct']
|
|||||||
tests = ['exec_001_pos', 'exec_002_neg']
|
tests = ['exec_001_pos', 'exec_002_neg']
|
||||||
tags = ['functional', 'exec']
|
tags = ['functional', 'exec']
|
||||||
|
|
||||||
|
[tests/functional/fadvise]
|
||||||
|
tests = ['fadvise_willneed']
|
||||||
|
tags = ['functional', 'fadvise']
|
||||||
|
|
||||||
[tests/functional/failmode]
|
[tests/functional/failmode]
|
||||||
tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue']
|
tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue']
|
||||||
tags = ['functional', 'failmode']
|
tags = ['functional', 'failmode']
|
||||||
|
@ -112,10 +112,6 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
|
|||||||
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
|
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
|
||||||
tags = ['functional', 'events']
|
tags = ['functional', 'events']
|
||||||
|
|
||||||
[tests/functional/fadvise:Linux]
|
|
||||||
tests = ['fadvise_sequential']
|
|
||||||
tags = ['functional', 'fadvise']
|
|
||||||
|
|
||||||
[tests/functional/fallocate:Linux]
|
[tests/functional/fallocate:Linux]
|
||||||
tests = ['fallocate_prealloc', 'fallocate_zero-range']
|
tests = ['fallocate_prealloc', 'fallocate_zero-range']
|
||||||
tags = ['functional', 'fallocate']
|
tags = ['functional', 'fallocate']
|
||||||
|
@ -140,7 +140,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attribu
|
|||||||
|
|
||||||
scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file
|
scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file
|
||||||
%C%_randfree_file_SOURCES = %D%/file/randfree_file.c
|
%C%_randfree_file_SOURCES = %D%/file/randfree_file.c
|
||||||
|
endif
|
||||||
|
|
||||||
scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise
|
scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise
|
||||||
%C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c
|
%C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c
|
||||||
endif
|
|
||||||
|
@ -44,21 +44,41 @@ static void
|
|||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
(void) fprintf(stderr,
|
(void) fprintf(stderr,
|
||||||
"usage: %s -f filename -a advise \n", execname);
|
"usage: %s -f filename -a advice \n", execname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct advice_name {
|
||||||
|
const char *name;
|
||||||
|
int value;
|
||||||
|
} advice_name;
|
||||||
|
|
||||||
|
static const struct advice_name table[] = {
|
||||||
|
#define ADV(name) {#name, name}
|
||||||
|
ADV(POSIX_FADV_NORMAL),
|
||||||
|
ADV(POSIX_FADV_RANDOM),
|
||||||
|
ADV(POSIX_FADV_SEQUENTIAL),
|
||||||
|
ADV(POSIX_FADV_WILLNEED),
|
||||||
|
ADV(POSIX_FADV_DONTNEED),
|
||||||
|
ADV(POSIX_FADV_NOREUSE),
|
||||||
|
{NULL}
|
||||||
|
};
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char *argv[])
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char *filename = NULL;
|
char *filename = NULL;
|
||||||
int advise = 0;
|
int advice = POSIX_FADV_NORMAL;
|
||||||
int fd, ch;
|
int fd, ch;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
while ((ch = getopt(argc, argv, "a:f:")) != EOF) {
|
while ((ch = getopt(argc, argv, "a:f:")) != EOF) {
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case 'a':
|
case 'a':
|
||||||
advise = atoll(optarg);
|
advice = -1;
|
||||||
|
for (const advice_name *p = table; p->name; ++p) {
|
||||||
|
if (strcmp(p->name, optarg) == 0)
|
||||||
|
advice = p->value;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
filename = optarg;
|
filename = optarg;
|
||||||
@ -75,8 +95,8 @@ main(int argc, char *argv[])
|
|||||||
err++;
|
err++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (advise < POSIX_FADV_NORMAL || advise > POSIX_FADV_NOREUSE) {
|
if (advice == -1) {
|
||||||
(void) printf("advise is invalid\n");
|
(void) printf("advice is invalid\n");
|
||||||
err++;
|
err++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +110,7 @@ main(int argc, char *argv[])
|
|||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posix_fadvise(fd, 0, 0, advise) != 0) {
|
if (posix_fadvise(fd, 0, 0, advice) != 0) {
|
||||||
perror("posix_fadvise");
|
perror("posix_fadvise");
|
||||||
close(fd);
|
close(fd);
|
||||||
return (1);
|
return (1);
|
||||||
|
@ -1526,7 +1526,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||||||
functional/exec/exec_002_neg.ksh \
|
functional/exec/exec_002_neg.ksh \
|
||||||
functional/exec/setup.ksh \
|
functional/exec/setup.ksh \
|
||||||
functional/fadvise/cleanup.ksh \
|
functional/fadvise/cleanup.ksh \
|
||||||
functional/fadvise/fadvise_sequential.ksh \
|
functional/fadvise/fadvise_willneed.ksh \
|
||||||
functional/fadvise/setup.ksh \
|
functional/fadvise/setup.ksh \
|
||||||
functional/failmode/cleanup.ksh \
|
functional/failmode/cleanup.ksh \
|
||||||
functional/failmode/failmode_dmu_tx_wait.ksh \
|
functional/failmode/failmode_dmu_tx_wait.ksh \
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
# NOTE: if HAVE_FILE_FADVISE is not defined former data_size
|
# NOTE: if HAVE_FILE_FADVISE is not defined former data_size
|
||||||
# should less or eaqul to latter one
|
# should less or equal to latter one
|
||||||
|
|
||||||
verify_runnable "global"
|
verify_runnable "global"
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ sync_pool $TESTPOOL
|
|||||||
|
|
||||||
data_size1=$(kstat arcstats.data_size)
|
data_size1=$(kstat arcstats.data_size)
|
||||||
|
|
||||||
log_must file_fadvise -f $FILE -a 2
|
log_must file_fadvise -f $FILE -a POSIX_FADV_WILLNEED
|
||||||
sleep 10
|
sleep 10
|
||||||
|
|
||||||
data_size2=$(kstat arcstats.data_size)
|
data_size2=$(kstat arcstats.data_size)
|
Loading…
Reference in New Issue
Block a user