FreeBSD: Add posix_fadvise(POSIX_FADV_WILLNEED) support

As commit 320f0c6 did for Linux, connect POSIX_FADV_WILLNEED
up to dmu_prefetch() on FreeBSD.

While there, fix portability problems in tests/functional/fadvise.

1.  Instead of relying on the numerical values of POSIX_FADV_XXX macros,
    accept macro names as arguments to the file_fadvise program.  (The
    numbers happen to match on Linux and FreeBSD, but future systems may
    vary and it seems a little strange/raw to count on that.)

2.  For implementation reasons, SEQUENTIAL doesn't reach ZFS via FreeBSD
    VFS currently (perhaps something that should be investigated in
    FreeBSD).  Since on Linux we're treating SEQUENTIAL and WILLNEED the
    same, it doesn't really matter which one we use, so switch the test
    over to WILLNEED exercise the new prefetch code on both OSes the
    same way.

Reviewed-by: Mateusz Guzik <mjg@FreeBSD.org>
Reviewed-by: Fedor Uporov <fuporov.vstack@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Thomas Munro <tmunro@FreeBSD.org>
Co-authored-by: Alexander Motin <mav@FreeBSD.org>
Closes #17379
This commit is contained in:
Alexander Motin 2025-05-29 09:34:07 -04:00 committed by GitHub
parent 00360efa35
commit fa697b94e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 119 additions and 14 deletions

View File

@ -6055,6 +6055,78 @@ zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
return (EOPNOTSUPP); return (EOPNOTSUPP);
} }
#ifndef _SYS_SYSPROTO_H_
struct vop_advise_args {
struct vnode *a_vp;
off_t a_start;
off_t a_end;
int a_advice;
};
#endif
static int
zfs_freebsd_advise(struct vop_advise_args *ap)
{
vnode_t *vp = ap->a_vp;
off_t start = ap->a_start;
off_t end = ap->a_end;
int advice = ap->a_advice;
off_t len;
znode_t *zp;
zfsvfs_t *zfsvfs;
objset_t *os;
int error = 0;
if (end < start)
return (EINVAL);
error = vn_lock(vp, LK_SHARED);
if (error)
return (error);
zp = VTOZ(vp);
zfsvfs = zp->z_zfsvfs;
os = zp->z_zfsvfs->z_os;
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
goto out_unlock;
/* kern_posix_fadvise points to the last byte, we want one past */
if (end != OFF_MAX)
end += 1;
len = end - start;
switch (advice) {
case POSIX_FADV_WILLNEED:
/*
* Pass on the caller's size directly, but note that
* dmu_prefetch_max will effectively cap it. If there really
* is a larger sequential access pattern, perhaps dmu_zfetch
* will detect it.
*/
dmu_prefetch(os, zp->z_id, 0, start, len,
ZIO_PRIORITY_ASYNC_READ);
break;
case POSIX_FADV_NORMAL:
case POSIX_FADV_RANDOM:
case POSIX_FADV_SEQUENTIAL:
case POSIX_FADV_DONTNEED:
case POSIX_FADV_NOREUSE:
/* ignored for now */
break;
default:
error = EINVAL;
break;
}
zfs_exit(zfsvfs, FTAG);
out_unlock:
VOP_UNLOCK(vp);
return (error);
}
static int static int
zfs_vptocnp(struct vop_vptocnp_args *ap) zfs_vptocnp(struct vop_vptocnp_args *ap)
{ {
@ -6293,6 +6365,7 @@ struct vop_vector zfs_vnodeops = {
.vop_link = zfs_freebsd_link, .vop_link = zfs_freebsd_link,
.vop_symlink = zfs_freebsd_symlink, .vop_symlink = zfs_freebsd_symlink,
.vop_readlink = zfs_freebsd_readlink, .vop_readlink = zfs_freebsd_readlink,
.vop_advise = zfs_freebsd_advise,
.vop_read = zfs_freebsd_read, .vop_read = zfs_freebsd_read,
.vop_write = zfs_freebsd_write, .vop_write = zfs_freebsd_write,
.vop_remove = zfs_freebsd_remove, .vop_remove = zfs_freebsd_remove,

View File

@ -730,6 +730,18 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
*/ */
rw_enter(&dn->dn_struct_rwlock, RW_READER); rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift != 0) { if (dn->dn_datablkshift != 0) {
/*
* Limit prefetch to present blocks.
*/
uint64_t size = (dn->dn_maxblkid + 1) << dn->dn_datablkshift;
if (offset >= size) {
rw_exit(&dn->dn_struct_rwlock);
return;
}
if (offset + len < offset || offset + len > size)
len = size - offset;
/* /*
* The object has multiple blocks. Calculate the full range * The object has multiple blocks. Calculate the full range
* of blocks [start, end2) and then split it into two parts, * of blocks [start, end2) and then split it into two parts,

View File

@ -717,6 +717,10 @@ tags = ['functional', 'direct']
tests = ['exec_001_pos', 'exec_002_neg'] tests = ['exec_001_pos', 'exec_002_neg']
tags = ['functional', 'exec'] tags = ['functional', 'exec']
[tests/functional/fadvise]
tests = ['fadvise_willneed']
tags = ['functional', 'fadvise']
[tests/functional/failmode] [tests/functional/failmode]
tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue'] tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue']
tags = ['functional', 'failmode'] tags = ['functional', 'failmode']

View File

@ -112,10 +112,6 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
tags = ['functional', 'events'] tags = ['functional', 'events']
[tests/functional/fadvise:Linux]
tests = ['fadvise_sequential']
tags = ['functional', 'fadvise']
[tests/functional/fallocate:Linux] [tests/functional/fallocate:Linux]
tests = ['fallocate_prealloc', 'fallocate_zero-range'] tests = ['fallocate_prealloc', 'fallocate_zero-range']
tags = ['functional', 'fallocate'] tags = ['functional', 'fallocate']

View File

@ -140,7 +140,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attribu
scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file
%C%_randfree_file_SOURCES = %D%/file/randfree_file.c %C%_randfree_file_SOURCES = %D%/file/randfree_file.c
endif
scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise
%C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c %C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c
endif

View File

@ -44,21 +44,41 @@ static void
usage(void) usage(void)
{ {
(void) fprintf(stderr, (void) fprintf(stderr,
"usage: %s -f filename -a advise \n", execname); "usage: %s -f filename -a advice \n", execname);
} }
typedef struct advice_name {
const char *name;
int value;
} advice_name;
static const struct advice_name table[] = {
#define ADV(name) {#name, name}
ADV(POSIX_FADV_NORMAL),
ADV(POSIX_FADV_RANDOM),
ADV(POSIX_FADV_SEQUENTIAL),
ADV(POSIX_FADV_WILLNEED),
ADV(POSIX_FADV_DONTNEED),
ADV(POSIX_FADV_NOREUSE),
{NULL}
};
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
char *filename = NULL; char *filename = NULL;
int advise = 0; int advice = POSIX_FADV_NORMAL;
int fd, ch; int fd, ch;
int err = 0; int err = 0;
while ((ch = getopt(argc, argv, "a:f:")) != EOF) { while ((ch = getopt(argc, argv, "a:f:")) != EOF) {
switch (ch) { switch (ch) {
case 'a': case 'a':
advise = atoll(optarg); advice = -1;
for (const advice_name *p = table; p->name; ++p) {
if (strcmp(p->name, optarg) == 0)
advice = p->value;
}
break; break;
case 'f': case 'f':
filename = optarg; filename = optarg;
@ -75,8 +95,8 @@ main(int argc, char *argv[])
err++; err++;
} }
if (advise < POSIX_FADV_NORMAL || advise > POSIX_FADV_NOREUSE) { if (advice == -1) {
(void) printf("advise is invalid\n"); (void) printf("advice is invalid\n");
err++; err++;
} }
@ -90,7 +110,7 @@ main(int argc, char *argv[])
return (1); return (1);
} }
if (posix_fadvise(fd, 0, 0, advise) != 0) { if (posix_fadvise(fd, 0, 0, advice) != 0) {
perror("posix_fadvise"); perror("posix_fadvise");
close(fd); close(fd);
return (1); return (1);

View File

@ -1526,7 +1526,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/exec/exec_002_neg.ksh \ functional/exec/exec_002_neg.ksh \
functional/exec/setup.ksh \ functional/exec/setup.ksh \
functional/fadvise/cleanup.ksh \ functional/fadvise/cleanup.ksh \
functional/fadvise/fadvise_sequential.ksh \ functional/fadvise/fadvise_willneed.ksh \
functional/fadvise/setup.ksh \ functional/fadvise/setup.ksh \
functional/failmode/cleanup.ksh \ functional/failmode/cleanup.ksh \
functional/failmode/failmode_dmu_tx_wait.ksh \ functional/failmode/failmode_dmu_tx_wait.ksh \

View File

@ -42,7 +42,7 @@
# #
# NOTE: if HAVE_FILE_FADVISE is not defined former data_size # NOTE: if HAVE_FILE_FADVISE is not defined former data_size
# should less or eaqul to latter one # should less or equal to latter one
verify_runnable "global" verify_runnable "global"
@ -66,7 +66,7 @@ sync_pool $TESTPOOL
data_size1=$(kstat arcstats.data_size) data_size1=$(kstat arcstats.data_size)
log_must file_fadvise -f $FILE -a 2 log_must file_fadvise -f $FILE -a POSIX_FADV_WILLNEED
sleep 10 sleep 10
data_size2=$(kstat arcstats.data_size) data_size2=$(kstat arcstats.data_size)