From fa697b94e65c7b35dfaa6a3bedb6c8ea079d3abc Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 29 May 2025 09:34:07 -0400 Subject: [PATCH] FreeBSD: Add posix_fadvise(POSIX_FADV_WILLNEED) support As commit 320f0c6 did for Linux, connect POSIX_FADV_WILLNEED up to dmu_prefetch() on FreeBSD. While there, fix portability problems in tests/functional/fadvise. 1. Instead of relying on the numerical values of POSIX_FADV_XXX macros, accept macro names as arguments to the file_fadvise program. (The numbers happen to match on Linux and FreeBSD, but future systems may vary and it seems a little strange/raw to count on that.) 2. For implementation reasons, SEQUENTIAL doesn't reach ZFS via FreeBSD VFS currently (perhaps something that should be investigated in FreeBSD). Since on Linux we're treating SEQUENTIAL and WILLNEED the same, it doesn't really matter which one we use, so switch the test over to WILLNEED exercise the new prefetch code on both OSes the same way. Reviewed-by: Mateusz Guzik Reviewed-by: Fedor Uporov Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Thomas Munro Co-authored-by: Alexander Motin Closes #17379 --- module/os/freebsd/zfs/zfs_vnops_os.c | 73 +++++++++++++++++++ module/zfs/dmu.c | 12 +++ tests/runfiles/common.run | 4 + tests/runfiles/linux.run | 4 - tests/zfs-tests/cmd/Makefile.am | 2 +- tests/zfs-tests/cmd/file/file_fadvise.c | 32 ++++++-- tests/zfs-tests/tests/Makefile.am | 2 +- ...se_sequential.ksh => fadvise_willneed.ksh} | 4 +- 8 files changed, 119 insertions(+), 14 deletions(-) rename tests/zfs-tests/tests/functional/fadvise/{fadvise_sequential.ksh => fadvise_willneed.ksh} (95%) diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 3c7141916..9a63ff4de 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -6055,6 +6055,78 @@ zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) return (EOPNOTSUPP); } +#ifndef _SYS_SYSPROTO_H_ +struct vop_advise_args { + struct vnode *a_vp; + off_t a_start; + off_t a_end; + int a_advice; +}; +#endif + +static int +zfs_freebsd_advise(struct vop_advise_args *ap) +{ + vnode_t *vp = ap->a_vp; + off_t start = ap->a_start; + off_t end = ap->a_end; + int advice = ap->a_advice; + off_t len; + znode_t *zp; + zfsvfs_t *zfsvfs; + objset_t *os; + int error = 0; + + if (end < start) + return (EINVAL); + + error = vn_lock(vp, LK_SHARED); + if (error) + return (error); + + zp = VTOZ(vp); + zfsvfs = zp->z_zfsvfs; + os = zp->z_zfsvfs->z_os; + + if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) + goto out_unlock; + + /* kern_posix_fadvise points to the last byte, we want one past */ + if (end != OFF_MAX) + end += 1; + len = end - start; + + switch (advice) { + case POSIX_FADV_WILLNEED: + /* + * Pass on the caller's size directly, but note that + * dmu_prefetch_max will effectively cap it. If there really + * is a larger sequential access pattern, perhaps dmu_zfetch + * will detect it. + */ + dmu_prefetch(os, zp->z_id, 0, start, len, + ZIO_PRIORITY_ASYNC_READ); + break; + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + case POSIX_FADV_DONTNEED: + case POSIX_FADV_NOREUSE: + /* ignored for now */ + break; + default: + error = EINVAL; + break; + } + + zfs_exit(zfsvfs, FTAG); + +out_unlock: + VOP_UNLOCK(vp); + + return (error); +} + static int zfs_vptocnp(struct vop_vptocnp_args *ap) { @@ -6293,6 +6365,7 @@ struct vop_vector zfs_vnodeops = { .vop_link = zfs_freebsd_link, .vop_symlink = zfs_freebsd_symlink, .vop_readlink = zfs_freebsd_readlink, + .vop_advise = zfs_freebsd_advise, .vop_read = zfs_freebsd_read, .vop_write = zfs_freebsd_write, .vop_remove = zfs_freebsd_remove, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9f2e2f695..96ac2cc28 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -730,6 +730,18 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset, */ rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift != 0) { + + /* + * Limit prefetch to present blocks. + */ + uint64_t size = (dn->dn_maxblkid + 1) << dn->dn_datablkshift; + if (offset >= size) { + rw_exit(&dn->dn_struct_rwlock); + return; + } + if (offset + len < offset || offset + len > size) + len = size - offset; + /* * The object has multiple blocks. Calculate the full range * of blocks [start, end2) and then split it into two parts, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index c6b995d4e..41e285a03 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -717,6 +717,10 @@ tags = ['functional', 'direct'] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] +[tests/functional/fadvise] +tests = ['fadvise_willneed'] +tags = ['functional', 'fadvise'] + [tests/functional/failmode] tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue'] tags = ['functional', 'failmode'] diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8d3cb0607..1682d5ad0 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -112,10 +112,6 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] tags = ['functional', 'events'] -[tests/functional/fadvise:Linux] -tests = ['fadvise_sequential'] -tags = ['functional', 'fadvise'] - [tests/functional/fallocate:Linux] tests = ['fallocate_prealloc', 'fallocate_zero-range'] tags = ['functional', 'fallocate'] diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 4498c9a73..909a72c43 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -140,7 +140,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attribu scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file %C%_randfree_file_SOURCES = %D%/file/randfree_file.c +endif scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise %C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c -endif diff --git a/tests/zfs-tests/cmd/file/file_fadvise.c b/tests/zfs-tests/cmd/file/file_fadvise.c index 690b89eea..5ad981502 100644 --- a/tests/zfs-tests/cmd/file/file_fadvise.c +++ b/tests/zfs-tests/cmd/file/file_fadvise.c @@ -44,21 +44,41 @@ static void usage(void) { (void) fprintf(stderr, - "usage: %s -f filename -a advise \n", execname); + "usage: %s -f filename -a advice \n", execname); } +typedef struct advice_name { + const char *name; + int value; +} advice_name; + +static const struct advice_name table[] = { +#define ADV(name) {#name, name} + ADV(POSIX_FADV_NORMAL), + ADV(POSIX_FADV_RANDOM), + ADV(POSIX_FADV_SEQUENTIAL), + ADV(POSIX_FADV_WILLNEED), + ADV(POSIX_FADV_DONTNEED), + ADV(POSIX_FADV_NOREUSE), + {NULL} +}; + int main(int argc, char *argv[]) { char *filename = NULL; - int advise = 0; + int advice = POSIX_FADV_NORMAL; int fd, ch; int err = 0; while ((ch = getopt(argc, argv, "a:f:")) != EOF) { switch (ch) { case 'a': - advise = atoll(optarg); + advice = -1; + for (const advice_name *p = table; p->name; ++p) { + if (strcmp(p->name, optarg) == 0) + advice = p->value; + } break; case 'f': filename = optarg; @@ -75,8 +95,8 @@ main(int argc, char *argv[]) err++; } - if (advise < POSIX_FADV_NORMAL || advise > POSIX_FADV_NOREUSE) { - (void) printf("advise is invalid\n"); + if (advice == -1) { + (void) printf("advice is invalid\n"); err++; } @@ -90,7 +110,7 @@ main(int argc, char *argv[]) return (1); } - if (posix_fadvise(fd, 0, 0, advise) != 0) { + if (posix_fadvise(fd, 0, 0, advice) != 0) { perror("posix_fadvise"); close(fd); return (1); diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 5a5008bfa..c6a6c709f 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1526,7 +1526,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/exec/exec_002_neg.ksh \ functional/exec/setup.ksh \ functional/fadvise/cleanup.ksh \ - functional/fadvise/fadvise_sequential.ksh \ + functional/fadvise/fadvise_willneed.ksh \ functional/fadvise/setup.ksh \ functional/failmode/cleanup.ksh \ functional/failmode/failmode_dmu_tx_wait.ksh \ diff --git a/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh b/tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh similarity index 95% rename from tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh rename to tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh index f72c21f56..3505cd169 100755 --- a/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh +++ b/tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh @@ -42,7 +42,7 @@ # # NOTE: if HAVE_FILE_FADVISE is not defined former data_size -# should less or eaqul to latter one +# should less or equal to latter one verify_runnable "global" @@ -66,7 +66,7 @@ sync_pool $TESTPOOL data_size1=$(kstat arcstats.data_size) -log_must file_fadvise -f $FILE -a 2 +log_must file_fadvise -f $FILE -a POSIX_FADV_WILLNEED sleep 10 data_size2=$(kstat arcstats.data_size)