diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 3c7141916..9a63ff4de 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -6055,6 +6055,78 @@ zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) return (EOPNOTSUPP); } +#ifndef _SYS_SYSPROTO_H_ +struct vop_advise_args { + struct vnode *a_vp; + off_t a_start; + off_t a_end; + int a_advice; +}; +#endif + +static int +zfs_freebsd_advise(struct vop_advise_args *ap) +{ + vnode_t *vp = ap->a_vp; + off_t start = ap->a_start; + off_t end = ap->a_end; + int advice = ap->a_advice; + off_t len; + znode_t *zp; + zfsvfs_t *zfsvfs; + objset_t *os; + int error = 0; + + if (end < start) + return (EINVAL); + + error = vn_lock(vp, LK_SHARED); + if (error) + return (error); + + zp = VTOZ(vp); + zfsvfs = zp->z_zfsvfs; + os = zp->z_zfsvfs->z_os; + + if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) + goto out_unlock; + + /* kern_posix_fadvise points to the last byte, we want one past */ + if (end != OFF_MAX) + end += 1; + len = end - start; + + switch (advice) { + case POSIX_FADV_WILLNEED: + /* + * Pass on the caller's size directly, but note that + * dmu_prefetch_max will effectively cap it. If there really + * is a larger sequential access pattern, perhaps dmu_zfetch + * will detect it. + */ + dmu_prefetch(os, zp->z_id, 0, start, len, + ZIO_PRIORITY_ASYNC_READ); + break; + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + case POSIX_FADV_DONTNEED: + case POSIX_FADV_NOREUSE: + /* ignored for now */ + break; + default: + error = EINVAL; + break; + } + + zfs_exit(zfsvfs, FTAG); + +out_unlock: + VOP_UNLOCK(vp); + + return (error); +} + static int zfs_vptocnp(struct vop_vptocnp_args *ap) { @@ -6293,6 +6365,7 @@ struct vop_vector zfs_vnodeops = { .vop_link = zfs_freebsd_link, .vop_symlink = zfs_freebsd_symlink, .vop_readlink = zfs_freebsd_readlink, + .vop_advise = zfs_freebsd_advise, .vop_read = zfs_freebsd_read, .vop_write = zfs_freebsd_write, .vop_remove = zfs_freebsd_remove, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9f2e2f695..96ac2cc28 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -730,6 +730,18 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset, */ rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift != 0) { + + /* + * Limit prefetch to present blocks. + */ + uint64_t size = (dn->dn_maxblkid + 1) << dn->dn_datablkshift; + if (offset >= size) { + rw_exit(&dn->dn_struct_rwlock); + return; + } + if (offset + len < offset || offset + len > size) + len = size - offset; + /* * The object has multiple blocks. Calculate the full range * of blocks [start, end2) and then split it into two parts, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index c6b995d4e..41e285a03 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -717,6 +717,10 @@ tags = ['functional', 'direct'] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] +[tests/functional/fadvise] +tests = ['fadvise_willneed'] +tags = ['functional', 'fadvise'] + [tests/functional/failmode] tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue'] tags = ['functional', 'failmode'] diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8d3cb0607..1682d5ad0 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -112,10 +112,6 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] tags = ['functional', 'events'] -[tests/functional/fadvise:Linux] -tests = ['fadvise_sequential'] -tags = ['functional', 'fadvise'] - [tests/functional/fallocate:Linux] tests = ['fallocate_prealloc', 'fallocate_zero-range'] tags = ['functional', 'fallocate'] diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 4498c9a73..909a72c43 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -140,7 +140,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attribu scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file %C%_randfree_file_SOURCES = %D%/file/randfree_file.c +endif scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise %C%_file_fadvise_SOURCES = %D%/file/file_fadvise.c -endif diff --git a/tests/zfs-tests/cmd/file/file_fadvise.c b/tests/zfs-tests/cmd/file/file_fadvise.c index 690b89eea..5ad981502 100644 --- a/tests/zfs-tests/cmd/file/file_fadvise.c +++ b/tests/zfs-tests/cmd/file/file_fadvise.c @@ -44,21 +44,41 @@ static void usage(void) { (void) fprintf(stderr, - "usage: %s -f filename -a advise \n", execname); + "usage: %s -f filename -a advice \n", execname); } +typedef struct advice_name { + const char *name; + int value; +} advice_name; + +static const struct advice_name table[] = { +#define ADV(name) {#name, name} + ADV(POSIX_FADV_NORMAL), + ADV(POSIX_FADV_RANDOM), + ADV(POSIX_FADV_SEQUENTIAL), + ADV(POSIX_FADV_WILLNEED), + ADV(POSIX_FADV_DONTNEED), + ADV(POSIX_FADV_NOREUSE), + {NULL} +}; + int main(int argc, char *argv[]) { char *filename = NULL; - int advise = 0; + int advice = POSIX_FADV_NORMAL; int fd, ch; int err = 0; while ((ch = getopt(argc, argv, "a:f:")) != EOF) { switch (ch) { case 'a': - advise = atoll(optarg); + advice = -1; + for (const advice_name *p = table; p->name; ++p) { + if (strcmp(p->name, optarg) == 0) + advice = p->value; + } break; case 'f': filename = optarg; @@ -75,8 +95,8 @@ main(int argc, char *argv[]) err++; } - if (advise < POSIX_FADV_NORMAL || advise > POSIX_FADV_NOREUSE) { - (void) printf("advise is invalid\n"); + if (advice == -1) { + (void) printf("advice is invalid\n"); err++; } @@ -90,7 +110,7 @@ main(int argc, char *argv[]) return (1); } - if (posix_fadvise(fd, 0, 0, advise) != 0) { + if (posix_fadvise(fd, 0, 0, advice) != 0) { perror("posix_fadvise"); close(fd); return (1); diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 5a5008bfa..c6a6c709f 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1526,7 +1526,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/exec/exec_002_neg.ksh \ functional/exec/setup.ksh \ functional/fadvise/cleanup.ksh \ - functional/fadvise/fadvise_sequential.ksh \ + functional/fadvise/fadvise_willneed.ksh \ functional/fadvise/setup.ksh \ functional/failmode/cleanup.ksh \ functional/failmode/failmode_dmu_tx_wait.ksh \ diff --git a/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh b/tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh similarity index 95% rename from tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh rename to tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh index f72c21f56..3505cd169 100755 --- a/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh +++ b/tests/zfs-tests/tests/functional/fadvise/fadvise_willneed.ksh @@ -42,7 +42,7 @@ # # NOTE: if HAVE_FILE_FADVISE is not defined former data_size -# should less or eaqul to latter one +# should less or equal to latter one verify_runnable "global" @@ -66,7 +66,7 @@ sync_pool $TESTPOOL data_size1=$(kstat arcstats.data_size) -log_must file_fadvise -f $FILE -a 2 +log_must file_fadvise -f $FILE -a POSIX_FADV_WILLNEED sleep 10 data_size2=$(kstat arcstats.data_size)