mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 11:47:43 +03:00
Wire O_DIRECT also to Uncached I/O (#17218)
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Fixes #17027 Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
@@ -297,7 +297,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
}
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
int32_t i, ptidx, pidx;
|
||||
uint32_t prefetchlimit;
|
||||
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||
@@ -364,8 +364,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
kmem_free(czb, sizeof (zbookmark_phys_t));
|
||||
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
int32_t i;
|
||||
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
dnode_phys_t *child_dnp;
|
||||
@@ -397,7 +397,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
break;
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
|
||||
@@ -669,7 +669,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
||||
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
||||
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
||||
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
arc_buf_t *buf;
|
||||
ASSERT(!BP_IS_REDACTED(rootbp));
|
||||
|
||||
Reference in New Issue
Block a user