mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 03:37:45 +03:00
Wire O_DIRECT also to Uncached I/O (#17218)
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Fixes #17027 Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
@@ -208,7 +208,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
|
||||
|
||||
int
|
||||
dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||
abd_t *data, uint32_t flags, dmu_tx_t *tx)
|
||||
abd_t *data, dmu_flags_t flags, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t **dbp;
|
||||
spa_t *spa = dn->dn_objset->os_spa;
|
||||
@@ -247,7 +247,7 @@ dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||
|
||||
int
|
||||
dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||
abd_t *data, uint32_t flags)
|
||||
abd_t *data, dmu_flags_t flags)
|
||||
{
|
||||
objset_t *os = dn->dn_objset;
|
||||
spa_t *spa = os->os_spa;
|
||||
@@ -351,7 +351,8 @@ error:
|
||||
|
||||
#ifdef _KERNEL
|
||||
int
|
||||
dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
||||
dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||
dmu_flags_t flags)
|
||||
{
|
||||
offset_t offset = zfs_uio_offset(uio);
|
||||
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
||||
@@ -362,7 +363,7 @@ dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
||||
|
||||
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
||||
offset & (PAGESIZE - 1), size);
|
||||
err = dmu_read_abd(dn, offset, size, data, DMU_DIRECTIO);
|
||||
err = dmu_read_abd(dn, offset, size, data, flags);
|
||||
abd_free(data);
|
||||
|
||||
if (err == 0)
|
||||
@@ -372,7 +373,8 @@ dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
||||
}
|
||||
|
||||
int
|
||||
dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||
dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||
dmu_flags_t flags, dmu_tx_t *tx)
|
||||
{
|
||||
offset_t offset = zfs_uio_offset(uio);
|
||||
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
||||
@@ -383,7 +385,7 @@ dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||
|
||||
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
||||
offset & (PAGESIZE - 1), size);
|
||||
err = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx);
|
||||
err = dmu_write_abd(dn, offset, size, data, flags, tx);
|
||||
abd_free(data);
|
||||
|
||||
if (err == 0)
|
||||
|
||||
Reference in New Issue
Block a user