mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 11:47:43 +03:00
Wire O_DIRECT also to Uncached I/O (#17218)
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Fixes #17027 Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
+67
-38
@@ -89,6 +89,12 @@ static int zfs_dio_enabled = 0;
|
||||
static int zfs_dio_enabled = 1;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Strictly enforce alignment for Direct I/O requests, returning EINVAL
|
||||
* if not page-aligned instead of silently falling back to uncached I/O.
|
||||
*/
|
||||
static int zfs_dio_strict = 0;
|
||||
|
||||
|
||||
/*
|
||||
* Maximum bytes to read per chunk in zfs_read().
|
||||
@@ -243,46 +249,54 @@ zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw,
|
||||
int ioflag = *ioflagp;
|
||||
int error = 0;
|
||||
|
||||
if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED ||
|
||||
zn_has_cached_data(zp, zfs_uio_offset(uio),
|
||||
if (os->os_direct == ZFS_DIRECT_ALWAYS) {
|
||||
/* Force either direct or uncached I/O. */
|
||||
ioflag |= O_DIRECT;
|
||||
}
|
||||
|
||||
if ((ioflag & O_DIRECT) == 0)
|
||||
goto out;
|
||||
|
||||
if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED) {
|
||||
/*
|
||||
* Direct I/O is disabled. The I/O request will be directed
|
||||
* through the ARC as uncached I/O.
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!zfs_uio_page_aligned(uio) ||
|
||||
!zfs_uio_aligned(uio, PAGE_SIZE)) {
|
||||
/*
|
||||
* Misaligned requests can be executed through the ARC as
|
||||
* uncached I/O. But if O_DIRECT was set by user and we
|
||||
* were set to be strict, then it is a failure.
|
||||
*/
|
||||
if ((*ioflagp & O_DIRECT) && zfs_dio_strict)
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (zn_has_cached_data(zp, zfs_uio_offset(uio),
|
||||
zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) {
|
||||
/*
|
||||
* Direct I/O is disabled or the region is mmap'ed. In either
|
||||
* case the I/O request will just directed through the ARC.
|
||||
* The region is mmap'ed. The I/O request will be directed
|
||||
* through the ARC as uncached I/O.
|
||||
*/
|
||||
ioflag &= ~O_DIRECT;
|
||||
goto out;
|
||||
} else if (os->os_direct == ZFS_DIRECT_ALWAYS &&
|
||||
zfs_uio_page_aligned(uio) &&
|
||||
zfs_uio_aligned(uio, PAGE_SIZE)) {
|
||||
if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) ||
|
||||
(rw == UIO_READ)) {
|
||||
ioflag |= O_DIRECT;
|
||||
}
|
||||
} else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) {
|
||||
/*
|
||||
* Direct I/O was requested through the direct=always, but it
|
||||
* is not properly PAGE_SIZE aligned. The request will be
|
||||
* directed through the ARC.
|
||||
*/
|
||||
ioflag &= ~O_DIRECT;
|
||||
}
|
||||
|
||||
if (ioflag & O_DIRECT) {
|
||||
if (!zfs_uio_page_aligned(uio) ||
|
||||
!zfs_uio_aligned(uio, PAGE_SIZE)) {
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* For short writes the page mapping of Direct I/O makes no sense.
|
||||
* Direct them through the ARC as uncached I/O.
|
||||
*/
|
||||
if (rw == UIO_WRITE && zfs_uio_resid(uio) < zp->z_blksz)
|
||||
goto out;
|
||||
|
||||
error = zfs_uio_get_dio_pages_alloc(uio, rw);
|
||||
if (error) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT);
|
||||
ASSERT0(error);
|
||||
error = zfs_uio_get_dio_pages_alloc(uio, rw);
|
||||
if (error)
|
||||
goto out;
|
||||
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
||||
|
||||
out:
|
||||
*ioflagp = ioflag;
|
||||
@@ -392,6 +406,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
ssize_t start_resid = n;
|
||||
ssize_t dio_remaining_resid = 0;
|
||||
|
||||
dmu_flags_t dflags = DMU_READ_PREFETCH;
|
||||
if (ioflag & O_DIRECT)
|
||||
dflags |= DMU_UNCACHEDIO;
|
||||
if (uio->uio_extflg & UIO_DIRECT) {
|
||||
/*
|
||||
* All pages for an O_DIRECT request ahve already been mapped
|
||||
@@ -414,6 +431,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t);
|
||||
if (dio_remaining_resid != 0)
|
||||
n -= dio_remaining_resid;
|
||||
dflags |= DMU_DIRECTIO;
|
||||
}
|
||||
|
||||
while (n > 0) {
|
||||
@@ -429,7 +447,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
error = mappedread(zp, nbytes, uio);
|
||||
} else {
|
||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||
uio, nbytes);
|
||||
uio, nbytes, dflags);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
@@ -479,15 +497,17 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
* remainder of the file can be read using the ARC.
|
||||
*/
|
||||
uio->uio_extflg &= ~UIO_DIRECT;
|
||||
dflags &= ~DMU_DIRECTIO;
|
||||
|
||||
if (zn_has_cached_data(zp, zfs_uio_offset(uio),
|
||||
zfs_uio_offset(uio) + dio_remaining_resid - 1)) {
|
||||
error = mappedread(zp, dio_remaining_resid, uio);
|
||||
} else {
|
||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio,
|
||||
dio_remaining_resid);
|
||||
dio_remaining_resid, dflags);
|
||||
}
|
||||
uio->uio_extflg |= UIO_DIRECT;
|
||||
dflags |= DMU_DIRECTIO;
|
||||
|
||||
if (error != 0)
|
||||
n += dio_remaining_resid;
|
||||
@@ -859,12 +879,18 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
zfs_rangelock_reduce(lr, woff, n);
|
||||
}
|
||||
|
||||
dmu_flags_t dflags = DMU_READ_PREFETCH;
|
||||
if (ioflag & O_DIRECT)
|
||||
dflags |= DMU_UNCACHEDIO;
|
||||
if (uio->uio_extflg & UIO_DIRECT)
|
||||
dflags |= DMU_DIRECTIO;
|
||||
|
||||
ssize_t tx_bytes;
|
||||
if (abuf == NULL) {
|
||||
tx_bytes = zfs_uio_resid(uio);
|
||||
zfs_uio_fault_disable(uio, B_TRUE);
|
||||
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||
uio, nbytes, tx);
|
||||
uio, nbytes, tx, dflags);
|
||||
zfs_uio_fault_disable(uio, B_FALSE);
|
||||
#ifdef __linux__
|
||||
if (error == EFAULT) {
|
||||
@@ -903,7 +929,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||
* arc buffer to a dbuf.
|
||||
*/
|
||||
error = dmu_assign_arcbuf_by_dbuf(
|
||||
sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
|
||||
sa_get_db(zp->z_sa_hdl), woff, abuf, tx, dflags);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* XXX This might not be necessary if
|
||||
@@ -1329,7 +1355,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||
error = SET_ERROR(ENOENT);
|
||||
} else {
|
||||
error = dmu_read(os, object, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||
}
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
} else { /* indirect write */
|
||||
@@ -2019,3 +2045,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, dio_enabled, INT, ZMOD_RW,
|
||||
"Enable Direct I/O");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, dio_strict, INT, ZMOD_RW,
|
||||
"Return errors on misaligned Direct I/O");
|
||||
|
||||
Reference in New Issue
Block a user