mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Wire O_DIRECT also to Uncached I/O (#17218)
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Fixes #17027 Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
+12
-12
@@ -1993,7 +1993,8 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
|
||||
|
||||
if (write_state == WR_COPIED &&
|
||||
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
|
||||
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
|
||||
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH |
|
||||
DMU_KEEP_CACHING) != 0) {
|
||||
zil_itx_destroy(itx);
|
||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
||||
write_state = WR_NEED_COPY;
|
||||
@@ -2265,19 +2266,19 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
||||
ASSERT(doi.doi_data_block_size);
|
||||
ASSERT0(offset % doi.doi_data_block_size);
|
||||
if (ztest_random(4) != 0) {
|
||||
int prefetch = ztest_random(2) ?
|
||||
dmu_flags_t flags = ztest_random(2) ?
|
||||
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
|
||||
|
||||
/*
|
||||
* We will randomly set when to do O_DIRECT on a read.
|
||||
*/
|
||||
if (ztest_random(4) == 0)
|
||||
prefetch |= DMU_DIRECTIO;
|
||||
flags |= DMU_DIRECTIO;
|
||||
|
||||
ztest_block_tag_t rbt;
|
||||
|
||||
VERIFY(dmu_read(os, lr->lr_foid, offset,
|
||||
sizeof (rbt), &rbt, prefetch) == 0);
|
||||
sizeof (rbt), &rbt, flags) == 0);
|
||||
if (rbt.bt_magic == BT_MAGIC) {
|
||||
ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
|
||||
offset, gen, txg, crtxg);
|
||||
@@ -2308,7 +2309,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
||||
dmu_write(os, lr->lr_foid, offset, length, data, tx);
|
||||
} else {
|
||||
memcpy(abuf->b_data, data, length);
|
||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx));
|
||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx, 0));
|
||||
}
|
||||
|
||||
(void) ztest_log_write(zd, tx, lr);
|
||||
@@ -2533,7 +2534,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
object, offset, size, ZTRL_READER);
|
||||
|
||||
error = dmu_read(os, object, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||
ASSERT0(error);
|
||||
} else {
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
@@ -2549,7 +2550,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
object, offset, size, ZTRL_READER);
|
||||
|
||||
error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
|
||||
|
||||
if (error == 0) {
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
|
||||
@@ -2826,7 +2826,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
|
||||
enum ztest_io_type io_type;
|
||||
uint64_t blocksize;
|
||||
void *data;
|
||||
uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH;
|
||||
dmu_flags_t dmu_read_flags = DMU_READ_NO_PREFETCH;
|
||||
|
||||
/*
|
||||
* We will randomly set when to do O_DIRECT on a read.
|
||||
@@ -5065,7 +5065,7 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
|
||||
uint64_t stride = 123456789ULL;
|
||||
uint64_t width = 40;
|
||||
int free_percent = 5;
|
||||
uint32_t dmu_read_flags = DMU_READ_PREFETCH;
|
||||
dmu_flags_t dmu_read_flags = DMU_READ_PREFETCH;
|
||||
|
||||
/*
|
||||
* We will randomly set when to do O_DIRECT on a read.
|
||||
@@ -5541,13 +5541,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
}
|
||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||
off, bigbuf_arcbufs[j], tx));
|
||||
off, bigbuf_arcbufs[j], tx, 0));
|
||||
} else {
|
||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||
off, bigbuf_arcbufs[2 * j], tx));
|
||||
off, bigbuf_arcbufs[2 * j], tx, 0));
|
||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||
off + chunksize / 2,
|
||||
bigbuf_arcbufs[2 * j + 1], tx));
|
||||
bigbuf_arcbufs[2 * j + 1], tx, 0));
|
||||
}
|
||||
if (i == 1) {
|
||||
dmu_buf_rele(dbt, FTAG);
|
||||
|
||||
Reference in New Issue
Block a user