mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-05-23 15:04:59 +03:00
Wire O_DIRECT also to Uncached I/O (#17218)
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Fixes #17027 Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
e2ba0f7643
commit
734eba251d
24
cmd/ztest.c
24
cmd/ztest.c
@ -1993,7 +1993,8 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
|
|||||||
|
|
||||||
if (write_state == WR_COPIED &&
|
if (write_state == WR_COPIED &&
|
||||||
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
|
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
|
||||||
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
|
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH |
|
||||||
|
DMU_KEEP_CACHING) != 0) {
|
||||||
zil_itx_destroy(itx);
|
zil_itx_destroy(itx);
|
||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
||||||
write_state = WR_NEED_COPY;
|
write_state = WR_NEED_COPY;
|
||||||
@ -2265,19 +2266,19 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
|||||||
ASSERT(doi.doi_data_block_size);
|
ASSERT(doi.doi_data_block_size);
|
||||||
ASSERT0(offset % doi.doi_data_block_size);
|
ASSERT0(offset % doi.doi_data_block_size);
|
||||||
if (ztest_random(4) != 0) {
|
if (ztest_random(4) != 0) {
|
||||||
int prefetch = ztest_random(2) ?
|
dmu_flags_t flags = ztest_random(2) ?
|
||||||
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
|
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We will randomly set when to do O_DIRECT on a read.
|
* We will randomly set when to do O_DIRECT on a read.
|
||||||
*/
|
*/
|
||||||
if (ztest_random(4) == 0)
|
if (ztest_random(4) == 0)
|
||||||
prefetch |= DMU_DIRECTIO;
|
flags |= DMU_DIRECTIO;
|
||||||
|
|
||||||
ztest_block_tag_t rbt;
|
ztest_block_tag_t rbt;
|
||||||
|
|
||||||
VERIFY(dmu_read(os, lr->lr_foid, offset,
|
VERIFY(dmu_read(os, lr->lr_foid, offset,
|
||||||
sizeof (rbt), &rbt, prefetch) == 0);
|
sizeof (rbt), &rbt, flags) == 0);
|
||||||
if (rbt.bt_magic == BT_MAGIC) {
|
if (rbt.bt_magic == BT_MAGIC) {
|
||||||
ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
|
ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
|
||||||
offset, gen, txg, crtxg);
|
offset, gen, txg, crtxg);
|
||||||
@ -2308,7 +2309,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
|||||||
dmu_write(os, lr->lr_foid, offset, length, data, tx);
|
dmu_write(os, lr->lr_foid, offset, length, data, tx);
|
||||||
} else {
|
} else {
|
||||||
memcpy(abuf->b_data, data, length);
|
memcpy(abuf->b_data, data, length);
|
||||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx));
|
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) ztest_log_write(zd, tx, lr);
|
(void) ztest_log_write(zd, tx, lr);
|
||||||
@ -2533,7 +2534,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
|||||||
object, offset, size, ZTRL_READER);
|
object, offset, size, ZTRL_READER);
|
||||||
|
|
||||||
error = dmu_read(os, object, offset, size, buf,
|
error = dmu_read(os, object, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||||
ASSERT0(error);
|
ASSERT0(error);
|
||||||
} else {
|
} else {
|
||||||
ASSERT3P(zio, !=, NULL);
|
ASSERT3P(zio, !=, NULL);
|
||||||
@ -2549,7 +2550,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
|||||||
object, offset, size, ZTRL_READER);
|
object, offset, size, ZTRL_READER);
|
||||||
|
|
||||||
error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
|
error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
|
||||||
|
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
blkptr_t *bp = &lr->lr_blkptr;
|
blkptr_t *bp = &lr->lr_blkptr;
|
||||||
|
|
||||||
@ -2826,7 +2826,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
|
|||||||
enum ztest_io_type io_type;
|
enum ztest_io_type io_type;
|
||||||
uint64_t blocksize;
|
uint64_t blocksize;
|
||||||
void *data;
|
void *data;
|
||||||
uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH;
|
dmu_flags_t dmu_read_flags = DMU_READ_NO_PREFETCH;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We will randomly set when to do O_DIRECT on a read.
|
* We will randomly set when to do O_DIRECT on a read.
|
||||||
@ -5065,7 +5065,7 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
|
|||||||
uint64_t stride = 123456789ULL;
|
uint64_t stride = 123456789ULL;
|
||||||
uint64_t width = 40;
|
uint64_t width = 40;
|
||||||
int free_percent = 5;
|
int free_percent = 5;
|
||||||
uint32_t dmu_read_flags = DMU_READ_PREFETCH;
|
dmu_flags_t dmu_read_flags = DMU_READ_PREFETCH;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We will randomly set when to do O_DIRECT on a read.
|
* We will randomly set when to do O_DIRECT on a read.
|
||||||
@ -5541,13 +5541,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|||||||
}
|
}
|
||||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||||
off, bigbuf_arcbufs[j], tx));
|
off, bigbuf_arcbufs[j], tx, 0));
|
||||||
} else {
|
} else {
|
||||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||||
off, bigbuf_arcbufs[2 * j], tx));
|
off, bigbuf_arcbufs[2 * j], tx, 0));
|
||||||
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||||
off + chunksize / 2,
|
off + chunksize / 2,
|
||||||
bigbuf_arcbufs[2 * j + 1], tx));
|
bigbuf_arcbufs[2 * j + 1], tx, 0));
|
||||||
}
|
}
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
dmu_buf_rele(dbt, FTAG);
|
dmu_buf_rele(dbt, FTAG);
|
||||||
|
@ -45,20 +45,6 @@ extern "C" {
|
|||||||
|
|
||||||
#define IN_DMU_SYNC 2
|
#define IN_DMU_SYNC 2
|
||||||
|
|
||||||
/*
|
|
||||||
* define flags for dbuf_read
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define DB_RF_MUST_SUCCEED (1 << 0)
|
|
||||||
#define DB_RF_CANFAIL (1 << 1)
|
|
||||||
#define DB_RF_HAVESTRUCT (1 << 2)
|
|
||||||
#define DB_RF_NOPREFETCH (1 << 3)
|
|
||||||
#define DB_RF_NEVERWAIT (1 << 4)
|
|
||||||
#define DB_RF_CACHED (1 << 5)
|
|
||||||
#define DB_RF_NO_DECRYPT (1 << 6)
|
|
||||||
#define DB_RF_PARTIAL_FIRST (1 << 7)
|
|
||||||
#define DB_RF_PARTIAL_MORE (1 << 8)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The simplified state transition diagram for dbufs looks like:
|
* The simplified state transition diagram for dbufs looks like:
|
||||||
*
|
*
|
||||||
@ -389,12 +375,15 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag,
|
|||||||
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
|
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
|
||||||
uint64_t blkid, uint64_t *hash_out);
|
uint64_t blkid, uint64_t *hash_out);
|
||||||
|
|
||||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, dmu_flags_t flags);
|
||||||
void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx);
|
void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx);
|
||||||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
|
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
|
||||||
|
void dmu_buf_will_fill_flags(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail,
|
||||||
|
dmu_flags_t flags);
|
||||||
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
|
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
|
||||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
|
||||||
|
dmu_flags_t flags);
|
||||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||||
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
@ -475,10 +464,10 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
|
|||||||
#define DBUF_GET_BUFC_TYPE(_db) \
|
#define DBUF_GET_BUFC_TYPE(_db) \
|
||||||
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
|
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
|
||||||
|
|
||||||
#define DBUF_IS_CACHEABLE(_db) \
|
#define DBUF_IS_CACHEABLE(_db) (!(_db)->db_pending_evict && \
|
||||||
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
|
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
|
||||||
(dbuf_is_metadata(_db) && \
|
(dbuf_is_metadata(_db) && \
|
||||||
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
|
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))))
|
||||||
|
|
||||||
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp);
|
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp);
|
||||||
|
|
||||||
|
@ -532,6 +532,26 @@ void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||||
struct zio_prop *zp);
|
struct zio_prop *zp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DB_RF_* are to be used for dbuf_read() or in limited other cases.
|
||||||
|
*/
|
||||||
|
typedef enum dmu_flags {
|
||||||
|
DB_RF_MUST_SUCCEED = 0, /* Suspend on I/O errors. */
|
||||||
|
DB_RF_CANFAIL = 1 << 0, /* Return on I/O errors. */
|
||||||
|
DB_RF_HAVESTRUCT = 1 << 1, /* dn_struct_rwlock is locked. */
|
||||||
|
DB_RF_NEVERWAIT = 1 << 2,
|
||||||
|
DMU_READ_PREFETCH = 0, /* Try speculative prefetch. */
|
||||||
|
DMU_READ_NO_PREFETCH = 1 << 3, /* Don't prefetch speculatively. */
|
||||||
|
DB_RF_NOPREFETCH = DMU_READ_NO_PREFETCH,
|
||||||
|
DMU_READ_NO_DECRYPT = 1 << 4, /* Don't decrypt. */
|
||||||
|
DB_RF_NO_DECRYPT = DMU_READ_NO_DECRYPT,
|
||||||
|
DMU_DIRECTIO = 1 << 5, /* Bypass ARC. */
|
||||||
|
DMU_UNCACHEDIO = 1 << 6, /* Reduce caching. */
|
||||||
|
DMU_PARTIAL_FIRST = 1 << 7, /* First partial access. */
|
||||||
|
DMU_PARTIAL_MORE = 1 << 8, /* Following partial access. */
|
||||||
|
DMU_KEEP_CACHING = 1 << 9, /* Don't affect caching. */
|
||||||
|
} dmu_flags_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The bonus data is accessed more or less like a regular buffer.
|
* The bonus data is accessed more or less like a regular buffer.
|
||||||
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
||||||
@ -547,7 +567,7 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
|||||||
int dmu_bonus_hold(objset_t *os, uint64_t object, const void *tag,
|
int dmu_bonus_hold(objset_t *os, uint64_t object, const void *tag,
|
||||||
dmu_buf_t **dbp);
|
dmu_buf_t **dbp);
|
||||||
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
|
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
|
||||||
uint32_t flags);
|
dmu_flags_t flags);
|
||||||
int dmu_bonus_max(void);
|
int dmu_bonus_max(void);
|
||||||
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||||
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
|
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
|
||||||
@ -558,9 +578,9 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
|
|||||||
* Special spill buffer support used by "SA" framework
|
* Special spill buffer support used by "SA" framework
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
|
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, dmu_flags_t flags,
|
||||||
dmu_buf_t **dbp);
|
const void *tag, dmu_buf_t **dbp);
|
||||||
int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
|
int dmu_spill_hold_by_dnode(dnode_t *dn, dmu_flags_t flags,
|
||||||
const void *tag, dmu_buf_t **dbp);
|
const void *tag, dmu_buf_t **dbp);
|
||||||
int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
|
int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
|
||||||
|
|
||||||
@ -579,17 +599,17 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
|
|||||||
* The object number must be a valid, allocated object number.
|
* The object number must be a valid, allocated object number.
|
||||||
*/
|
*/
|
||||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **, int flags);
|
const void *tag, dmu_buf_t **, dmu_flags_t flags);
|
||||||
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t length, int read, const void *tag, int *numbufsp,
|
uint64_t length, int read, const void *tag, int *numbufsp,
|
||||||
dmu_buf_t ***dbpp);
|
dmu_buf_t ***dbpp);
|
||||||
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp);
|
const void *tag, dmu_buf_t **dbp);
|
||||||
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp, int flags);
|
const void *tag, dmu_buf_t **dbp, dmu_flags_t flags);
|
||||||
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
|
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
|
||||||
dmu_buf_t ***dbpp, uint32_t flags);
|
dmu_buf_t ***dbpp, dmu_flags_t flags);
|
||||||
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
|
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
|
||||||
dmu_buf_t **dbp);
|
dmu_buf_t **dbp);
|
||||||
|
|
||||||
@ -781,6 +801,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
|
|||||||
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
||||||
*/
|
*/
|
||||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||||
|
void dmu_buf_will_dirty_flags(dmu_buf_t *db, dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
boolean_t dmu_buf_is_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
boolean_t dmu_buf_is_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||||
void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
||||||
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
||||||
@ -874,40 +895,36 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
|
|||||||
* Canfail routines will return 0 on success, or an errno if there is a
|
* Canfail routines will return 0 on success, or an errno if there is a
|
||||||
* nonrecoverable I/O error.
|
* nonrecoverable I/O error.
|
||||||
*/
|
*/
|
||||||
#define DMU_READ_PREFETCH 0 /* prefetch */
|
|
||||||
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
|
|
||||||
#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
|
|
||||||
#define DMU_DIRECTIO 4 /* use Direct I/O */
|
|
||||||
|
|
||||||
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||||
void *buf, uint32_t flags);
|
void *buf, dmu_flags_t flags);
|
||||||
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
|
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
|
||||||
uint32_t flags);
|
dmu_flags_t flags);
|
||||||
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||||
const void *buf, dmu_tx_t *tx);
|
const void *buf, dmu_tx_t *tx);
|
||||||
int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
|
int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
const void *buf, dmu_tx_t *tx);
|
const void *buf, dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
int dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size,
|
|
||||||
const void *buf, dmu_tx_t *tx, uint32_t flags);
|
|
||||||
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size);
|
int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
||||||
int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size);
|
dmu_flags_t flags);
|
||||||
int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size);
|
int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags);
|
||||||
|
int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags);
|
||||||
int dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
int dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
int dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
int dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
int dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
int dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
#endif
|
#endif
|
||||||
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
||||||
void dmu_return_arcbuf(struct arc_buf *buf);
|
void dmu_return_arcbuf(struct arc_buf *buf);
|
||||||
int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
|
int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
struct arc_buf *buf, dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
||||||
struct arc_buf *buf, dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
|
||||||
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
||||||
extern uint_t zfs_max_recordsize;
|
extern uint_t zfs_max_recordsize;
|
||||||
|
|
||||||
|
@ -270,11 +270,13 @@ void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
|||||||
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
||||||
|
|
||||||
int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *);
|
int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *);
|
||||||
int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t flags);
|
int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t);
|
||||||
int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t, dmu_tx_t *);
|
int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t,
|
||||||
|
dmu_tx_t *);
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t);
|
int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t);
|
||||||
int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_tx_t *);
|
int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t,
|
||||||
|
dmu_tx_t *);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -81,9 +81,10 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
|||||||
void dmu_zfetch_fini(zfetch_t *);
|
void dmu_zfetch_fini(zfetch_t *);
|
||||||
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
|
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
|
||||||
boolean_t);
|
boolean_t);
|
||||||
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
|
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t,
|
||||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
|
|
||||||
boolean_t);
|
boolean_t);
|
||||||
|
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
|
||||||
|
boolean_t, boolean_t);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -981,9 +981,9 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
|
|||||||
uint64_t extents_skipped, uint64_t bytes_skipped,
|
uint64_t extents_skipped, uint64_t bytes_skipped,
|
||||||
uint64_t extents_failed, uint64_t bytes_failed);
|
uint64_t extents_failed, uint64_t bytes_failed);
|
||||||
extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
|
extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
uint32_t flags);
|
dmu_flags_t flags);
|
||||||
extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
|
extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
uint32_t flags);
|
dmu_flags_t flags);
|
||||||
extern void spa_import_progress_add(spa_t *spa);
|
extern void spa_import_progress_add(spa_t *spa);
|
||||||
extern void spa_import_progress_remove(uint64_t spa_guid);
|
extern void spa_import_progress_remove(uint64_t spa_guid);
|
||||||
extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,
|
extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,
|
||||||
|
@ -33,7 +33,9 @@
|
|||||||
/*
|
/*
|
||||||
* Platform-dependent resource accounting hooks
|
* Platform-dependent resource accounting hooks
|
||||||
*/
|
*/
|
||||||
void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
|
void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
|
dmu_flags_t flags);
|
||||||
|
void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
|
dmu_flags_t flags);
|
||||||
|
|
||||||
#endif /* _SYS_ZFS_RACCT_H */
|
#endif /* _SYS_ZFS_RACCT_H */
|
||||||
|
@ -27,13 +27,13 @@
|
|||||||
#include <sys/zfs_racct.h>
|
#include <sys/zfs_racct.h>
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
(void) spa, (void) size, (void) iops, (void) flags;
|
(void) spa, (void) size, (void) iops, (void) flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
(void) spa, (void) size, (void) iops, (void) flags;
|
(void) spa, (void) size, (void) iops, (void) flags;
|
||||||
}
|
}
|
||||||
|
@ -304,7 +304,7 @@ Default dnode block size as a power of 2.
|
|||||||
.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int
|
.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int
|
||||||
Default dnode indirect block size as a power of 2.
|
Default dnode indirect block size as a power of 2.
|
||||||
.
|
.
|
||||||
.It Sy zfs_dio_enabled Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
.It Sy zfs_dio_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||||
Enable Direct I/O.
|
Enable Direct I/O.
|
||||||
If this setting is 0, then all I/O requests will be directed through the ARC
|
If this setting is 0, then all I/O requests will be directed through the ARC
|
||||||
acting as though the dataset property
|
acting as though the dataset property
|
||||||
@ -312,6 +312,11 @@ acting as though the dataset property
|
|||||||
was set to
|
was set to
|
||||||
.Sy disabled .
|
.Sy disabled .
|
||||||
.
|
.
|
||||||
|
.It Sy zfs_dio_strict Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||||
|
Strictly enforce alignment for Direct I/O requests, returning
|
||||||
|
.Sy EINVAL
|
||||||
|
if not page-aligned instead of silently falling back to uncached I/O.
|
||||||
|
.
|
||||||
.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
|
.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
|
||||||
When attempting to log an output nvlist of an ioctl in the on-disk history,
|
When attempting to log an output nvlist of an ioctl in the on-disk history,
|
||||||
the output will not be stored if it is larger than this size (in bytes).
|
the output will not be stored if it is larger than this size (in bytes).
|
||||||
|
@ -41,7 +41,6 @@
|
|||||||
#include <sys/dsl_pool.h>
|
#include <sys/dsl_pool.h>
|
||||||
#include <sys/dsl_synctask.h>
|
#include <sys/dsl_synctask.h>
|
||||||
#include <sys/dsl_prop.h>
|
#include <sys/dsl_prop.h>
|
||||||
#include <sys/dmu_zfetch.h>
|
|
||||||
#include <sys/zfs_ioctl.h>
|
#include <sys/zfs_ioctl.h>
|
||||||
#include <sys/zap.h>
|
#include <sys/zap.h>
|
||||||
#include <sys/zio_checksum.h>
|
#include <sys/zio_checksum.h>
|
||||||
@ -71,6 +70,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
struct sf_buf *sf;
|
struct sf_buf *sf;
|
||||||
int numbufs, i;
|
int numbufs, i;
|
||||||
int err;
|
int err;
|
||||||
|
dmu_flags_t flags = 0;
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return (0);
|
return (0);
|
||||||
@ -94,10 +94,17 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||||
|
|
||||||
if (tocpy == db->db_size)
|
if (tocpy == db->db_size) {
|
||||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
dmu_buf_will_fill(db, tx, B_FALSE);
|
||||||
|
} else {
|
||||||
|
if (i == numbufs - 1 && bufoff + tocpy < db->db_size) {
|
||||||
|
if (bufoff == 0)
|
||||||
|
flags |= DMU_PARTIAL_FIRST;
|
||||||
else
|
else
|
||||||
dmu_buf_will_dirty(db, tx);
|
flags |= DMU_PARTIAL_MORE;
|
||||||
|
}
|
||||||
|
dmu_buf_will_dirty_flags(db, tx, flags);
|
||||||
|
}
|
||||||
|
|
||||||
for (copied = 0; copied < tocpy; copied += PAGESIZE) {
|
for (copied = 0; copied < tocpy; copied += PAGESIZE) {
|
||||||
ASSERT3U(ptoa((*ma)->pindex), ==,
|
ASSERT3U(ptoa((*ma)->pindex), ==,
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
#include <sys/racct.h>
|
#include <sys/racct.h>
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
curthread->td_ru.ru_inblock += iops;
|
curthread->td_ru.ru_inblock += iops;
|
||||||
#ifdef RACCT
|
#ifdef RACCT
|
||||||
@ -46,7 +46,7 @@ zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
curthread->td_ru.ru_oublock += iops;
|
curthread->td_ru.ru_oublock += iops;
|
||||||
#ifdef RACCT
|
#ifdef RACCT
|
||||||
|
@ -530,7 +530,7 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
|
|||||||
page_unhold(pp);
|
page_unhold(pp);
|
||||||
} else {
|
} else {
|
||||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
uio, bytes);
|
uio, bytes, DMU_READ_PREFETCH);
|
||||||
}
|
}
|
||||||
len -= bytes;
|
len -= bytes;
|
||||||
off = 0;
|
off = 0;
|
||||||
|
@ -679,7 +679,7 @@ zvol_strategy_impl(zv_request_t *zvr)
|
|||||||
while (resid != 0 && off < volsize) {
|
while (resid != 0 && off < volsize) {
|
||||||
size_t size = MIN(resid, zvol_maxphys);
|
size_t size = MIN(resid, zvol_maxphys);
|
||||||
if (doread) {
|
if (doread) {
|
||||||
error = dmu_read(os, ZVOL_OBJ, off, size, addr,
|
error = dmu_read_by_dnode(zv->zv_dn, off, size, addr,
|
||||||
DMU_READ_PREFETCH);
|
DMU_READ_PREFETCH);
|
||||||
} else {
|
} else {
|
||||||
dmu_tx_t *tx = dmu_tx_create(os);
|
dmu_tx_t *tx = dmu_tx_create(os);
|
||||||
@ -688,7 +688,8 @@ zvol_strategy_impl(zv_request_t *zvr)
|
|||||||
if (error) {
|
if (error) {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
} else {
|
} else {
|
||||||
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
|
dmu_write_by_dnode(zv->zv_dn, off, size, addr,
|
||||||
|
tx, DMU_READ_PREFETCH);
|
||||||
zvol_log_write(zv, tx, off, size, commit);
|
zvol_log_write(zv, tx, off, size, commit);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
}
|
}
|
||||||
@ -834,7 +835,8 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||||||
if (bytes > volsize - zfs_uio_offset(&uio))
|
if (bytes > volsize - zfs_uio_offset(&uio))
|
||||||
bytes = volsize - zfs_uio_offset(&uio);
|
bytes = volsize - zfs_uio_offset(&uio);
|
||||||
|
|
||||||
error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
|
error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes,
|
||||||
|
DMU_READ_PREFETCH);
|
||||||
if (error) {
|
if (error) {
|
||||||
/* Convert checksum errors into IO errors. */
|
/* Convert checksum errors into IO errors. */
|
||||||
if (error == ECKSUM)
|
if (error == ECKSUM)
|
||||||
@ -893,7 +895,8 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
|
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx,
|
||||||
|
DMU_READ_PREFETCH);
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
zvol_log_write(zv, tx, off, bytes, commit);
|
zvol_log_write(zv, tx, off, bytes, commit);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
@ -30,14 +30,14 @@
|
|||||||
#include <linux/task_io_accounting_ops.h>
|
#include <linux/task_io_accounting_ops.h>
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
task_io_account_read(size);
|
task_io_account_read(size);
|
||||||
spa_iostats_read_add(spa, size, iops, flags);
|
spa_iostats_read_add(spa, size, iops, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
task_io_account_write(size);
|
task_io_account_write(size);
|
||||||
spa_iostats_write_add(spa, size, iops, flags);
|
spa_iostats_write_add(spa, size, iops, flags);
|
||||||
@ -46,13 +46,13 @@ zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
(void) spa, (void) size, (void) iops, (void) flags;
|
(void) spa, (void) size, (void) iops, (void) flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
(void) spa, (void) size, (void) iops, (void) flags;
|
(void) spa, (void) size, (void) iops, (void) flags;
|
||||||
}
|
}
|
||||||
|
@ -329,7 +329,7 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
|
|||||||
put_page(pp);
|
put_page(pp);
|
||||||
} else {
|
} else {
|
||||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
uio, bytes);
|
uio, bytes, DMU_READ_PREFETCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
len -= bytes;
|
len -= bytes;
|
||||||
|
@ -258,7 +258,8 @@ zvol_write(zv_request_t *zvr)
|
|||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
|
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx,
|
||||||
|
DMU_READ_PREFETCH);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
zvol_log_write(zv, tx, off, bytes, sync);
|
zvol_log_write(zv, tx, off, bytes, sync);
|
||||||
}
|
}
|
||||||
@ -428,7 +429,8 @@ zvol_read(zv_request_t *zvr)
|
|||||||
if (bytes > volsize - uio.uio_loffset)
|
if (bytes > volsize - uio.uio_loffset)
|
||||||
bytes = volsize - uio.uio_loffset;
|
bytes = volsize - uio.uio_loffset;
|
||||||
|
|
||||||
error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
|
error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes,
|
||||||
|
DMU_READ_PREFETCH);
|
||||||
if (error) {
|
if (error) {
|
||||||
/* convert checksum errors into IO errors */
|
/* convert checksum errors into IO errors */
|
||||||
if (error == ECKSUM)
|
if (error == ECKSUM)
|
||||||
|
@ -6103,7 +6103,9 @@ top:
|
|||||||
ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH),
|
ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH),
|
||||||
demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data,
|
demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data,
|
||||||
metadata, misses);
|
metadata, misses);
|
||||||
zfs_racct_read(spa, size, 1, 0);
|
zfs_racct_read(spa, size, 1,
|
||||||
|
(*arc_flags & ARC_FLAG_UNCACHED) ?
|
||||||
|
DMU_UNCACHEDIO : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if the spa even has l2 configured */
|
/* Check if the spa even has l2 configured */
|
||||||
|
@ -1499,7 +1499,8 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
|
|||||||
* decrypt / authenticate them when we need to read an encrypted bonus buffer.
|
* decrypt / authenticate them when we need to read an encrypted bonus buffer.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
|
dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
objset_t *os = db->db_objset;
|
objset_t *os = db->db_objset;
|
||||||
dmu_buf_impl_t *dndb;
|
dmu_buf_impl_t *dndb;
|
||||||
@ -1507,7 +1508,7 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
|
|||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if ((flags & DB_RF_NO_DECRYPT) != 0 ||
|
if ((flags & DMU_READ_NO_DECRYPT) != 0 ||
|
||||||
!os->os_encrypted || os->os_raw_receive ||
|
!os->os_encrypted || os->os_raw_receive ||
|
||||||
(dndb = dn->dn_dbuf) == NULL)
|
(dndb = dn->dn_dbuf) == NULL)
|
||||||
return (0);
|
return (0);
|
||||||
@ -1561,7 +1562,7 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
|
|||||||
* returning.
|
* returning.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
|
||||||
db_lock_type_t dblt, blkptr_t *bp, const void *tag)
|
db_lock_type_t dblt, blkptr_t *bp, const void *tag)
|
||||||
{
|
{
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
@ -1627,7 +1628,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
|||||||
zio_flags = (flags & DB_RF_CANFAIL) ?
|
zio_flags = (flags & DB_RF_CANFAIL) ?
|
||||||
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
||||||
|
|
||||||
if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bp))
|
if ((flags & DMU_READ_NO_DECRYPT) && BP_IS_PROTECTED(bp))
|
||||||
zio_flags |= ZIO_FLAG_RAW;
|
zio_flags |= ZIO_FLAG_RAW;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1728,7 +1729,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
dbuf_read(dmu_buf_impl_t *db, zio_t *pio, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
boolean_t miss = B_TRUE, need_wait = B_FALSE, prefetch;
|
boolean_t miss = B_TRUE, need_wait = B_FALSE, prefetch;
|
||||||
@ -1748,12 +1749,14 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
|||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
||||||
(flags & DB_RF_NOPREFETCH) == 0;
|
(flags & DMU_READ_NO_PREFETCH) == 0;
|
||||||
|
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
if (flags & DB_RF_PARTIAL_FIRST)
|
if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
|
||||||
|
db->db_pending_evict = B_FALSE;
|
||||||
|
if (flags & DMU_PARTIAL_FIRST)
|
||||||
db->db_partial_read = B_TRUE;
|
db->db_partial_read = B_TRUE;
|
||||||
else if (!(flags & DB_RF_PARTIAL_MORE))
|
else if (!(flags & (DMU_PARTIAL_MORE | DMU_KEEP_CACHING)))
|
||||||
db->db_partial_read = B_FALSE;
|
db->db_partial_read = B_FALSE;
|
||||||
miss = (db->db_state != DB_CACHED);
|
miss = (db->db_state != DB_CACHED);
|
||||||
|
|
||||||
@ -1794,7 +1797,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
|||||||
* unauthenticated blocks, which will verify their MAC if
|
* unauthenticated blocks, which will verify their MAC if
|
||||||
* the key is now available.
|
* the key is now available.
|
||||||
*/
|
*/
|
||||||
if ((flags & DB_RF_NO_DECRYPT) == 0 && db->db_buf != NULL &&
|
if ((flags & DMU_READ_NO_DECRYPT) == 0 && db->db_buf != NULL &&
|
||||||
(arc_is_encrypted(db->db_buf) ||
|
(arc_is_encrypted(db->db_buf) ||
|
||||||
arc_is_unauthenticated(db->db_buf) ||
|
arc_is_unauthenticated(db->db_buf) ||
|
||||||
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
|
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
|
||||||
@ -1842,7 +1845,8 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
|||||||
|
|
||||||
if (err == 0 && prefetch) {
|
if (err == 0 && prefetch) {
|
||||||
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, miss,
|
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, miss,
|
||||||
flags & DB_RF_HAVESTRUCT);
|
flags & DB_RF_HAVESTRUCT, (flags & DMU_UNCACHEDIO) ||
|
||||||
|
db->db_pending_evict);
|
||||||
}
|
}
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
@ -1874,11 +1878,14 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dbuf_noread(dmu_buf_impl_t *db)
|
dbuf_noread(dmu_buf_impl_t *db, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
|
||||||
|
db->db_pending_evict = B_FALSE;
|
||||||
|
db->db_partial_read = B_FALSE;
|
||||||
while (db->db_state == DB_READ || db->db_state == DB_FILL)
|
while (db->db_state == DB_READ || db->db_state == DB_FILL)
|
||||||
cv_wait(&db->db_changed, &db->db_mtx);
|
cv_wait(&db->db_changed, &db->db_mtx);
|
||||||
if (db->db_state == DB_UNCACHED) {
|
if (db->db_state == DB_UNCACHED) {
|
||||||
@ -2191,8 +2198,8 @@ dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
|
|||||||
kmem_free(dr, sizeof (*dr));
|
kmem_free(dr, sizeof (*dr));
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
int err = dbuf_read(parent_db, NULL,
|
int err = dbuf_read(parent_db, NULL, DB_RF_CANFAIL |
|
||||||
(DB_RF_NOPREFETCH | DB_RF_CANFAIL));
|
DMU_READ_NO_PREFETCH);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
dbuf_rele(parent_db, FTAG);
|
dbuf_rele(parent_db, FTAG);
|
||||||
kmem_free(dr, sizeof (*dr));
|
kmem_free(dr, sizeof (*dr));
|
||||||
@ -2620,8 +2627,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
|
dmu_buf_will_dirty_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||||
boolean_t undirty = B_FALSE;
|
boolean_t undirty = B_FALSE;
|
||||||
@ -2673,7 +2680,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
|
|||||||
* not the uderlying block that is being replaced. dbuf_undirty() will
|
* not the uderlying block that is being replaced. dbuf_undirty() will
|
||||||
* do brt_pending_remove() before removing the dirty record.
|
* do brt_pending_remove() before removing the dirty record.
|
||||||
*/
|
*/
|
||||||
(void) dbuf_read(db, NULL, flags);
|
(void) dbuf_read(db, NULL, flags | DB_RF_MUST_SUCCEED);
|
||||||
if (undirty) {
|
if (undirty) {
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
VERIFY(!dbuf_undirty(db, tx));
|
VERIFY(!dbuf_undirty(db, tx));
|
||||||
@ -2685,8 +2692,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
|
|||||||
void
|
void
|
||||||
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_will_dirty_impl(db_fake,
|
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
|
||||||
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean_t
|
boolean_t
|
||||||
@ -2850,7 +2856,7 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||||||
DBUF_VERIFY(db);
|
DBUF_VERIFY(db);
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
dbuf_noread(db);
|
dbuf_noread(db, DMU_KEEP_CACHING);
|
||||||
(void) dbuf_dirty(db, tx);
|
(void) dbuf_dirty(db, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2864,12 +2870,13 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||||||
DTRACE_SET_STATE(db, "allocating NOFILL buffer");
|
DTRACE_SET_STATE(db, "allocating NOFILL buffer");
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
dbuf_noread(db);
|
dbuf_noread(db, DMU_KEEP_CACHING);
|
||||||
(void) dbuf_dirty(db, tx);
|
(void) dbuf_dirty(db, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
|
dmu_buf_will_fill_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||||
|
|
||||||
@ -2891,7 +2898,7 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
|
|||||||
*/
|
*/
|
||||||
if (canfail && dr) {
|
if (canfail && dr) {
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
dmu_buf_will_dirty(db_fake, tx);
|
dmu_buf_will_dirty_flags(db_fake, tx, flags);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@ -2907,10 +2914,16 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
|
|||||||
}
|
}
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
dbuf_noread(db);
|
dbuf_noread(db, flags);
|
||||||
(void) dbuf_dirty(db, tx);
|
(void) dbuf_dirty(db, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
|
||||||
|
{
|
||||||
|
dmu_buf_will_fill_flags(db_fake, tx, canfail, DMU_READ_NO_PREFETCH);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is effectively the same as dmu_buf_will_dirty(), but
|
* This function is effectively the same as dmu_buf_will_dirty(), but
|
||||||
* indicates the caller expects raw encrypted data in the db, and provides
|
* indicates the caller expects raw encrypted data in the db, and provides
|
||||||
@ -2933,8 +2946,8 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
|||||||
ASSERT0(db->db_level);
|
ASSERT0(db->db_level);
|
||||||
ASSERT(db->db_objset->os_raw_receive);
|
ASSERT(db->db_objset->os_raw_receive);
|
||||||
|
|
||||||
dmu_buf_will_dirty_impl(db_fake,
|
dmu_buf_will_dirty_flags(db_fake, tx,
|
||||||
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
|
DMU_READ_NO_PREFETCH | DMU_READ_NO_DECRYPT);
|
||||||
|
|
||||||
dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||||
|
|
||||||
@ -3076,7 +3089,8 @@ dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx)
|
|||||||
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
|
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||||
@ -3090,6 +3104,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
|||||||
ASSERT(arc_released(buf));
|
ASSERT(arc_released(buf));
|
||||||
|
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
|
||||||
|
db->db_pending_evict = B_FALSE;
|
||||||
|
db->db_partial_read = B_FALSE;
|
||||||
|
|
||||||
while (db->db_state == DB_READ || db->db_state == DB_FILL)
|
while (db->db_state == DB_READ || db->db_state == DB_FILL)
|
||||||
cv_wait(&db->db_changed, &db->db_mtx);
|
cv_wait(&db->db_changed, &db->db_mtx);
|
||||||
@ -3344,8 +3361,8 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
|
|||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
err = dbuf_read(*parentp, NULL,
|
err = dbuf_read(*parentp, NULL, DB_RF_CANFAIL |
|
||||||
(DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL));
|
DB_RF_HAVESTRUCT | DMU_READ_NO_PREFETCH);
|
||||||
if (err) {
|
if (err) {
|
||||||
dbuf_rele(*parentp, NULL);
|
dbuf_rele(*parentp, NULL);
|
||||||
*parentp = NULL;
|
*parentp = NULL;
|
||||||
@ -3404,7 +3421,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
|
|||||||
db->db_user = NULL;
|
db->db_user = NULL;
|
||||||
db->db_user_immediate_evict = FALSE;
|
db->db_user_immediate_evict = FALSE;
|
||||||
db->db_freed_in_flight = FALSE;
|
db->db_freed_in_flight = FALSE;
|
||||||
db->db_pending_evict = FALSE;
|
db->db_pending_evict = TRUE;
|
||||||
|
db->db_partial_read = FALSE;
|
||||||
|
|
||||||
if (blkid == DMU_BONUS_BLKID) {
|
if (blkid == DMU_BONUS_BLKID) {
|
||||||
ASSERT3P(parent, ==, dn->dn_dbuf);
|
ASSERT3P(parent, ==, dn->dn_dbuf);
|
||||||
@ -3615,8 +3633,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
|
|||||||
dbuf_prefetch_fini(dpa, B_TRUE);
|
dbuf_prefetch_fini(dpa, B_TRUE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
(void) dbuf_read(db, NULL,
|
(void) dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT |
|
||||||
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_HAVESTRUCT);
|
DMU_READ_NO_PREFETCH);
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4002,6 +4020,7 @@ dbuf_create_bonus(dnode_t *dn)
|
|||||||
ASSERT(dn->dn_bonus == NULL);
|
ASSERT(dn->dn_bonus == NULL);
|
||||||
dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL,
|
dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL,
|
||||||
dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID));
|
dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID));
|
||||||
|
dn->dn_bonus->db_pending_evict = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
@ -4167,8 +4186,11 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting)
|
|||||||
* This dbuf has anonymous data associated with it.
|
* This dbuf has anonymous data associated with it.
|
||||||
*/
|
*/
|
||||||
dbuf_destroy(db);
|
dbuf_destroy(db);
|
||||||
} else if (!(DBUF_IS_CACHEABLE(db) || db->db_partial_read) ||
|
} else if (!db->db_partial_read && !DBUF_IS_CACHEABLE(db)) {
|
||||||
db->db_pending_evict) {
|
/*
|
||||||
|
* We don't expect more accesses to the dbuf, and it
|
||||||
|
* is either not cacheable or was marked for eviction.
|
||||||
|
*/
|
||||||
dbuf_destroy(db);
|
dbuf_destroy(db);
|
||||||
} else if (!multilist_link_active(&db->db_cache_link)) {
|
} else if (!multilist_link_active(&db->db_cache_link)) {
|
||||||
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
|
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
|
||||||
|
181
module/zfs/dmu.c
181
module/zfs/dmu.c
@ -222,20 +222,14 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp, int flags)
|
const void *tag, dmu_buf_t **dbp, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
int db_flags = DB_RF_CANFAIL;
|
|
||||||
|
|
||||||
if (flags & DMU_READ_NO_PREFETCH)
|
|
||||||
db_flags |= DB_RF_NOPREFETCH;
|
|
||||||
if (flags & DMU_READ_NO_DECRYPT)
|
|
||||||
db_flags |= DB_RF_NO_DECRYPT;
|
|
||||||
|
|
||||||
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
|
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
||||||
err = dbuf_read(db, NULL, db_flags);
|
err = dbuf_read(db, NULL, flags | DB_RF_CANFAIL);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
dbuf_rele(db, tag);
|
dbuf_rele(db, tag);
|
||||||
*dbp = NULL;
|
*dbp = NULL;
|
||||||
@ -247,20 +241,14 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp, int flags)
|
const void *tag, dmu_buf_t **dbp, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
int db_flags = DB_RF_CANFAIL;
|
|
||||||
|
|
||||||
if (flags & DMU_READ_NO_PREFETCH)
|
|
||||||
db_flags |= DB_RF_NOPREFETCH;
|
|
||||||
if (flags & DMU_READ_NO_DECRYPT)
|
|
||||||
db_flags |= DB_RF_NO_DECRYPT;
|
|
||||||
|
|
||||||
err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
|
err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
||||||
err = dbuf_read(db, NULL, db_flags);
|
err = dbuf_read(db, NULL, flags | DB_RF_CANFAIL);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
dbuf_rele(db, tag);
|
dbuf_rele(db, tag);
|
||||||
*dbp = NULL;
|
*dbp = NULL;
|
||||||
@ -358,16 +346,10 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
|||||||
* Returns ENOENT, EIO, or 0.
|
* Returns ENOENT, EIO, or 0.
|
||||||
*/
|
*/
|
||||||
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
|
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
|
||||||
uint32_t flags)
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db;
|
dmu_buf_impl_t *db;
|
||||||
int error;
|
int error;
|
||||||
uint32_t db_flags = DB_RF_MUST_SUCCEED;
|
|
||||||
|
|
||||||
if (flags & DMU_READ_NO_PREFETCH)
|
|
||||||
db_flags |= DB_RF_NOPREFETCH;
|
|
||||||
if (flags & DMU_READ_NO_DECRYPT)
|
|
||||||
db_flags |= DB_RF_NO_DECRYPT;
|
|
||||||
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
if (dn->dn_bonus == NULL) {
|
if (dn->dn_bonus == NULL) {
|
||||||
@ -393,7 +375,7 @@ int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
|
|||||||
*/
|
*/
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
|
|
||||||
error = dbuf_read(db, NULL, db_flags);
|
error = dbuf_read(db, NULL, flags | DB_RF_CANFAIL);
|
||||||
if (error) {
|
if (error) {
|
||||||
dnode_evict_bonus(dn);
|
dnode_evict_bonus(dn);
|
||||||
dbuf_rele(db, tag);
|
dbuf_rele(db, tag);
|
||||||
@ -431,7 +413,7 @@ dmu_bonus_hold(objset_t *os, uint64_t object, const void *tag, dmu_buf_t **dbp)
|
|||||||
* dmu_spill_hold_existing() should be used.
|
* dmu_spill_hold_existing() should be used.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, const void *tag,
|
dmu_spill_hold_by_dnode(dnode_t *dn, dmu_flags_t flags, const void *tag,
|
||||||
dmu_buf_t **dbp)
|
dmu_buf_t **dbp)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = NULL;
|
dmu_buf_impl_t *db = NULL;
|
||||||
@ -489,18 +471,14 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
|
dmu_spill_hold_by_bonus(dmu_buf_t *bonus, dmu_flags_t flags, const void *tag,
|
||||||
dmu_buf_t **dbp)
|
dmu_buf_t **dbp)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
|
||||||
int err;
|
int err;
|
||||||
uint32_t db_flags = DB_RF_CANFAIL;
|
|
||||||
|
|
||||||
if (flags & DMU_READ_NO_DECRYPT)
|
|
||||||
db_flags |= DB_RF_NO_DECRYPT;
|
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
err = dmu_spill_hold_by_dnode(DB_DNODE(db), db_flags, tag, dbp);
|
err = dmu_spill_hold_by_dnode(DB_DNODE(db), flags, tag, dbp);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
@ -515,12 +493,12 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
|
|||||||
int
|
int
|
||||||
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
boolean_t read, const void *tag, int *numbufsp, dmu_buf_t ***dbpp,
|
boolean_t read, const void *tag, int *numbufsp, dmu_buf_t ***dbpp,
|
||||||
uint32_t flags)
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
zstream_t *zs = NULL;
|
zstream_t *zs = NULL;
|
||||||
uint64_t blkid, nblks, i;
|
uint64_t blkid, nblks, i;
|
||||||
uint32_t dbuf_flags;
|
dmu_flags_t dbuf_flags;
|
||||||
int err;
|
int err;
|
||||||
zio_t *zio = NULL;
|
zio_t *zio = NULL;
|
||||||
boolean_t missed = B_FALSE;
|
boolean_t missed = B_FALSE;
|
||||||
@ -532,11 +510,8 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||||||
* we can tell it about the multi-block read. dbuf_read() only knows
|
* we can tell it about the multi-block read. dbuf_read() only knows
|
||||||
* about the one block it is accessing.
|
* about the one block it is accessing.
|
||||||
*/
|
*/
|
||||||
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
|
dbuf_flags = (flags & ~DMU_READ_PREFETCH) | DMU_READ_NO_PREFETCH |
|
||||||
DB_RF_NOPREFETCH;
|
DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
|
||||||
|
|
||||||
if ((flags & DMU_READ_NO_DECRYPT) != 0)
|
|
||||||
dbuf_flags |= DB_RF_NO_DECRYPT;
|
|
||||||
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
if (dn->dn_datablkshift) {
|
if (dn->dn_datablkshift) {
|
||||||
@ -569,15 +544,15 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||||||
* that if multiple threads block on same indirect block, we
|
* that if multiple threads block on same indirect block, we
|
||||||
* base predictions on the original less racy request order.
|
* base predictions on the original less racy request order.
|
||||||
*/
|
*/
|
||||||
zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks, read,
|
zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
|
||||||
B_TRUE);
|
read && !(flags & DMU_DIRECTIO), B_TRUE);
|
||||||
}
|
}
|
||||||
for (i = 0; i < nblks; i++) {
|
for (i = 0; i < nblks; i++) {
|
||||||
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
|
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
|
||||||
if (db == NULL) {
|
if (db == NULL) {
|
||||||
if (zs) {
|
if (zs) {
|
||||||
dmu_zfetch_run(&dn->dn_zfetch, zs, missed,
|
dmu_zfetch_run(&dn->dn_zfetch, zs, missed,
|
||||||
B_TRUE);
|
B_TRUE, (flags & DMU_UNCACHEDIO));
|
||||||
}
|
}
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
dmu_buf_rele_array(dbp, nblks, tag);
|
dmu_buf_rele_array(dbp, nblks, tag);
|
||||||
@ -599,9 +574,9 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||||||
offset + length < db->db.db_offset +
|
offset + length < db->db.db_offset +
|
||||||
db->db.db_size) {
|
db->db.db_size) {
|
||||||
if (offset <= db->db.db_offset)
|
if (offset <= db->db.db_offset)
|
||||||
dbuf_flags |= DB_RF_PARTIAL_FIRST;
|
dbuf_flags |= DMU_PARTIAL_FIRST;
|
||||||
else
|
else
|
||||||
dbuf_flags |= DB_RF_PARTIAL_MORE;
|
dbuf_flags |= DMU_PARTIAL_MORE;
|
||||||
}
|
}
|
||||||
(void) dbuf_read(db, zio, dbuf_flags);
|
(void) dbuf_read(db, zio, dbuf_flags);
|
||||||
if (db->db_state != DB_CACHED)
|
if (db->db_state != DB_CACHED)
|
||||||
@ -621,8 +596,10 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||||||
if (!read && ((flags & DMU_DIRECTIO) == 0))
|
if (!read && ((flags & DMU_DIRECTIO) == 0))
|
||||||
zfs_racct_write(dn->dn_objset->os_spa, length, nblks, flags);
|
zfs_racct_write(dn->dn_objset->os_spa, length, nblks, flags);
|
||||||
|
|
||||||
if (zs)
|
if (zs) {
|
||||||
dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE);
|
dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE,
|
||||||
|
(flags & DMU_UNCACHEDIO));
|
||||||
|
}
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
|
|
||||||
if (read) {
|
if (read) {
|
||||||
@ -1170,7 +1147,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
|
dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
void *buf, uint32_t flags)
|
void *buf, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
int numbufs, err = 0;
|
int numbufs, err = 0;
|
||||||
@ -1198,6 +1175,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
|
|||||||
abd_free(data);
|
abd_free(data);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
flags &= ~DMU_DIRECTIO;
|
||||||
|
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
|
uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
|
||||||
@ -1236,7 +1214,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||||
void *buf, uint32_t flags)
|
void *buf, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
int err;
|
int err;
|
||||||
@ -1252,14 +1230,14 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
|
dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
|
||||||
uint32_t flags)
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
return (dmu_read_impl(dn, offset, size, buf, flags));
|
return (dmu_read_impl(dn, offset, size, buf, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
|
dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
|
||||||
const void *buf, dmu_tx_t *tx)
|
const void *buf, dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -1275,10 +1253,17 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||||
|
|
||||||
if (tocpy == db->db_size)
|
if (tocpy == db->db_size) {
|
||||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
dmu_buf_will_fill_flags(db, tx, B_FALSE, flags);
|
||||||
|
} else {
|
||||||
|
if (i == numbufs - 1 && bufoff + tocpy < db->db_size) {
|
||||||
|
if (bufoff == 0)
|
||||||
|
flags |= DMU_PARTIAL_FIRST;
|
||||||
else
|
else
|
||||||
dmu_buf_will_dirty(db, tx);
|
flags |= DMU_PARTIAL_MORE;
|
||||||
|
}
|
||||||
|
dmu_buf_will_dirty_flags(db, tx, flags);
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT(db->db_data != NULL);
|
ASSERT(db->db_data != NULL);
|
||||||
(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
|
(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
|
||||||
@ -1304,17 +1289,13 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
VERIFY0(dmu_buf_hold_array(os, object, offset, size,
|
VERIFY0(dmu_buf_hold_array(os, object, offset, size,
|
||||||
FALSE, FTAG, &numbufs, &dbp));
|
FALSE, FTAG, &numbufs, &dbp));
|
||||||
dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
|
dmu_write_impl(dbp, numbufs, offset, size, buf, tx, DMU_READ_PREFETCH);
|
||||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This interface is not used internally by ZFS but is provided for
|
|
||||||
* use by Lustre which is built on the DMU interfaces.
|
|
||||||
*/
|
|
||||||
int
|
int
|
||||||
dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size,
|
dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
const void *buf, dmu_tx_t *tx, uint32_t flags)
|
const void *buf, dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
int numbufs;
|
int numbufs;
|
||||||
@ -1327,25 +1308,19 @@ dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size,
|
|||||||
if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned((void *)buf) &&
|
if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned((void *)buf) &&
|
||||||
zfs_dio_aligned(offset, size, dn->dn_datablksz)) {
|
zfs_dio_aligned(offset, size, dn->dn_datablksz)) {
|
||||||
abd_t *data = abd_get_from_buf((void *)buf, size);
|
abd_t *data = abd_get_from_buf((void *)buf, size);
|
||||||
error = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx);
|
error = dmu_write_abd(dn, offset, size, data, flags, tx);
|
||||||
abd_free(data);
|
abd_free(data);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
flags &= ~DMU_DIRECTIO;
|
||||||
|
|
||||||
VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size,
|
VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size,
|
||||||
FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
|
FALSE, FTAG, &numbufs, &dbp, flags));
|
||||||
dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
|
dmu_write_impl(dbp, numbufs, offset, size, buf, tx, flags);
|
||||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
|
|
||||||
const void *buf, dmu_tx_t *tx)
|
|
||||||
{
|
|
||||||
return (dmu_write_by_dnode_flags(dn, offset, size, buf, tx, 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx)
|
||||||
@ -1402,20 +1377,22 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
int
|
int
|
||||||
dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
int numbufs, i, err;
|
int numbufs, i, err;
|
||||||
|
|
||||||
if (uio->uio_extflg & UIO_DIRECT)
|
if (uio->uio_extflg & UIO_DIRECT)
|
||||||
return (dmu_read_uio_direct(dn, uio, size));
|
return (dmu_read_uio_direct(dn, uio, size, flags));
|
||||||
|
flags &= ~DMU_DIRECTIO;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NB: we could do this block-at-a-time, but it's nice
|
* NB: we could do this block-at-a-time, but it's nice
|
||||||
* to be reading in parallel.
|
* to be reading in parallel.
|
||||||
*/
|
*/
|
||||||
err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
|
err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
|
||||||
TRUE, FTAG, &numbufs, &dbp, 0);
|
TRUE, FTAG, &numbufs, &dbp, flags);
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
@ -1453,7 +1430,8 @@ dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
|||||||
* because we don't have to find the dnode_t for the object.
|
* because we don't have to find the dnode_t for the object.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
|
dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
|
||||||
int err;
|
int err;
|
||||||
@ -1462,7 +1440,7 @@ dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
|
|||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
err = dmu_read_uio_dnode(DB_DNODE(db), uio, size);
|
err = dmu_read_uio_dnode(DB_DNODE(db), uio, size, flags);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
@ -1474,7 +1452,8 @@ dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
|
|||||||
* Starting at offset zfs_uio_offset(uio).
|
* Starting at offset zfs_uio_offset(uio).
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size)
|
dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
int err;
|
int err;
|
||||||
@ -1486,7 +1465,7 @@ dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size)
|
|||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
err = dmu_read_uio_dnode(dn, uio, size);
|
err = dmu_read_uio_dnode(dn, uio, size, flags);
|
||||||
|
|
||||||
dnode_rele(dn, FTAG);
|
dnode_rele(dn, FTAG);
|
||||||
|
|
||||||
@ -1494,12 +1473,14 @@ dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
int numbufs;
|
int numbufs;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
uint64_t write_size;
|
uint64_t write_size;
|
||||||
|
dmu_flags_t oflags = flags;
|
||||||
|
|
||||||
top:
|
top:
|
||||||
write_size = size;
|
write_size = size;
|
||||||
@ -1512,13 +1493,14 @@ top:
|
|||||||
(write_size >= dn->dn_datablksz)) {
|
(write_size >= dn->dn_datablksz)) {
|
||||||
if (zfs_dio_aligned(zfs_uio_offset(uio), write_size,
|
if (zfs_dio_aligned(zfs_uio_offset(uio), write_size,
|
||||||
dn->dn_datablksz)) {
|
dn->dn_datablksz)) {
|
||||||
return (dmu_write_uio_direct(dn, uio, size, tx));
|
return (dmu_write_uio_direct(dn, uio, size, flags, tx));
|
||||||
} else if (write_size > dn->dn_datablksz &&
|
} else if (write_size > dn->dn_datablksz &&
|
||||||
zfs_dio_offset_aligned(zfs_uio_offset(uio),
|
zfs_dio_offset_aligned(zfs_uio_offset(uio),
|
||||||
dn->dn_datablksz)) {
|
dn->dn_datablksz)) {
|
||||||
write_size =
|
write_size =
|
||||||
dn->dn_datablksz * (write_size / dn->dn_datablksz);
|
dn->dn_datablksz * (write_size / dn->dn_datablksz);
|
||||||
err = dmu_write_uio_direct(dn, uio, write_size, tx);
|
err = dmu_write_uio_direct(dn, uio, write_size, flags,
|
||||||
|
tx);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
size -= write_size;
|
size -= write_size;
|
||||||
goto top;
|
goto top;
|
||||||
@ -1530,9 +1512,10 @@ top:
|
|||||||
P2PHASE(zfs_uio_offset(uio), dn->dn_datablksz);
|
P2PHASE(zfs_uio_offset(uio), dn->dn_datablksz);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
flags &= ~DMU_DIRECTIO;
|
||||||
|
|
||||||
err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), write_size,
|
err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), write_size,
|
||||||
FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
|
FALSE, FTAG, &numbufs, &dbp, flags);
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
@ -1549,10 +1532,17 @@ top:
|
|||||||
|
|
||||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||||
|
|
||||||
if (tocpy == db->db_size)
|
if (tocpy == db->db_size) {
|
||||||
dmu_buf_will_fill(db, tx, B_TRUE);
|
dmu_buf_will_fill_flags(db, tx, B_TRUE, flags);
|
||||||
|
} else {
|
||||||
|
if (i == numbufs - 1 && bufoff + tocpy < db->db_size) {
|
||||||
|
if (bufoff == 0)
|
||||||
|
flags |= DMU_PARTIAL_FIRST;
|
||||||
else
|
else
|
||||||
dmu_buf_will_dirty(db, tx);
|
flags |= DMU_PARTIAL_MORE;
|
||||||
|
}
|
||||||
|
dmu_buf_will_dirty_flags(db, tx, flags);
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT(db->db_data != NULL);
|
ASSERT(db->db_data != NULL);
|
||||||
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
|
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
|
||||||
@ -1575,6 +1565,7 @@ top:
|
|||||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||||
|
|
||||||
if ((uio->uio_extflg & UIO_DIRECT) && size > 0) {
|
if ((uio->uio_extflg & UIO_DIRECT) && size > 0) {
|
||||||
|
flags = oflags;
|
||||||
goto top;
|
goto top;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1592,7 +1583,7 @@ top:
|
|||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
|
||||||
int err;
|
int err;
|
||||||
@ -1601,7 +1592,7 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
|||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
err = dmu_write_uio_dnode(DB_DNODE(db), uio, size, tx);
|
err = dmu_write_uio_dnode(DB_DNODE(db), uio, size, tx, flags);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
@ -1614,7 +1605,7 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
|
|||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
int err;
|
int err;
|
||||||
@ -1626,7 +1617,7 @@ dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
|||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
err = dmu_write_uio_dnode(dn, uio, size, tx);
|
err = dmu_write_uio_dnode(dn, uio, size, tx, flags);
|
||||||
|
|
||||||
dnode_rele(dn, FTAG);
|
dnode_rele(dn, FTAG);
|
||||||
|
|
||||||
@ -1796,11 +1787,10 @@ dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
|
|||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db;
|
dmu_buf_impl_t *db;
|
||||||
objset_t *os = dn->dn_objset;
|
objset_t *os = dn->dn_objset;
|
||||||
uint64_t object = dn->dn_object;
|
|
||||||
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
||||||
uint64_t blkid;
|
uint64_t blkid;
|
||||||
|
|
||||||
@ -1816,8 +1806,8 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
|||||||
* same size as the dbuf.
|
* same size as the dbuf.
|
||||||
*/
|
*/
|
||||||
if (offset == db->db.db_offset && blksz == db->db.db_size) {
|
if (offset == db->db.db_offset && blksz == db->db.db_size) {
|
||||||
zfs_racct_write(os->os_spa, blksz, 1, 0);
|
zfs_racct_write(os->os_spa, blksz, 1, flags);
|
||||||
dbuf_assign_arcbuf(db, buf, tx);
|
dbuf_assign_arcbuf(db, buf, tx, flags);
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
} else {
|
} else {
|
||||||
/* compressed bufs must always be assignable to their dbuf */
|
/* compressed bufs must always be assignable to their dbuf */
|
||||||
@ -1825,7 +1815,7 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
|||||||
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
||||||
|
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
dmu_write(os, object, offset, blksz, buf->b_data, tx);
|
dmu_write_by_dnode(dn, offset, blksz, buf->b_data, tx, flags);
|
||||||
dmu_return_arcbuf(buf);
|
dmu_return_arcbuf(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1834,13 +1824,13 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
err = dmu_assign_arcbuf_by_dnode(DB_DNODE(db), offset, buf, tx);
|
err = dmu_assign_arcbuf_by_dnode(DB_DNODE(db), offset, buf, tx, flags);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
@ -1985,7 +1975,7 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
|
|||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = dbuf_read((dmu_buf_impl_t *)zgd->zgd_db, NULL,
|
error = dbuf_read((dmu_buf_impl_t *)zgd->zgd_db, NULL,
|
||||||
DB_RF_CANFAIL | DB_RF_NOPREFETCH);
|
DB_RF_CANFAIL | DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
@ -2928,7 +2918,6 @@ EXPORT_SYMBOL(dmu_read_uio_dbuf);
|
|||||||
EXPORT_SYMBOL(dmu_read_uio_dnode);
|
EXPORT_SYMBOL(dmu_read_uio_dnode);
|
||||||
EXPORT_SYMBOL(dmu_write);
|
EXPORT_SYMBOL(dmu_write);
|
||||||
EXPORT_SYMBOL(dmu_write_by_dnode);
|
EXPORT_SYMBOL(dmu_write_by_dnode);
|
||||||
EXPORT_SYMBOL(dmu_write_by_dnode_flags);
|
|
||||||
EXPORT_SYMBOL(dmu_write_uio);
|
EXPORT_SYMBOL(dmu_write_uio);
|
||||||
EXPORT_SYMBOL(dmu_write_uio_dbuf);
|
EXPORT_SYMBOL(dmu_write_uio_dbuf);
|
||||||
EXPORT_SYMBOL(dmu_write_uio_dnode);
|
EXPORT_SYMBOL(dmu_write_uio_dnode);
|
||||||
|
@ -208,7 +208,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
abd_t *data, uint32_t flags, dmu_tx_t *tx)
|
abd_t *data, dmu_flags_t flags, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
spa_t *spa = dn->dn_objset->os_spa;
|
spa_t *spa = dn->dn_objset->os_spa;
|
||||||
@ -247,7 +247,7 @@ dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
abd_t *data, uint32_t flags)
|
abd_t *data, dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
objset_t *os = dn->dn_objset;
|
objset_t *os = dn->dn_objset;
|
||||||
spa_t *spa = os->os_spa;
|
spa_t *spa = os->os_spa;
|
||||||
@ -351,7 +351,8 @@ error:
|
|||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
int
|
int
|
||||||
dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
offset_t offset = zfs_uio_offset(uio);
|
offset_t offset = zfs_uio_offset(uio);
|
||||||
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
||||||
@ -362,7 +363,7 @@ dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
|||||||
|
|
||||||
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
||||||
offset & (PAGESIZE - 1), size);
|
offset & (PAGESIZE - 1), size);
|
||||||
err = dmu_read_abd(dn, offset, size, data, DMU_DIRECTIO);
|
err = dmu_read_abd(dn, offset, size, data, flags);
|
||||||
abd_free(data);
|
abd_free(data);
|
||||||
|
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
@ -372,7 +373,8 @@ dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
|
||||||
|
dmu_flags_t flags, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
offset_t offset = zfs_uio_offset(uio);
|
offset_t offset = zfs_uio_offset(uio);
|
||||||
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT;
|
||||||
@ -383,7 +385,7 @@ dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
|||||||
|
|
||||||
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index],
|
||||||
offset & (PAGESIZE - 1), size);
|
offset & (PAGESIZE - 1), size);
|
||||||
err = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx);
|
err = dmu_write_abd(dn, offset, size, data, flags, tx);
|
||||||
abd_free(data);
|
abd_free(data);
|
||||||
|
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
@ -2332,12 +2332,11 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
|
|||||||
data = DN_BONUS(dn->dn_phys);
|
data = DN_BONUS(dn->dn_phys);
|
||||||
}
|
}
|
||||||
} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
|
} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
|
||||||
int rf = 0;
|
dmu_flags_t rf = DB_RF_MUST_SUCCEED;
|
||||||
|
|
||||||
if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
|
if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
|
||||||
rf |= DB_RF_HAVESTRUCT;
|
rf |= DB_RF_HAVESTRUCT;
|
||||||
error = dmu_spill_hold_by_dnode(dn,
|
error = dmu_spill_hold_by_dnode(dn, rf,
|
||||||
rf | DB_RF_MUST_SUCCEED,
|
|
||||||
FTAG, (dmu_buf_t **)&db);
|
FTAG, (dmu_buf_t **)&db);
|
||||||
ASSERT(error == 0);
|
ASSERT(error == 0);
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
@ -2135,7 +2135,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
|||||||
if (data != NULL) {
|
if (data != NULL) {
|
||||||
dmu_buf_t *db;
|
dmu_buf_t *db;
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
uint32_t flags = DMU_READ_NO_PREFETCH;
|
dmu_flags_t flags = DMU_READ_NO_PREFETCH;
|
||||||
|
|
||||||
if (rwa->raw)
|
if (rwa->raw)
|
||||||
flags |= DMU_READ_NO_DECRYPT;
|
flags |= DMU_READ_NO_DECRYPT;
|
||||||
@ -2277,14 +2277,18 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
|
|||||||
dmu_write_by_dnode(dn,
|
dmu_write_by_dnode(dn,
|
||||||
drrw->drr_offset,
|
drrw->drr_offset,
|
||||||
drrw->drr_logical_size,
|
drrw->drr_logical_size,
|
||||||
abd_to_buf(decomp_abd), tx);
|
abd_to_buf(decomp_abd), tx,
|
||||||
|
DMU_READ_NO_PREFETCH |
|
||||||
|
DMU_UNCACHEDIO);
|
||||||
}
|
}
|
||||||
abd_free(decomp_abd);
|
abd_free(decomp_abd);
|
||||||
} else {
|
} else {
|
||||||
dmu_write_by_dnode(dn,
|
dmu_write_by_dnode(dn,
|
||||||
drrw->drr_offset,
|
drrw->drr_offset,
|
||||||
drrw->drr_logical_size,
|
drrw->drr_logical_size,
|
||||||
abd_to_buf(abd), tx);
|
abd_to_buf(abd), tx,
|
||||||
|
DMU_READ_NO_PREFETCH |
|
||||||
|
DMU_UNCACHEDIO);
|
||||||
}
|
}
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
abd_free(abd);
|
abd_free(abd);
|
||||||
@ -2407,10 +2411,10 @@ receive_process_write_record(struct receive_writer_arg *rwa,
|
|||||||
if (rwa->heal) {
|
if (rwa->heal) {
|
||||||
blkptr_t *bp;
|
blkptr_t *bp;
|
||||||
dmu_buf_t *dbp;
|
dmu_buf_t *dbp;
|
||||||
int flags = DB_RF_CANFAIL;
|
dmu_flags_t flags = DB_RF_CANFAIL;
|
||||||
|
|
||||||
if (rwa->raw)
|
if (rwa->raw)
|
||||||
flags |= DB_RF_NO_DECRYPT;
|
flags |= DMU_READ_NO_DECRYPT;
|
||||||
|
|
||||||
if (rwa->byteswap) {
|
if (rwa->byteswap) {
|
||||||
dmu_object_byteswap_t byteswap =
|
dmu_object_byteswap_t byteswap =
|
||||||
@ -2567,8 +2571,8 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
|||||||
rwa->max_object = drrs->drr_object;
|
rwa->max_object = drrs->drr_object;
|
||||||
|
|
||||||
VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db));
|
VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db));
|
||||||
if ((err = dmu_spill_hold_by_bonus(db, DMU_READ_NO_DECRYPT, FTAG,
|
if ((err = dmu_spill_hold_by_bonus(db, DMU_READ_NO_DECRYPT |
|
||||||
&db_spill)) != 0) {
|
DB_RF_CANFAIL, FTAG, &db_spill)) != 0) {
|
||||||
dmu_buf_rele(db, FTAG);
|
dmu_buf_rele(db, FTAG);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
@ -2621,7 +2625,8 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
|||||||
|
|
||||||
memcpy(abuf->b_data, abd_to_buf(abd), DRR_SPILL_PAYLOAD_SIZE(drrs));
|
memcpy(abuf->b_data, abd_to_buf(abd), DRR_SPILL_PAYLOAD_SIZE(drrs));
|
||||||
abd_free(abd);
|
abd_free(abd);
|
||||||
dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
|
dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx,
|
||||||
|
DMU_UNCACHEDIO);
|
||||||
|
|
||||||
dmu_buf_rele(db, FTAG);
|
dmu_buf_rele(db, FTAG);
|
||||||
dmu_buf_rele(db_spill, FTAG);
|
dmu_buf_rele(db_spill, FTAG);
|
||||||
|
@ -297,7 +297,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (BP_GET_LEVEL(bp) > 0) {
|
if (BP_GET_LEVEL(bp) > 0) {
|
||||||
uint32_t flags = ARC_FLAG_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
int32_t i, ptidx, pidx;
|
int32_t i, ptidx, pidx;
|
||||||
uint32_t prefetchlimit;
|
uint32_t prefetchlimit;
|
||||||
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||||
@ -364,8 +364,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
kmem_free(czb, sizeof (zbookmark_phys_t));
|
kmem_free(czb, sizeof (zbookmark_phys_t));
|
||||||
|
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||||
uint32_t flags = ARC_FLAG_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
|
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||||
dnode_phys_t *child_dnp;
|
dnode_phys_t *child_dnp;
|
||||||
@ -397,7 +397,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||||
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
|
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
objset_phys_t *osp;
|
objset_phys_t *osp;
|
||||||
|
|
||||||
@ -669,7 +669,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
|||||||
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
||||||
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
||||||
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||||
uint32_t flags = ARC_FLAG_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
objset_phys_t *osp;
|
objset_phys_t *osp;
|
||||||
arc_buf_t *buf;
|
arc_buf_t *buf;
|
||||||
ASSERT(!BP_IS_REDACTED(rootbp));
|
ASSERT(!BP_IS_REDACTED(rootbp));
|
||||||
|
@ -222,8 +222,8 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
|
|||||||
* PARTIAL_FIRST allows caching for uncacheable blocks. It will
|
* PARTIAL_FIRST allows caching for uncacheable blocks. It will
|
||||||
* be cleared after dmu_buf_will_dirty() call dbuf_read() again.
|
* be cleared after dmu_buf_will_dirty() call dbuf_read() again.
|
||||||
*/
|
*/
|
||||||
err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH |
|
err = dbuf_read(db, zio, DB_RF_CANFAIL | DMU_READ_NO_PREFETCH |
|
||||||
(level == 0 ? DB_RF_PARTIAL_FIRST : 0));
|
(level == 0 ? (DMU_UNCACHEDIO | DMU_PARTIAL_FIRST) : 0));
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
@ -690,7 +690,7 @@ prescient:
|
|||||||
|
|
||||||
void
|
void
|
||||||
dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
||||||
boolean_t have_lock)
|
boolean_t have_lock, boolean_t uncached)
|
||||||
{
|
{
|
||||||
int64_t pf_start, pf_end, ipf_start, ipf_end;
|
int64_t pf_start, pf_end, ipf_start, ipf_end;
|
||||||
int epbs, issued;
|
int epbs, issued;
|
||||||
@ -745,7 +745,8 @@ dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
|||||||
issued = 0;
|
issued = 0;
|
||||||
for (int64_t blk = pf_start; blk < pf_end; blk++) {
|
for (int64_t blk = pf_start; blk < pf_end; blk++) {
|
||||||
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
|
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
|
||||||
ZIO_PRIORITY_ASYNC_READ, 0, dmu_zfetch_done, zs);
|
ZIO_PRIORITY_ASYNC_READ, uncached ?
|
||||||
|
ARC_FLAG_UNCACHED : 0, dmu_zfetch_done, zs);
|
||||||
}
|
}
|
||||||
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
|
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
|
||||||
issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
|
issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
|
||||||
@ -761,13 +762,13 @@ dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
|||||||
|
|
||||||
void
|
void
|
||||||
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
||||||
boolean_t missed, boolean_t have_lock)
|
boolean_t missed, boolean_t have_lock, boolean_t uncached)
|
||||||
{
|
{
|
||||||
zstream_t *zs;
|
zstream_t *zs;
|
||||||
|
|
||||||
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
|
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
|
||||||
if (zs)
|
if (zs)
|
||||||
dmu_zfetch_run(zf, zs, missed, have_lock);
|
dmu_zfetch_run(zf, zs, missed, have_lock, uncached);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
|
||||||
|
@ -1510,7 +1510,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
|
|||||||
* if we get the encrypted or decrypted version.
|
* if we get the encrypted or decrypted version.
|
||||||
*/
|
*/
|
||||||
err = dbuf_read(db, NULL, DB_RF_CANFAIL |
|
err = dbuf_read(db, NULL, DB_RF_CANFAIL |
|
||||||
DB_RF_NO_DECRYPT | DB_RF_NOPREFETCH);
|
DMU_READ_NO_PREFETCH | DMU_READ_NO_DECRYPT);
|
||||||
if (err) {
|
if (err) {
|
||||||
DNODE_STAT_BUMP(dnode_hold_dbuf_read);
|
DNODE_STAT_BUMP(dnode_hold_dbuf_read);
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
@ -2578,7 +2578,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
|||||||
}
|
}
|
||||||
error = dbuf_read(db, NULL,
|
error = dbuf_read(db, NULL,
|
||||||
DB_RF_CANFAIL | DB_RF_HAVESTRUCT |
|
DB_RF_CANFAIL | DB_RF_HAVESTRUCT |
|
||||||
DB_RF_NO_DECRYPT | DB_RF_NOPREFETCH);
|
DMU_READ_NO_PREFETCH | DMU_READ_NO_DECRYPT);
|
||||||
if (error) {
|
if (error) {
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
return (error);
|
return (error);
|
||||||
|
@ -513,6 +513,7 @@ dnode_evict_dbufs(dnode_t *dn)
|
|||||||
avl_remove(&dn->dn_dbufs, db_marker);
|
avl_remove(&dn->dn_dbufs, db_marker);
|
||||||
} else {
|
} else {
|
||||||
db->db_pending_evict = TRUE;
|
db->db_pending_evict = TRUE;
|
||||||
|
db->db_partial_read = FALSE;
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
||||||
}
|
}
|
||||||
|
@ -703,8 +703,8 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
|
|||||||
boolean_t dummy;
|
boolean_t dummy;
|
||||||
|
|
||||||
if (hdl->sa_spill == NULL) {
|
if (hdl->sa_spill == NULL) {
|
||||||
VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, 0, NULL,
|
VERIFY0(dmu_spill_hold_by_bonus(hdl->sa_bonus,
|
||||||
&hdl->sa_spill) == 0);
|
DB_RF_MUST_SUCCEED, NULL, &hdl->sa_spill));
|
||||||
}
|
}
|
||||||
dmu_buf_will_dirty(hdl->sa_spill, tx);
|
dmu_buf_will_dirty(hdl->sa_spill, tx);
|
||||||
|
|
||||||
|
@ -948,7 +948,8 @@ spa_iostats_trim_add(spa_t *spa, trim_type_t type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
spa_history_kstat_t *shk = &spa->spa_stats.iostats;
|
spa_history_kstat_t *shk = &spa->spa_stats.iostats;
|
||||||
kstat_t *ksp = shk->kstat;
|
kstat_t *ksp = shk->kstat;
|
||||||
@ -967,7 +968,8 @@ spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
|
spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
|
||||||
|
dmu_flags_t flags)
|
||||||
{
|
{
|
||||||
spa_history_kstat_t *shk = &spa->spa_stats.iostats;
|
spa_history_kstat_t *shk = &spa->spa_stats.iostats;
|
||||||
kstat_t *ksp = shk->kstat;
|
kstat_t *ksp = shk->kstat;
|
||||||
|
@ -669,7 +669,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||||||
int err;
|
int err;
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
err = dmu_read_by_dnode(DB_DNODE(db), off, len,
|
err = dmu_read_by_dnode(DB_DNODE(db), off, len,
|
||||||
&lr->lr_data[0], DMU_READ_NO_PREFETCH);
|
&lr->lr_data[0], DMU_READ_NO_PREFETCH |
|
||||||
|
DMU_KEEP_CACHING);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
zil_itx_destroy(itx);
|
zil_itx_destroy(itx);
|
||||||
|
@ -89,6 +89,12 @@ static int zfs_dio_enabled = 0;
|
|||||||
static int zfs_dio_enabled = 1;
|
static int zfs_dio_enabled = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Strictly enforce alignment for Direct I/O requests, returning EINVAL
|
||||||
|
* if not page-aligned instead of silently falling back to uncached I/O.
|
||||||
|
*/
|
||||||
|
static int zfs_dio_strict = 0;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum bytes to read per chunk in zfs_read().
|
* Maximum bytes to read per chunk in zfs_read().
|
||||||
@ -243,46 +249,54 @@ zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw,
|
|||||||
int ioflag = *ioflagp;
|
int ioflag = *ioflagp;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED ||
|
if (os->os_direct == ZFS_DIRECT_ALWAYS) {
|
||||||
zn_has_cached_data(zp, zfs_uio_offset(uio),
|
/* Force either direct or uncached I/O. */
|
||||||
zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) {
|
|
||||||
/*
|
|
||||||
* Direct I/O is disabled or the region is mmap'ed. In either
|
|
||||||
* case the I/O request will just directed through the ARC.
|
|
||||||
*/
|
|
||||||
ioflag &= ~O_DIRECT;
|
|
||||||
goto out;
|
|
||||||
} else if (os->os_direct == ZFS_DIRECT_ALWAYS &&
|
|
||||||
zfs_uio_page_aligned(uio) &&
|
|
||||||
zfs_uio_aligned(uio, PAGE_SIZE)) {
|
|
||||||
if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) ||
|
|
||||||
(rw == UIO_READ)) {
|
|
||||||
ioflag |= O_DIRECT;
|
ioflag |= O_DIRECT;
|
||||||
}
|
}
|
||||||
} else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) {
|
|
||||||
|
if ((ioflag & O_DIRECT) == 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED) {
|
||||||
/*
|
/*
|
||||||
* Direct I/O was requested through the direct=always, but it
|
* Direct I/O is disabled. The I/O request will be directed
|
||||||
* is not properly PAGE_SIZE aligned. The request will be
|
* through the ARC as uncached I/O.
|
||||||
* directed through the ARC.
|
|
||||||
*/
|
*/
|
||||||
ioflag &= ~O_DIRECT;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ioflag & O_DIRECT) {
|
|
||||||
if (!zfs_uio_page_aligned(uio) ||
|
if (!zfs_uio_page_aligned(uio) ||
|
||||||
!zfs_uio_aligned(uio, PAGE_SIZE)) {
|
!zfs_uio_aligned(uio, PAGE_SIZE)) {
|
||||||
|
/*
|
||||||
|
* Misaligned requests can be executed through the ARC as
|
||||||
|
* uncached I/O. But if O_DIRECT was set by user and we
|
||||||
|
* were set to be strict, then it is a failure.
|
||||||
|
*/
|
||||||
|
if ((*ioflagp & O_DIRECT) && zfs_dio_strict)
|
||||||
error = SET_ERROR(EINVAL);
|
error = SET_ERROR(EINVAL);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = zfs_uio_get_dio_pages_alloc(uio, rw);
|
if (zn_has_cached_data(zp, zfs_uio_offset(uio),
|
||||||
if (error) {
|
zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) {
|
||||||
|
/*
|
||||||
|
* The region is mmap'ed. The I/O request will be directed
|
||||||
|
* through the ARC as uncached I/O.
|
||||||
|
*/
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT);
|
/*
|
||||||
ASSERT0(error);
|
* For short writes the page mapping of Direct I/O makes no sense.
|
||||||
|
* Direct them through the ARC as uncached I/O.
|
||||||
|
*/
|
||||||
|
if (rw == UIO_WRITE && zfs_uio_resid(uio) < zp->z_blksz)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
error = zfs_uio_get_dio_pages_alloc(uio, rw);
|
||||||
|
if (error)
|
||||||
|
goto out;
|
||||||
|
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
*ioflagp = ioflag;
|
*ioflagp = ioflag;
|
||||||
@ -392,6 +406,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
ssize_t start_resid = n;
|
ssize_t start_resid = n;
|
||||||
ssize_t dio_remaining_resid = 0;
|
ssize_t dio_remaining_resid = 0;
|
||||||
|
|
||||||
|
dmu_flags_t dflags = DMU_READ_PREFETCH;
|
||||||
|
if (ioflag & O_DIRECT)
|
||||||
|
dflags |= DMU_UNCACHEDIO;
|
||||||
if (uio->uio_extflg & UIO_DIRECT) {
|
if (uio->uio_extflg & UIO_DIRECT) {
|
||||||
/*
|
/*
|
||||||
* All pages for an O_DIRECT request ahve already been mapped
|
* All pages for an O_DIRECT request ahve already been mapped
|
||||||
@ -414,6 +431,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t);
|
dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t);
|
||||||
if (dio_remaining_resid != 0)
|
if (dio_remaining_resid != 0)
|
||||||
n -= dio_remaining_resid;
|
n -= dio_remaining_resid;
|
||||||
|
dflags |= DMU_DIRECTIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
@ -429,7 +447,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
error = mappedread(zp, nbytes, uio);
|
error = mappedread(zp, nbytes, uio);
|
||||||
} else {
|
} else {
|
||||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
uio, nbytes);
|
uio, nbytes, dflags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
@ -479,15 +497,17 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
* remainder of the file can be read using the ARC.
|
* remainder of the file can be read using the ARC.
|
||||||
*/
|
*/
|
||||||
uio->uio_extflg &= ~UIO_DIRECT;
|
uio->uio_extflg &= ~UIO_DIRECT;
|
||||||
|
dflags &= ~DMU_DIRECTIO;
|
||||||
|
|
||||||
if (zn_has_cached_data(zp, zfs_uio_offset(uio),
|
if (zn_has_cached_data(zp, zfs_uio_offset(uio),
|
||||||
zfs_uio_offset(uio) + dio_remaining_resid - 1)) {
|
zfs_uio_offset(uio) + dio_remaining_resid - 1)) {
|
||||||
error = mappedread(zp, dio_remaining_resid, uio);
|
error = mappedread(zp, dio_remaining_resid, uio);
|
||||||
} else {
|
} else {
|
||||||
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio,
|
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio,
|
||||||
dio_remaining_resid);
|
dio_remaining_resid, dflags);
|
||||||
}
|
}
|
||||||
uio->uio_extflg |= UIO_DIRECT;
|
uio->uio_extflg |= UIO_DIRECT;
|
||||||
|
dflags |= DMU_DIRECTIO;
|
||||||
|
|
||||||
if (error != 0)
|
if (error != 0)
|
||||||
n += dio_remaining_resid;
|
n += dio_remaining_resid;
|
||||||
@ -859,12 +879,18 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
zfs_rangelock_reduce(lr, woff, n);
|
zfs_rangelock_reduce(lr, woff, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dmu_flags_t dflags = DMU_READ_PREFETCH;
|
||||||
|
if (ioflag & O_DIRECT)
|
||||||
|
dflags |= DMU_UNCACHEDIO;
|
||||||
|
if (uio->uio_extflg & UIO_DIRECT)
|
||||||
|
dflags |= DMU_DIRECTIO;
|
||||||
|
|
||||||
ssize_t tx_bytes;
|
ssize_t tx_bytes;
|
||||||
if (abuf == NULL) {
|
if (abuf == NULL) {
|
||||||
tx_bytes = zfs_uio_resid(uio);
|
tx_bytes = zfs_uio_resid(uio);
|
||||||
zfs_uio_fault_disable(uio, B_TRUE);
|
zfs_uio_fault_disable(uio, B_TRUE);
|
||||||
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
uio, nbytes, tx);
|
uio, nbytes, tx, dflags);
|
||||||
zfs_uio_fault_disable(uio, B_FALSE);
|
zfs_uio_fault_disable(uio, B_FALSE);
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (error == EFAULT) {
|
if (error == EFAULT) {
|
||||||
@ -903,7 +929,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||||||
* arc buffer to a dbuf.
|
* arc buffer to a dbuf.
|
||||||
*/
|
*/
|
||||||
error = dmu_assign_arcbuf_by_dbuf(
|
error = dmu_assign_arcbuf_by_dbuf(
|
||||||
sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
|
sa_get_db(zp->z_sa_hdl), woff, abuf, tx, dflags);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
/*
|
/*
|
||||||
* XXX This might not be necessary if
|
* XXX This might not be necessary if
|
||||||
@ -1329,7 +1355,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
|||||||
error = SET_ERROR(ENOENT);
|
error = SET_ERROR(ENOENT);
|
||||||
} else {
|
} else {
|
||||||
error = dmu_read(os, object, offset, size, buf,
|
error = dmu_read(os, object, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||||
}
|
}
|
||||||
ASSERT(error == 0 || error == ENOENT);
|
ASSERT(error == 0 || error == ENOENT);
|
||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
@ -2019,3 +2045,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
|
|||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs, zfs_, dio_enabled, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs, zfs_, dio_enabled, INT, ZMOD_RW,
|
||||||
"Enable Direct I/O");
|
"Enable Direct I/O");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs, zfs_, dio_strict, INT, ZMOD_RW,
|
||||||
|
"Return errors on misaligned Direct I/O");
|
||||||
|
@ -900,8 +900,9 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
|
||||||
(wr_state == WR_COPIED ? len : 0));
|
(wr_state == WR_COPIED ? len : 0));
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
if (wr_state == WR_COPIED && dmu_read_by_dnode(zv->zv_dn,
|
if (wr_state == WR_COPIED &&
|
||||||
offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
|
dmu_read_by_dnode(zv->zv_dn, offset, len, lr + 1,
|
||||||
|
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) {
|
||||||
zil_itx_destroy(itx);
|
zil_itx_destroy(itx);
|
||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
@ -994,7 +995,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
|||||||
zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
|
zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
|
||||||
size, RL_READER);
|
size, RL_READER);
|
||||||
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
|
||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
ASSERT3P(zio, !=, NULL);
|
ASSERT3P(zio, !=, NULL);
|
||||||
/*
|
/*
|
||||||
|
@ -107,6 +107,7 @@ VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
|||||||
BCLONE_ENABLED bclone_enabled zfs_bclone_enabled
|
BCLONE_ENABLED bclone_enabled zfs_bclone_enabled
|
||||||
BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty
|
BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty
|
||||||
DIO_ENABLED dio_enabled zfs_dio_enabled
|
DIO_ENABLED dio_enabled zfs_dio_enabled
|
||||||
|
DIO_STRICT dio_strict zfs_dio_strict
|
||||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
||||||
|
@ -40,8 +40,10 @@
|
|||||||
|
|
||||||
verify_runnable "global"
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_must save_tunable DIO_STRICT
|
||||||
function cleanup
|
function cleanup
|
||||||
{
|
{
|
||||||
|
restore_tunable DIO_STRICT
|
||||||
zfs set recordsize=$rs $TESTPOOL/$TESTFS
|
zfs set recordsize=$rs $TESTPOOL/$TESTFS
|
||||||
zfs set direct=standard $TESTPOOL/$TESTFS
|
zfs set direct=standard $TESTPOOL/$TESTFS
|
||||||
log_must rm -f $tmp_file
|
log_must rm -f $tmp_file
|
||||||
@ -61,6 +63,13 @@ file_size=$((rs * 8))
|
|||||||
|
|
||||||
log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1
|
log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1
|
||||||
|
|
||||||
|
log_must set_tunable32 DIO_STRICT 0
|
||||||
|
log_must zfs set direct=standard $TESTPOOL/$TESTFS
|
||||||
|
# sub-pagesize direct writes/read will always pass if not strict.
|
||||||
|
log_must stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D
|
||||||
|
log_must stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 -d
|
||||||
|
|
||||||
|
log_must set_tunable32 DIO_STRICT 1
|
||||||
log_must zfs set direct=standard $TESTPOOL/$TESTFS
|
log_must zfs set direct=standard $TESTPOOL/$TESTFS
|
||||||
# sub-pagesize direct writes/read will always fail if direct=standard.
|
# sub-pagesize direct writes/read will always fail if direct=standard.
|
||||||
log_mustnot stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D
|
log_mustnot stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D
|
||||||
|
@ -48,6 +48,7 @@ TESTDS=${TESTPOOL}/${TESTFS}
|
|||||||
TESTFILE=${TESTDIR}/${TESTFILE0}
|
TESTFILE=${TESTDIR}/${TESTFILE0}
|
||||||
|
|
||||||
log_must save_tunable DIO_ENABLED
|
log_must save_tunable DIO_ENABLED
|
||||||
|
log_must save_tunable DIO_STRICT
|
||||||
typeset recordsize_saved=$(get_prop recordsize $TESTDS)
|
typeset recordsize_saved=$(get_prop recordsize $TESTDS)
|
||||||
typeset direct_saved=$(get_prop direct $TESTDS)
|
typeset direct_saved=$(get_prop direct $TESTDS)
|
||||||
|
|
||||||
@ -57,6 +58,7 @@ function cleanup
|
|||||||
zfs set recordsize=$recordsize_saved $TESTDS
|
zfs set recordsize=$recordsize_saved $TESTDS
|
||||||
zfs set direct=$direct_saved $TESTDS
|
zfs set direct=$direct_saved $TESTDS
|
||||||
restore_tunable DIO_ENABLED
|
restore_tunable DIO_ENABLED
|
||||||
|
restore_tunable DIO_STRICT
|
||||||
}
|
}
|
||||||
log_onexit cleanup
|
log_onexit cleanup
|
||||||
|
|
||||||
@ -154,6 +156,7 @@ for krs in 4 8 16 32 64 128 256 512 ; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
# reset for write tests
|
# reset for write tests
|
||||||
|
log_must set_tunable32 DIO_STRICT 1
|
||||||
log_must zfs set recordsize=16K $TESTDS
|
log_must zfs set recordsize=16K $TESTDS
|
||||||
log_must zfs set direct=standard $TESTDS
|
log_must zfs set direct=standard $TESTDS
|
||||||
|
|
||||||
@ -173,4 +176,12 @@ log_must zpool sync
|
|||||||
assert_dioalign $TESTFILE $PAGE_SIZE 16384
|
assert_dioalign $TESTFILE $PAGE_SIZE 16384
|
||||||
log_mustnot dd if=/dev/urandom of=$TESTFILE bs=1024 count=256 oflag=direct
|
log_mustnot dd if=/dev/urandom of=$TESTFILE bs=1024 count=256 oflag=direct
|
||||||
|
|
||||||
|
# same again, but without strict, which should succeed.
|
||||||
|
log_must set_tunable32 DIO_STRICT 0
|
||||||
|
log_must rm -f $TESTFILE
|
||||||
|
log_must touch $TESTFILE
|
||||||
|
log_must zpool sync
|
||||||
|
assert_dioalign $TESTFILE $PAGE_SIZE 16384
|
||||||
|
log_must dd if=/dev/urandom of=$TESTFILE bs=1024 count=256 oflag=direct
|
||||||
|
|
||||||
log_pass $CLAIM
|
log_pass $CLAIM
|
||||||
|
Loading…
Reference in New Issue
Block a user