Illumos 5175 - implement dmu_read_uio_dbuf() to improve cached read performance

5175 implement dmu_read_uio_dbuf() to improve cached read performance
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Alex Reece <alex.reece@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Richard Elling <richard.elling@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>

References:
  https://www.illumos.org/issues/5175
  https://github.com/illumos/illumos-gate/commit/f8554bb

Porting notes:

This patch doesn't include the changes for the COMSTAR (Common
Multiprotocol SCSI Target) - since it's not available for ZoL.

http://thegreyblog.blogspot.co.at/2010/02/setting-up-solaris-comstar-and.html

Ported by: kernelOfTruth <kerneloftruth@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3392
This commit is contained in:
Matthew Ahrens 2015-06-16 23:06:27 +02:00 committed by Brian Behlendorf
parent d6c9ff0a6b
commit 804e050457
3 changed files with 78 additions and 11 deletions

View File

@ -714,6 +714,7 @@ int dmu_read_req(objset_t *os, uint64_t object, struct request *req);
int dmu_write_req(objset_t *os, uint64_t object, struct request *req, int dmu_write_req(objset_t *os, uint64_t object, struct request *req,
dmu_tx_t *tx); dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx); dmu_tx_t *tx);
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,

View File

@ -1199,8 +1199,8 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
return (err); return (err);
} }
int static int
dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
{ {
dmu_buf_t **dbp; dmu_buf_t **dbp;
int numbufs, i, err; int numbufs, i, err;
@ -1210,8 +1210,8 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
* NB: we could do this block-at-a-time, but it's nice * NB: we could do this block-at-a-time, but it's nice
* to be reading in parallel. * to be reading in parallel.
*/ */
err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
&numbufs, &dbp); TRUE, FTAG, &numbufs, &dbp, 0);
if (err) if (err)
return (err); return (err);
@ -1253,6 +1253,58 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
return (err); return (err);
} }
/*
* Read 'size' bytes into the uio buffer.
* From object zdb->db_object.
* Starting at offset uio->uio_loffset.
*
* If the caller already has a dbuf in the target object
* (e.g. its bonus buffer), this routine is faster than dmu_read_uio(),
* because we don't have to find the dnode_t for the object.
*/
int
dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
dnode_t *dn;
int err;
if (size == 0)
return (0);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
err = dmu_read_uio_dnode(dn, uio, size);
DB_DNODE_EXIT(db);
return (err);
}
/*
* Read 'size' bytes into the uio buffer.
* From the specified object
* Starting at offset uio->uio_loffset.
*/
int
dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
{
dnode_t *dn;
int err;
if (size == 0)
return (0);
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
err = dmu_read_uio_dnode(dn, uio, size);
dnode_rele(dn, FTAG);
return (err);
}
static int static int
dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
{ {
@ -1305,6 +1357,15 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
return (err); return (err);
} }
/*
* Write 'size' bytes from the uio buffer.
* To object zdb->db_object.
* Starting at offset uio->uio_loffset.
*
* If the caller already has a dbuf in the target object
* (e.g. its bonus buffer), this routine is faster than dmu_write_uio(),
* because we don't have to find the dnode_t for the object.
*/
int int
dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
dmu_tx_t *tx) dmu_tx_t *tx)
@ -1324,6 +1385,11 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
return (err); return (err);
} }
/*
* Write 'size' bytes from the uio buffer.
* To the specified object.
* Starting at offset uio->uio_loffset.
*/
int int
dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
dmu_tx_t *tx) dmu_tx_t *tx)

View File

@ -376,7 +376,6 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
struct address_space *mp = ip->i_mapping; struct address_space *mp = ip->i_mapping;
struct page *pp; struct page *pp;
znode_t *zp = ITOZ(ip); znode_t *zp = ITOZ(ip);
objset_t *os = ITOZSB(ip)->z_os;
int64_t start, off; int64_t start, off;
uint64_t bytes; uint64_t bytes;
int len = nbytes; int len = nbytes;
@ -403,7 +402,8 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
unlock_page(pp); unlock_page(pp);
page_cache_release(pp); page_cache_release(pp);
} else { } else {
error = dmu_read_uio(os, zp->z_id, uio, bytes); error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, bytes);
} }
len -= bytes; len -= bytes;
@ -440,7 +440,6 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
{ {
znode_t *zp = ITOZ(ip); znode_t *zp = ITOZ(ip);
zfs_sb_t *zsb = ITOZSB(ip); zfs_sb_t *zsb = ITOZSB(ip);
objset_t *os;
ssize_t n, nbytes; ssize_t n, nbytes;
int error = 0; int error = 0;
rl_t *rl; rl_t *rl;
@ -450,7 +449,6 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ZFS_ENTER(zsb); ZFS_ENTER(zsb);
ZFS_VERIFY_ZP(zp); ZFS_VERIFY_ZP(zp);
os = zsb->z_os;
if (zp->z_pflags & ZFS_AV_QUARANTINED) { if (zp->z_pflags & ZFS_AV_QUARANTINED) {
ZFS_EXIT(zsb); ZFS_EXIT(zsb);
@ -531,10 +529,12 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
nbytes = MIN(n, zfs_read_chunk_size - nbytes = MIN(n, zfs_read_chunk_size -
P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
if (zp->z_is_mapped && !(ioflag & O_DIRECT)) if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
error = mappedread(ip, nbytes, uio); error = mappedread(ip, nbytes, uio);
else } else {
error = dmu_read_uio(os, zp->z_id, uio, nbytes); error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes);
}
if (error) { if (error) {
/* convert checksum errors into IO errors */ /* convert checksum errors into IO errors */