From 804e050457f1755cc37f39ef1f88786ba7e688d5 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 16 Jun 2015 23:06:27 +0200 Subject: [PATCH] Illumos 5175 - implement dmu_read_uio_dbuf() to improve cached read performance 5175 implement dmu_read_uio_dbuf() to improve cached read performance Reviewed by: Adam Leventhal Reviewed by: Alex Reece Reviewed by: George Wilson Reviewed by: Richard Elling Approved by: Robert Mustacchi References: https://www.illumos.org/issues/5175 https://github.com/illumos/illumos-gate/commit/f8554bb Porting notes: This patch doesn't include the changes for the COMSTAR (Common Multiprotocol SCSI Target) - since it's not available for ZoL. http://thegreyblog.blogspot.co.at/2010/02/setting-up-solaris-comstar-and.html Ported by: kernelOfTruth Signed-off-by: Brian Behlendorf Closes #3392 --- include/sys/dmu.h | 1 + module/zfs/dmu.c | 74 +++++++++++++++++++++++++++++++++++++++--- module/zfs/zfs_vnops.c | 14 ++++---- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index a010a68c4..4ad496ae0 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -714,6 +714,7 @@ int dmu_read_req(objset_t *os, uint64_t object, struct request *req); int dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); +int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size); int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, dmu_tx_t *tx); int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 81f434380..7e8328e77 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1199,8 +1199,8 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) return (err); } -int -dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +static int +dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) { dmu_buf_t **dbp; int numbufs, i, err; @@ -1210,8 +1210,8 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, + TRUE, FTAG, &numbufs, &dbp, 0); if (err) return (err); @@ -1253,6 +1253,58 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) return (err); } +/* + * Read 'size' bytes into the uio buffer. + * From object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_read_uio(), + * because we don't have to find the dnode_t for the object. + */ +int +dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_read_uio_dnode(dn, uio, size); + DB_DNODE_EXIT(db); + + return (err); +} + +/* + * Read 'size' bytes into the uio buffer. + * From the specified object + * Starting at offset uio->uio_loffset. + */ +int +dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +{ + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_read_uio_dnode(dn, uio, size); + + dnode_rele(dn, FTAG); + + return (err); +} + static int dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) { @@ -1305,6 +1357,15 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_write_uio(), + * because we don't have to find the dnode_t for the object. + */ int dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_tx_t *tx) @@ -1324,6 +1385,11 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To the specified object. + * Starting at offset uio->uio_loffset. + */ int dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, dmu_tx_t *tx) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 19a4132e4..7780fe902 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -376,7 +376,6 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio) struct address_space *mp = ip->i_mapping; struct page *pp; znode_t *zp = ITOZ(ip); - objset_t *os = ITOZSB(ip)->z_os; int64_t start, off; uint64_t bytes; int len = nbytes; @@ -403,7 +402,8 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio) unlock_page(pp); page_cache_release(pp); } else { - error = dmu_read_uio(os, zp->z_id, uio, bytes); + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, bytes); } len -= bytes; @@ -440,7 +440,6 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); - objset_t *os; ssize_t n, nbytes; int error = 0; rl_t *rl; @@ -450,7 +449,6 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - os = zsb->z_os; if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zsb); @@ -531,10 +529,12 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) nbytes = MIN(n, zfs_read_chunk_size - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); - if (zp->z_is_mapped && !(ioflag & O_DIRECT)) + if (zp->z_is_mapped && !(ioflag & O_DIRECT)) { error = mappedread(ip, nbytes, uio); - else - error = dmu_read_uio(os, zp->z_id, uio, nbytes); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes); + } if (error) { /* convert checksum errors into IO errors */