Illumos 5987 - zfs prefetch code needs work

5987 zfs prefetch code needs work
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Approved by: Gordon Ross <gordon.ross@nexenta.com>

References:
  https://www.illumos.org/issues/5987 zfs prefetch code needs work
  illumos/illumos-gate@cf6106c 5987 zfs prefetch code needs work

Porting notes:
- [module/zfs/dbuf.c]
  - 5f6d0b6 Handle block pointers with a corrupt logical size
- [module/zfs/dmu_zfetch.c]
  - c65aa5b Fix gcc missing parenthesis warnings
  - 428870f Update core ZFS code from build 121 to build 141.
  - 79c76d5 Change KM_PUSHPAGE -> KM_SLEEP
  - b8d06fc Switch KM_SLEEP to KM_PUSHPAGE
  - Account for ISO C90 - mixed declarations and code - warnings
  - Module parameters (new/changed):
    - Replaced zfetch_block_cap with zfetch_max_distance
      (Max bytes to prefetch per stream (default 8MB; 8 * 1024 * 1024))
    - Preserved zfs_prefetch_disable as 'int' for consistency with
      existing Linux module options.
- [include/sys/trace_arc.h]
  - Added new tracepoints
    - DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__sync__wait__for__async);
    - DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
- [man/man5/zfs-module-parameters.5]
  - Updated man page

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Matthew Ahrens
2015-12-26 22:10:31 +01:00
committed by Brian Behlendorf
parent ab5cbbd107
commit 7f60329a26
10 changed files with 285 additions and 663 deletions
+21 -14
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
@@ -386,7 +386,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
@@ -396,15 +396,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
ASSERT(length <= DMU_MAX_ACCESS);
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
dbuf_flags |= DB_RF_NOPREFETCH;
/*
* Note: We directly notify the prefetch code of this read, so that
* we can tell it about the multi-block read. dbuf_read() only knows
* about the one block it is accessing.
*/
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;
} else {
if (offset + length > dn->dn_datablksz) {
zfs_panic_recover("zfs: accessing past end of object "
@@ -423,19 +427,24 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset);
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
zio_nowait(zio);
return (SET_ERROR(EIO));
}
/* initiate async i/o */
if (read) {
if (read)
(void) dbuf_read(db, zio, dbuf_flags);
}
dbp[i] = &db->db;
}
if ((flags & DMU_READ_NO_PREFETCH) == 0 && read &&
length < zfetch_array_rd_sz) {
dmu_zfetch(&dn->dn_zfetch, blkid, nblks);
}
rw_exit(&dn->dn_struct_rwlock);
/* wait for async i/o */
@@ -489,7 +498,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
uint64_t length, boolean_t read, void *tag, int *numbufsp,
dmu_buf_t ***dbpp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
@@ -537,9 +547,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t blkid;
int nblks, err;
if (zfs_prefetch_disable)
return;
if (len == 0) { /* they're interested in the bonus buffer */
dn = DMU_META_DNODE(os);