mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-27 04:32:16 +03:00
Illumos 5987 - zfs prefetch code needs work
5987 zfs prefetch code needs work Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Paul Dagnelie <pcd@delphix.com> Approved by: Gordon Ross <gordon.ross@nexenta.com> References: https://www.illumos.org/issues/5987 zfs prefetch code needs work illumos/illumos-gate@cf6106c 5987 zfs prefetch code needs work Porting notes: - [module/zfs/dbuf.c] -5f6d0b6Handle block pointers with a corrupt logical size - [module/zfs/dmu_zfetch.c] -c65aa5bFix gcc missing parenthesis warnings -428870fUpdate core ZFS code from build 121 to build 141. -79c76d5Change KM_PUSHPAGE -> KM_SLEEP -b8d06fcSwitch KM_SLEEP to KM_PUSHPAGE - Account for ISO C90 - mixed declarations and code - warnings - Module parameters (new/changed): - Replaced zfetch_block_cap with zfetch_max_distance (Max bytes to prefetch per stream (default 8MB; 8 * 1024 * 1024)) - Preserved zfs_prefetch_disable as 'int' for consistency with existing Linux module options. - [include/sys/trace_arc.h] - Added new tracepoints - DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__sync__wait__for__async); - DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch); - [man/man5/zfs-module-parameters.5] - Updated man page Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
committed by
Brian Behlendorf
parent
ab5cbbd107
commit
7f60329a26
+67
-7
@@ -474,6 +474,8 @@ typedef struct arc_stats {
|
||||
kstat_named_t arcstat_meta_limit;
|
||||
kstat_named_t arcstat_meta_max;
|
||||
kstat_named_t arcstat_meta_min;
|
||||
kstat_named_t arcstat_sync_wait_for_async;
|
||||
kstat_named_t arcstat_demand_hit_predictive_prefetch;
|
||||
kstat_named_t arcstat_need_free;
|
||||
kstat_named_t arcstat_sys_free;
|
||||
} arc_stats_t;
|
||||
@@ -568,6 +570,8 @@ static arc_stats_t arc_stats = {
|
||||
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_max", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_min", KSTAT_DATA_UINT64 },
|
||||
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
|
||||
{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
|
||||
{ "arc_need_free", KSTAT_DATA_UINT64 },
|
||||
{ "arc_sys_free", KSTAT_DATA_UINT64 }
|
||||
};
|
||||
@@ -4244,6 +4248,36 @@ top:
|
||||
|
||||
if (HDR_IO_IN_PROGRESS(hdr)) {
|
||||
|
||||
if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
|
||||
priority == ZIO_PRIORITY_SYNC_READ) {
|
||||
/*
|
||||
* This sync read must wait for an
|
||||
* in-progress async read (e.g. a predictive
|
||||
* prefetch). Async reads are queued
|
||||
* separately at the vdev_queue layer, so
|
||||
* this is a form of priority inversion.
|
||||
* Ideally, we would "inherit" the demand
|
||||
* i/o's priority by moving the i/o from
|
||||
* the async queue to the synchronous queue,
|
||||
* but there is currently no mechanism to do
|
||||
* so. Track this so that we can evaluate
|
||||
* the magnitude of this potential performance
|
||||
* problem.
|
||||
*
|
||||
* Note that if the prefetch i/o is already
|
||||
* active (has been issued to the device),
|
||||
* the prefetch improved performance, because
|
||||
* we issued it sooner than we would have
|
||||
* without the prefetch.
|
||||
*/
|
||||
DTRACE_PROBE1(arc__sync__wait__for__async,
|
||||
arc_buf_hdr_t *, hdr);
|
||||
ARCSTAT_BUMP(arcstat_sync_wait_for_async);
|
||||
}
|
||||
if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
|
||||
hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
|
||||
}
|
||||
|
||||
if (*arc_flags & ARC_FLAG_WAIT) {
|
||||
cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
|
||||
mutex_exit(hash_lock);
|
||||
@@ -4252,7 +4286,7 @@ top:
|
||||
ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
|
||||
|
||||
if (done) {
|
||||
arc_callback_t *acb = NULL;
|
||||
arc_callback_t *acb = NULL;
|
||||
|
||||
acb = kmem_zalloc(sizeof (arc_callback_t),
|
||||
KM_SLEEP);
|
||||
@@ -4277,6 +4311,19 @@ top:
|
||||
hdr->b_l1hdr.b_state == arc_mfu);
|
||||
|
||||
if (done) {
|
||||
if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
|
||||
/*
|
||||
* This is a demand read which does not have to
|
||||
* wait for i/o because we did a predictive
|
||||
* prefetch i/o for it, which has completed.
|
||||
*/
|
||||
DTRACE_PROBE1(
|
||||
arc__demand__hit__predictive__prefetch,
|
||||
arc_buf_hdr_t *, hdr);
|
||||
ARCSTAT_BUMP(
|
||||
arcstat_demand_hit_predictive_prefetch);
|
||||
hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
|
||||
}
|
||||
add_reference(hdr, hash_lock, private);
|
||||
/*
|
||||
* If this block is already in use, create a new
|
||||
@@ -4349,12 +4396,16 @@ top:
|
||||
goto top; /* restart the IO request */
|
||||
}
|
||||
|
||||
/* if this is a prefetch, we don't have a reference */
|
||||
if (*arc_flags & ARC_FLAG_PREFETCH) {
|
||||
/*
|
||||
* If there is a callback, we pass our reference to
|
||||
* it; otherwise we remove our reference.
|
||||
*/
|
||||
if (done == NULL) {
|
||||
(void) remove_reference(hdr, hash_lock,
|
||||
private);
|
||||
hdr->b_flags |= ARC_FLAG_PREFETCH;
|
||||
}
|
||||
if (*arc_flags & ARC_FLAG_PREFETCH)
|
||||
hdr->b_flags |= ARC_FLAG_PREFETCH;
|
||||
if (*arc_flags & ARC_FLAG_L2CACHE)
|
||||
hdr->b_flags |= ARC_FLAG_L2CACHE;
|
||||
if (*arc_flags & ARC_FLAG_L2COMPRESS)
|
||||
@@ -4377,11 +4428,13 @@ top:
|
||||
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
|
||||
|
||||
/* if this is a prefetch, we don't have a reference */
|
||||
/*
|
||||
* If there is a callback, we pass a reference to it.
|
||||
*/
|
||||
if (done != NULL)
|
||||
add_reference(hdr, hash_lock, private);
|
||||
if (*arc_flags & ARC_FLAG_PREFETCH)
|
||||
hdr->b_flags |= ARC_FLAG_PREFETCH;
|
||||
else
|
||||
add_reference(hdr, hash_lock, private);
|
||||
if (*arc_flags & ARC_FLAG_L2CACHE)
|
||||
hdr->b_flags |= ARC_FLAG_L2CACHE;
|
||||
if (*arc_flags & ARC_FLAG_L2COMPRESS)
|
||||
@@ -4399,6 +4452,8 @@ top:
|
||||
arc_access(hdr, hash_lock);
|
||||
}
|
||||
|
||||
if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
|
||||
hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH;
|
||||
ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
|
||||
|
||||
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
|
||||
@@ -4438,6 +4493,11 @@ top:
|
||||
demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
|
||||
data, metadata, misses);
|
||||
|
||||
if (priority == ZIO_PRIORITY_ASYNC_READ)
|
||||
hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ;
|
||||
else
|
||||
hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ;
|
||||
|
||||
if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
|
||||
/*
|
||||
* Read from the L2ARC if the following are true:
|
||||
|
||||
Reference in New Issue
Block a user