Support re-prioritizing asynchronous prefetches

When sequential scrubs were merged, all calls to arc_read()
(including prefetch IOs) were given ZIO_PRIORITY_ASYNC_READ.
Unfortunately, this behaves badly with an existing issue where
prefetch IOs cannot be re-prioritized after the issue. The
result is that synchronous reads end up in the same vdev_queue
as the scrub IOs and can have (in some workloads) multiple
seconds of latency.

This patch incorporates 2 changes. The first ensures that all
scrub IOs are given ZIO_PRIORITY_SCRUB to allow the vdev_queue
code to differentiate between these I/Os and user prefetches.
Second, this patch introduces zio_change_priority() to provide
the missing capability to upgrade a zio's priority.

Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #6921 
Closes #6926
This commit is contained in:
Tom Caputi
2017-12-21 12:13:06 -05:00
committed by Brian Behlendorf
parent 993669a7bf
commit a8b2e30685
11 changed files with 124 additions and 43 deletions
+28 -26
View File
@@ -663,7 +663,7 @@ typedef struct arc_stats {
kstat_named_t arcstat_dnode_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_sync_wait_for_async;
kstat_named_t arcstat_async_upgrade_sync;
kstat_named_t arcstat_demand_hit_predictive_prefetch;
kstat_named_t arcstat_demand_hit_prescient_prefetch;
kstat_named_t arcstat_need_free;
@@ -763,7 +763,7 @@ static arc_stats_t arc_stats = {
{ "arc_dnode_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
{ "async_upgrade_sync", KSTAT_DATA_UINT64 },
{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
{ "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
@@ -5911,32 +5911,20 @@ top:
*arc_flags |= ARC_FLAG_CACHED;
if (HDR_IO_IN_PROGRESS(hdr)) {
zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
ASSERT3P(head_zio, !=, NULL);
if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
priority == ZIO_PRIORITY_SYNC_READ) {
/*
* This sync read must wait for an
* in-progress async read (e.g. a predictive
* prefetch). Async reads are queued
* separately at the vdev_queue layer, so
* this is a form of priority inversion.
* Ideally, we would "inherit" the demand
* i/o's priority by moving the i/o from
* the async queue to the synchronous queue,
* but there is currently no mechanism to do
* so. Track this so that we can evaluate
* the magnitude of this potential performance
* problem.
*
* Note that if the prefetch i/o is already
* active (has been issued to the device),
* the prefetch improved performance, because
* we issued it sooner than we would have
* without the prefetch.
* This is a sync read that needs to wait for
* an in-flight async read. Request that the
* zio have its priority upgraded.
*/
DTRACE_PROBE1(arc__sync__wait__for__async,
zio_change_priority(head_zio, priority);
DTRACE_PROBE1(arc__async__upgrade__sync,
arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_sync_wait_for_async);
ARCSTAT_BUMP(arcstat_async_upgrade_sync);
}
if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
arc_hdr_clear_flags(hdr,
@@ -5966,6 +5954,7 @@ top:
spa, NULL, NULL, NULL, zio_flags);
ASSERT3P(acb->acb_done, !=, NULL);
acb->acb_zio_head = head_zio;
acb->acb_next = hdr->b_l1hdr.b_acb;
hdr->b_l1hdr.b_acb = acb;
mutex_exit(hash_lock);
@@ -6182,14 +6171,17 @@ top:
vd = NULL;
}
if (priority == ZIO_PRIORITY_ASYNC_READ)
/*
* We count both async reads and scrub IOs as asynchronous so
* that both can be upgraded in the event of a cache hit while
* the read IO is still in-flight.
*/
if (priority == ZIO_PRIORITY_ASYNC_READ ||
priority == ZIO_PRIORITY_SCRUB)
arc_hdr_set_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
else
arc_hdr_clear_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
if (hash_lock != NULL)
mutex_exit(hash_lock);
/*
* At this point, we have a level 1 cache miss. Try again in
* L2ARC if possible.
@@ -6260,6 +6252,10 @@ top:
ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY, B_FALSE);
acb->acb_zio_head = rzio;
if (hash_lock != NULL)
mutex_exit(hash_lock);
DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
zio_t *, rzio);
@@ -6276,6 +6272,8 @@ top:
goto out;
/* l2arc read error; goto zio_read() */
if (hash_lock != NULL)
mutex_enter(hash_lock);
} else {
DTRACE_PROBE1(l2arc__miss,
arc_buf_hdr_t *, hdr);
@@ -6296,6 +6294,10 @@ top:
rzio = zio_read(pio, spa, bp, hdr_abd, size,
arc_read_done, hdr, priority, zio_flags, zb);
acb->acb_zio_head = rzio;
if (hash_lock != NULL)
mutex_exit(hash_lock);
if (*arc_flags & ARC_FLAG_WAIT) {
rc = zio_wait(rzio);