mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 03:08:51 +03:00
Support re-prioritizing asynchronous prefetches
When sequential scrubs were merged, all calls to arc_read() (including prefetch IOs) were given ZIO_PRIORITY_ASYNC_READ. Unfortunately, this behaves badly with an existing issue where prefetch IOs cannot be re-prioritized after the issue. The result is that synchronous reads end up in the same vdev_queue as the scrub IOs and can have (in some workloads) multiple seconds of latency. This patch incorporates 2 changes. The first ensures that all scrub IOs are given ZIO_PRIORITY_SCRUB to allow the vdev_queue code to differentiate between these I/Os and user prefetches. Second, this patch introduces zio_change_priority() to provide the missing capability to upgrade a zio's priority. Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #6921 Closes #6926
This commit is contained in:
committed by
Brian Behlendorf
parent
993669a7bf
commit
a8b2e30685
+28
-26
@@ -663,7 +663,7 @@ typedef struct arc_stats {
|
||||
kstat_named_t arcstat_dnode_limit;
|
||||
kstat_named_t arcstat_meta_max;
|
||||
kstat_named_t arcstat_meta_min;
|
||||
kstat_named_t arcstat_sync_wait_for_async;
|
||||
kstat_named_t arcstat_async_upgrade_sync;
|
||||
kstat_named_t arcstat_demand_hit_predictive_prefetch;
|
||||
kstat_named_t arcstat_demand_hit_prescient_prefetch;
|
||||
kstat_named_t arcstat_need_free;
|
||||
@@ -763,7 +763,7 @@ static arc_stats_t arc_stats = {
|
||||
{ "arc_dnode_limit", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_max", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_min", KSTAT_DATA_UINT64 },
|
||||
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
|
||||
{ "async_upgrade_sync", KSTAT_DATA_UINT64 },
|
||||
{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
|
||||
{ "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
|
||||
{ "arc_need_free", KSTAT_DATA_UINT64 },
|
||||
@@ -5911,32 +5911,20 @@ top:
|
||||
*arc_flags |= ARC_FLAG_CACHED;
|
||||
|
||||
if (HDR_IO_IN_PROGRESS(hdr)) {
|
||||
zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
|
||||
|
||||
ASSERT3P(head_zio, !=, NULL);
|
||||
if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
|
||||
priority == ZIO_PRIORITY_SYNC_READ) {
|
||||
/*
|
||||
* This sync read must wait for an
|
||||
* in-progress async read (e.g. a predictive
|
||||
* prefetch). Async reads are queued
|
||||
* separately at the vdev_queue layer, so
|
||||
* this is a form of priority inversion.
|
||||
* Ideally, we would "inherit" the demand
|
||||
* i/o's priority by moving the i/o from
|
||||
* the async queue to the synchronous queue,
|
||||
* but there is currently no mechanism to do
|
||||
* so. Track this so that we can evaluate
|
||||
* the magnitude of this potential performance
|
||||
* problem.
|
||||
*
|
||||
* Note that if the prefetch i/o is already
|
||||
* active (has been issued to the device),
|
||||
* the prefetch improved performance, because
|
||||
* we issued it sooner than we would have
|
||||
* without the prefetch.
|
||||
* This is a sync read that needs to wait for
|
||||
* an in-flight async read. Request that the
|
||||
* zio have its priority upgraded.
|
||||
*/
|
||||
DTRACE_PROBE1(arc__sync__wait__for__async,
|
||||
zio_change_priority(head_zio, priority);
|
||||
DTRACE_PROBE1(arc__async__upgrade__sync,
|
||||
arc_buf_hdr_t *, hdr);
|
||||
ARCSTAT_BUMP(arcstat_sync_wait_for_async);
|
||||
ARCSTAT_BUMP(arcstat_async_upgrade_sync);
|
||||
}
|
||||
if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
|
||||
arc_hdr_clear_flags(hdr,
|
||||
@@ -5966,6 +5954,7 @@ top:
|
||||
spa, NULL, NULL, NULL, zio_flags);
|
||||
|
||||
ASSERT3P(acb->acb_done, !=, NULL);
|
||||
acb->acb_zio_head = head_zio;
|
||||
acb->acb_next = hdr->b_l1hdr.b_acb;
|
||||
hdr->b_l1hdr.b_acb = acb;
|
||||
mutex_exit(hash_lock);
|
||||
@@ -6182,14 +6171,17 @@ top:
|
||||
vd = NULL;
|
||||
}
|
||||
|
||||
if (priority == ZIO_PRIORITY_ASYNC_READ)
|
||||
/*
|
||||
* We count both async reads and scrub IOs as asynchronous so
|
||||
* that both can be upgraded in the event of a cache hit while
|
||||
* the read IO is still in-flight.
|
||||
*/
|
||||
if (priority == ZIO_PRIORITY_ASYNC_READ ||
|
||||
priority == ZIO_PRIORITY_SCRUB)
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
|
||||
else
|
||||
arc_hdr_clear_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
|
||||
|
||||
if (hash_lock != NULL)
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
/*
|
||||
* At this point, we have a level 1 cache miss. Try again in
|
||||
* L2ARC if possible.
|
||||
@@ -6260,6 +6252,10 @@ top:
|
||||
ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_DONT_PROPAGATE |
|
||||
ZIO_FLAG_DONT_RETRY, B_FALSE);
|
||||
acb->acb_zio_head = rzio;
|
||||
|
||||
if (hash_lock != NULL)
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
|
||||
zio_t *, rzio);
|
||||
@@ -6276,6 +6272,8 @@ top:
|
||||
goto out;
|
||||
|
||||
/* l2arc read error; goto zio_read() */
|
||||
if (hash_lock != NULL)
|
||||
mutex_enter(hash_lock);
|
||||
} else {
|
||||
DTRACE_PROBE1(l2arc__miss,
|
||||
arc_buf_hdr_t *, hdr);
|
||||
@@ -6296,6 +6294,10 @@ top:
|
||||
|
||||
rzio = zio_read(pio, spa, bp, hdr_abd, size,
|
||||
arc_read_done, hdr, priority, zio_flags, zb);
|
||||
acb->acb_zio_head = rzio;
|
||||
|
||||
if (hash_lock != NULL)
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
if (*arc_flags & ARC_FLAG_WAIT) {
|
||||
rc = zio_wait(rzio);
|
||||
|
||||
Reference in New Issue
Block a user