mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 11:47:43 +03:00
Support re-prioritizing asynchronous prefetches
When sequential scrubs were merged, all calls to arc_read() (including prefetch IOs) were given ZIO_PRIORITY_ASYNC_READ. Unfortunately, this behaves badly with an existing issue where prefetch IOs cannot be re-prioritized after the issue. The result is that synchronous reads end up in the same vdev_queue as the scrub IOs and can have (in some workloads) multiple seconds of latency. This patch incorporates 2 changes. The first ensures that all scrub IOs are given ZIO_PRIORITY_SCRUB to allow the vdev_queue code to differentiate between these I/Os and user prefetches. Second, this patch introduces zio_change_priority() to provide the missing capability to upgrade a zio's priority. Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #6921 Closes #6926
This commit is contained in:
committed by
Brian Behlendorf
parent
993669a7bf
commit
a8b2e30685
+38
-5
@@ -539,6 +539,8 @@ zio_walk_children(zio_t *pio, zio_link_t **zl)
|
||||
{
|
||||
list_t *cl = &pio->io_child_list;
|
||||
|
||||
ASSERT(MUTEX_HELD(&pio->io_lock));
|
||||
|
||||
*zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
|
||||
if (*zl == NULL)
|
||||
return (NULL);
|
||||
@@ -573,8 +575,8 @@ zio_add_child(zio_t *pio, zio_t *cio)
|
||||
zl->zl_parent = pio;
|
||||
zl->zl_child = cio;
|
||||
|
||||
mutex_enter(&cio->io_lock);
|
||||
mutex_enter(&pio->io_lock);
|
||||
mutex_enter(&cio->io_lock);
|
||||
|
||||
ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
|
||||
|
||||
@@ -587,8 +589,8 @@ zio_add_child(zio_t *pio, zio_t *cio)
|
||||
pio->io_child_count++;
|
||||
cio->io_parent_count++;
|
||||
|
||||
mutex_exit(&pio->io_lock);
|
||||
mutex_exit(&cio->io_lock);
|
||||
mutex_exit(&pio->io_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -597,8 +599,8 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
|
||||
ASSERT(zl->zl_parent == pio);
|
||||
ASSERT(zl->zl_child == cio);
|
||||
|
||||
mutex_enter(&cio->io_lock);
|
||||
mutex_enter(&pio->io_lock);
|
||||
mutex_enter(&cio->io_lock);
|
||||
|
||||
list_remove(&pio->io_child_list, zl);
|
||||
list_remove(&cio->io_parent_list, zl);
|
||||
@@ -606,8 +608,8 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
|
||||
pio->io_child_count--;
|
||||
cio->io_parent_count--;
|
||||
|
||||
mutex_exit(&pio->io_lock);
|
||||
mutex_exit(&cio->io_lock);
|
||||
mutex_exit(&pio->io_lock);
|
||||
kmem_cache_free(zio_link_cache, zl);
|
||||
}
|
||||
|
||||
@@ -1963,14 +1965,16 @@ zio_reexecute(zio_t *pio)
|
||||
* cannot be affected by any side effects of reexecuting 'cio'.
|
||||
*/
|
||||
zio_link_t *zl = NULL;
|
||||
mutex_enter(&pio->io_lock);
|
||||
for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
|
||||
cio_next = zio_walk_children(pio, &zl);
|
||||
mutex_enter(&pio->io_lock);
|
||||
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
|
||||
pio->io_children[cio->io_child_type][w]++;
|
||||
mutex_exit(&pio->io_lock);
|
||||
zio_reexecute(cio);
|
||||
mutex_enter(&pio->io_lock);
|
||||
}
|
||||
mutex_exit(&pio->io_lock);
|
||||
|
||||
/*
|
||||
* Now that all children have been reexecuted, execute the parent.
|
||||
@@ -3474,6 +3478,35 @@ zio_vdev_io_done(zio_t *zio)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used to change the priority of an existing zio that is
|
||||
* currently in-flight. This is used by the arc to upgrade priority in the
|
||||
* event that a demand read is made for a block that is currently queued
|
||||
* as a scrub or async read IO. Otherwise, the high priority read request
|
||||
* would end up having to wait for the lower priority IO.
|
||||
*/
|
||||
void
|
||||
zio_change_priority(zio_t *pio, zio_priority_t priority)
|
||||
{
|
||||
zio_t *cio, *cio_next;
|
||||
zio_link_t *zl = NULL;
|
||||
|
||||
ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
||||
|
||||
if (pio->io_vd != NULL && pio->io_vd->vdev_ops->vdev_op_leaf) {
|
||||
vdev_queue_change_io_priority(pio, priority);
|
||||
} else {
|
||||
pio->io_priority = priority;
|
||||
}
|
||||
|
||||
mutex_enter(&pio->io_lock);
|
||||
for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
|
||||
cio_next = zio_walk_children(pio, &zl);
|
||||
zio_change_priority(cio, priority);
|
||||
}
|
||||
mutex_exit(&pio->io_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* For non-raidz ZIOs, we can just copy aside the bad data read from the
|
||||
* disk, and use that to finish the checksum ereport later.
|
||||
|
||||
Reference in New Issue
Block a user