210 lines
6.4 KiB
Diff
210 lines
6.4 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Gvozden Neskovic <neskovic@gmail.com>
|
||
|
Date: Fri, 4 Aug 2017 11:29:56 +0200
|
||
|
Subject: [PATCH] vdev_mirror: load balancing fixes
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
vdev_queue:
|
||
|
- Track the last position of each vdev, including the io size,
|
||
|
in order to detect linear access of the following zio.
|
||
|
- Remove duplicate `vq_lastoffset`
|
||
|
|
||
|
vdev_mirror:
|
||
|
- Correctly calculate the zio offset (signedness issue)
|
||
|
- Deprecate `vdev_queue_register_lastoffset()`
|
||
|
- Add `VDEV_LABEL_START_SIZE` to zio offset of leaf vdevs
|
||
|
|
||
|
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||
|
Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
|
||
|
Closes #6461
|
||
|
(cherry picked from commit 06acbbc429bfe7197e5fc3a49acfeef5c37b64c8)
|
||
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||
|
---
|
||
|
include/sys/vdev.h | 3 +--
|
||
|
include/sys/vdev_impl.h | 1 -
|
||
|
module/zfs/vdev_mirror.c | 36 ++++++++++++++++--------------------
|
||
|
module/zfs/vdev_queue.c | 21 +++++++--------------
|
||
|
4 files changed, 24 insertions(+), 37 deletions(-)
|
||
|
|
||
|
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
|
||
|
index 7157ef43f..473d2691c 100644
|
||
|
--- a/include/sys/vdev.h
|
||
|
+++ b/include/sys/vdev.h
|
||
|
@@ -125,8 +125,7 @@ extern zio_t *vdev_queue_io(zio_t *zio);
|
||
|
extern void vdev_queue_io_done(zio_t *zio);
|
||
|
|
||
|
extern int vdev_queue_length(vdev_t *vd);
|
||
|
-extern uint64_t vdev_queue_lastoffset(vdev_t *vd);
|
||
|
-extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
|
||
|
+extern uint64_t vdev_queue_last_offset(vdev_t *vd);
|
||
|
|
||
|
extern void vdev_config_dirty(vdev_t *vd);
|
||
|
extern void vdev_config_clean(vdev_t *vd);
|
||
|
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
|
||
|
index 7c5e54b08..4c2e3cd2e 100644
|
||
|
--- a/include/sys/vdev_impl.h
|
||
|
+++ b/include/sys/vdev_impl.h
|
||
|
@@ -127,7 +127,6 @@ struct vdev_queue {
|
||
|
hrtime_t vq_io_delta_ts;
|
||
|
zio_t vq_io_search; /* used as local for stack reduction */
|
||
|
kmutex_t vq_lock;
|
||
|
- uint64_t vq_lastoffset;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
|
||
|
index 0439e4b46..d230b4db4 100644
|
||
|
--- a/module/zfs/vdev_mirror.c
|
||
|
+++ b/module/zfs/vdev_mirror.c
|
||
|
@@ -116,7 +116,8 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
|
||
|
static int
|
||
|
vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
|
||
|
{
|
||
|
- uint64_t lastoffset;
|
||
|
+ uint64_t last_offset;
|
||
|
+ int64_t offset_diff;
|
||
|
int load;
|
||
|
|
||
|
/* All DVAs have equal weight at the root. */
|
||
|
@@ -129,13 +130,17 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
|
||
|
* worse overall when resilvering with compared to without.
|
||
|
*/
|
||
|
|
||
|
+ /* Fix zio_offset for leaf vdevs */
|
||
|
+ if (vd->vdev_ops->vdev_op_leaf)
|
||
|
+ zio_offset += VDEV_LABEL_START_SIZE;
|
||
|
+
|
||
|
/* Standard load based on pending queue length. */
|
||
|
load = vdev_queue_length(vd);
|
||
|
- lastoffset = vdev_queue_lastoffset(vd);
|
||
|
+ last_offset = vdev_queue_last_offset(vd);
|
||
|
|
||
|
if (vd->vdev_nonrot) {
|
||
|
/* Non-rotating media. */
|
||
|
- if (lastoffset == zio_offset)
|
||
|
+ if (last_offset == zio_offset)
|
||
|
return (load + zfs_vdev_mirror_non_rotating_inc);
|
||
|
|
||
|
/*
|
||
|
@@ -148,16 +153,16 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
|
||
|
}
|
||
|
|
||
|
/* Rotating media I/O's which directly follow the last I/O. */
|
||
|
- if (lastoffset == zio_offset)
|
||
|
+ if (last_offset == zio_offset)
|
||
|
return (load + zfs_vdev_mirror_rotating_inc);
|
||
|
|
||
|
/*
|
||
|
* Apply half the seek increment to I/O's within seek offset
|
||
|
- * of the last I/O queued to this vdev as they should incur less
|
||
|
+ * of the last I/O issued to this vdev as they should incur less
|
||
|
* of a seek increment.
|
||
|
*/
|
||
|
- if (ABS(lastoffset - zio_offset) <
|
||
|
- zfs_vdev_mirror_rotating_seek_offset)
|
||
|
+ offset_diff = (int64_t)(last_offset - zio_offset);
|
||
|
+ if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset)
|
||
|
return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
|
||
|
|
||
|
/* Apply the full seek increment to all other I/O's. */
|
||
|
@@ -382,29 +387,20 @@ vdev_mirror_child_select(zio_t *zio)
|
||
|
mm->mm_preferred_cnt++;
|
||
|
}
|
||
|
|
||
|
- if (mm->mm_preferred_cnt == 1) {
|
||
|
- vdev_queue_register_lastoffset(
|
||
|
- mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
|
||
|
+ if (mm->mm_preferred_cnt == 1)
|
||
|
return (mm->mm_preferred[0]);
|
||
|
- }
|
||
|
|
||
|
- if (mm->mm_preferred_cnt > 1) {
|
||
|
- int c = vdev_mirror_preferred_child_randomize(zio);
|
||
|
|
||
|
- vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
|
||
|
- return (c);
|
||
|
- }
|
||
|
+ if (mm->mm_preferred_cnt > 1)
|
||
|
+ return (vdev_mirror_preferred_child_randomize(zio));
|
||
|
|
||
|
/*
|
||
|
* Every device is either missing or has this txg in its DTL.
|
||
|
* Look for any child we haven't already tried before giving up.
|
||
|
*/
|
||
|
for (c = 0; c < mm->mm_children; c++) {
|
||
|
- if (!mm->mm_child[c].mc_tried) {
|
||
|
- vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
|
||
|
- zio);
|
||
|
+ if (!mm->mm_child[c].mc_tried)
|
||
|
return (c);
|
||
|
- }
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
|
||
|
index 6b3e87291..40cba340a 100644
|
||
|
--- a/module/zfs/vdev_queue.c
|
||
|
+++ b/module/zfs/vdev_queue.c
|
||
|
@@ -393,7 +393,7 @@ vdev_queue_init(vdev_t *vd)
|
||
|
sizeof (zio_t), offsetof(struct zio, io_queue_node));
|
||
|
}
|
||
|
|
||
|
- vq->vq_lastoffset = 0;
|
||
|
+ vq->vq_last_offset = 0;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
@@ -699,9 +699,8 @@ again:
|
||
|
*/
|
||
|
tree = vdev_queue_class_tree(vq, p);
|
||
|
vq->vq_io_search.io_timestamp = 0;
|
||
|
- vq->vq_io_search.io_offset = vq->vq_last_offset + 1;
|
||
|
- VERIFY3P(avl_find(tree, &vq->vq_io_search,
|
||
|
- &idx), ==, NULL);
|
||
|
+ vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
|
||
|
+ VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
|
||
|
zio = avl_nearest(tree, idx, AVL_AFTER);
|
||
|
if (zio == NULL)
|
||
|
zio = avl_first(tree);
|
||
|
@@ -728,7 +727,7 @@ again:
|
||
|
}
|
||
|
|
||
|
vdev_queue_pending_add(vq, zio);
|
||
|
- vq->vq_last_offset = zio->io_offset;
|
||
|
+ vq->vq_last_offset = zio->io_offset + zio->io_size;
|
||
|
|
||
|
return (zio);
|
||
|
}
|
||
|
@@ -806,7 +805,7 @@ vdev_queue_io_done(zio_t *zio)
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- * As these three methods are only used for load calculations we're not
|
||
|
+ * As these two methods are only used for load calculations we're not
|
||
|
* concerned if we get an incorrect value on 32bit platforms due to lack of
|
||
|
* vq_lock mutex use here, instead we prefer to keep it lock free for
|
||
|
* performance.
|
||
|
@@ -818,15 +817,9 @@ vdev_queue_length(vdev_t *vd)
|
||
|
}
|
||
|
|
||
|
uint64_t
|
||
|
-vdev_queue_lastoffset(vdev_t *vd)
|
||
|
+vdev_queue_last_offset(vdev_t *vd)
|
||
|
{
|
||
|
- return (vd->vdev_queue.vq_lastoffset);
|
||
|
-}
|
||
|
-
|
||
|
-void
|
||
|
-vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
|
||
|
-{
|
||
|
- vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
|
||
|
+ return (vd->vdev_queue.vq_last_offset);
|
||
|
}
|
||
|
|
||
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||
|
--
|
||
|
2.14.2
|
||
|
|