202 lines
6.4 KiB
Diff
202 lines
6.4 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Tony Hutter <hutter2@llnl.gov>
|
||
|
Date: Sun, 4 Mar 2018 17:34:51 -0800
|
||
|
Subject: [PATCH] Change checksum & IO delay ratelimit values
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
Change checksum & IO delay ratelimit thresholds from 5/sec to 20/sec.
|
||
|
This allows zed to actually trigger if a bunch of these events arrive in
|
||
|
a short period of time (zed has a threshold of 10 events in 10 sec).
|
||
|
Previously, if you had, say, 100 checksum errors in 1 sec, it would get
|
||
|
ratelimited to 5/sec which wouldn't trigger zed to fault the drive.
|
||
|
|
||
|
Also, convert the checksum and IO delay thresholds to module params for
|
||
|
easy testing.
|
||
|
|
||
|
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
|
||
|
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||
|
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
|
||
|
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
|
||
|
Closes #7252
|
||
|
(cherry picked from commit 6dc40e2ada2d0d008bd314ff3525f2b0acc2bb01)
|
||
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||
|
---
|
||
|
include/sys/vdev_impl.h | 2 --
|
||
|
include/sys/zfs_ratelimit.h | 12 +++++++++---
|
||
|
module/zcommon/zfs_comutil.c | 4 ++--
|
||
|
module/zfs/vdev.c | 23 +++++++++++++++++++++--
|
||
|
man/man5/zfs-module-parameters.5 | 39 +++++++++++++++++++++++++++++++++++++++
|
||
|
5 files changed, 71 insertions(+), 9 deletions(-)
|
||
|
|
||
|
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
|
||
|
index 13c495822..4f9f1a903 100644
|
||
|
--- a/include/sys/vdev_impl.h
|
||
|
+++ b/include/sys/vdev_impl.h
|
||
|
@@ -255,8 +255,6 @@ struct vdev {
|
||
|
* We rate limit ZIO delay and ZIO checksum events, since they
|
||
|
* can flood ZED with tons of events when a drive is acting up.
|
||
|
*/
|
||
|
-#define DELAYS_PER_SECOND 5
|
||
|
-#define CHECKSUMS_PER_SECOND 5
|
||
|
zfs_ratelimit_t vdev_delay_rl;
|
||
|
zfs_ratelimit_t vdev_checksum_rl;
|
||
|
};
|
||
|
diff --git a/include/sys/zfs_ratelimit.h b/include/sys/zfs_ratelimit.h
|
||
|
index f36e07841..012825fad 100644
|
||
|
--- a/include/sys/zfs_ratelimit.h
|
||
|
+++ b/include/sys/zfs_ratelimit.h
|
||
|
@@ -25,13 +25,19 @@
|
||
|
typedef struct {
|
||
|
hrtime_t start;
|
||
|
unsigned int count;
|
||
|
- unsigned int burst; /* Number to allow per interval */
|
||
|
- unsigned int interval; /* Interval length in seconds */
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Pointer to number of events per interval. We do this to
|
||
|
+ * allow the burst to be a (changeable) module parameter.
|
||
|
+ */
|
||
|
+ unsigned int *burst;
|
||
|
+
|
||
|
+ unsigned int interval; /* Interval length in seconds */
|
||
|
kmutex_t lock;
|
||
|
} zfs_ratelimit_t;
|
||
|
|
||
|
int zfs_ratelimit(zfs_ratelimit_t *rl);
|
||
|
-void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
|
||
|
+void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
|
||
|
unsigned int interval);
|
||
|
void zfs_ratelimit_fini(zfs_ratelimit_t *rl);
|
||
|
|
||
|
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c
|
||
|
index 52cb7e365..44cdc8523 100644
|
||
|
--- a/module/zcommon/zfs_comutil.c
|
||
|
+++ b/module/zcommon/zfs_comutil.c
|
||
|
@@ -215,7 +215,7 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
|
||
|
* interval: Interval time in seconds
|
||
|
*/
|
||
|
void
|
||
|
-zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
|
||
|
+zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
|
||
|
unsigned int interval)
|
||
|
{
|
||
|
rl->count = 0;
|
||
|
@@ -270,7 +270,7 @@ zfs_ratelimit(zfs_ratelimit_t *rl)
|
||
|
rl->start = now;
|
||
|
rl->count = 0;
|
||
|
} else {
|
||
|
- if (rl->count >= rl->burst) {
|
||
|
+ if (rl->count >= *rl->burst) {
|
||
|
rc = 0; /* We're ratelimiting */
|
||
|
}
|
||
|
}
|
||
|
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
|
||
|
index df07d893d..0786fbb83 100644
|
||
|
--- a/module/zfs/vdev.c
|
||
|
+++ b/module/zfs/vdev.c
|
||
|
@@ -56,6 +56,16 @@
|
||
|
*/
|
||
|
int metaslabs_per_vdev = 200;
|
||
|
|
||
|
+/*
|
||
|
+ * Rate limit delay events to this many IO delays per second.
|
||
|
+ */
|
||
|
+unsigned int zfs_delays_per_second = 20;
|
||
|
+
|
||
|
+/*
|
||
|
+ * Rate limit checksum events after this many checksum errors per second.
|
||
|
+ */
|
||
|
+unsigned int zfs_checksums_per_second = 20;
|
||
|
+
|
||
|
/*
|
||
|
* Virtual device management.
|
||
|
*/
|
||
|
@@ -357,8 +367,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||
|
* and checksum events so that we don't overwhelm ZED with thousands
|
||
|
* of events when a disk is acting up.
|
||
|
*/
|
||
|
- zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1);
|
||
|
- zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1);
|
||
|
+ zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_delays_per_second, 1);
|
||
|
+ zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksums_per_second, 1);
|
||
|
|
||
|
list_link_init(&vd->vdev_config_dirty_node);
|
||
|
list_link_init(&vd->vdev_state_dirty_node);
|
||
|
@@ -3776,5 +3786,14 @@ module_param(metaslabs_per_vdev, int, 0644);
|
||
|
MODULE_PARM_DESC(metaslabs_per_vdev,
|
||
|
"Divide added vdev into approximately (but no more than) this number "
|
||
|
"of metaslabs");
|
||
|
+
|
||
|
+module_param(zfs_delays_per_second, uint, 0644);
|
||
|
+MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
|
||
|
+ "IO delays per second");
|
||
|
+
|
||
|
+module_param(zfs_checksums_per_second, uint, 0644);
|
||
|
+ MODULE_PARM_DESC(zfs_checksums_per_second, "Rate limit checksum events "
|
||
|
+ "to this many checksum errors per second (do not set below zed"
|
||
|
+ "threshold).");
|
||
|
/* END CSTYLED */
|
||
|
#endif
|
||
|
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
|
||
|
index d4daffde6..8d5ac2576 100644
|
||
|
--- a/man/man5/zfs-module-parameters.5
|
||
|
+++ b/man/man5/zfs-module-parameters.5
|
||
|
@@ -739,6 +739,34 @@ Disable pool import at module load by ignoring the cache file (typically \fB/etc
|
||
|
Use \fB1\fR for yes (default) and \fB0\fR for no.
|
||
|
.RE
|
||
|
|
||
|
+.sp
|
||
|
+.ne 2
|
||
|
+.na
|
||
|
+\fBzfs_checksums_per_second\fR (int)
|
||
|
+.ad
|
||
|
+.RS 12n
|
||
|
+Rate limit checksum events to this many per second. Note that this should
|
||
|
+not be set below the zed thresholds (currently 10 checksums over 10 sec)
|
||
|
+or else zed may not trigger any action.
|
||
|
+.sp
|
||
|
+Default value: 20
|
||
|
+.RE
|
||
|
+
|
||
|
+.sp
|
||
|
+.ne 2
|
||
|
+.na
|
||
|
+\fBzfs_commit_timeout_pct\fR (int)
|
||
|
+.ad
|
||
|
+.RS 12n
|
||
|
+This controls the amount of time that a ZIL block (lwb) will remain "open"
|
||
|
+when it isn't "full", and it has a thread waiting for it to be committed to
|
||
|
+stable storage. The timeout is scaled based on a percentage of the last lwb
|
||
|
+latency to avoid significantly impacting the latency of each individual
|
||
|
+transaction record (itx).
|
||
|
+.sp
|
||
|
+Default value: \fB5\fR%.
|
||
|
+.RE
|
||
|
+
|
||
|
.sp
|
||
|
.ne 2
|
||
|
.na
|
||
|
@@ -866,6 +894,17 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
|
||
|
Default value: \fB500,000\fR.
|
||
|
.RE
|
||
|
|
||
|
+.sp
|
||
|
+.ne 2
|
||
|
+.na
|
||
|
+\fBzfs_delays_per_second\fR (int)
|
||
|
+.ad
|
||
|
+.RS 12n
|
||
|
+Rate limit IO delay events to this many per second.
|
||
|
+.sp
|
||
|
+Default value: 20
|
||
|
+.RE
|
||
|
+
|
||
|
.sp
|
||
|
.ne 2
|
||
|
.na
|
||
|
--
|
||
|
2.14.2
|
||
|
|