mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
cbe882298e
Slow disk response times can be indicative of a failing drive. ZFS currently tracks slow I/Os (slower than zio_slow_io_ms) and generates events (ereport.fs.zfs.delay). However, no action is taken by ZED, like is done for checksum or I/O errors. This change adds slow disk diagnosis to ZED which is opt-in using new VDEV properties: VDEV_PROP_SLOW_IO_N VDEV_PROP_SLOW_IO_T If multiple VDEVs in a pool are undergoing slow I/Os, then it skips the zpool_vdev_degrade(). Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Rob Wing <rob.wing@klarasystems.com> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #15469
337 lines
7.3 KiB
C
337 lines
7.3 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License, Version 1.0 only
|
|
* (the "License"). You may not use this file except in compliance
|
|
* with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*
|
|
* Copyright (c) 2016, Intel Corporation.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/list.h>
|
|
#include <sys/time.h>
|
|
|
|
#include "fmd_api.h"
|
|
#include "fmd_serd.h"
|
|
#include "../zed_log.h"
|
|
|
|
|
|
#define FMD_STR_BUCKETS 211
|
|
|
|
|
|
#ifdef SERD_ENG_DEBUG
|
|
#define serd_log_msg(fmt, ...) \
|
|
zed_log_msg(LOG_INFO, fmt, __VA_ARGS__)
|
|
#else
|
|
#define serd_log_msg(fmt, ...)
|
|
#endif
|
|
|
|
|
|
/*
|
|
* SERD Engine Backend
|
|
*/
|
|
|
|
/*
|
|
* Compute the delta between events in nanoseconds. To account for very old
|
|
* events which are replayed, we must handle the case where time is negative.
|
|
* We convert the hrtime_t's to unsigned 64-bit integers and then handle the
|
|
* case where 'old' is greater than 'new' (i.e. high-res time has wrapped).
|
|
*/
|
|
static hrtime_t
|
|
fmd_event_delta(hrtime_t t1, hrtime_t t2)
|
|
{
|
|
uint64_t old = t1;
|
|
uint64_t new = t2;
|
|
|
|
return (new >= old ? new - old : (UINT64_MAX - old) + new + 1);
|
|
}
|
|
|
|
static fmd_serd_eng_t *
|
|
fmd_serd_eng_alloc(const char *name, uint64_t n, hrtime_t t)
|
|
{
|
|
fmd_serd_eng_t *sgp;
|
|
|
|
sgp = malloc(sizeof (fmd_serd_eng_t));
|
|
if (sgp == NULL) {
|
|
perror("malloc");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(sgp, 0, sizeof (fmd_serd_eng_t));
|
|
|
|
sgp->sg_name = strdup(name);
|
|
if (sgp->sg_name == NULL) {
|
|
perror("strdup");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
sgp->sg_flags = FMD_SERD_DIRTY;
|
|
sgp->sg_n = n;
|
|
sgp->sg_t = t;
|
|
|
|
list_create(&sgp->sg_list, sizeof (fmd_serd_elem_t),
|
|
offsetof(fmd_serd_elem_t, se_list));
|
|
|
|
return (sgp);
|
|
}
|
|
|
|
static void
|
|
fmd_serd_eng_free(fmd_serd_eng_t *sgp)
|
|
{
|
|
fmd_serd_eng_reset(sgp);
|
|
free(sgp->sg_name);
|
|
list_destroy(&sgp->sg_list);
|
|
free(sgp);
|
|
}
|
|
|
|
/*
|
|
* sourced from fmd_string.c
|
|
*/
|
|
static ulong_t
|
|
fmd_strhash(const char *key)
|
|
{
|
|
ulong_t g, h = 0;
|
|
const char *p;
|
|
|
|
for (p = key; *p != '\0'; p++) {
|
|
h = (h << 4) + *p;
|
|
|
|
if ((g = (h & 0xf0000000)) != 0) {
|
|
h ^= (g >> 24);
|
|
h ^= g;
|
|
}
|
|
}
|
|
|
|
return (h);
|
|
}
|
|
|
|
void
|
|
fmd_serd_hash_create(fmd_serd_hash_t *shp)
|
|
{
|
|
shp->sh_hashlen = FMD_STR_BUCKETS;
|
|
shp->sh_hash = calloc(shp->sh_hashlen, sizeof (void *));
|
|
shp->sh_count = 0;
|
|
|
|
if (shp->sh_hash == NULL) {
|
|
perror("calloc");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
}
|
|
|
|
void
|
|
fmd_serd_hash_destroy(fmd_serd_hash_t *shp)
|
|
{
|
|
fmd_serd_eng_t *sgp, *ngp;
|
|
uint_t i;
|
|
|
|
for (i = 0; i < shp->sh_hashlen; i++) {
|
|
for (sgp = shp->sh_hash[i]; sgp != NULL; sgp = ngp) {
|
|
ngp = sgp->sg_next;
|
|
fmd_serd_eng_free(sgp);
|
|
}
|
|
}
|
|
|
|
free(shp->sh_hash);
|
|
memset(shp, 0, sizeof (fmd_serd_hash_t));
|
|
}
|
|
|
|
void
|
|
fmd_serd_hash_apply(fmd_serd_hash_t *shp, fmd_serd_eng_f *func, void *arg)
|
|
{
|
|
fmd_serd_eng_t *sgp;
|
|
uint_t i;
|
|
|
|
for (i = 0; i < shp->sh_hashlen; i++) {
|
|
for (sgp = shp->sh_hash[i]; sgp != NULL; sgp = sgp->sg_next)
|
|
func(sgp, arg);
|
|
}
|
|
}
|
|
|
|
fmd_serd_eng_t *
|
|
fmd_serd_eng_insert(fmd_serd_hash_t *shp, const char *name,
|
|
uint_t n, hrtime_t t)
|
|
{
|
|
uint_t h = fmd_strhash(name) % shp->sh_hashlen;
|
|
fmd_serd_eng_t *sgp = fmd_serd_eng_alloc(name, n, t);
|
|
|
|
serd_log_msg(" SERD Engine: inserting %s N %d T %llu",
|
|
name, (int)n, (long long unsigned)t);
|
|
|
|
sgp->sg_next = shp->sh_hash[h];
|
|
shp->sh_hash[h] = sgp;
|
|
shp->sh_count++;
|
|
|
|
return (sgp);
|
|
}
|
|
|
|
fmd_serd_eng_t *
|
|
fmd_serd_eng_lookup(fmd_serd_hash_t *shp, const char *name)
|
|
{
|
|
uint_t h = fmd_strhash(name) % shp->sh_hashlen;
|
|
fmd_serd_eng_t *sgp;
|
|
|
|
for (sgp = shp->sh_hash[h]; sgp != NULL; sgp = sgp->sg_next) {
|
|
if (strcmp(name, sgp->sg_name) == 0)
|
|
return (sgp);
|
|
}
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
void
|
|
fmd_serd_eng_delete(fmd_serd_hash_t *shp, const char *name)
|
|
{
|
|
uint_t h = fmd_strhash(name) % shp->sh_hashlen;
|
|
fmd_serd_eng_t *sgp, **pp = &shp->sh_hash[h];
|
|
|
|
serd_log_msg(" SERD Engine: deleting %s", name);
|
|
|
|
for (sgp = *pp; sgp != NULL; sgp = sgp->sg_next) {
|
|
if (strcmp(sgp->sg_name, name) != 0)
|
|
pp = &sgp->sg_next;
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (sgp != NULL) {
|
|
*pp = sgp->sg_next;
|
|
fmd_serd_eng_free(sgp);
|
|
assert(shp->sh_count != 0);
|
|
shp->sh_count--;
|
|
}
|
|
}
|
|
|
|
static void
|
|
fmd_serd_eng_discard(fmd_serd_eng_t *sgp, fmd_serd_elem_t *sep)
|
|
{
|
|
list_remove(&sgp->sg_list, sep);
|
|
sgp->sg_count--;
|
|
|
|
serd_log_msg(" SERD Engine: discarding %s, %d remaining",
|
|
sgp->sg_name, (int)sgp->sg_count);
|
|
|
|
free(sep);
|
|
}
|
|
|
|
int
|
|
fmd_serd_eng_record(fmd_serd_eng_t *sgp, hrtime_t hrt)
|
|
{
|
|
fmd_serd_elem_t *sep, *oep;
|
|
|
|
/*
|
|
* If the fired flag is already set, return false and discard the
|
|
* event. This means that the caller will only see the engine "fire"
|
|
* once until fmd_serd_eng_reset() is called. The fmd_serd_eng_fired()
|
|
* function can also be used in combination with fmd_serd_eng_record().
|
|
*/
|
|
if (sgp->sg_flags & FMD_SERD_FIRED) {
|
|
serd_log_msg(" SERD Engine: record %s already fired!",
|
|
sgp->sg_name);
|
|
return (B_FALSE);
|
|
}
|
|
|
|
while (sgp->sg_count >= sgp->sg_n)
|
|
fmd_serd_eng_discard(sgp, list_tail(&sgp->sg_list));
|
|
|
|
sep = malloc(sizeof (fmd_serd_elem_t));
|
|
if (sep == NULL) {
|
|
perror("malloc");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
sep->se_hrt = hrt;
|
|
|
|
list_insert_head(&sgp->sg_list, sep);
|
|
sgp->sg_count++;
|
|
|
|
serd_log_msg(" SERD Engine: recording %s of %d (%llu)",
|
|
sgp->sg_name, (int)sgp->sg_count, (long long unsigned)hrt);
|
|
|
|
/*
|
|
* Pick up the oldest element pointer for comparison to 'sep'. We must
|
|
* do this after adding 'sep' because 'oep' and 'sep' can be the same.
|
|
*/
|
|
oep = list_tail(&sgp->sg_list);
|
|
|
|
if (sgp->sg_count >= sgp->sg_n &&
|
|
fmd_event_delta(oep->se_hrt, sep->se_hrt) <= sgp->sg_t) {
|
|
sgp->sg_flags |= FMD_SERD_FIRED | FMD_SERD_DIRTY;
|
|
serd_log_msg(" SERD Engine: fired %s", sgp->sg_name);
|
|
return (B_TRUE);
|
|
}
|
|
|
|
sgp->sg_flags |= FMD_SERD_DIRTY;
|
|
return (B_FALSE);
|
|
}
|
|
|
|
int
|
|
fmd_serd_eng_fired(fmd_serd_eng_t *sgp)
|
|
{
|
|
return (sgp->sg_flags & FMD_SERD_FIRED);
|
|
}
|
|
|
|
int
|
|
fmd_serd_eng_empty(fmd_serd_eng_t *sgp)
|
|
{
|
|
return (sgp->sg_count == 0);
|
|
}
|
|
|
|
void
|
|
fmd_serd_eng_reset(fmd_serd_eng_t *sgp)
|
|
{
|
|
serd_log_msg(" SERD Engine: resetting %s", sgp->sg_name);
|
|
|
|
while (sgp->sg_count != 0)
|
|
fmd_serd_eng_discard(sgp, list_head(&sgp->sg_list));
|
|
|
|
sgp->sg_flags &= ~FMD_SERD_FIRED;
|
|
sgp->sg_flags |= FMD_SERD_DIRTY;
|
|
}
|
|
|
|
void
|
|
fmd_serd_eng_gc(fmd_serd_eng_t *sgp, void *arg)
|
|
{
|
|
(void) arg;
|
|
fmd_serd_elem_t *sep, *nep;
|
|
hrtime_t hrt;
|
|
|
|
if (sgp->sg_count == 0 || (sgp->sg_flags & FMD_SERD_FIRED))
|
|
return; /* no garbage collection needed if empty or fired */
|
|
|
|
sep = list_head(&sgp->sg_list);
|
|
if (sep == NULL)
|
|
return;
|
|
|
|
hrt = sep->se_hrt - sgp->sg_t;
|
|
|
|
for (sep = list_head(&sgp->sg_list); sep != NULL; sep = nep) {
|
|
if (sep->se_hrt >= hrt)
|
|
break; /* sep and subsequent events are all within T */
|
|
|
|
nep = list_next(&sgp->sg_list, sep);
|
|
fmd_serd_eng_discard(sgp, sep);
|
|
sgp->sg_flags |= FMD_SERD_DIRTY;
|
|
}
|
|
}
|