mirror_zfs/cmd/zed/agents/zfs_agents.c
Matthew Ahrens 4b5c9d9f97 zed crashes when devid not present
zed core dumps due to a NULL pointer in zfs_agent_iter_vdev(). The
gs_devid is NULL, but the nvl has a "devid" entry.

zfs_agent_post_event() checks that ZFS_EV_VDEV_GUID or DEV_IDENTIFIER is
present in nvl, but then later it and zfs_agent_iter_vdev() assume that
DEV_IDENTIFIER is present and thus gs_devid is set.

Typically this is not a problem because usually either all vdevs have
devid's, or none of them do. Since zfs_agent_iter_vdev() first checks if
the vdev has devid before dereferencing gs_devid, the problem isn't
typically encountered. However, if some vdevs have devid's and some do
not, then the problem is easily reproduced.  This can happen if the pool
has been moved from a system that has devid's to one that does not.

The fix is for zfs_agent_iter_vdev() to only try to match the devid's if
both nvl and gsp have devid's present.

Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
External-issue: DLPX-65090
Closes #9054
Closes #9060
2019-07-26 12:07:48 -07:00

423 lines
11 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License Version 1.0 (CDDL-1.0).
* You can obtain a copy of the license from the top-level file
* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
* You may not use this file except in compliance with the license.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2016, Intel Corporation.
* Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
*/
#include <libnvpair.h>
#include <libzfs.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/list.h>
#include <sys/time.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <sys/fm/protocol.h>
#include <sys/fm/fs/zfs.h>
#include <pthread.h>
#include <unistd.h>
#include "zfs_agents.h"
#include "fmd_api.h"
#include "../zed_log.h"
/*
* agent dispatch code
*/
static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
static list_t agent_events; /* list of pending events */
static int agent_exiting;
typedef struct agent_event {
char ae_class[64];
char ae_subclass[32];
nvlist_t *ae_nvl;
list_node_t ae_node;
} agent_event_t;
pthread_t g_agents_tid;
libzfs_handle_t *g_zfs_hdl;
/* guid search data */
typedef enum device_type {
DEVICE_TYPE_L2ARC, /* l2arc device */
DEVICE_TYPE_SPARE, /* spare device */
DEVICE_TYPE_PRIMARY /* any primary pool storage device */
} device_type_t;
typedef struct guid_search {
uint64_t gs_pool_guid;
uint64_t gs_vdev_guid;
char *gs_devid;
device_type_t gs_vdev_type;
uint64_t gs_vdev_expandtime; /* vdev expansion time */
} guid_search_t;
/*
* Walks the vdev tree recursively looking for a matching devid.
* Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
*/
static boolean_t
zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
{
guid_search_t *gsp = arg;
char *path = NULL;
uint_t c, children;
nvlist_t **child;
/*
* First iterate over any children.
*/
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
return (B_TRUE);
}
}
}
/*
* Iterate over any spares and cache devices
*/
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
return (B_TRUE);
}
}
}
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
return (B_TRUE);
}
}
}
/*
* On a devid match, grab the vdev guid and expansion time, if any.
*/
if (gsp->gs_devid != NULL &&
(nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
(strcmp(gsp->gs_devid, path) == 0)) {
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
&gsp->gs_vdev_guid);
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
&gsp->gs_vdev_expandtime);
return (B_TRUE);
}
return (B_FALSE);
}
static int
zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
{
guid_search_t *gsp = arg;
nvlist_t *config, *nvl;
/*
* For each vdev in this pool, look for a match by devid
*/
if ((config = zpool_get_config(zhp, NULL)) != NULL) {
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvl) == 0) {
(void) zfs_agent_iter_vdev(zhp, nvl, gsp);
}
}
/*
* if a match was found then grab the pool guid
*/
if (gsp->gs_vdev_guid) {
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&gsp->gs_pool_guid);
}
zpool_close(zhp);
return (gsp->gs_vdev_guid != 0);
}
void
zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
{
agent_event_t *event;
if (subclass == NULL)
subclass = "";
event = malloc(sizeof (agent_event_t));
if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
if (event)
free(event);
return;
}
if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
class = EC_ZFS;
subclass = ESC_ZFS_VDEV_CHECK;
}
/*
* On ZFS on Linux, we don't get the expected FM_RESOURCE_REMOVED
* ereport from vdev_disk layer after a hot unplug. Fortunately we
* get a EC_DEV_REMOVE from our disk monitor and it is a suitable
* proxy so we remap it here for the benefit of the diagnosis engine.
*/
if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
(strcmp(subclass, ESC_DISK) == 0) &&
(nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
nvlist_exists(nvl, DEV_IDENTIFIER))) {
nvlist_t *payload = event->ae_nvl;
struct timeval tv;
int64_t tod[2];
uint64_t pool_guid = 0, vdev_guid = 0;
guid_search_t search = { 0 };
device_type_t devtype = DEVICE_TYPE_PRIMARY;
class = "resource.fs.zfs.removed";
subclass = "";
(void) nvlist_add_string(payload, FM_CLASS, class);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
(void) gettimeofday(&tv, NULL);
tod[0] = tv.tv_sec;
tod[1] = tv.tv_usec;
(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
/*
* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
* ZFS_EV_POOL_GUID may be missing so find them.
*/
(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
&search.gs_devid);
(void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
pool_guid = search.gs_pool_guid;
vdev_guid = search.gs_vdev_guid;
devtype = search.gs_vdev_type;
/*
* We want to avoid reporting "remove" events coming from
* libudev for VDEVs which were expanded recently (10s) and
* avoid activating spares in response to partitions being
* deleted and created in rapid succession.
*/
if (search.gs_vdev_expandtime != 0 &&
search.gs_vdev_expandtime + 10 > tv.tv_sec) {
zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
"for recently expanded device '%s'", EC_DEV_REMOVE,
search.gs_devid);
goto out;
}
(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
switch (devtype) {
case DEVICE_TYPE_L2ARC:
(void) nvlist_add_string(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
VDEV_TYPE_L2CACHE);
break;
case DEVICE_TYPE_SPARE:
(void) nvlist_add_string(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
break;
case DEVICE_TYPE_PRIMARY:
(void) nvlist_add_string(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
break;
}
zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
EC_DEV_REMOVE, class);
}
(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
(void) strlcpy(event->ae_subclass, subclass,
sizeof (event->ae_subclass));
(void) pthread_mutex_lock(&agent_lock);
list_insert_tail(&agent_events, event);
(void) pthread_mutex_unlock(&agent_lock);
out:
(void) pthread_cond_signal(&agent_cond);
}
static void
zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
{
/*
* The diagnosis engine subscribes to the following events.
* On illumos these subscriptions reside in:
* /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
*/
if (strstr(class, "ereport.fs.zfs.") != NULL ||
strstr(class, "resource.fs.zfs.") != NULL ||
strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
}
/*
* The retire agent subscribes to the following events.
* On illumos these subscriptions reside in:
* /usr/lib/fm/fmd/plugins/zfs-retire.conf
*
* NOTE: faults events come directly from our diagnosis engine
* and will not pass through the zfs kernel module.
*/
if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
strcmp(class, "resource.fs.zfs.removed") == 0 ||
strcmp(class, "resource.fs.zfs.statechange") == 0 ||
strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
}
/*
* The SLM module only consumes disk events and vdev check events
*
* NOTE: disk events come directly from disk monitor and will
* not pass through the zfs kernel module.
*/
if (strstr(class, "EC_dev_") != NULL ||
strcmp(class, EC_ZFS) == 0) {
(void) zfs_slm_event(class, subclass, nvl);
}
}
/*
* Events are consumed and dispatched from this thread
* An agent can also post an event so event list lock
* is not held when calling an agent.
* One event is consumed at a time.
*/
static void *
zfs_agent_consumer_thread(void *arg)
{
for (;;) {
agent_event_t *event;
(void) pthread_mutex_lock(&agent_lock);
/* wait for an event to show up */
while (!agent_exiting && list_is_empty(&agent_events))
(void) pthread_cond_wait(&agent_cond, &agent_lock);
if (agent_exiting) {
(void) pthread_mutex_unlock(&agent_lock);
zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
"exiting");
return (NULL);
}
if ((event = (list_head(&agent_events))) != NULL) {
list_remove(&agent_events, event);
(void) pthread_mutex_unlock(&agent_lock);
/* dispatch to all event subscribers */
zfs_agent_dispatch(event->ae_class, event->ae_subclass,
event->ae_nvl);
nvlist_free(event->ae_nvl);
free(event);
continue;
}
(void) pthread_mutex_unlock(&agent_lock);
}
return (NULL);
}
void
zfs_agent_init(libzfs_handle_t *zfs_hdl)
{
fmd_hdl_t *hdl;
g_zfs_hdl = zfs_hdl;
if (zfs_slm_init() != 0)
zed_log_die("Failed to initialize zfs slm");
zed_log_msg(LOG_INFO, "Add Agent: init");
hdl = fmd_module_hdl("zfs-diagnosis");
_zfs_diagnosis_init(hdl);
if (!fmd_module_initialized(hdl))
zed_log_die("Failed to initialize zfs diagnosis");
hdl = fmd_module_hdl("zfs-retire");
_zfs_retire_init(hdl);
if (!fmd_module_initialized(hdl))
zed_log_die("Failed to initialize zfs retire");
list_create(&agent_events, sizeof (agent_event_t),
offsetof(struct agent_event, ae_node));
if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
NULL) != 0) {
list_destroy(&agent_events);
zed_log_die("Failed to initialize agents");
}
}
void
zfs_agent_fini(void)
{
fmd_hdl_t *hdl;
agent_event_t *event;
agent_exiting = 1;
(void) pthread_cond_signal(&agent_cond);
/* wait for zfs_enum_pools thread to complete */
(void) pthread_join(g_agents_tid, NULL);
/* drain any pending events */
while ((event = (list_head(&agent_events))) != NULL) {
list_remove(&agent_events, event);
nvlist_free(event->ae_nvl);
free(event);
}
list_destroy(&agent_events);
if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
_zfs_retire_fini(hdl);
fmd_hdl_unregister(hdl);
}
if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
_zfs_diagnosis_fini(hdl);
fmd_hdl_unregister(hdl);
}
zed_log_msg(LOG_INFO, "Add Agent: fini");
zfs_slm_fini();
g_zfs_hdl = NULL;
}