diff --git a/module/zfs/fm.c b/module/zfs/fm.c new file mode 100644 index 000000000..3cc979d41 --- /dev/null +++ b/module/zfs/fm.c @@ -0,0 +1,1266 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Fault Management Architecture (FMA) Resource and Protocol Support + * + * The routines contained herein provide services to support kernel subsystems + * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). + * + * Name-Value Pair Lists + * + * The embodiment of an FMA protocol element (event, fmri or authority) is a + * name-value pair list (nvlist_t). FMA-specific nvlist construtor and + * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used + * to create an nvpair list using custom allocators. Callers may choose to + * allocate either from the kernel memory allocator, or from a preallocated + * buffer, useful in constrained contexts like high-level interrupt routines. + * + * Protocol Event and FMRI Construction + * + * Convenience routines are provided to construct nvlist events according to + * the FMA Event Protocol and Naming Schema specification for ereports and + * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. + * + * ENA Manipulation + * + * Routines to generate ENA formats 0, 1 and 2 are available as well as + * routines to increment formats 1 and 2. Individual fields within the + * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), + * fm_ena_format_get() and fm_ena_gen_get(). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These + * values must be kept in sync with the FMA source code in usr/src/cmd/fm. + */ +static const char *fm_url = "http://www.sun.com/msg"; +static const char *fm_msgid = "SUNOS-8000-0G"; +static char *volatile fm_panicstr = NULL; + +errorq_t *ereport_errorq; +void *ereport_dumpbuf; +size_t ereport_dumplen; + +static uint_t ereport_chanlen = ERPT_EVCH_MAX; +static evchan_t *ereport_chan = NULL; +static ulong_t ereport_qlen = 0; +static size_t ereport_size = 0; +static int ereport_cols = 80; + +/* + * Common fault management kstats to record ereport generation + * failures + */ + +struct erpt_kstat { + kstat_named_t erpt_dropped; /* num erpts dropped on post */ + kstat_named_t erpt_set_failed; /* num erpt set failures */ + kstat_named_t fmri_set_failed; /* num fmri set failures */ + kstat_named_t payload_set_failed; /* num payload set failures */ +}; + +static struct erpt_kstat erpt_kstat_data = { + { "erpt-dropped", KSTAT_DATA_UINT64 }, + { "erpt-set-failed", KSTAT_DATA_UINT64 }, + { "fmri-set-failed", KSTAT_DATA_UINT64 }, + { "payload-set-failed", KSTAT_DATA_UINT64 } +}; + +/*ARGSUSED*/ +static void +fm_drain(void *private, void *data, errorq_elem_t *eep) +{ + nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep); + + if (!panicstr) + (void) fm_ereport_post(nvl, EVCH_TRYHARD); + else + fm_nvprint(nvl); +} + +void +fm_init(void) +{ + kstat_t *ksp; + + (void) sysevent_evc_bind(FM_ERROR_CHAN, + &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND); + + (void) sysevent_evc_control(ereport_chan, + EVCH_SET_CHAN_LEN, &ereport_chanlen); + + if (ereport_qlen == 0) + ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4); + + if (ereport_size == 0) + ereport_size = ERPT_DATA_SZ; + + ereport_errorq = errorq_nvcreate("fm_ereport_queue", + (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size, + FM_ERR_PIL, ERRORQ_VITAL); + if (ereport_errorq == NULL) + panic("failed to create required ereport error queue"); + + ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP); + ereport_dumplen = ereport_size; + + /* Initialize ereport allocation and generation kstats */ + ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED, + sizeof (struct erpt_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (ksp != NULL) { + ksp->ks_data = &erpt_kstat_data; + kstat_install(ksp); + } else { + cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); + + } +} + +/* + * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of + * output so they aren't split across console lines, and return the end column. + */ +/*PRINTFLIKE4*/ +static int +fm_printf(int depth, int c, int cols, const char *format, ...) +{ + va_list ap; + int width; + char c1; + + va_start(ap, format); + width = vsnprintf(&c1, sizeof (c1), format, ap); + va_end(ap); + + if (c + width >= cols) { + console_printf("\n\r"); + c = 0; + if (format[0] != ' ' && depth > 0) { + console_printf(" "); + c++; + } + } + + va_start(ap, format); + console_vprintf(format, ap); + va_end(ap); + + return ((c + width) % cols); +} + +/* + * Recursively print a nvlist in the specified column width and return the + * column we end up in. This function is called recursively by fm_nvprint(), + * below. We generically format the entire nvpair using hexadecimal + * integers and strings, and elide any integer arrays. Arrays are basically + * used for cache dumps right now, so we suppress them so as not to overwhelm + * the amount of console output we produce at panic time. This can be further + * enhanced as FMA technology grows based upon the needs of consumers. All + * FMA telemetry is logged using the dump device transport, so the console + * output serves only as a fallback in case this procedure is unsuccessful. + */ +static int +fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) +{ + nvpair_t *nvp; + + for (nvp = nvlist_next_nvpair(nvl, NULL); + nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { + + data_type_t type = nvpair_type(nvp); + const char *name = nvpair_name(nvp); + + boolean_t b; + uint8_t i8; + uint16_t i16; + uint32_t i32; + uint64_t i64; + char *str; + nvlist_t *cnv; + + if (strcmp(name, FM_CLASS) == 0) + continue; /* already printed by caller */ + + c = fm_printf(d, c, cols, " %s=", name); + + switch (type) { + case DATA_TYPE_BOOLEAN: + c = fm_printf(d + 1, c, cols, " 1"); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(nvp, &b); + c = fm_printf(d + 1, c, cols, b ? "1" : "0"); + break; + + case DATA_TYPE_BYTE: + (void) nvpair_value_byte(nvp, &i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_INT8: + (void) nvpair_value_int8(nvp, (void *)&i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_UINT8: + (void) nvpair_value_uint8(nvp, &i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_INT16: + (void) nvpair_value_int16(nvp, (void *)&i16); + c = fm_printf(d + 1, c, cols, "%x", i16); + break; + + case DATA_TYPE_UINT16: + (void) nvpair_value_uint16(nvp, &i16); + c = fm_printf(d + 1, c, cols, "%x", i16); + break; + + case DATA_TYPE_INT32: + (void) nvpair_value_int32(nvp, (void *)&i32); + c = fm_printf(d + 1, c, cols, "%x", i32); + break; + + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &i32); + c = fm_printf(d + 1, c, cols, "%x", i32); + break; + + case DATA_TYPE_INT64: + (void) nvpair_value_int64(nvp, (void *)&i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_HRTIME: + (void) nvpair_value_hrtime(nvp, (void *)&i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &str); + c = fm_printf(d + 1, c, cols, "\"%s\"", + str ? str : ""); + break; + + case DATA_TYPE_NVLIST: + c = fm_printf(d + 1, c, cols, "["); + (void) nvpair_value_nvlist(nvp, &cnv); + c = fm_nvprintr(cnv, d + 1, c, cols); + c = fm_printf(d + 1, c, cols, " ]"); + break; + + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "["); + (void) nvpair_value_nvlist_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) { + c = fm_nvprintr(val[i], d + 1, c, cols); + } + c = fm_printf(d + 1, c, cols, " ]"); + } + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + c = fm_printf(d + 1, c, cols, "[...]"); + break; + case DATA_TYPE_UNKNOWN: + c = fm_printf(d + 1, c, cols, ""); + break; + } + } + + return (c); +} + +void +fm_nvprint(nvlist_t *nvl) +{ + char *class; + int c = 0; + + console_printf("\r"); + + if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) + c = fm_printf(0, c, ereport_cols, "%s", class); + + if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0) + console_printf("\n"); + + console_printf("\n"); +} + +/* + * Wrapper for panic() that first produces an FMA-style message for admins. + * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this + * is the one exception to that rule and the only error that gets messaged. + * This function is intended for use by subsystems that have detected a fatal + * error and enqueued appropriate ereports and wish to then force a panic. + */ +/*PRINTFLIKE1*/ +void +fm_panic(const char *format, ...) +{ + va_list ap; + + (void) casptr((void *)&fm_panicstr, NULL, (void *)format); + va_start(ap, format); + vpanic(format, ap); + va_end(ap); +} + +/* + * Print any appropriate FMA banner message before the panic message. This + * function is called by panicsys() and prints the message for fm_panic(). + * We print the message here so that it comes after the system is quiesced. + * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix). + * The rest of the message is for the console only and not needed in the log, + * so it is printed using console_printf(). We break it up into multiple + * chunks so as to avoid overflowing any small legacy prom_printf() buffers. + */ +void +fm_banner(void) +{ + timespec_t tod; + hrtime_t now; + + if (!fm_panicstr) + return; /* panic was not initiated by fm_panic(); do nothing */ + + if (panicstr) { + tod = panic_hrestime; + now = panic_hrtime; + } else { + gethrestime(&tod); + now = gethrtime_waitfree(); + } + + cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, " + "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid); + + console_printf( +"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n" +"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n", + fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now); + + console_printf( +"PLATFORM: %s, CSN: -, HOSTNAME: %s\n" +"SOURCE: %s, REV: %s %s\n", + platform, utsname.nodename, utsname.sysname, + utsname.release, utsname.version); + + console_printf( +"DESC: Errors have been detected that require a reboot to ensure system\n" +"integrity. See %s/%s for more information.\n", + fm_url, fm_msgid); + + console_printf( +"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n" +"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n" +"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n"); + + console_printf("\n"); +} + +/* + * Utility function to write all of the pending ereports to the dump device. + * This function is called at either normal reboot or panic time, and simply + * iterates over the in-transit messages in the ereport sysevent channel. + */ +void +fm_ereport_dump(void) +{ + evchanq_t *chq; + sysevent_t *sep; + erpt_dump_t ed; + + timespec_t tod; + hrtime_t now; + char *buf; + size_t len; + + if (panicstr) { + tod = panic_hrestime; + now = panic_hrtime; + } else { + if (ereport_errorq != NULL) + errorq_drain(ereport_errorq); + gethrestime(&tod); + now = gethrtime_waitfree(); + } + + /* + * In the panic case, sysevent_evc_walk_init() will return NULL. + */ + if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL && + !panicstr) + return; /* event channel isn't initialized yet */ + + while ((sep = sysevent_evc_walk_step(chq)) != NULL) { + if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL) + break; + + ed.ed_magic = ERPT_MAGIC; + ed.ed_chksum = checksum32(buf, len); + ed.ed_size = (uint32_t)len; + ed.ed_pad = 0; + ed.ed_hrt_nsec = SE_TIME(sep); + ed.ed_hrt_base = now; + ed.ed_tod_base.sec = tod.tv_sec; + ed.ed_tod_base.nsec = tod.tv_nsec; + + dumpvp_write(&ed, sizeof (ed)); + dumpvp_write(buf, len); + } + + sysevent_evc_walk_fini(chq); +} + +/* + * Post an error report (ereport) to the sysevent error channel. The error + * channel must be established with a prior call to sysevent_evc_create() + * before publication may occur. + */ +void +fm_ereport_post(nvlist_t *ereport, int evc_flag) +{ + size_t nvl_size = 0; + evchan_t *error_chan; + + (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE); + if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } + + if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan, + EVCH_CREAT|EVCH_HOLD_PEND) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } + + if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR, + SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + sysevent_evc_unbind(error_chan); + return; + } + sysevent_evc_unbind(error_chan); +} + +/* + * Wrapppers for FM nvlist allocators + */ +/* ARGSUSED */ +static void * +i_fm_alloc(nv_alloc_t *nva, size_t size) +{ + return (kmem_zalloc(size, KM_SLEEP)); +} + +/* ARGSUSED */ +static void +i_fm_free(nv_alloc_t *nva, void *buf, size_t size) +{ + kmem_free(buf, size); +} + +const nv_alloc_ops_t fm_mem_alloc_ops = { + NULL, + NULL, + i_fm_alloc, + i_fm_free, + NULL +}; + +/* + * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer + * to the newly allocated nv_alloc_t structure is returned upon success or NULL + * is returned to indicate that the nv_alloc structure could not be created. + */ +nv_alloc_t * +fm_nva_xcreate(char *buf, size_t bufsz) +{ + nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); + + if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { + kmem_free(nvhdl, sizeof (nv_alloc_t)); + return (NULL); + } + + return (nvhdl); +} + +/* + * Destroy a previously allocated nv_alloc structure. The fixed buffer + * associated with nva must be freed by the caller. + */ +void +fm_nva_xdestroy(nv_alloc_t *nva) +{ + nv_alloc_fini(nva); + kmem_free(nva, sizeof (nv_alloc_t)); +} + +/* + * Create a new nv list. A pointer to a new nv list structure is returned + * upon success or NULL is returned to indicate that the structure could + * not be created. The newly created nv list is created and managed by the + * operations installed in nva. If nva is NULL, the default FMA nva + * operations are installed and used. + * + * When called from the kernel and nva == NULL, this function must be called + * from passive kernel context with no locks held that can prevent a + * sleeping memory allocation from occurring. Otherwise, this function may + * be called from other kernel contexts as long a valid nva created via + * fm_nva_create() is supplied. + */ +nvlist_t * +fm_nvlist_create(nv_alloc_t *nva) +{ + int hdl_alloced = 0; + nvlist_t *nvl; + nv_alloc_t *nvhdl; + + if (nva == NULL) { + nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); + + if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { + kmem_free(nvhdl, sizeof (nv_alloc_t)); + return (NULL); + } + hdl_alloced = 1; + } else { + nvhdl = nva; + } + + if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { + if (hdl_alloced) { + kmem_free(nvhdl, sizeof (nv_alloc_t)); + nv_alloc_fini(nvhdl); + } + return (NULL); + } + + return (nvl); +} + +/* + * Destroy a previously allocated nvlist structure. flag indicates whether + * or not the associated nva structure should be freed (FM_NVA_FREE) or + * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows + * it to be re-used for future nvlist creation operations. + */ +void +fm_nvlist_destroy(nvlist_t *nvl, int flag) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); + + nvlist_free(nvl); + + if (nva != NULL) { + if (flag == FM_NVA_FREE) + fm_nva_xdestroy(nva); + } +} + +int +i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) +{ + int nelem, ret = 0; + data_type_t type; + + while (ret == 0 && name != NULL) { + type = va_arg(ap, data_type_t); + switch (type) { + case DATA_TYPE_BYTE: + ret = nvlist_add_byte(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_BYTE_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_byte_array(payload, name, + va_arg(ap, uchar_t *), nelem); + break; + case DATA_TYPE_BOOLEAN_VALUE: + ret = nvlist_add_boolean_value(payload, name, + va_arg(ap, boolean_t)); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_boolean_array(payload, name, + va_arg(ap, boolean_t *), nelem); + break; + case DATA_TYPE_INT8: + ret = nvlist_add_int8(payload, name, + va_arg(ap, int)); + break; + case DATA_TYPE_INT8_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int8_array(payload, name, + va_arg(ap, int8_t *), nelem); + break; + case DATA_TYPE_UINT8: + ret = nvlist_add_uint8(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_UINT8_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint8_array(payload, name, + va_arg(ap, uint8_t *), nelem); + break; + case DATA_TYPE_INT16: + ret = nvlist_add_int16(payload, name, + va_arg(ap, int)); + break; + case DATA_TYPE_INT16_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int16_array(payload, name, + va_arg(ap, int16_t *), nelem); + break; + case DATA_TYPE_UINT16: + ret = nvlist_add_uint16(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_UINT16_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint16_array(payload, name, + va_arg(ap, uint16_t *), nelem); + break; + case DATA_TYPE_INT32: + ret = nvlist_add_int32(payload, name, + va_arg(ap, int32_t)); + break; + case DATA_TYPE_INT32_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int32_array(payload, name, + va_arg(ap, int32_t *), nelem); + break; + case DATA_TYPE_UINT32: + ret = nvlist_add_uint32(payload, name, + va_arg(ap, uint32_t)); + break; + case DATA_TYPE_UINT32_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint32_array(payload, name, + va_arg(ap, uint32_t *), nelem); + break; + case DATA_TYPE_INT64: + ret = nvlist_add_int64(payload, name, + va_arg(ap, int64_t)); + break; + case DATA_TYPE_INT64_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int64_array(payload, name, + va_arg(ap, int64_t *), nelem); + break; + case DATA_TYPE_UINT64: + ret = nvlist_add_uint64(payload, name, + va_arg(ap, uint64_t)); + break; + case DATA_TYPE_UINT64_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint64_array(payload, name, + va_arg(ap, uint64_t *), nelem); + break; + case DATA_TYPE_STRING: + ret = nvlist_add_string(payload, name, + va_arg(ap, char *)); + break; + case DATA_TYPE_STRING_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_string_array(payload, name, + va_arg(ap, char **), nelem); + break; + case DATA_TYPE_NVLIST: + ret = nvlist_add_nvlist(payload, name, + va_arg(ap, nvlist_t *)); + break; + case DATA_TYPE_NVLIST_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_nvlist_array(payload, name, + va_arg(ap, nvlist_t **), nelem); + break; + default: + ret = EINVAL; + } + + name = va_arg(ap, char *); + } + return (ret); +} + +void +fm_payload_set(nvlist_t *payload, ...) +{ + int ret; + const char *name; + va_list ap; + + va_start(ap, payload); + name = va_arg(ap, char *); + ret = i_fm_payload_set(payload, name, ap); + va_end(ap); + + if (ret) + atomic_add_64( + &erpt_kstat_data.payload_set_failed.value.ui64, 1); +} + +/* + * Set-up and validate the members of an ereport event according to: + * + * Member name Type Value + * ==================================================== + * class string ereport + * version uint8_t 0 + * ena uint64_t + * detector nvlist_t + * ereport-payload nvlist_t + * + */ +void +fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, + uint64_t ena, const nvlist_t *detector, ...) +{ + char ereport_class[FM_MAX_CLASS]; + const char *name; + va_list ap; + int ret; + + if (version != FM_EREPORT_VERS0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } + + (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", + FM_EREPORT_CLASS, erpt_class); + if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + } + + if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, + (nvlist_t *)detector) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + } + + va_start(ap, detector); + name = va_arg(ap, const char *); + ret = i_fm_payload_set(ereport, name, ap); + va_end(ap); + + if (ret) + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); +} + +/* + * Set-up and validate the members of an hc fmri according to; + * + * Member name Type Value + * =================================================== + * version uint8_t 0 + * auth nvlist_t + * hc-name string + * hc-id string + * + * Note that auth and hc-id are optional members. + */ + +#define HC_MAXPAIRS 20 +#define HC_MAXNAMELEN 50 + +static int +fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) +{ + if (version != FM_HC_SCHEME_VERSION) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || + nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + return (1); +} + +void +fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, + nvlist_t *snvl, int npairs, ...) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); + nvlist_t *pairs[HC_MAXPAIRS]; + va_list ap; + int i; + + if (!fm_fmri_hc_set_common(fmri, version, auth)) + return; + + npairs = MIN(npairs, HC_MAXPAIRS); + + va_start(ap, npairs); + for (i = 0; i < npairs; i++) { + const char *name = va_arg(ap, const char *); + uint32_t id = va_arg(ap, uint32_t); + char idstr[11]; + + (void) snprintf(idstr, sizeof (idstr), "%u", id); + + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } + va_end(ap); + + if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0) + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + + for (i = 0; i < npairs; i++) + fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); + + if (snvl != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } +} + +/* + * Set-up and validate the members of an dev fmri according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t + * devpath string + * devid string + * + * Note that auth and devid are optional members. + */ +void +fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, + const char *devpath, const char *devid) +{ + if (version != DEV_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, + FM_FMRI_SCHEME_DEV) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (auth != NULL) { + if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } + + if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + + if (devid != NULL) + if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0) + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); +} + +/* + * Set-up and validate the members of an cpu fmri according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t + * cpuid uint32_t + * cpumask uint8_t + * serial uint64_t + * + * Note that auth, cpumask, serial are optional members. + * + */ +void +fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, + uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) +{ + uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; + + if (version < CPU_SCHEME_VERSION1) { + atomic_add_64(failedp, 1); + return; + } + + if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { + atomic_add_64(failedp, 1); + return; + } + + if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, + FM_FMRI_SCHEME_CPU) != 0) { + atomic_add_64(failedp, 1); + return; + } + + if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) + atomic_add_64(failedp, 1); + + if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) + atomic_add_64(failedp, 1); + + if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, + *cpu_maskp) != 0) + atomic_add_64(failedp, 1); + + if (serial_idp == NULL || nvlist_add_string(fmri_cpu, + FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) + atomic_add_64(failedp, 1); +} + +/* + * Set-up and validate the members of a mem according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t [optional] + * unum string + * serial string [optional*] + * offset uint64_t [optional] + * + * * serial is required if offset is present + */ +void +fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, + const char *unum, const char *serial, uint64_t offset) +{ + if (version != MEM_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (!serial && (offset != (uint64_t)-1)) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (auth != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } + + if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + + if (serial != NULL) { + if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, + (char **)&serial, 1) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + if (offset != (uint64_t)-1) { + if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET, + offset) != 0) { + atomic_add_64(&erpt_kstat_data. + fmri_set_failed.value.ui64, 1); + } + } + } +} + +void +fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, + uint64_t vdev_guid) +{ + if (version != ZFS_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + + if (vdev_guid != 0) { + if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } +} + +uint64_t +fm_ena_increment(uint64_t ena) +{ + uint64_t new_ena; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); + break; + case FM_ENA_FMT2: + new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); + break; + default: + new_ena = 0; + } + + return (new_ena); +} + +uint64_t +fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) +{ + uint64_t ena = 0; + + switch (format) { + case FM_ENA_FMT1: + if (timestamp) { + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((cpuid << ENA_FMT1_CPUID_SHFT) & + ENA_FMT1_CPUID_MASK) | + ((timestamp << ENA_FMT1_TIME_SHFT) & + ENA_FMT1_TIME_MASK)); + } else { + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((cpuid << ENA_FMT1_CPUID_SHFT) & + ENA_FMT1_CPUID_MASK) | + ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) & + ENA_FMT1_TIME_MASK)); + } + break; + case FM_ENA_FMT2: + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); + break; + default: + break; + } + + return (ena); +} + +uint64_t +fm_ena_generate(uint64_t timestamp, uchar_t format) +{ + return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format)); +} + +uint64_t +fm_ena_generation_get(uint64_t ena) +{ + uint64_t gen; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; + break; + case FM_ENA_FMT2: + gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; + break; + default: + gen = 0; + break; + } + + return (gen); +} + +uchar_t +fm_ena_format_get(uint64_t ena) +{ + + return (ENA_FORMAT(ena)); +} + +uint64_t +fm_ena_id_get(uint64_t ena) +{ + uint64_t id; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; + break; + case FM_ENA_FMT2: + id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; + break; + default: + id = 0; + } + + return (id); +} + +uint64_t +fm_ena_time_get(uint64_t ena) +{ + uint64_t time; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; + break; + case FM_ENA_FMT2: + time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; + break; + default: + time = 0; + } + + return (time); +} + +/* + * Convert a getpcstack() trace to symbolic name+offset, and add the resulting + * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK. + */ +void +fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth) +{ + int i; + char *sym; + ulong_t off; + char *stkpp[FM_STK_DEPTH]; + char buf[FM_STK_DEPTH * FM_SYM_SZ]; + char *stkp = buf; + + for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) { + if ((sym = kobj_getsymname(stack[i], &off)) != NULL) + (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off); + else + (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]); + stkpp[i] = stkp; + } + + fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK, + DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL); +} + +void +print_msg_hwerr(ctid_t ct_id, proc_t *p) +{ + uprintf("Killed process %d (%s) in contract id %d " + "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id); +} diff --git a/module/zcommon/include/sys/fm/fs/zfs.h b/module/zfs/include/sys/fm/fs/zfs.h similarity index 100% rename from module/zcommon/include/sys/fm/fs/zfs.h rename to module/zfs/include/sys/fm/fs/zfs.h diff --git a/module/zfs/include/sys/fm/protocol.h b/module/zfs/include/sys/fm/protocol.h new file mode 100644 index 000000000..767fb07d8 --- /dev/null +++ b/module/zfs/include/sys/fm/protocol.h @@ -0,0 +1,336 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FM_PROTOCOL_H +#define _SYS_FM_PROTOCOL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +#include +#include +#else +#include +#include +#endif +#include + +/* FM common member names */ +#define FM_CLASS "class" +#define FM_VERSION "version" + +/* FM event class values */ +#define FM_EREPORT_CLASS "ereport" +#define FM_FAULT_CLASS "fault" +#define FM_RSRC_CLASS "resource" +#define FM_LIST_EVENT "list" + +/* FM list.* event class values */ +#define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect" +#define FM_LIST_ISOLATED_CLASS FM_LIST_EVENT ".isolated" +#define FM_LIST_REPAIRED_CLASS FM_LIST_EVENT ".repaired" +#define FM_LIST_UPDATED_CLASS FM_LIST_EVENT ".updated" +#define FM_LIST_RESOLVED_CLASS FM_LIST_EVENT ".resolved" + +/* ereport class subcategory values */ +#define FM_ERROR_CPU "cpu" +#define FM_ERROR_IO "io" + +/* ereport version and payload member names */ +#define FM_EREPORT_VERS0 0 +#define FM_EREPORT_VERSION FM_EREPORT_VERS0 + +/* ereport payload member names */ +#define FM_EREPORT_DETECTOR "detector" +#define FM_EREPORT_ENA "ena" + +/* list.* event payload member names */ +#define FM_LIST_EVENT_SIZE "list-sz" + +/* + * list.suspect, isolated, updated, repaired and resolved + * versions/payload member names. + */ +#define FM_SUSPECT_UUID "uuid" +#define FM_SUSPECT_DIAG_CODE "code" +#define FM_SUSPECT_DIAG_TIME "diag-time" +#define FM_SUSPECT_DE "de" +#define FM_SUSPECT_FAULT_LIST "fault-list" +#define FM_SUSPECT_FAULT_SZ "fault-list-sz" +#define FM_SUSPECT_FAULT_STATUS "fault-status" +#define FM_SUSPECT_MESSAGE "message" +#define FM_SUSPECT_RETIRE "retire" +#define FM_SUSPECT_RESPONSE "response" +#define FM_SUSPECT_SEVERITY "severity" + +#define FM_SUSPECT_VERS0 0 +#define FM_SUSPECT_VERSION FM_SUSPECT_VERS0 + +#define FM_SUSPECT_FAULTY 0x1 +#define FM_SUSPECT_UNUSABLE 0x2 +#define FM_SUSPECT_NOT_PRESENT 0x4 +#define FM_SUSPECT_DEGRADED 0x8 +#define FM_SUSPECT_REPAIRED 0x10 +#define FM_SUSPECT_REPLACED 0x20 +#define FM_SUSPECT_ACQUITTED 0x40 + +/* fault event versions and payload member names */ +#define FM_FAULT_VERS0 0 +#define FM_FAULT_VERSION FM_FAULT_VERS0 + +#define FM_FAULT_ASRU "asru" +#define FM_FAULT_FRU "fru" +#define FM_FAULT_FRU_LABEL "fru-label" +#define FM_FAULT_CERTAINTY "certainty" +#define FM_FAULT_RESOURCE "resource" +#define FM_FAULT_LOCATION "location" + +/* resource event versions and payload member names */ +#define FM_RSRC_VERS0 0 +#define FM_RSRC_VERSION FM_RSRC_VERS0 +#define FM_RSRC_RESOURCE "resource" + +/* resource.fm.asru.* payload member names */ +#define FM_RSRC_ASRU_UUID "uuid" +#define FM_RSRC_ASRU_CODE "code" +#define FM_RSRC_ASRU_FAULTY "faulty" +#define FM_RSRC_ASRU_REPAIRED "repaired" +#define FM_RSRC_ASRU_REPLACED "replaced" +#define FM_RSRC_ASRU_ACQUITTED "acquitted" +#define FM_RSRC_ASRU_UNUSABLE "unusable" +#define FM_RSRC_ASRU_EVENT "event" + +/* resource.fm.xprt.* versions and payload member names */ +#define FM_RSRC_XPRT_VERS0 0 +#define FM_RSRC_XPRT_VERSION FM_RSRC_XPRT_VERS0 +#define FM_RSRC_XPRT_UUID "uuid" +#define FM_RSRC_XPRT_SUBCLASS "subclass" +#define FM_RSRC_XPRT_FAULT_STATUS "fault-status" +#define FM_RSRC_XPRT_FAULT_HAS_ASRU "fault-has-asru" + +/* + * FM ENA Format Macros + */ +#define ENA_FORMAT_MASK 0x3 +#define ENA_FORMAT(ena) ((ena) & ENA_FORMAT_MASK) + +/* ENA format types */ +#define FM_ENA_FMT0 0 +#define FM_ENA_FMT1 1 +#define FM_ENA_FMT2 2 + +/* Format 1 */ +#define ENA_FMT1_GEN_MASK 0x00000000000003FCull +#define ENA_FMT1_ID_MASK 0xFFFFFFFFFFFFFC00ull +#define ENA_FMT1_CPUID_MASK 0x00000000000FFC00ull +#define ENA_FMT1_TIME_MASK 0xFFFFFFFFFFF00000ull +#define ENA_FMT1_GEN_SHFT 2 +#define ENA_FMT1_ID_SHFT 10 +#define ENA_FMT1_CPUID_SHFT ENA_FMT1_ID_SHFT +#define ENA_FMT1_TIME_SHFT 20 + +/* Format 2 */ +#define ENA_FMT2_GEN_MASK 0x00000000000003FCull +#define ENA_FMT2_ID_MASK 0xFFFFFFFFFFFFFC00ull +#define ENA_FMT2_TIME_MASK ENA_FMT2_ID_MASK +#define ENA_FMT2_GEN_SHFT 2 +#define ENA_FMT2_ID_SHFT 10 +#define ENA_FMT2_TIME_SHFT ENA_FMT2_ID_SHFT + +/* Common FMRI type names */ +#define FM_FMRI_AUTHORITY "authority" +#define FM_FMRI_SCHEME "scheme" +#define FM_FMRI_SVC_AUTHORITY "svc-authority" +#define FM_FMRI_FACILITY "facility" + +/* FMRI authority-type member names */ +#define FM_FMRI_AUTH_CHASSIS "chassis-id" +#define FM_FMRI_AUTH_PRODUCT "product-id" +#define FM_FMRI_AUTH_DOMAIN "domain-id" +#define FM_FMRI_AUTH_SERVER "server-id" +#define FM_FMRI_AUTH_HOST "host-id" + +#define FM_AUTH_VERS0 0 +#define FM_FMRI_AUTH_VERSION FM_AUTH_VERS0 + +/* scheme name values */ +#define FM_FMRI_SCHEME_FMD "fmd" +#define FM_FMRI_SCHEME_DEV "dev" +#define FM_FMRI_SCHEME_HC "hc" +#define FM_FMRI_SCHEME_SVC "svc" +#define FM_FMRI_SCHEME_CPU "cpu" +#define FM_FMRI_SCHEME_MEM "mem" +#define FM_FMRI_SCHEME_MOD "mod" +#define FM_FMRI_SCHEME_PKG "pkg" +#define FM_FMRI_SCHEME_LEGACY "legacy-hc" +#define FM_FMRI_SCHEME_ZFS "zfs" + +/* Scheme versions */ +#define FMD_SCHEME_VERSION0 0 +#define FM_FMD_SCHEME_VERSION FMD_SCHEME_VERSION0 +#define DEV_SCHEME_VERSION0 0 +#define FM_DEV_SCHEME_VERSION DEV_SCHEME_VERSION0 +#define FM_HC_VERS0 0 +#define FM_HC_SCHEME_VERSION FM_HC_VERS0 +#define CPU_SCHEME_VERSION0 0 +#define CPU_SCHEME_VERSION1 1 +#define FM_CPU_SCHEME_VERSION CPU_SCHEME_VERSION1 +#define MEM_SCHEME_VERSION0 0 +#define FM_MEM_SCHEME_VERSION MEM_SCHEME_VERSION0 +#define MOD_SCHEME_VERSION0 0 +#define FM_MOD_SCHEME_VERSION MOD_SCHEME_VERSION0 +#define PKG_SCHEME_VERSION0 0 +#define FM_PKG_SCHEME_VERSION PKG_SCHEME_VERSION0 +#define LEGACY_SCHEME_VERSION0 0 +#define FM_LEGACY_SCHEME_VERSION LEGACY_SCHEME_VERSION0 +#define SVC_SCHEME_VERSION0 0 +#define FM_SVC_SCHEME_VERSION SVC_SCHEME_VERSION0 +#define ZFS_SCHEME_VERSION0 0 +#define FM_ZFS_SCHEME_VERSION ZFS_SCHEME_VERSION0 + +/* hc scheme member names */ +#define FM_FMRI_HC_SERIAL_ID "serial" +#define FM_FMRI_HC_PART "part" +#define FM_FMRI_HC_REVISION "revision" +#define FM_FMRI_HC_ROOT "hc-root" +#define FM_FMRI_HC_LIST_SZ "hc-list-sz" +#define FM_FMRI_HC_LIST "hc-list" +#define FM_FMRI_HC_SPECIFIC "hc-specific" + +/* facility member names */ +#define FM_FMRI_FACILITY_NAME "facility-name" +#define FM_FMRI_FACILITY_TYPE "facility-type" + +/* hc-list version and member names */ +#define FM_FMRI_HC_NAME "hc-name" +#define FM_FMRI_HC_ID "hc-id" + +#define HC_LIST_VERSION0 0 +#define FM_HC_LIST_VERSION HC_LIST_VERSION0 + +/* hc-specific member names */ +#define FM_FMRI_HC_SPECIFIC_OFFSET "offset" +#define FM_FMRI_HC_SPECIFIC_PHYSADDR "physaddr" + +/* fmd module scheme member names */ +#define FM_FMRI_FMD_NAME "mod-name" +#define FM_FMRI_FMD_VERSION "mod-version" + +/* dev scheme member names */ +#define FM_FMRI_DEV_ID "devid" +#define FM_FMRI_DEV_PATH "device-path" + +/* pkg scheme member names */ +#define FM_FMRI_PKG_BASEDIR "pkg-basedir" +#define FM_FMRI_PKG_INST "pkg-inst" +#define FM_FMRI_PKG_VERSION "pkg-version" + +/* svc scheme member names */ +#define FM_FMRI_SVC_NAME "svc-name" +#define FM_FMRI_SVC_INSTANCE "svc-instance" +#define FM_FMRI_SVC_CONTRACT_ID "svc-contract-id" + +/* svc-authority member names */ +#define FM_FMRI_SVC_AUTH_SCOPE "scope" +#define FM_FMRI_SVC_AUTH_SYSTEM_FQN "system-fqn" + +/* cpu scheme member names */ +#define FM_FMRI_CPU_ID "cpuid" +#define FM_FMRI_CPU_SERIAL_ID "serial" +#define FM_FMRI_CPU_MASK "cpumask" +#define FM_FMRI_CPU_VID "cpuvid" +#define FM_FMRI_CPU_CPUFRU "cpufru" +#define FM_FMRI_CPU_CACHE_INDEX "cacheindex" +#define FM_FMRI_CPU_CACHE_WAY "cacheway" +#define FM_FMRI_CPU_CACHE_BIT "cachebit" +#define FM_FMRI_CPU_CACHE_TYPE "cachetype" + +#define FM_FMRI_CPU_CACHE_TYPE_L2 0 +#define FM_FMRI_CPU_CACHE_TYPE_L3 1 + +/* legacy-hc scheme member names */ +#define FM_FMRI_LEGACY_HC "component" +#define FM_FMRI_LEGACY_HC_PREFIX FM_FMRI_SCHEME_HC":///" \ + FM_FMRI_LEGACY_HC"=" + +/* mem scheme member names */ +#define FM_FMRI_MEM_UNUM "unum" +#define FM_FMRI_MEM_SERIAL_ID "serial" +#define FM_FMRI_MEM_PHYSADDR "physaddr" +#define FM_FMRI_MEM_MEMCONFIG "memconfig" +#define FM_FMRI_MEM_OFFSET "offset" + +/* mod scheme member names */ +#define FM_FMRI_MOD_PKG "mod-pkg" +#define FM_FMRI_MOD_NAME "mod-name" +#define FM_FMRI_MOD_ID "mod-id" +#define FM_FMRI_MOD_DESC "mod-desc" + +/* zfs scheme member names */ +#define FM_FMRI_ZFS_POOL "pool" +#define FM_FMRI_ZFS_VDEV "vdev" + +extern nv_alloc_t *fm_nva_xcreate(char *, size_t); +extern void fm_nva_xdestroy(nv_alloc_t *); + +extern nvlist_t *fm_nvlist_create(nv_alloc_t *); +extern void fm_nvlist_destroy(nvlist_t *, int); + +#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */ +#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */ + +extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t, + const nvlist_t *, ...); +extern void fm_payload_set(nvlist_t *, ...); +extern int i_fm_payload_set(nvlist_t *, const char *, va_list); +extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *, + int, ...); +extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *, + const char *); +extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *); +extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t, + uint8_t *, const char *); +extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *, + const char *, uint64_t); +extern void fm_authority_set(nvlist_t *, int, const char *, const char *, + const char *, const char *); +extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t); + +extern uint64_t fm_ena_increment(uint64_t); +extern uint64_t fm_ena_generate(uint64_t, uchar_t); +extern uint64_t fm_ena_generate_cpu(uint64_t, processorid_t, uchar_t); +extern uint64_t fm_ena_generation_get(uint64_t); +extern uchar_t fm_ena_format_get(uint64_t); +extern uint64_t fm_ena_id_get(uint64_t); +extern uint64_t fm_ena_time_get(uint64_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FM_PROTOCOL_H */ diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h new file mode 100644 index 000000000..4934814d8 --- /dev/null +++ b/module/zfs/include/sys/fm/util.h @@ -0,0 +1,105 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FM_UTIL_H +#define _SYS_FM_UTIL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* + * Shared user/kernel definitions for class length, error channel name, + * and kernel event publisher string. + */ +#define FM_MAX_CLASS 100 +#define FM_ERROR_CHAN "com.sun:fm:error" +#define FM_PUB "fm" + +/* + * ereport dump device transport support + * + * Ereports are written out to the dump device at a proscribed offset from the + * end, similar to in-transit log messages. The ereports are represented as a + * erpt_dump_t header followed by ed_size bytes of packed native nvlist data. + * + * NOTE: All of these constants and the header must be defined so they have the + * same representation for *both* 32-bit and 64-bit producers and consumers. + */ +#define ERPT_MAGIC 0xf00d4eddU +#define ERPT_MAX_ERRS 16 +#define ERPT_DATA_SZ (6 * 1024) +#define ERPT_EVCH_MAX 256 +#define ERPT_HIWAT 64 + +typedef struct erpt_dump { + uint32_t ed_magic; /* ERPT_MAGIC or zero to indicate end */ + uint32_t ed_chksum; /* checksum32() of packed nvlist data */ + uint32_t ed_size; /* ereport (nvl) fixed buf size */ + uint32_t ed_pad; /* reserved for future use */ + hrtime_t ed_hrt_nsec; /* hrtime of this ereport */ + hrtime_t ed_hrt_base; /* hrtime sample corresponding to ed_tod_base */ + struct { + uint64_t sec; /* seconds since gettimeofday() Epoch */ + uint64_t nsec; /* nanoseconds past ed_tod_base.sec */ + } ed_tod_base; +} erpt_dump_t; + +#ifdef _KERNEL +#include + +#define FM_STK_DEPTH 20 /* maximum stack depth */ +#define FM_SYM_SZ 64 /* maximum symbol size */ +#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */ + +#define FM_EREPORT_PAYLOAD_NAME_STACK "stack" + +extern errorq_t *ereport_errorq; +extern void *ereport_dumpbuf; +extern size_t ereport_dumplen; + +extern void fm_init(void); +extern void fm_nvprint(nvlist_t *); +extern void fm_panic(const char *, ...); +extern void fm_banner(void); + +extern void fm_ereport_dump(void); +extern void fm_ereport_post(nvlist_t *, int); + +extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FM_UTIL_H */ diff --git a/scripts/zfs-update.sh b/scripts/zfs-update.sh index 03023eaef..83525fe03 100755 --- a/scripts/zfs-update.sh +++ b/scripts/zfs-update.sh @@ -94,17 +94,19 @@ cp ${SRC_UCM}/sys/u8_textprep_data.h ${DST_MOD}/unicode/include/sys/ echo "* module/zcommon" mkdir -p ${DST_MOD}/zcommon/include/sys/fs/ -mkdir -p ${DST_MOD}/zcommon/include/sys/fm/fs/ cp ${SRC_CM}/zfs/*.c ${DST_MOD}/zcommon/ cp ${SRC_CM}/zfs/*.h ${DST_MOD}/zcommon/include/ cp ${SRC_UCM}/sys/fs/zfs.h ${DST_MOD}/zcommon/include/sys/fs/ -cp ${SRC_UCM}/sys/fm/fs/zfs.h ${DST_MOD}/zcommon/include/sys/fm/fs/ echo "* module/zfs" -mkdir -p ${DST_MOD}/zpool/include/sys/ +mkdir -p ${DST_MOD}/zfs/include/sys/fm/fs/ cp ${SRC_UTS}/intel/zfs/spa_boot.c ${DST_MOD}/zfs/ cp ${SRC_ZLIB}/*.c ${DST_MOD}/zfs/ cp ${SRC_ZLIB}/sys/*.h ${DST_MOD}/zfs/include/sys/ +cp ${SRC_UCM}/os/fm.c ${DST_MOD}/zfs/ +cp ${SRC_UCM}/sys/fm/protocol.h ${DST_MOD}/zfs/include/sys/fm/ +cp ${SRC_UCM}/sys/fm/util.h ${DST_MOD}/zfs/include/sys/fm/ +cp ${SRC_UCM}/sys/fm/fs/zfs.h ${DST_MOD}/zfs/include/sys/fm/fs/ rm ${DST_MOD}/zfs/vdev_disk.c rm ${DST_MOD}/zfs/zvol.c rm ${DST_MOD}/zfs/include/sys/vdev_disk.h