Merge branch 'b_tracepoints'

Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2874
This commit is contained in:
Brian Behlendorf 2014-11-17 11:14:24 -08:00
commit bc9f4131a1
22 changed files with 1530 additions and 285 deletions

View File

@ -0,0 +1,59 @@
dnl #
dnl # Ensure the DECLARE_EVENT_CLASS macro is available to non-GPL modules.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-I\$(src)"
AC_MSG_CHECKING([whether DECLARE_EVENT_CLASS() is available])
ZFS_LINUX_TRY_COMPILE_HEADER([
#include <linux/module.h>
MODULE_LICENSE(ZFS_META_LICENSE);
#define CREATE_TRACE_POINTS
#include "conftest.h"
],[
trace_zfs_autoconf_event_one(1UL);
trace_zfs_autoconf_event_two(2UL);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_DECLARE_EVENT_CLASS, 1,
[DECLARE_EVENT_CLASS() is available])
],[
AC_MSG_RESULT(no)
],[
#if !defined(_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
#define _CONFTEST_H
#undef TRACE_SYSTEM
#define TRACE_SYSTEM zfs
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(zfs_autoconf_event_class,
TP_PROTO(unsigned long i),
TP_ARGS(i),
TP_STRUCT__entry(
__field(unsigned long, i)
),
TP_fast_assign(
__entry->i = i;
),
TP_printk("i = %lu", __entry->i)
);
#define DEFINE_AUTOCONF_EVENT(name) \
DEFINE_EVENT(zfs_autoconf_event_class, name, \
TP_PROTO(unsigned long i), \
TP_ARGS(i))
DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_one);
DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_two);
#endif /* _CONFTEST_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE conftest
#include <trace/define_trace.h>
])
EXTRA_KCFLAGS="$tmp_flags"
])

View File

@ -6,6 +6,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_SPL
ZFS_AC_TEST_MODULE
ZFS_AC_KERNEL_CONFIG
ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
ZFS_AC_KERNEL_TYPE_FMODE_T
@ -506,9 +507,18 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
])
dnl #
dnl # ZFS_LINUX_CONFTEST
dnl # ZFS_LINUX_CONFTEST_H
dnl #
AC_DEFUN([ZFS_LINUX_CONFTEST], [
AC_DEFUN([ZFS_LINUX_CONFTEST_H], [
cat - <<_ACEOF >conftest.h
$1
_ACEOF
])
dnl #
dnl # ZFS_LINUX_CONFTEST_C
dnl #
AC_DEFUN([ZFS_LINUX_CONFTEST_C], [
cat confdefs.h - <<_ACEOF >conftest.c
$1
_ACEOF
@ -534,13 +544,14 @@ dnl #
dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE
dnl #
AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [
m4_ifvaln([$1], [ZFS_LINUX_CONFTEST([$1])])
m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])])
m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])])
rm -Rf build && mkdir -p build && touch build/conftest.mod.c
echo "obj-m := conftest.o" >build/Makefile
modpost_flag=''
test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
AS_IF(
[AC_TRY_COMMAND(cp conftest.c build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
[AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
[$4],
[_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])]
)
@ -627,3 +638,16 @@ AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [
fi
fi
])
dnl #
dnl # ZFS_LINUX_TRY_COMPILE_HEADER
dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are
dnl # provided via the fifth parameter
dnl #
AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER],
[ZFS_LINUX_COMPILE_IFELSE(
[AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])],
[modules],
[test -s build/conftest.o],
[$3], [$4], [AC_LANG_SOURCE([$5])])
])

View File

@ -2,6 +2,7 @@ SUBDIRS = fm fs
COMMON_H = \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
$(top_srcdir)/include/sys/avl_impl.h \
$(top_srcdir)/include/sys/blkptr.h \
@ -39,11 +40,13 @@ COMMON_H = \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
$(top_srcdir)/include/sys/sdt.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
$(top_srcdir)/include/sys/u8_textprep_data.h \

157
include/sys/arc_impl.h Normal file
View File

@ -0,0 +1,157 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_ARC_IMPL_H
#define _SYS_ARC_IMPL_H
#include <sys/arc.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
* ARC_mru - recently used, currently cached
* ARC_mru_ghost - recentely used, no longer in cache
* ARC_mfu - frequently used, currently cached
* ARC_mfu_ghost - frequently used, no longer in cache
* ARC_l2c_only - exists in L2ARC but not other states
* When there are no active references to the buffer, they are
* are linked onto a list in one of these arc states. These are
* the only buffers that can be evicted or deleted. Within each
* state there are multiple lists, one for meta-data and one for
* non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
* etc.) is tracked separately so that it can be managed more
* explicitly: favored over data, limited explicitly.
*
* Anonymous buffers are buffers that are not associated with
* a DVA. These are buffers that hold dirty block copies
* before they are written to stable storage. By definition,
* they are "ref'd" and are considered part of arc_mru
* that cannot be freed. Generally, they will aquire a DVA
* as they are written and migrate onto the arc_mru list.
*
* The ARC_l2c_only state is for buffers that are in the second
* level ARC but no longer in any of the ARC_m* lists. The second
* level ARC itself may also contain buffers that are in any of
* the ARC_m* states - meaning that a buffer can exist in two
* places. The reason for the ARC_l2c_only state is to keep the
* buffer header in the hash table, so that reads that hit the
* second level ARC benefit from these fast lookups.
*/
typedef struct arc_state {
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
kmutex_t arcs_mtx;
arc_state_type_t arcs_state;
} arc_state_t;
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
typedef struct arc_callback arc_callback_t;
struct arc_callback {
void *acb_private;
arc_done_func_t *acb_done;
arc_buf_t *acb_buf;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
void *awcb_private;
arc_done_func_t *awcb_ready;
arc_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
};
struct arc_buf_hdr {
/* protected by hash lock */
dva_t b_dva;
uint64_t b_birth;
uint64_t b_cksum0;
kmutex_t b_freeze_lock;
zio_cksum_t *b_freeze_cksum;
arc_buf_hdr_t *b_hash_next;
arc_buf_t *b_buf;
uint32_t b_flags;
uint32_t b_datacnt;
arc_callback_t *b_acb;
kcondvar_t b_cv;
/* immutable */
arc_buf_contents_t b_type;
uint64_t b_size;
uint64_t b_spa;
/* protected by arc state mutex */
arc_state_t *b_state;
list_node_t b_arc_node;
/* updated atomically */
clock_t b_arc_access;
uint32_t b_mru_hits;
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
uint32_t b_l2_hits;
/* self protecting */
refcount_t b_refcnt;
l2arc_buf_hdr_t *b_l2hdr;
list_node_t b_l2node;
};
typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
uint64_t l2ad_hand; /* next write location */
uint64_t l2ad_start; /* first addr on device */
uint64_t l2ad_end; /* last addr on device */
uint64_t l2ad_evict; /* last addr eviction reached */
boolean_t l2ad_first; /* first sweep through */
boolean_t l2ad_writing; /* currently writing */
list_t *l2ad_buflist; /* buffer list */
list_node_t l2ad_node; /* device list node */
} l2arc_dev_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ARC_IMPL_H */

70
include/sys/sdt.h Normal file
View File

@ -0,0 +1,70 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SDT_H
#define _SYS_SDT_H
#ifndef _KERNEL
#define ZFS_PROBE(a) ((void) 0)
#define ZFS_PROBE1(a, c) ((void) 0)
#define ZFS_PROBE2(a, c, e) ((void) 0)
#define ZFS_PROBE3(a, c, e, g) ((void) 0)
#define ZFS_PROBE4(a, c, e, g, i) ((void) 0)
#define ZFS_SET_ERROR(err) ((void) 0)
#else
#if defined(HAVE_DECLARE_EVENT_CLASS)
#include <sys/trace.h>
/*
* The set-error SDT probe is extra static, in that we declare its fake
* function literally, rather than with the DTRACE_PROBE1() macro. This is
* necessary so that SET_ERROR() can evaluate to a value, which wouldn't
* be possible if it required multiple statements (to declare the function
* and then call it).
*
* SET_ERROR() uses the comma operator so that it can be used without much
* additional code. For example, "return (EINVAL);" becomes
* "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated
* twice, so it should not have side effects (e.g. something like:
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
*/
#define SET_ERROR(err) \
(trace_zfs_set__error(__FILE__, __func__, __LINE__, err), err)
#else
#undef SET_ERROR
#define SET_ERROR(err) (err)
#endif /* HAVE_DECLARE_EVENT_CLASS */
#endif /* _KERNEL */
#endif /* _SYS_SDT_H */

1038
include/sys/trace.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -66,6 +66,7 @@
#include <sys/sunddi.h>
#include <sys/ctype.h>
#include <sys/disp.h>
#include <sys/trace.h>
#include <linux/dcache_compat.h>
#include <linux/utsname_compat.h>
@ -140,15 +141,12 @@
#define CE_PANIC 3 /* panic */
#define CE_IGNORE 4 /* print nothing */
extern int aok;
/*
* ZFS debugging
*/
extern void dprintf_setup(int *argc, char **argv);
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
extern void cmn_err(int, const char *, ...);
extern void vcmn_err(int, const char *, va_list);
extern void panic(const char *, ...);
@ -156,7 +154,8 @@ extern void vpanic(const char *, va_list);
#define fm_panic panic
#ifdef __sun
extern int aok;
/*
* DTrace SDT probes have different signatures in userland than they do in
* kernel. If they're being used in kernel code, re-define them out of
@ -202,9 +201,6 @@ extern void vpanic(const char *, va_list);
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
*/
#define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
#else
#define SET_ERROR(err) (err)
#endif
/*
* Threads. TS_STACK_MIN is dictated by the minimum allowed pthread stack

View File

@ -38,14 +38,6 @@ extern "C" {
#define FALSE 0
#endif
/*
* ZFS debugging - Always enabled for user space builds.
*/
#if !defined(ZFS_DEBUG) && !defined(_KERNEL)
#define ZFS_DEBUG
#endif
extern int zfs_flags;
extern int zfs_recover;
extern int zfs_free_leak_on_eio;
@ -59,29 +51,15 @@ extern int zfs_free_leak_on_eio;
#define ZFS_DEBUG_ZIO_FREE (1<<6)
#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7)
/*
* Always log zfs debug messages to the spl debug subsystem as SS_USER1.
* When the SPL is configured with debugging enabled these messages will
* appear in the internal spl debug log, otherwise they are a no-op.
*/
#if defined(_KERNEL)
#include <spl-debug.h>
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__SDEBUG(NULL, SS_USER1, SD_DPRINTF, __VA_ARGS__)
/*
* When zfs is running is user space the debugging is always enabled.
* The messages will be printed using the __dprintf() function and
* filtered based on the zfs_flags variable.
*/
#else
#if defined(HAVE_DECLARE_EVENT_CLASS) || !defined(_KERNEL)
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
#endif /* _KERNEL */
#else
#define dprintf(...) ((void)0)
#endif /* HAVE_DECLARE_EVENT_CLASS || !_KERNEL */
extern void zfs_panic_recover(const char *fmt, ...);
@ -93,12 +71,8 @@ typedef struct zfs_dbgmsg {
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
#if defined(_KERNEL) && defined(__linux__)
#define zfs_dbgmsg(...) dprintf(__VA_ARGS__)
#else
extern void zfs_dbgmsg(const char *fmt, ...);
extern void zfs_dbgmsg_print(const char *tag);
#endif
#ifndef _KERNEL
extern int dprintf_find_string(const char *string);

View File

@ -33,7 +33,6 @@ libspl_HEADERS = \
$(top_srcdir)/lib/libspl/include/sys/param.h \
$(top_srcdir)/lib/libspl/include/sys/priv.h \
$(top_srcdir)/lib/libspl/include/sys/processor.h \
$(top_srcdir)/lib/libspl/include/sys/sdt.h \
$(top_srcdir)/lib/libspl/include/sys/stack.h \
$(top_srcdir)/lib/libspl/include/sys/stat.h \
$(top_srcdir)/lib/libspl/include/sys/stropts.h \

View File

@ -70,6 +70,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/space_map.c \
$(top_srcdir)/module/zfs/space_reftree.c \
$(top_srcdir)/module/zfs/txg.c \
$(top_srcdir)/module/zfs/trace.c \
$(top_srcdir)/module/zfs/uberblock.c \
$(top_srcdir)/module/zfs/unique.c \
$(top_srcdir)/module/zfs/vdev.c \

View File

@ -52,6 +52,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/spa_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
$(MODULE)-objs += @top_srcdir@/module/zfs/space_reftree.o
$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
$(MODULE)-objs += @top_srcdir@/module/zfs/trace.o
$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
$(MODULE)-objs += @top_srcdir@/module/zfs/unique.o
$(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o

View File

@ -145,6 +145,7 @@
#include <sys/kstat.h>
#include <sys/dmu_tx.h>
#include <zfs_fletcher.h>
#include <sys/arc_impl.h>
#ifndef _KERNEL
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@ -218,46 +219,6 @@ unsigned long zfs_arc_max = 0;
unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
* ARC_mru - recently used, currently cached
* ARC_mru_ghost - recentely used, no longer in cache
* ARC_mfu - frequently used, currently cached
* ARC_mfu_ghost - frequently used, no longer in cache
* ARC_l2c_only - exists in L2ARC but not other states
* When there are no active references to the buffer, they are
* are linked onto a list in one of these arc states. These are
* the only buffers that can be evicted or deleted. Within each
* state there are multiple lists, one for meta-data and one for
* non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
* etc.) is tracked separately so that it can be managed more
* explicitly: favored over data, limited explicitly.
*
* Anonymous buffers are buffers that are not associated with
* a DVA. These are buffers that hold dirty block copies
* before they are written to stable storage. By definition,
* they are "ref'd" and are considered part of arc_mru
* that cannot be freed. Generally, they will aquire a DVA
* as they are written and migrate onto the arc_mru list.
*
* The ARC_l2c_only state is for buffers that are in the second
* level ARC but no longer in any of the ARC_m* lists. The second
* level ARC itself may also contain buffers that are in any of
* the ARC_m* states - meaning that a buffer can exist in two
* places. The reason for the ARC_l2c_only state is to keep the
* buffer header in the hash table, so that reads that hit the
* second level ARC benefit from these fast lookups.
*/
typedef struct arc_state {
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
kmutex_t arcs_mtx;
arc_state_type_t arcs_state;
} arc_state_t;
/* The 6 states: */
static arc_state_t ARC_anon;
static arc_state_t ARC_mru;
@ -522,69 +483,6 @@ static arc_state_t *arc_l2c_only;
#define L2ARC_IS_VALID_COMPRESS(_c_) \
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
typedef struct arc_callback arc_callback_t;
struct arc_callback {
void *acb_private;
arc_done_func_t *acb_done;
arc_buf_t *acb_buf;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
void *awcb_private;
arc_done_func_t *awcb_ready;
arc_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
};
struct arc_buf_hdr {
/* protected by hash lock */
dva_t b_dva;
uint64_t b_birth;
uint64_t b_cksum0;
kmutex_t b_freeze_lock;
zio_cksum_t *b_freeze_cksum;
arc_buf_hdr_t *b_hash_next;
arc_buf_t *b_buf;
uint32_t b_flags;
uint32_t b_datacnt;
arc_callback_t *b_acb;
kcondvar_t b_cv;
/* immutable */
arc_buf_contents_t b_type;
uint64_t b_size;
uint64_t b_spa;
/* protected by arc state mutex */
arc_state_t *b_state;
list_node_t b_arc_node;
/* updated atomically */
clock_t b_arc_access;
uint32_t b_mru_hits;
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
uint32_t b_l2_hits;
/* self protecting */
refcount_t b_refcnt;
l2arc_buf_hdr_t *b_l2hdr;
list_node_t b_l2node;
};
static list_t arc_prune_list;
static kmutex_t arc_prune_mtx;
static arc_buf_t *arc_eviction_list;
@ -707,19 +605,6 @@ int l2arc_norw = B_FALSE; /* no reads during writes */
/*
* L2ARC Internals
*/
typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
uint64_t l2ad_hand; /* next write location */
uint64_t l2ad_start; /* first addr on device */
uint64_t l2ad_end; /* last addr on device */
uint64_t l2ad_evict; /* last addr eviction reached */
boolean_t l2ad_first; /* first sweep through */
boolean_t l2ad_writing; /* currently writing */
list_t *l2ad_buflist; /* buffer list */
list_node_t l2ad_node; /* device list node */
} l2arc_dev_t;
static list_t L2ARC_dev_list; /* device list */
static list_t *l2arc_dev_list; /* device list pointer */
static kmutex_t l2arc_dev_mtx; /* device list mutex */
@ -2043,7 +1928,7 @@ top:
if (bytes_evicted < bytes)
dprintf("only evicted %lld bytes from %x\n",
(longlong_t)bytes_evicted, state);
(longlong_t)bytes_evicted, state->arcs_state);
if (skipped)
ARCSTAT_INCR(arcstat_evict_skip, skipped);

View File

@ -221,7 +221,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
if (zfs_free_leak_on_eio)
flags |= TRAVERSE_HARD;
zfs_dbgmsg("bptree index %d: traversing from min_txg=%lld "
zfs_dbgmsg("bptree index %lld: traversing from min_txg=%lld "
"bookmark %lld/%lld/%lld/%lld",
i, (longlong_t)bte.be_birth_txg,
(longlong_t)bte.be_zb.zb_objset,

View File

@ -238,6 +238,53 @@ static avl_tree_t spa_l2cache_avl;
kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
#ifdef ZFS_DEBUG
/* Everything except dprintf and spa is on by default in debug builds */
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
#else
int zfs_flags = 0;
#endif
/*
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
* This should only be used as a last resort, as it typically results
* in leaked space, or worse.
*/
int zfs_recover = B_FALSE;
/*
* If destroy encounters an EIO while reading metadata (e.g. indirect
* blocks), space referenced by the missing metadata can not be freed.
* Normally this causes the background destroy to become "stalled", as
* it is unable to make forward progress. While in this stalled state,
* all remaining space to free from the error-encountering filesystem is
* "temporarily leaked". Set this flag to cause it to ignore the EIO,
* permanently leak the space from indirect blocks that can not be read,
* and continue to free everything else that it can.
*
* The default, "stalling" behavior is useful if the storage partially
* fails (i.e. some but not all i/os fail), and then later recovers. In
* this case, we will be able to continue pool operations while it is
* partially failed, and when it recovers, we can continue to free the
* space, with no leaks. However, note that this case is actually
* fairly rare.
*
* Typically pools either (a) fail completely (but perhaps temporarily,
* e.g. a top-level vdev going offline), or (b) have localized,
* permanent errors (e.g. disk returns the wrong data due to bit flip or
* firmware bug). In case (a), this setting does not matter because the
* pool will be suspended and the sync thread will not be able to make
* forward progress regardless. In case (b), because the error is
* permanent, the best we can do is leak the minimum amount of space,
* which is what setting this flag will do. Therefore, it is reasonable
* for this flag to normally be set, but we chose the more conservative
* approach of not setting it, so that there is no possibility of
* leaking space in the "partial temporary" failure case.
*/
int zfs_free_leak_on_eio = B_FALSE;
/*
* Expiration time in milliseconds. This value has two meanings. First it is
* used to determine when the spa_deadman() logic should fire. By default the
@ -1319,6 +1366,16 @@ spa_freeze(spa_t *spa)
txg_wait_synced(spa_get_dsl(spa), freeze_txg);
}
void
zfs_panic_recover(const char *fmt, ...)
{
va_list adx;
va_start(adx, fmt);
vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
va_end(adx);
}
/*
* This is a stripped-down version of strtoull, suitable only for converting
* lowercase hexadecimal numbers that don't overflow.
@ -1923,6 +1980,16 @@ EXPORT_SYMBOL(spa_mode);
EXPORT_SYMBOL(spa_namespace_lock);
module_param(zfs_flags, int, 0644);
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
module_param(zfs_recover, int, 0644);
MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
module_param(zfs_free_leak_on_eio, int, 0644);
MODULE_PARM_DESC(zfs_free_leak_on_eio,
"Set to ignore IO errors during free and permanently leak the space");
module_param(zfs_deadman_synctime_ms, ulong, 0644);
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");

View File

@ -427,7 +427,7 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
doi.doi_data_block_size != space_map_blksz) {
zfs_dbgmsg("txg %llu, spa %s, reallocating: "
"old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
"old bonus %llu, old blocksz %u", dmu_tx_get_txg(tx),
spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
space_map_free(sm, tx);

View File

@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@ -20,17 +19,21 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one
* (and only one) C file, so this dummy file exists for that purpose.
*/
#ifndef _LIBSPL_SYS_SDT_H
#define _LIBSPL_SYS_SDT_H
#include <sys/arc_impl.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/dbuf.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dmu_tx.h>
#include <sys/dnode.h>
#include <sys/zfs_znode.h>
#include <sys/zil_impl.h>
#include <sys/zrlock.h>
#define DTRACE_PROBE(a) ((void) 0)
#define DTRACE_PROBE1(a, b, c) ((void) 0)
#define DTRACE_PROBE2(a, b, c, d, e) ((void) 0)
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void) 0)
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void) 0)
#endif
#define CREATE_TRACE_POINTS
#include <sys/trace.h>

View File

@ -209,7 +209,7 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
tbl->zt_nextblk = 0;
tbl->zt_blks_copied = 0;
dprintf("finished; numblocks now %llu (%lluk entries)\n",
dprintf("finished; numblocks now %llu (%uk entries)\n",
tbl->zt_numblks, 1<<(tbl->zt_shift-10));
}

View File

@ -25,99 +25,22 @@
#include <sys/zfs_context.h>
#if !defined(_KERNEL) || !defined(__linux__)
list_t zfs_dbgmsgs;
int zfs_dbgmsg_size;
kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
#endif
/*
* Enable various debugging features.
*/
int zfs_flags = 0;
/*
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
* This should only be used as a last resort, as it typically results
* in leaked space, or worse.
*/
int zfs_recover = B_FALSE;
/*
* If destroy encounters an EIO while reading metadata (e.g. indirect
* blocks), space referenced by the missing metadata can not be freed.
* Normally this causes the background destroy to become "stalled", as
* it is unable to make forward progress. While in this stalled state,
* all remaining space to free from the error-encountering filesystem is
* "temporarily leaked". Set this flag to cause it to ignore the EIO,
* permanently leak the space from indirect blocks that can not be read,
* and continue to free everything else that it can.
*
* The default, "stalling" behavior is useful if the storage partially
* fails (i.e. some but not all i/os fail), and then later recovers. In
* this case, we will be able to continue pool operations while it is
* partially failed, and when it recovers, we can continue to free the
* space, with no leaks. However, note that this case is actually
* fairly rare.
*
* Typically pools either (a) fail completely (but perhaps temporarily,
* e.g. a top-level vdev going offline), or (b) have localized,
* permanent errors (e.g. disk returns the wrong data due to bit flip or
* firmware bug). In case (a), this setting does not matter because the
* pool will be suspended and the sync thread will not be able to make
* forward progress regardless. In case (b), because the error is
* permanent, the best we can do is leak the minimum amount of space,
* which is what setting this flag will do. Therefore, it is reasonable
* for this flag to normally be set, but we chose the more conservative
* approach of not setting it, so that there is no possibility of
* leaking space in the "partial temporary" failure case.
*/
int zfs_free_leak_on_eio = B_FALSE;
void
zfs_panic_recover(const char *fmt, ...)
{
va_list adx;
va_start(adx, fmt);
vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
va_end(adx);
}
/*
* Debug logging is enabled by default for production kernel builds.
* The overhead for this is negligible and the logs can be valuable when
* debugging. For non-production user space builds all debugging except
* logging is enabled since performance is no longer a concern.
*/
void
zfs_dbgmsg_init(void)
{
#if !defined(_KERNEL) || !defined(__linux__)
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
offsetof(zfs_dbgmsg_t, zdm_node));
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
#endif
if (zfs_flags == 0) {
#if defined(_KERNEL)
zfs_flags = ZFS_DEBUG_DPRINTF;
spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
#else
zfs_flags = ~ZFS_DEBUG_DPRINTF;
#endif /* _KERNEL */
}
}
void
zfs_dbgmsg_fini(void)
{
#if !defined(_KERNEL) || !defined(__linux__)
zfs_dbgmsg_t *zdm;
while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
@ -127,25 +50,24 @@ zfs_dbgmsg_fini(void)
}
mutex_destroy(&zfs_dbgmsgs_lock);
ASSERT0(zfs_dbgmsg_size);
#endif
}
#if !defined(_KERNEL) || !defined(__linux__)
/*
* Print these messages by running:
* echo ::zfs_dbgmsg | mdb -k
* To get this data enable the zfs__dbgmsg tracepoint as shown:
*
* Monitor these messages by running:
* dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
* # Enable zfs__dbgmsg tracepoint, clear the tracepoint ring buffer
* $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
* $ echo 0 > /sys/kernel/debug/tracing/trace
*
* When used with libzpool, monitor with:
* dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}'
* # Dump the ring buffer.
* $ cat /sys/kernel/debug/tracing/trace
*/
void
zfs_dbgmsg(const char *fmt, ...)
{
int size;
va_list adx;
char *nl;
zfs_dbgmsg_t *zdm;
va_start(adx, fmt);
@ -156,13 +78,20 @@ zfs_dbgmsg(const char *fmt, ...)
* There is one byte of string in sizeof (zfs_dbgmsg_t), used
* for the terminating null.
*/
zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_PUSHPAGE);
zdm->zdm_timestamp = gethrestime_sec();
va_start(adx, fmt);
(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
va_end(adx);
/*
* Get rid of trailing newline.
*/
nl = strrchr(zdm->zdm_msg, '\n');
if (nl != NULL)
*nl = '\0';
DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
mutex_enter(&zfs_dbgmsgs_lock);
@ -180,6 +109,7 @@ zfs_dbgmsg(const char *fmt, ...)
void
zfs_dbgmsg_print(const char *tag)
{
#if !defined(_KERNEL)
zfs_dbgmsg_t *zdm;
(void) printf("ZFS_DBGMSG(%s):\n", tag);
@ -188,17 +118,5 @@ zfs_dbgmsg_print(const char *tag)
zdm = list_next(&zfs_dbgmsgs, zdm))
(void) printf("%s\n", zdm->zdm_msg);
mutex_exit(&zfs_dbgmsgs_lock);
#endif /* !_KERNEL */
}
#endif
#if defined(_KERNEL)
module_param(zfs_flags, int, 0644);
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
module_param(zfs_recover, int, 0644);
MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
module_param(zfs_free_leak_on_eio, int, 0644);
MODULE_PARM_DESC(zfs_free_leak_on_eio,
"Set to ignore IO errors during free and permanently leak the space");
#endif /* _KERNEL */

View File

@ -247,6 +247,55 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
#if defined(HAVE_DECLARE_EVENT_CLASS)
void
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
{
const char *newfile;
size_t size = 4096;
char *buf = kmem_alloc(size, KM_PUSHPAGE);
char *nl;
va_list adx;
/*
* Get rid of annoying prefix to filename.
*/
newfile = strrchr(file, '/');
if (newfile != NULL) {
newfile = newfile + 1; /* Get rid of leading / */
} else {
newfile = file;
}
va_start(adx, fmt);
(void) vsnprintf(buf, size, fmt, adx);
va_end(adx);
/*
* Get rid of trailing newline.
*/
nl = strrchr(buf, '\n');
if (nl != NULL)
*nl = '\0';
/*
* To get this data enable the zfs__dprintf trace point as shown:
*
* # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
* $ echo 1 > /sys/module/zfs/parameters/zfs_flags
* $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
* $ echo 0 > /sys/kernel/debug/tracing/trace
*
* # Dump the ring buffer.
* $ cat /sys/kernel/debug/tracing/trace
*/
DTRACE_PROBE4(zfs__dprintf,
char *, newfile, char *, func, int, line, char *, buf);
kmem_free(buf, size);
}
#endif /* HAVE_DECLARE_EVENT_CLASS */
static void
history_str_free(char *buf)
{

View File

@ -1436,7 +1436,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
gen_mask = -1ULL >> (64 - 8 * i);
dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
if ((err = zfs_zget(zsb, object, &zp))) {
ZFS_EXIT(zsb);
return (err);
@ -1447,7 +1447,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
if (zp_gen == 0)
zp_gen = 1;
if (zp->z_unlinked || zp_gen != fid_gen) {
dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
fid_gen);
iput(ZTOI(zp));
ZFS_EXIT(zsb);
return (SET_ERROR(EINVAL));

View File

@ -597,8 +597,9 @@ line: while (<$filehandle>) {
if (/\(\s/) {
err("whitespace after left paren");
}
# allow "for" statements to have empty "continue" clauses
if (/\s\)/ && !/^\s*for \([^;]*;[^;]*; \)/) {
# Allow "for" statements to have empty "continue" clauses.
# Allow right paren on its own line unless we're being picky (-p).
if (/\s\)/ && !/^\s*for \([^;]*;[^;]*; \)/ && ($picky || !/^\s*\)/)) {
err("whitespace before right paren");
}
if (/^\s*\(void\)[^ ]/) {

View File

@ -33,7 +33,6 @@ MODULE-OPTIONS:
$0 zfs="zfs_prefetch_disable=1"
$0 zfs="zfs_prefetch_disable=1 zfs_mdcomp_disable=1"
$0 spl="spl_debug_mask=0"
EOF
}