mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	The intent of this patch is extend the existing deadman code
such that it's flexible enough to be used by both ztest and
on production systems.  The proposed changes include:
* Added a new `zfs_deadman_failmode` module option which is
  used to dynamically control the behavior of the deadman.  It's
  loosely modeled after, but independant from, the pool failmode
  property.  It can be set to wait, continue, or panic.
    * wait     - Wait for the "hung" I/O (default)
    * continue - Attempt to recover from a "hung" I/O
    * panic    - Panic the system
* Added a new `zfs_deadman_ziotime_ms` module option which is
  analogous to `zfs_deadman_synctime_ms` except instead of
  applying to a pool TXG sync it applies to zio_wait().  A
  default value of 300s is used to define a "hung" zio.
* The ztest deadman thread has been re-enabled by default,
  aligned with the upstream OpenZFS code, and then extended
  to terminate the process when it takes significantly longer
  to complete than expected.
* The -G option was added to ztest to print the internal debug
  log when a fatal error is encountered.  This same option was
  previously added to zdb in commit fa603f82.  Update zloop.sh
  to unconditionally pass -G to obtain additional debugging.
* The FM_EREPORT_ZFS_DELAY event which was previously posted
  when the deadman detect a "hung" pool has been replaced by
  a new dedicated FM_EREPORT_ZFS_DEADMAN event.
* The proposed recovery logic attempts to restart a "hung"
  zio by calling zio_interrupt() on any outstanding leaf zios.
  We may want to further restrict this to zios in either the
  ZIO_STAGE_VDEV_IO_START or ZIO_STAGE_VDEV_IO_DONE stages.
  Calling zio_interrupt() is expected to only be useful for
  cases when an IO has been submitted to the physical device
  but for some reasonable the completion callback hasn't been
  called by the lower layers.  This shouldn't be possible but
  has been observed and may be caused by kernel/driver bugs.
* The 'zfs_deadman_synctime_ms' default value was reduced from
  1000s to 600s.
* Depending on how ztest fails there may be no cache file to
  move.  This should not be considered fatal, collect the logs
  which are available and carry on.
* Add deadman test cases for spa_deadman() and zio_wait().
* Increase default zfs_deadman_checktime_ms to 60s.
Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #6999
		
	
			
		
			
				
	
	
		
			121 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			121 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * CDDL HEADER START
 | 
						|
 *
 | 
						|
 * The contents of this file are subject to the terms of the
 | 
						|
 * Common Development and Distribution License (the "License").
 | 
						|
 * You may not use this file except in compliance with the License.
 | 
						|
 *
 | 
						|
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | 
						|
 * or http://www.opensolaris.org/os/licensing.
 | 
						|
 * See the License for the specific language governing permissions
 | 
						|
 * and limitations under the License.
 | 
						|
 *
 | 
						|
 * When distributing Covered Code, include this CDDL HEADER in each
 | 
						|
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | 
						|
 * If applicable, add the following below this CDDL HEADER, with the
 | 
						|
 * fields enclosed by brackets "[]" replaced with your own identifying
 | 
						|
 * information: Portions Copyright [yyyy] [name of copyright owner]
 | 
						|
 *
 | 
						|
 * CDDL HEADER END
 | 
						|
 */
 | 
						|
/*
 | 
						|
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 | 
						|
 * Use is subject to license terms.
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef	_SYS_FM_FS_ZFS_H
 | 
						|
#define	_SYS_FM_FS_ZFS_H
 | 
						|
 | 
						|
#ifdef	__cplusplus
 | 
						|
extern "C" {
 | 
						|
#endif
 | 
						|
 | 
						|
#define	ZFS_ERROR_CLASS				"fs.zfs"
 | 
						|
 | 
						|
#define	FM_EREPORT_ZFS_CHECKSUM			"checksum"
 | 
						|
#define	FM_EREPORT_ZFS_AUTHENTICATION		"authentication"
 | 
						|
#define	FM_EREPORT_ZFS_IO			"io"
 | 
						|
#define	FM_EREPORT_ZFS_DATA			"data"
 | 
						|
#define	FM_EREPORT_ZFS_DELAY			"delay"
 | 
						|
#define	FM_EREPORT_ZFS_DEADMAN			"deadman"
 | 
						|
#define	FM_EREPORT_ZFS_POOL			"zpool"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_UNKNOWN		"vdev.unknown"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_OPEN_FAILED	"vdev.open_failed"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA	"vdev.corrupt_data"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_NO_REPLICAS	"vdev.no_replicas"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM	"vdev.bad_guid_sum"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_TOO_SMALL		"vdev.too_small"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_BAD_LABEL		"vdev.bad_label"
 | 
						|
#define	FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT	"vdev.bad_ashift"
 | 
						|
#define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 | 
						|
#define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
 | 
						|
#define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
 | 
						|
#define	FM_EREPORT_ZFS_CONFIG_CACHE_WRITE	"config_cache_write"
 | 
						|
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_POOL_GUID	"pool_guid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT	"pool_context"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_POOL_STATE	"pool_state"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID	"vdev_guid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE	"vdev_type"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH	"vdev_path"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH	"vdev_physpath"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH	"vdev_enc_sysfs_path"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID	"vdev_devid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU		"vdev_fru"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE	"vdev_state"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE	"vdev_laststate"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT	"vdev_ashift"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS	"vdev_complete_ts"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS	"vdev_delta_ts"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS	"vdev_spare_paths"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS	"vdev_spare_guids"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS	"vdev_read_errors"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID	"parent_guid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE	"parent_type"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH	"parent_path"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID	"parent_devid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET	"zio_objset"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT	"zio_object"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL	"zio_level"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID	"zio_blkid"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR		"zio_err"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET	"zio_offset"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE		"zio_size"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS	"zio_flags"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE	"zio_stage"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE	"zio_pipeline"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY	"zio_delay"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP	"zio_timestamp"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA	"zio_delta"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_PREV_STATE	"prev_state"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED	"cksum_expected"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL	"cksum_actual"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO	"cksum_algorithm"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP	"cksum_byteswap"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS	"bad_range_sets"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS	"bad_range_clears"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS	"bad_set_bits"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS	"bad_cleared_bits"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
 | 
						|
#define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
 | 
						|
 | 
						|
#define	FM_EREPORT_FAILMODE_WAIT		"wait"
 | 
						|
#define	FM_EREPORT_FAILMODE_CONTINUE		"continue"
 | 
						|
#define	FM_EREPORT_FAILMODE_PANIC		"panic"
 | 
						|
 | 
						|
#define	FM_RESOURCE_REMOVED			"removed"
 | 
						|
#define	FM_RESOURCE_AUTOREPLACE			"autoreplace"
 | 
						|
#define	FM_RESOURCE_STATECHANGE			"statechange"
 | 
						|
 | 
						|
#ifdef	__cplusplus
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
#endif	/* _SYS_FM_FS_ZFS_H */
 |