mirror_zfs/include/sys/zfs_debug.h
Brian Behlendorf 1b939560be
Add TRIM support
UNMAP/TRIM support is a frequently-requested feature to help
prevent performance from degrading on SSDs and on various other
SAN-like storage back-ends.  By issuing UNMAP/TRIM commands for
sectors which are no longer allocated the underlying device can
often more efficiently manage itself.

This TRIM implementation is modeled on the `zpool initialize`
feature which writes a pattern to all unallocated space in the
pool.  The new `zpool trim` command uses the same vdev_xlate()
code to calculate what sectors are unallocated, the same per-
vdev TRIM thread model and locking, and the same basic CLI for
a consistent user experience.  The core difference is that
instead of writing a pattern it will issue UNMAP/TRIM commands
for those extents.

The zio pipeline was updated to accommodate this by adding a new
ZIO_TYPE_TRIM type and associated spa taskq.  This new type makes
is straight forward to add the platform specific TRIM/UNMAP calls
to vdev_disk.c and vdev_file.c.  These new ZIO_TYPE_TRIM zios are
handled largely the same way as ZIO_TYPE_READs or ZIO_TYPE_WRITEs.
This makes it possible to largely avoid changing the pipieline,
one exception is that TRIM zio's may exceed the 16M block size
limit since they contain no data.

In addition to the manual `zpool trim` command, a background
automatic TRIM was added and is controlled by the 'autotrim'
property.  It relies on the exact same infrastructure as the
manual TRIM.  However, instead of relying on the extents in a
metaslab's ms_allocatable range tree, a ms_trim tree is kept
per metaslab.  When 'autotrim=on', ranges added back to the
ms_allocatable tree are also added to the ms_free tree.  The
ms_free tree is then periodically consumed by an autotrim
thread which systematically walks a top level vdev's metaslabs.

Since the automatic TRIM will skip ranges it considers too small
there is value in occasionally running a full `zpool trim`.  This
may occur when the freed blocks are small and not enough time
was allowed to aggregate them.  An automatic TRIM and a manual
`zpool trim` may be run concurrently, in which case the automatic
TRIM will yield to the manual TRIM.

Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Contributions-by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Contributions-by: Tim Chase <tim@chase2k.com>
Contributions-by: Chunwei Chen <tuxoko@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #8419 
Closes #598
2019-03-29 09:13:20 -07:00

110 lines
3.2 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_DEBUG_H
#define _SYS_ZFS_DEBUG_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
extern int zfs_flags;
extern int zfs_recover;
extern int zfs_free_leak_on_eio;
extern int zfs_dbgmsg_enable;
#define ZFS_DEBUG_DPRINTF (1 << 0)
#define ZFS_DEBUG_DBUF_VERIFY (1 << 1)
#define ZFS_DEBUG_DNODE_VERIFY (1 << 2)
#define ZFS_DEBUG_SNAPNAMES (1 << 3)
#define ZFS_DEBUG_MODIFY (1 << 4)
/* 1<<5 was previously used, try not to reuse */
#define ZFS_DEBUG_ZIO_FREE (1 << 6)
#define ZFS_DEBUG_HISTOGRAM_VERIFY (1 << 7)
#define ZFS_DEBUG_METASLAB_VERIFY (1 << 8)
#define ZFS_DEBUG_SET_ERROR (1 << 9)
#define ZFS_DEBUG_INDIRECT_REMAP (1 << 10)
#define ZFS_DEBUG_TRIM (1 << 11)
extern void __zfs_dbgmsg(char *buf);
extern void __dprintf(boolean_t dprint, const char *file, const char *func,
int line, const char *fmt, ...);
/*
* Some general principles for using zfs_dbgmsg():
* 1. We don't want to pollute the log with typically-irrelevant messages,
* so don't print too many messages in the "normal" code path - O(1)
* per txg.
* 2. We want to know for sure what happened, so make the message specific
* (e.g. *which* thing am I operating on).
* 3. Do print a message when something unusual or unexpected happens
* (e.g. error cases).
* 4. Print a message when making user-initiated on-disk changes.
*
* Note that besides principle 1, another reason that we don't want to
* use zfs_dbgmsg in high-frequency routines is the potential impact
* that it can have on performance.
*/
#define zfs_dbgmsg(...) \
if (zfs_dbgmsg_enable) \
__dprintf(B_FALSE, __FILE__, __func__, __LINE__, __VA_ARGS__)
#ifdef ZFS_DEBUG
/*
* To enable this:
*
* $ echo 1 >/sys/module/zfs/parameters/zfs_flags
*/
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(B_TRUE, __FILE__, __func__, __LINE__, __VA_ARGS__)
#else
#define dprintf(...) ((void)0)
#endif /* ZFS_DEBUG */
extern void zfs_panic_recover(const char *fmt, ...);
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
#ifndef _KERNEL
extern int dprintf_find_string(const char *string);
extern void zfs_dbgmsg_print(const char *tag);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_DEBUG_H */