mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Add TRIM support
UNMAP/TRIM support is a frequently-requested feature to help prevent performance from degrading on SSDs and on various other SAN-like storage back-ends. By issuing UNMAP/TRIM commands for sectors which are no longer allocated the underlying device can often more efficiently manage itself. This TRIM implementation is modeled on the `zpool initialize` feature which writes a pattern to all unallocated space in the pool. The new `zpool trim` command uses the same vdev_xlate() code to calculate what sectors are unallocated, the same per- vdev TRIM thread model and locking, and the same basic CLI for a consistent user experience. The core difference is that instead of writing a pattern it will issue UNMAP/TRIM commands for those extents. The zio pipeline was updated to accommodate this by adding a new ZIO_TYPE_TRIM type and associated spa taskq. This new type makes is straight forward to add the platform specific TRIM/UNMAP calls to vdev_disk.c and vdev_file.c. These new ZIO_TYPE_TRIM zios are handled largely the same way as ZIO_TYPE_READs or ZIO_TYPE_WRITEs. This makes it possible to largely avoid changing the pipieline, one exception is that TRIM zio's may exceed the 16M block size limit since they contain no data. In addition to the manual `zpool trim` command, a background automatic TRIM was added and is controlled by the 'autotrim' property. It relies on the exact same infrastructure as the manual TRIM. However, instead of relying on the extents in a metaslab's ms_allocatable range tree, a ms_trim tree is kept per metaslab. When 'autotrim=on', ranges added back to the ms_allocatable tree are also added to the ms_free tree. The ms_free tree is then periodically consumed by an autotrim thread which systematically walks a top level vdev's metaslabs. Since the automatic TRIM will skip ranges it considers too small there is value in occasionally running a full `zpool trim`. This may occur when the freed blocks are small and not enough time was allowed to aggregate them. An automatic TRIM and a manual `zpool trim` may be run concurrently, in which case the automatic TRIM will yield to the manual TRIM. Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Tim Chase <tim@chase2k.com> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: George Wilson <george.wilson@delphix.com> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Contributions-by: Saso Kiselkov <saso.kiselkov@nexenta.com> Contributions-by: Tim Chase <tim@chase2k.com> Contributions-by: Chunwei Chen <tuxoko@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8419 Closes #598
This commit is contained in:
+53
-9
@@ -738,6 +738,24 @@ typedef enum spa_import_type {
|
||||
SPA_IMPORT_ASSEMBLE
|
||||
} spa_import_type_t;
|
||||
|
||||
/*
|
||||
* Send TRIM commands in-line during normal pool operation while deleting.
|
||||
* OFF: no
|
||||
* ON: yes
|
||||
*/
|
||||
typedef enum {
|
||||
SPA_AUTOTRIM_OFF = 0, /* default */
|
||||
SPA_AUTOTRIM_ON
|
||||
} spa_autotrim_t;
|
||||
|
||||
/*
|
||||
* Reason TRIM command was issued, used internally for accounting purposes.
|
||||
*/
|
||||
typedef enum trim_type {
|
||||
TRIM_TYPE_MANUAL = 0,
|
||||
TRIM_TYPE_AUTO = 1,
|
||||
} trim_type_t;
|
||||
|
||||
/* state manipulation functions */
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
|
||||
@@ -764,15 +782,17 @@ extern void spa_inject_delref(spa_t *spa);
|
||||
extern void spa_scan_stat_init(spa_t *spa);
|
||||
extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
|
||||
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
#define SPA_ASYNC_AUTOEXPAND 0x20
|
||||
#define SPA_ASYNC_REMOVE_DONE 0x40
|
||||
#define SPA_ASYNC_REMOVE_STOP 0x80
|
||||
#define SPA_ASYNC_INITIALIZE_RESTART 0x100
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
#define SPA_ASYNC_AUTOEXPAND 0x20
|
||||
#define SPA_ASYNC_REMOVE_DONE 0x40
|
||||
#define SPA_ASYNC_REMOVE_STOP 0x80
|
||||
#define SPA_ASYNC_INITIALIZE_RESTART 0x100
|
||||
#define SPA_ASYNC_TRIM_RESTART 0x200
|
||||
#define SPA_ASYNC_AUTOTRIM_RESTART 0x400
|
||||
|
||||
/*
|
||||
* Controls the behavior of spa_vdev_remove().
|
||||
@@ -790,6 +810,8 @@ extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern boolean_t spa_vdev_remove_active(spa_t *spa);
|
||||
extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
|
||||
nvlist_t *vdev_errlist);
|
||||
extern int spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
|
||||
uint64_t rate, boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
|
||||
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
|
||||
@@ -887,6 +909,7 @@ typedef struct spa_stats {
|
||||
spa_history_kstat_t io_history;
|
||||
spa_history_list_t mmp_history;
|
||||
spa_history_kstat_t state; /* pool state */
|
||||
spa_history_kstat_t iostats;
|
||||
} spa_stats_t;
|
||||
|
||||
typedef enum txg_state {
|
||||
@@ -905,6 +928,22 @@ typedef struct txg_stat {
|
||||
uint64_t ndirty;
|
||||
} txg_stat_t;
|
||||
|
||||
/* Assorted pool IO kstats */
|
||||
typedef struct spa_iostats {
|
||||
kstat_named_t trim_extents_written;
|
||||
kstat_named_t trim_bytes_written;
|
||||
kstat_named_t trim_extents_skipped;
|
||||
kstat_named_t trim_bytes_skipped;
|
||||
kstat_named_t trim_extents_failed;
|
||||
kstat_named_t trim_bytes_failed;
|
||||
kstat_named_t autotrim_extents_written;
|
||||
kstat_named_t autotrim_bytes_written;
|
||||
kstat_named_t autotrim_extents_skipped;
|
||||
kstat_named_t autotrim_bytes_skipped;
|
||||
kstat_named_t autotrim_extents_failed;
|
||||
kstat_named_t autotrim_bytes_failed;
|
||||
} spa_iostats_t;
|
||||
|
||||
extern void spa_stats_init(spa_t *spa);
|
||||
extern void spa_stats_destroy(spa_t *spa);
|
||||
extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb,
|
||||
@@ -922,6 +961,10 @@ extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
|
||||
extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
|
||||
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
|
||||
int error);
|
||||
extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
|
||||
uint64_t extents_written, uint64_t bytes_written,
|
||||
uint64_t extents_skipped, uint64_t bytes_skipped,
|
||||
uint64_t extents_failed, uint64_t bytes_failed);
|
||||
|
||||
/* Pool configuration locks */
|
||||
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||
@@ -1005,6 +1048,7 @@ extern objset_t *spa_meta_objset(spa_t *spa);
|
||||
extern uint64_t spa_deadman_synctime(spa_t *spa);
|
||||
extern uint64_t spa_deadman_ziotime(spa_t *spa);
|
||||
extern uint64_t spa_dirty_data(spa_t *spa);
|
||||
extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern void spa_load_failed(spa_t *spa, const char *fmt, ...);
|
||||
|
||||
Reference in New Issue
Block a user