mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-06-01 19:54:10 +03:00
Add TRIM support
UNMAP/TRIM support is a frequently-requested feature to help prevent performance from degrading on SSDs and on various other SAN-like storage back-ends. By issuing UNMAP/TRIM commands for sectors which are no longer allocated the underlying device can often more efficiently manage itself. This TRIM implementation is modeled on the `zpool initialize` feature which writes a pattern to all unallocated space in the pool. The new `zpool trim` command uses the same vdev_xlate() code to calculate what sectors are unallocated, the same per- vdev TRIM thread model and locking, and the same basic CLI for a consistent user experience. The core difference is that instead of writing a pattern it will issue UNMAP/TRIM commands for those extents. The zio pipeline was updated to accommodate this by adding a new ZIO_TYPE_TRIM type and associated spa taskq. This new type makes is straight forward to add the platform specific TRIM/UNMAP calls to vdev_disk.c and vdev_file.c. These new ZIO_TYPE_TRIM zios are handled largely the same way as ZIO_TYPE_READs or ZIO_TYPE_WRITEs. This makes it possible to largely avoid changing the pipieline, one exception is that TRIM zio's may exceed the 16M block size limit since they contain no data. In addition to the manual `zpool trim` command, a background automatic TRIM was added and is controlled by the 'autotrim' property. It relies on the exact same infrastructure as the manual TRIM. However, instead of relying on the extents in a metaslab's ms_allocatable range tree, a ms_trim tree is kept per metaslab. When 'autotrim=on', ranges added back to the ms_allocatable tree are also added to the ms_free tree. The ms_free tree is then periodically consumed by an autotrim thread which systematically walks a top level vdev's metaslabs. Since the automatic TRIM will skip ranges it considers too small there is value in occasionally running a full `zpool trim`. This may occur when the freed blocks are small and not enough time was allowed to aggregate them. An automatic TRIM and a manual `zpool trim` may be run concurrently, in which case the automatic TRIM will yield to the manual TRIM. Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Tim Chase <tim@chase2k.com> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: George Wilson <george.wilson@delphix.com> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Contributions-by: Saso Kiselkov <saso.kiselkov@nexenta.com> Contributions-by: Tim Chase <tim@chase2k.com> Contributions-by: Chunwei Chen <tuxoko@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8419 Closes #598
This commit is contained in:
+103
@@ -0,0 +1,103 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019 by Tim Chase. All rights reserved.
|
||||
# Copyright (c) 2019 Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/trim/trim.kshlib
|
||||
. $STF_SUITE/tests/functional/trim/trim.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Check various pool geometries stripe, mirror, raidz)
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool on file vdevs to trim.
|
||||
# 2. Set 'autotrim=on' on pool.
|
||||
# 3. Fill the pool to a known percentage of capacity.
|
||||
# 4. Verify the vdevs contain 75% or more allocated blocks.
|
||||
# 5. Remove all files making it possible to trim the entire pool.
|
||||
# 6. Wait for auto trim to issue trim IOs for the free blocks.
|
||||
# 7. Verify the disks contain 30% or less allocated blocks.
|
||||
# 8. Repeat for test for striped, mirrored, and RAIDZ pools.
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_assert "Set 'autotrim=on' verify pool disks were trimmed"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
if poolexists $TESTPOOL; then
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
|
||||
log_must rm -f $TRIM_VDEVS
|
||||
|
||||
log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
|
||||
log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
|
||||
log_must set_tunable64 zfs_vdev_min_ms_count $vdev_min_ms_count
|
||||
}
|
||||
log_onexit cleanup
|
||||
|
||||
# Minimum trim size is decreased to verify all trim sizes.
|
||||
typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
|
||||
log_must set_tunable64 zfs_trim_extent_bytes_min 4096
|
||||
|
||||
# Reduced zfs_trim_txg_batch to make trimming more frequent.
|
||||
typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
|
||||
log_must set_tunable64 zfs_trim_txg_batch 8
|
||||
|
||||
# Increased metaslabs to better simulate larger more realistic devices.
|
||||
typeset vdev_min_ms_count=$(get_tunable zfs_vdev_min_ms_count)
|
||||
log_must set_tunable64 zfs_vdev_min_ms_count 32
|
||||
|
||||
typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) ))
|
||||
typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) ))
|
||||
|
||||
for type in "" "mirror" "raidz2"; do
|
||||
|
||||
if [[ "$type" = "" ]]; then
|
||||
VDEVS="$TRIM_VDEV1"
|
||||
elif [[ "$type" = "mirror" ]]; then
|
||||
VDEVS="$TRIM_VDEV1 $TRIM_VDEV2"
|
||||
else
|
||||
VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3"
|
||||
fi
|
||||
|
||||
log_must truncate -s $((4 * MINVDEVSIZE)) $VDEVS
|
||||
log_must zpool create -f $TESTPOOL $VDEVS
|
||||
log_must zpool set autotrim=on $TESTPOOL
|
||||
|
||||
typeset availspace=$(get_prop available $TESTPOOL)
|
||||
typeset fill_mb=$(( floor(availspace * 0.90 / 1024 / 1024) ))
|
||||
|
||||
# Fill the pool, verify the vdevs are no longer sparse.
|
||||
file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R
|
||||
verify_vdevs "-gt" "$VDEV_MAX_MB" $VDEVS
|
||||
|
||||
# Remove the file, wait for trim, verify the vdevs are now sparse.
|
||||
log_must rm /$TESTPOOL/file
|
||||
wait_trim_io $TESTPOOL "ind" 64
|
||||
verify_vdevs "-le" "$VDEV_MIN_MB" $VDEVS
|
||||
|
||||
log_must zpool destroy $TESTPOOL
|
||||
log_must rm -f $VDEVS
|
||||
done
|
||||
|
||||
log_pass "Auto trim successfully shrunk vdevs"
|
||||
Reference in New Issue
Block a user