Fast Clone Deletion

Deleting a clone requires finding blocks are clone-only, not shared
with the snapshot. This was done by traversing the entire block tree
which results in a large performance penalty for sparsely
written clones.

This is new method keeps track of clone blocks when they are
modified in a "Livelist" so that, when it’s time to delete,
the clone-specific blocks are already at hand.

We see performance improvements because now deletion work is
proportional to the number of clone-modified blocks, not the size
of the original dataset.

Reviewed-by: Sean Eric Fagan <sef@ixsystems.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Signed-off-by: Sara Hartse <sara.hartse@delphix.com>
Closes #8416
This commit is contained in:
Sara Hartse
2019-07-26 10:54:14 -07:00
committed by Brian Behlendorf
parent d274ac5460
commit 37f03da8ba
38 changed files with 2583 additions and 205 deletions
+1 -1
View File
@@ -22,7 +22,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
# Copyright (c) 2017 by Tim Chase. All rights reserved.
# Copyright (c) 2017 by Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
@@ -2,6 +2,8 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_destro
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
zfs_clone_livelist_condense_and_disable.ksh \
zfs_clone_livelist_condense_races.ksh \
zfs_destroy_001_pos.ksh \
zfs_destroy_002_pos.ksh \
zfs_destroy_003_pos.ksh \
@@ -17,7 +19,10 @@ dist_pkgdata_SCRIPTS = \
zfs_destroy_013_neg.ksh \
zfs_destroy_014_pos.ksh \
zfs_destroy_015_pos.ksh \
zfs_destroy_016_pos.ksh
zfs_destroy_016_pos.ksh \
zfs_destroy_clone_livelist.ksh \
zfs_destroy_dev_removal.ksh \
zfs_destroy_dev_removal_condense.ksh
dist_pkgdata_DATA = \
zfs_destroy_common.kshlib \
@@ -0,0 +1,125 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
# DESCRIPTION
# Verify zfs destroy test for clones with the livelist feature
# enabled.
# STRATEGY
# 1. Clone where livelist is condensed
# - create clone, write several files, delete those files
# - check that the number of livelist entries decreases
# after the delete
# 2. Clone where livelist is deactivated
# - create clone, write files. Delete those files and the
# file in the filesystem when the snapshot was created
# so the clone and snapshot no longer share data
# - check that the livelist is destroyed
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
function cleanup
{
log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
# reset the livelist sublist size to the original value
set_tunable64 zfs_livelist_max_entries $ORIGINAL_MAX
# reset the minimum percent shared to 75
set_tunable32 zfs_livelist_min_percent_shared $ORIGINAL_MIN
}
function check_ll_len
{
string="$(zdb -vvvvv $TESTPOOL | grep "Livelist")"
substring="$1"
msg=$2
if test "${string#*$substring}" != "$string"; then
return 0 # $substring is in $string
else
log_note $string
log_fail "$msg" # $substring is not in $string
fi
}
function test_condense
{
# set the max livelist entries to a small value to more easily
# trigger a condense
set_tunable64 zfs_livelist_max_entries 0x14
# set a small percent shared threshold so the livelist is not disabled
set_tunable32 zfs_livelist_min_percent_shared 0xa
clone_dataset $TESTFS1 snap $TESTCLONE
# sync between each write to make sure a new entry is created
for i in {0..4}; do
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/testfile$i
log_must zpool sync $TESTPOOL
done
check_ll_len "5 entries" "Unexpected livelist size"
# sync between each write to allow for a condense of the previous entry
for i in {0..4}; do
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/testfile$i
log_must zpool sync $TESTPOOL
done
check_ll_len "6 entries" "Condense did not occur"
log_must zfs destroy $TESTPOOL/$TESTCLONE
check_livelist_gone
}
function test_deactivated
{
# Threshold set to 50 percent
set_tunable32 zfs_livelist_min_percent_shared 0x32
clone_dataset $TESTFS1 snap $TESTCLONE
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE0
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE1
log_must zpool sync $TESTPOOL
# snapshot and clone share 'atestfile', 33 percent
check_livelist_gone
log_must zfs destroy -R $TESTPOOL/$TESTCLONE
# Threshold set to 20 percent
set_tunable32 zfs_livelist_min_percent_shared 0x14
clone_dataset $TESTFS1 snap $TESTCLONE
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE0
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE1
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE2
log_must zpool sync $TESTPOOL
# snapshot and clone share 'atestfile', 25 percent
check_livelist_exists $TESTCLONE
log_must rm /$TESTPOOL/$TESTCLONE/atestfile
# snapshot and clone share no files
check_livelist_gone
log_must zfs destroy -R $TESTPOOL/$TESTCLONE
}
ORIGINAL_MAX=$(get_tunable zfs_livelist_max_entries)
ORIGINAL_MIN=$(get_tunable zfs_livelist_min_percent_shared)
log_onexit cleanup
log_must zfs create $TESTPOOL/$TESTFS1
log_must mkfile 5m /$TESTPOOL/$TESTFS1/atestfile
log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
test_condense
test_deactivated
log_pass "Clone's livelist condenses and disables as expected."
@@ -0,0 +1,116 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
# DESCRIPTION
# Test race conditions for livelist condensing
# STRATEGY
# These tests exercise code paths that deal with a livelist being
# simultaneously condensed and deactivated (deleted, exported or disabled).
# If a variable is set, the zthr will pause until it is cancelled or waited
# and then a counter variable keeps track of whether or not the code path is
# reached.
# 1. Deletion race: repeatedly overwrite the same file to trigger condense
# and then delete the clone.
# 2. Disable race: Overwrite enough files to trigger condenses and disabling of
# the livelist.
# 3. Export race: repeatedly overwrite the same file to trigger condense and
# then export the pool.
. $STF_SUITE/include/libtest.shlib
function cleanup
{
log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
# reset the livelist sublist size to the original value
set_tunable64 zfs_livelist_max_entries $ORIGINAL_MAX
# reset the condense tests to 0
set_tunable32 zfs_livelist_condense_zthr_pause 0
set_tunable32 zfs_livelist_condense_sync_pause 0
}
function delete_race
{
set_tunable32 "$1" 0
log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
for i in {1..5}; do
log_must zpool sync $TESTPOOL
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
done
log_must zfs destroy $TESTPOOL/$TESTCLONE
log_must zpool sync $TESTPOOL
[[ "1" == "$(get_tunable "$1")" ]] || \
log_fail "delete/condense race test failed"
}
function export_race
{
set_tunable32 "$1" 0
log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
for i in {1..5}; do
log_must zpool sync $TESTPOOL
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
done
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
[[ "1" == "$(get_tunable "$1")" ]] || \
log_fail "export/condense race test failed"
log_must zfs destroy $TESTPOOL/$TESTCLONE
}
function disable_race
{
set_tunable32 "$1" 0
log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
for i in {1..5}; do
log_must zpool sync $TESTPOOL
log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
done
# overwrite the file shared with the origin to trigger disable
log_must mkfile 100m /$TESTPOOL/$TESTCLONE/atestfile
log_must zpool sync $TESTPOOL
[[ "1" == "$(get_tunable "$1")" ]] || \
log_fail "disable/condense race test failed"
log_must zfs destroy $TESTPOOL/$TESTCLONE
}
ORIGINAL_MAX=$(get_tunable zfs_livelist_max_entries)
log_onexit cleanup
log_must zfs create $TESTPOOL/$TESTFS1
log_must mkfile 100m /$TESTPOOL/$TESTFS1/atestfile
log_must zpool sync $TESTPOOL
log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
# Reduce livelist size to trigger condense more easily
set_tunable64 zfs_livelist_max_entries 0x14
# Test cancellation path in the zthr
set_tunable32 zfs_livelist_condense_zthr_pause 1
set_tunable32 zfs_livelist_condense_sync_pause 0
disable_race "zfs_livelist_condense_zthr_cancel"
delete_race "zfs_livelist_condense_zthr_cancel"
export_race "zfs_livelist_condense_zthr_cancel"
# Test cancellation path in the synctask
set_tunable32 zfs_livelist_condense_zthr_pause 0
set_tunable32 zfs_livelist_condense_sync_pause 1
disable_race "zfs_livelist_condense_sync_cancel"
delete_race "zfs_livelist_condense_sync_cancel"
log_pass "Clone livelist condense race conditions passed."
@@ -0,0 +1,140 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
# DESCRIPTION
# Verify zfs destroy test for clones with the livelist feature
# enabled.
# STRATEGY
# 1. One clone with an empty livelist
# - create the clone, check that livelist exists
# - delete the clone, check that livelist is eventually
# destroyed
# 2. One clone with populated livelist
# - create the clone, check that livelist exists
# - write multiple files to the clone
# - delete the clone, check that livelist is eventually
# destroyed
# 3. Multiple clones with empty livelists
# - same as 1. but with multiple clones
# 4. Multuple clones with populated livelists
# - same as 2. but with multiple clones
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
function cleanup
{
datasetexists $TESTPOOL/$TESTFS1 && zfs destroy -R $TESTPOOL/$TESTFS1
# reset the livelist sublist size to its original value
set_tunable64 zfs_livelist_max_entries $ORIGINAL_MAX
}
function clone_write_file
{
log_must mkfile 1m /$TESTPOOL/$1/$2
log_must zpool sync $TESTPOOL
}
function test_one_empty
{
clone_dataset $TESTFS1 snap $TESTCLONE
log_must zfs destroy $TESTPOOL/$TESTCLONE
check_livelist_gone
}
function test_one
{
clone_dataset $TESTFS1 snap $TESTCLONE
clone_write_file $TESTCLONE $TESTFILE0
clone_write_file $TESTCLONE $TESTFILE1
clone_write_file $TESTCLONE $TESTFILE2
log_must rm /$TESTPOOL/$TESTCLONE/$TESTFILE0
log_must rm /$TESTPOOL/$TESTCLONE/$TESTFILE2
check_livelist_exists $TESTCLONE
log_must zfs destroy $TESTPOOL/$TESTCLONE
check_livelist_gone
}
function test_multiple_empty
{
clone_dataset $TESTFS1 snap $TESTCLONE
clone_dataset $TESTFS1 snap $TESTCLONE1
clone_dataset $TESTFS1 snap $TESTCLONE2
log_must zfs destroy $TESTPOOL/$TESTCLONE
log_must zfs destroy $TESTPOOL/$TESTCLONE1
log_must zfs destroy $TESTPOOL/$TESTCLONE2
check_livelist_gone
}
function test_multiple
{
clone_dataset $TESTFS1 snap $TESTCLONE
clone_dataset $TESTFS1 snap $TESTCLONE1
clone_dataset $TESTFS1 snap $TESTCLONE2
clone_write_file $TESTCLONE $TESTFILE0
clone_write_file $TESTCLONE1 $TESTFILE0
clone_write_file $TESTCLONE1 $TESTFILE1
clone_write_file $TESTCLONE1 $TESTFILE2
clone_write_file $TESTCLONE2 $TESTFILE0
log_must rm /$TESTPOOL/$TESTCLONE2/$TESTFILE0
clone_write_file $TESTCLONE2 $TESTFILE1
log_must rm /$TESTPOOL/$TESTCLONE2/$TESTFILE1
check_livelist_exists $TESTCLONE
check_livelist_exists $TESTCLONE1
check_livelist_exists $TESTCLONE2
log_must zfs destroy $TESTPOOL/$TESTCLONE
log_must zfs destroy $TESTPOOL/$TESTCLONE1
log_must zfs destroy $TESTPOOL/$TESTCLONE2
check_livelist_gone
}
function test_promote
{
clone_dataset $TESTFS1 snap $TESTCLONE
log_must zfs promote $TESTPOOL/$TESTCLONE
check_livelist_gone
log_must zfs destroy -R $TESTPOOL/$TESTCLONE
}
ORIGINAL_MAX=$(get_tunable zfs_livelist_max_entries)
log_onexit cleanup
log_must zfs create $TESTPOOL/$TESTFS1
log_must mkfile 20m /$TESTPOOL/$TESTFS1/atestfile
log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
# set a small livelist entry size to more easily test multiple entry livelists
set_tunable64 zfs_livelist_max_entries 0x14
test_one_empty
test_one
test_multiple_empty
test_multiple
test_promote
log_pass "Clone with the livelist feature enabled could be destroyed," \
"also could be promoted and destroyed as expected."
@@ -25,7 +25,7 @@
#
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -146,3 +146,43 @@ function check_dataset
done
fi
}
# Use zdb to see if a livelist exists for a given clone
# $1 clone name
function check_livelist_exists
{
zdb -vvvvv $TESTPOOL/$1 | grep "Livelist" || \
log_fail "zdb could not find Livelist"
}
# Wait for the deferred destroy livelists to be removed
function wait_for_deferred_destroy
{
sync
deleted=$(zdb -vvvvv $TESTPOOL | grep "Deleted Livelist")
while [[ "$deleted" != "" ]]; do
deleted=$(zdb -vvvvv $TESTPOOL | grep "Deleted Livelist")
done
}
# Check that a livelist has been removed, waiting for deferred destroy entries
# to be cleared from zdb.
function check_livelist_gone
{
wait_for_deferred_destroy
zdb -vvvvv $TESTPOOL | grep "Livelist" && \
log_fail "zdb found Livelist after the clone is deleted."
}
# Create a clone in the testpool based on $TESTFS@snap. Verify that the clone
# was created and that it includes a livelist
# $1 fs name
# $2 snap name
# $3 clone name
function clone_dataset
{
log_must zfs clone $TESTPOOL/$1@$2 $TESTPOOL/$3
datasetexists $TESTPOOL/$3 || \
log_fail "zfs clone $TESTPOOL/$3 fail."
check_livelist_exists $3
}
@@ -0,0 +1,68 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
# DESCRIPTION
# Verify that livelists tracking remapped blocks can be
# properly destroyed.
# STRATEGY
# 1. Create a pool with disk1 and create a filesystem, snapshot
# and clone. Write several files to the clone.
# 2. Add disk2 to the pool and then remove disk1, triggering a
# remap of the blkptrs tracked in the livelist.
# 3. Delete the clone
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/removal/removal.kshlib
function cleanup
{
poolexists $TESTPOOL2 && zpool destroy $TESTPOOL2
[[ -f $VIRTUAL_DISK1 ]] && log_must rm $VIRTUAL_DISK1
[[ -f $VIRTUAL_DISK2 ]] && lot_must rm $VIRTUAL_DISK2
}
log_onexit cleanup
VIRTUAL_DISK1=/var/tmp/disk1
VIRTUAL_DISK2=/var/tmp/disk2
log_must mkfile $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK1
log_must mkfile $(($MINVDEVSIZE * 16)) $VIRTUAL_DISK2
log_must zpool create $TESTPOOL2 $VIRTUAL_DISK1
log_must poolexists $TESTPOOL2
log_must zfs create $TESTPOOL2/$TESTFS
log_must mkfile 25m /$TESTPOOL2/$TESTFS/atestfile
log_must zfs snapshot $TESTPOOL2/$TESTFS@snap
log_must zfs clone $TESTPOOL2/$TESTFS@snap $TESTPOOL2/$TESTCLONE
log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE0
log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE1
log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE2
log_must zpool add $TESTPOOL2 $VIRTUAL_DISK2
log_must zpool remove $TESTPOOL2 $VIRTUAL_DISK1
wait_for_removal $TESTPOOL2
log_must rm /$TESTPOOL2/$TESTCLONE/$TESTFILE0
log_must rm /$TESTPOOL2/$TESTCLONE/$TESTFILE1
log_must zfs destroy $TESTPOOL2/$TESTCLONE
log_pass "Clone with the livelist feature and remapped blocks," \
"can be destroyed."
@@ -0,0 +1,93 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
# DESCRIPTION
# Verify that livelists tracking remapped blocks can be
# properly condensed.
# STRATEGY
# 1. Create a pool with disk1 and create a filesystem, snapshot
# and clone. Create two files for the first livelist entry and
# pause condensing.
# 2. Add disk2 to the pool and then remove disk1, triggering a
# remap of the blkptrs tracked in the livelist.
# 3. Overwrite the first file several times to trigger a condense,
# overwrite the second file once and resume condensing, now with
# extra blkptrs added during the remap
# 4. Check that the test added new ALLOC blkptrs mid-condense using
# a variable set in that code path
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/removal/removal.kshlib
function cleanup
{
poolexists $TESTPOOL2 && zpool destroy $TESTPOOL2
# reset livelist max size
set_tunable64 zfs_livelist_max_entries $ORIGINAL_MAX
[[ -f $VIRTUAL_DISK1 ]] && log_must rm $VIRTUAL_DISK1
[[ -f $VIRTUAL_DISK2 ]] && lot_must rm $VIRTUAL_DISK2
}
log_onexit cleanup
ORIGINAL_MAX=$(get_tunable zfs_livelist_max_entries)
set_tunable64 zfs_livelist_max_entries 0x14
VIRTUAL_DISK1=/var/tmp/disk1
VIRTUAL_DISK2=/var/tmp/disk2
log_must mkfile $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK1
log_must mkfile $(($MINVDEVSIZE * 16)) $VIRTUAL_DISK2
log_must zpool create $TESTPOOL2 $VIRTUAL_DISK1
log_must poolexists $TESTPOOL2
log_must zfs create $TESTPOOL2/$TESTFS
log_must mkfile 100m /$TESTPOOL2/$TESTFS/atestfile
log_must zfs snapshot $TESTPOOL2/$TESTFS@snap
log_must zfs clone $TESTPOOL2/$TESTFS@snap $TESTPOOL2/$TESTCLONE
# Create inital files and pause condense zthr on next execution
log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/B
log_must zpool sync $TESTPOOL2
set_tunable32 zfs_livelist_condense_sync_pause 1
# Add a new dev and remove the old one
log_must zpool add $TESTPOOL2 $VIRTUAL_DISK2
log_must zpool remove $TESTPOOL2 $VIRTUAL_DISK1
wait_for_removal $TESTPOOL2
set_tunable32 zfs_livelist_condense_new_alloc 0
# Trigger a condense
log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
log_must zpool sync $TESTPOOL2
log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
log_must zpool sync $TESTPOOL2
# Write remapped blkptrs which will modify the livelist mid-condense
log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/B
# Resume condense thr
set_tunable32 zfs_livelist_condense_sync_pause 0
log_must zpool sync $TESTPOOL2
# Check that we've added new ALLOC blkptrs during the condense
[[ "0" < "$(get_tunable zfs_livelist_condense_new_alloc)" ]] || \
log_fail "removal/condense test failed"
log_must zfs destroy $TESTPOOL2/$TESTCLONE
log_pass "Clone with the livelist feature and remapped blocks," \
"can be condensed."
@@ -93,5 +93,6 @@ if is_linux; then
"feature@allocation_classes"
"feature@resilver_defer"
"feature@bookmark_v2"
"feature@livelist"
)
fi