FDT dedup log sync -- remove incremental

This PR condenses the FDT dedup log syncing into a single sync
pass. This reduces the overhead of modifying indirect blocks for the
dedup table multiple times per txg. In addition, changes were made to
the formula for how much to sync per txg. We now also consider the
backlog we have to clear, to prevent it from growing too large, or
remaining large on an idle system.

Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Authored-by: Don Brady <don.brady@klarasystems.com>
Authored-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Closes #17038
This commit is contained in:
Paul Dagnelie
2025-03-13 10:47:03 -07:00
committed by Alexander Motin
parent f9d59b579e
commit 661310ff5c
13 changed files with 366 additions and 202 deletions
+2 -2
View File
@@ -683,8 +683,8 @@ post =
tags = ['functional', 'deadman']
[tests/functional/dedup]
tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_legacy_create',
'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_fdt_pacing',
'dedup_legacy_create', 'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_zap_shrink']
pre =
post =
+2
View File
@@ -32,6 +32,8 @@ DDT_ZAP_DEFAULT_BS dedup.ddt_zap_default_bs ddt_zap_default_bs
DDT_ZAP_DEFAULT_IBS dedup.ddt_zap_default_ibs ddt_zap_default_ibs
DDT_DATA_IS_SPECIAL ddt_data_is_special zfs_ddt_data_is_special
DEDUP_LOG_TXG_MAX dedup.log_txg_max zfs_dedup_log_txg_max
DEDUP_LOG_FLUSH_ENTRIES_MAX dedup.log_flush_entries_max zfs_dedup_log_flush_entries_max
DEDUP_LOG_FLUSH_ENTRIES_MIN dedup.log_flush_entries_min zfs_dedup_log_flush_entries_min
DEADMAN_CHECKTIME_MS deadman.checktime_ms zfs_deadman_checktime_ms
DEADMAN_EVENTS_PER_SECOND deadman_events_per_second zfs_deadman_events_per_second
DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode
+1
View File
@@ -1442,6 +1442,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/dedup/setup.ksh \
functional/dedup/dedup_fdt_create.ksh \
functional/dedup/dedup_fdt_import.ksh \
functional/dedup/dedup_fdt_pacing.ksh \
functional/dedup/dedup_legacy_create.ksh \
functional/dedup/dedup_legacy_import.ksh \
functional/dedup/dedup_legacy_fdt_upgrade.ksh \
+109
View File
@@ -0,0 +1,109 @@
#!/bin/ksh -p
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025 Klara, Inc.
#
# Ensure dedup log flushes are appropriately paced
. $STF_SUITE/include/libtest.shlib
log_assert "dedup (FDT) paces out log entries appropriately"
function get_ddt_log_entries
{
zdb -D $TESTPOOL | grep -- "-log-sha256-" | sed 's/.*entries=//' | \
awk '{sum += $1} END {print sum}'
}
function cleanup
{
if poolexists $TESTPOOL; then
destroy_pool $TESTPOOL
fi
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MAX
}
log_onexit cleanup
# Create a pool with fast dedup enabled. We disable block cloning to ensure
# it doesn't get in the way of dedup.
log_must zpool create -f \
-o feature@fast_dedup=enabled \
-o feature@block_cloning=disabled \
$TESTPOOL $DISKS
# Create a filesystem with a small recordsize so that we get more DDT entries,
# disable compression so our writes create predictable results on disk, and
# use 'xattr=sa' to prevent selinux xattrs influencing our accounting
log_must zfs create \
-o dedup=on \
-o compression=off \
-o xattr=sa \
-o checksum=sha256 \
-o recordsize=4k $TESTPOOL/fs
# Set the dedup log to only flush a single entry per txg.
# It's hard to guarantee that exactly one flush will happen per txg, or that
# we don't miss a txg due to weird latency or anything, so we build some
# wiggle room into subsequent checks.
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MAX
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MAX 1
# Create a file. This is 256 full blocks, so will produce 256 entries in the
# dedup log.
log_must dd if=/dev/urandom of=/$TESTPOOL/fs/file1 bs=128k count=8
sync_pool
# Verify there are at least 240 entries in the dedup log.
log_entries=$(get_ddt_log_entries)
[[ "$log_entries" -gt 240 ]] || \
log_fail "Fewer than 240 entries in dedup log: $log_entries"
# Wait for 5 TXGs to sync.
for i in `seq 1 5`; do
sync_pool
done
# Verify there are at least 220 entries in the dedup log.
log_entries2=$(get_ddt_log_entries)
[[ $((log_entries - log_entries2)) -lt 20 ]] || \
log_fail "Too many entries pruned from dedup log: " \
"from $log_entries to $log_entries2"
[[ $((log_entries - log_entries2)) -gt 5 ]] || \
log_fail "Too few entries pruned from dedup log: " \
"from $log_entries to $log_entries2"
# Set the log flush rate high enough to clear the whole list.
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MAX 1024
sync_pool
# Verify there are 0 entries in the dedup log.
log_entries3=$(get_ddt_log_entries)
[[ "$log_entries3" -eq 0 ]] || \
log_fail "Entries still present in dedup log: $log_entries3"
# Verify there are 256 entries in the unique table.
log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=256'"
log_pass "dedup (FDT) paces out log entries appropriately"
@@ -48,13 +48,15 @@ log_assert "Verify DDT pruning correctly removes non-duplicate entries"
# entries appear in the DDT ZAP
log_must save_tunable DEDUP_LOG_TXG_MAX
log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
function cleanup
{
if poolexists $TESTPOOL ; then
destroy_pool $TESTPOOL
fi
log_must restore_tunable DEDUP_LOG_TXG_MAX
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
}
function ddt_entries
@@ -55,6 +55,8 @@ POOL="dedup_pool"
# where things appear on-disk
log_must save_tunable DEDUP_LOG_TXG_MAX
log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
function cleanup
{
@@ -63,6 +65,7 @@ function cleanup
fi
log_must rm -fd $VDEV_GENERAL $VDEV_DEDUP $MOUNTDIR
log_must restore_tunable DEDUP_LOG_TXG_MAX
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
}
@@ -44,6 +44,8 @@ log_assert "Create a large number of entries in the DDT. " \
# entries appear in the DDT ZAP
log_must save_tunable DEDUP_LOG_TXG_MAX
log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
function cleanup
{
@@ -51,6 +53,7 @@ function cleanup
destroy_pool $TESTPOOL
fi
log_must restore_tunable DEDUP_LOG_TXG_MAX
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
}
log_onexit cleanup