From 4655bdd8ab02a61c4271cd92ff14bc134b99c4a0 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Tue, 17 Mar 2026 02:58:25 +0500 Subject: [PATCH] ZTS: Fix L2ARC test reliability Disable depth cap (L2ARC_EXT_HEADROOM_PCT=0) in DWPD and multidev tests that rely on predictable marker advancement during fill and measurement. Rework multidev_throughput to verify sustained throughput across three consecutive windows instead of asserting an absolute rate. Use larger cache devices (8GB) to avoid frequent global marker resets (smallest_capacity/8), fill ARC before attaching caches to provide a stable evictable buffer pool, and lower write_max to 8MB/s to avoid exhausting data within the measurement period. Use destroy_pool (log_must_busy) instead of log_must zpool destroy to handle transient EBUSY during teardown. Remove l2arc_multidev_throughput_pos from the expected-fail list in zts-report.py.in. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #18321 --- tests/test-runner/bin/zts-report.py.in | 1 - .../l2arc/l2arc_dwpd_ratelimit_pos.ksh | 5 +- .../l2arc/l2arc_dwpd_reimport_pos.ksh | 5 +- .../l2arc/l2arc_multidev_scaling_pos.ksh | 45 ++++---- .../l2arc/l2arc_multidev_throughput_pos.ksh | 108 ++++++++++-------- 5 files changed, 91 insertions(+), 73 deletions(-) diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 90d4c1277..a8251c511 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -245,7 +245,6 @@ maybe = { 'history/history_010_pos': ['SKIP', exec_reason], 'io/mmap': ['SKIP', fio_reason], 'l2arc/l2arc_l2miss_pos': ['FAIL', known_reason], - 'l2arc/l2arc_multidev_throughput_pos': ['FAIL', 18272], 'l2arc/persist_l2arc_005_pos': ['FAIL', known_reason], 'largest_pool/largest_pool_001_pos': ['FAIL', known_reason], 'mmap/mmap_sync_001_pos': ['FAIL', known_reason], diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_ratelimit_pos.ksh b/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_ratelimit_pos.ksh index 65b2bf07a..51223790b 100755 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_ratelimit_pos.ksh +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_ratelimit_pos.ksh @@ -51,6 +51,7 @@ function cleanup restore_tunable L2ARC_WRITE_MAX restore_tunable L2ARC_NOPREFETCH restore_tunable L2ARC_DWPD_LIMIT + restore_tunable L2ARC_EXT_HEADROOM_PCT restore_tunable ARC_MIN restore_tunable ARC_MAX } @@ -60,6 +61,7 @@ log_onexit cleanup save_tunable L2ARC_WRITE_MAX save_tunable L2ARC_NOPREFETCH save_tunable L2ARC_DWPD_LIMIT +save_tunable L2ARC_EXT_HEADROOM_PCT save_tunable ARC_MIN save_tunable ARC_MAX @@ -73,6 +75,7 @@ log_must set_tunable64 ARC_MAX $((400 * 1024 * 1024)) log_must set_tunable64 ARC_MIN $((200 * 1024 * 1024)) log_must set_tunable32 L2ARC_NOPREFETCH 0 log_must set_tunable32 L2ARC_WRITE_MAX $((200 * 1024 * 1024)) +log_must set_tunable64 L2ARC_EXT_HEADROOM_PCT 0 # Create larger main vdev to accommodate fill data log_must truncate -s 5G $VDEV @@ -133,6 +136,6 @@ if [[ ${results[5000]} -le ${results[1800]} ]]; then log_fail "DWPD=5000 should write more than DWPD=1800" fi -log_must zpool destroy $TESTPOOL +destroy_pool $TESTPOOL log_pass "L2ARC DWPD rate limiting correctly limits write rate." diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_reimport_pos.ksh b/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_reimport_pos.ksh index 50208bce2..1cf8e7d6e 100755 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_reimport_pos.ksh +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc_dwpd_reimport_pos.ksh @@ -50,6 +50,7 @@ function cleanup restore_tunable L2ARC_WRITE_MAX restore_tunable L2ARC_NOPREFETCH restore_tunable L2ARC_DWPD_LIMIT + restore_tunable L2ARC_EXT_HEADROOM_PCT restore_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE restore_tunable ARC_MIN restore_tunable ARC_MAX @@ -60,6 +61,7 @@ log_onexit cleanup save_tunable L2ARC_WRITE_MAX save_tunable L2ARC_NOPREFETCH save_tunable L2ARC_DWPD_LIMIT +save_tunable L2ARC_EXT_HEADROOM_PCT save_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE save_tunable ARC_MIN save_tunable ARC_MAX @@ -78,6 +80,7 @@ log_must set_tunable64 ARC_MAX $((400 * 1024 * 1024)) log_must set_tunable64 ARC_MIN $((200 * 1024 * 1024)) log_must set_tunable32 L2ARC_NOPREFETCH 0 log_must set_tunable32 L2ARC_WRITE_MAX $((200 * 1024 * 1024)) +log_must set_tunable64 L2ARC_EXT_HEADROOM_PCT 0 # Create larger main vdev to accommodate fill data log_must truncate -s 8G $VDEV @@ -164,6 +167,6 @@ if [[ $writes_after -eq 0 ]]; then log_fail "No writes after import - rate limiting may be broken" fi -log_must zpool destroy $TESTPOOL +destroy_pool $TESTPOOL log_pass "L2ARC DWPD rate limiting works after pool export/import." diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_scaling_pos.ksh b/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_scaling_pos.ksh index 8e8c9078c..093806862 100755 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_scaling_pos.ksh +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_scaling_pos.ksh @@ -27,14 +27,14 @@ # L2ARC parallel writes scale with number of cache devices. # # STRATEGY: -# 1. Configure L2ARC write rate to 16MB/s per device. -# 2. Disable DWPD rate limiting to test pure parallel throughput. -# 3. Create pool with single 2100MB cache device. -# 4. Generate continuous writes, wait for L2ARC activity, measure over 25s. -# 5. Verify single-device throughput ~400MB (16MB/s × 25s). -# 6. Recreate pool with dual 2100MB cache devices. -# 7. Generate continuous writes, wait for L2ARC activity, measure over 25s. -# 8. Verify dual-device throughput ~800MB (2×16MB/s × 25s). +# 1. Configure L2ARC write rate to 4MB/s per device. +# 2. Disable DWPD rate limiting and depth cap to test pure parallel throughput. +# 3. Create pool with single 3072MB cache device. +# 4. Generate continuous writes, wait for L2ARC activity, measure over 12s. +# 5. Verify single-device throughput ~48MB (4MB/s × 12s). +# 6. Recreate pool with dual 3072MB cache devices. +# 7. Generate continuous writes, wait for L2ARC activity, measure over 12s. +# 8. Verify dual-device throughput ~96MB (2×4MB/s × 12s). # verify_runnable "global" @@ -50,6 +50,7 @@ function cleanup restore_tunable L2ARC_WRITE_MAX restore_tunable L2ARC_NOPREFETCH restore_tunable L2ARC_DWPD_LIMIT + restore_tunable L2ARC_EXT_HEADROOM_PCT restore_tunable ARC_MIN restore_tunable ARC_MAX } @@ -59,19 +60,23 @@ log_onexit cleanup save_tunable L2ARC_WRITE_MAX save_tunable L2ARC_NOPREFETCH save_tunable L2ARC_DWPD_LIMIT +save_tunable L2ARC_EXT_HEADROOM_PCT save_tunable ARC_MIN save_tunable ARC_MAX -# Test parameters -typeset cache_sz=1000 +# Test parameters — cache_sz and write_max are chosen so that total writes +# per phase stay below the global marker reset threshold +# (smallest_capacity/8) to avoid throughput disruption from marker resets. +typeset cache_sz=3072 typeset fill_mb=2500 # 2.5GB initial data -typeset test_time=12 # Measurement window: 16MB/s × 12s = ~200MB per device +typeset test_time=12 # Measurement window: 4MB/s × 12s = ~48MB per device -# Disable DWPD to test pure parallel throughput +# Disable DWPD and depth cap to test pure parallel throughput log_must set_tunable32 L2ARC_DWPD_LIMIT 0 +log_must set_tunable64 L2ARC_EXT_HEADROOM_PCT 0 -# Set L2ARC_WRITE_MAX to 16MB/s to test parallel scaling -log_must set_tunable32 L2ARC_WRITE_MAX $((16 * 1024 * 1024)) +# Set L2ARC_WRITE_MAX to 4MB/s to test parallel scaling +log_must set_tunable32 L2ARC_WRITE_MAX $((4 * 1024 * 1024)) log_must set_tunable32 L2ARC_NOPREFETCH 0 # Configure arc_max so L2ARC >= arc_c_max * 2 threshold for persistent markers @@ -107,12 +112,12 @@ kill $dd_pid 2>/dev/null wait $dd_pid 2>/dev/null typeset single_writes=$((end - start)) -# expected = 16MB/s * 1 device * 25s = 400MB -typeset single_expected=$((16 * 1024 * 1024 * test_time)) +# expected = 4MB/s * 1 device * 12s = 48MB +typeset single_expected=$((4 * 1024 * 1024 * test_time)) log_note "Single-device writes: $((single_writes / 1024 / 1024))MB (expected ~$((single_expected / 1024 / 1024))MB)" # Dual device test -log_must zpool destroy $TESTPOOL +destroy_pool $TESTPOOL log_must truncate -s ${cache_sz}M $VDEV_CACHE log_must truncate -s ${cache_sz}M $VDEV_CACHE2 @@ -142,8 +147,8 @@ kill $dd_pid 2>/dev/null wait $dd_pid 2>/dev/null typeset dual_writes=$((end - start)) -# expected = 16MB/s * 2 devices * 25s = 800MB -typeset dual_expected=$((16 * 1024 * 1024 * 2 * test_time)) +# expected = 4MB/s * 2 devices * 12s = 96MB +typeset dual_expected=$((4 * 1024 * 1024 * 2 * test_time)) log_note "Dual-device writes: $((dual_writes / 1024 / 1024))MB (expected ~$((dual_expected / 1024 / 1024))MB)" # Verify writes are within expected range (80-150%) @@ -157,6 +162,6 @@ if [[ $dual_writes -lt $dual_min ]]; then log_fail "Dual-device writes $((dual_writes / 1024 / 1024))MB below minimum $((dual_min / 1024 / 1024))MB" fi -log_must zpool destroy $TESTPOOL +destroy_pool $TESTPOOL log_pass "L2ARC parallel writes scale with number of cache devices." diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_throughput_pos.ksh b/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_throughput_pos.ksh index abdaeb41f..fea0a5a8a 100755 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_throughput_pos.ksh +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc_multidev_throughput_pos.ksh @@ -24,18 +24,20 @@ # # DESCRIPTION: -# L2ARC parallel writes scale with number of cache devices. +# L2ARC parallel writes sustain throughput over time without degradation. # # STRATEGY: -# 1. Disable DWPD rate limiting. -# 2. Create pool with 2 cache devices. -# 3. Write data and measure L2ARC throughput. -# 4. Verify throughput scales with device count (~16MB/s per device). +# 1. Disable DWPD rate limiting and depth cap. +# 2. Create pool without cache devices, fill ARC to arc_max. +# 3. Add 2 cache devices after ARC is full and stable. +# 4. Measure L2ARC throughput over 3 consecutive windows. +# 5. Verify throughput remains stable (no window drops below 50% +# of the first). # verify_runnable "global" -log_assert "L2ARC parallel writes scale with number of cache devices." +log_assert "L2ARC parallel writes sustain throughput without degradation." function cleanup { @@ -46,6 +48,7 @@ function cleanup restore_tunable L2ARC_WRITE_MAX restore_tunable L2ARC_NOPREFETCH restore_tunable L2ARC_DWPD_LIMIT + restore_tunable L2ARC_EXT_HEADROOM_PCT restore_tunable ARC_MIN restore_tunable ARC_MAX } @@ -55,41 +58,47 @@ log_onexit cleanup save_tunable L2ARC_WRITE_MAX save_tunable L2ARC_NOPREFETCH save_tunable L2ARC_DWPD_LIMIT +save_tunable L2ARC_EXT_HEADROOM_PCT save_tunable ARC_MIN save_tunable ARC_MAX -# Test parameters -typeset num_devs=2 -typeset cache_sz=1000 # 2000MB total > 1900MB (arc_max*2) threshold -typeset test_time=10 -typeset fill_mb=1500 -typeset expected_rate=$((32 * 1024 * 1024)) # 32 MB/s per device +# Test parameters — cache_sz and write_max are chosen so that total writes +# across all windows stay below the global marker reset threshold +# (smallest_capacity/8) to avoid throughput disruption from marker resets. +typeset cache_sz=3072 +typeset window_time=10 +typeset num_windows=3 +typeset arc_max_mb=950 +typeset fill_mb=$arc_max_mb -# Disable DWPD rate limiting +# Disable DWPD rate limiting and depth cap log_must set_tunable32 L2ARC_DWPD_LIMIT 0 +log_must set_tunable64 L2ARC_EXT_HEADROOM_PCT 0 -# Set L2ARC_WRITE_MAX to 32MB/s per device (64MB/s total with 2 devices) -log_must set_tunable32 L2ARC_WRITE_MAX $expected_rate +# Set L2ARC_WRITE_MAX to 4MB/s per device +log_must set_tunable32 L2ARC_WRITE_MAX $((4 * 1024 * 1024)) log_must set_tunable32 L2ARC_NOPREFETCH 0 -# Configure arc_max large enough to feed L2ARC -log_must set_tunable64 ARC_MAX $((950 * 1024 * 1024)) +# Configure ARC size +log_must set_tunable64 ARC_MAX $((arc_max_mb * 1024 * 1024)) log_must set_tunable64 ARC_MIN $((512 * 1024 * 1024)) -# Create cache devices (using letters e-f to follow cfg naming convention) +# Create pool without cache devices +log_must truncate -s 5G $VDEV +log_must zpool create -f $TESTPOOL $VDEV + +# Fill ARC to arc_max so eviction lists have stable evictable buffers +log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=$fill_mb +log_must zpool sync $TESTPOOL + +# Create and add cache devices now that ARC is full typeset cache_devs="" for letter in e f; do typeset dev="$VDIR/$letter" log_must truncate -s ${cache_sz}M $dev cache_devs="$cache_devs $dev" done - -log_must truncate -s 2G $VDEV -log_must zpool create -f $TESTPOOL $VDEV cache $cache_devs - -# Generate data in background -dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=$fill_mb & -typeset dd_pid=$! +log_must zpool add -f $TESTPOOL cache $cache_devs # Wait for L2ARC to start writing typeset l2_size=0 @@ -99,35 +108,34 @@ for i in {1..30}; do sleep 1 done if [[ $l2_size -eq 0 ]]; then - kill $dd_pid 2>/dev/null log_fail "L2ARC did not start writing" fi -# Measure L2ARC throughput over test window -typeset start=$(kstat arcstats.l2_write_bytes) -log_must sleep $test_time -typeset end=$(kstat arcstats.l2_write_bytes) -kill $dd_pid 2>/dev/null -wait $dd_pid 2>/dev/null +# Measure throughput over consecutive windows +typeset -a window_bytes +for w in $(seq 1 $num_windows); do + typeset start=$(kstat arcstats.l2_write_bytes) + log_must sleep $window_time + typeset end=$(kstat arcstats.l2_write_bytes) + window_bytes[$w]=$((end - start)) + log_note "Window $w: $((window_bytes[$w] / 1024 / 1024))MB" +done -typeset bytes=$((end - start)) -typeset bytes_mb=$((bytes / 1024 / 1024)) -# expected = 32MB/s * 2 devices * 10 seconds = 640MB -typeset expected=$((expected_rate * num_devs * test_time)) -typeset expected_mb=$((expected / 1024 / 1024)) - -log_note "L2ARC writes: ${bytes_mb}MB (expected ~${expected_mb}MB)" - -# Verify writes are within expected range (75-150%) -typeset min_bytes=$((expected * 75 / 100)) -typeset max_bytes=$((expected * 150 / 100)) -if [[ $bytes -lt $min_bytes ]]; then - log_fail "Writes ${bytes_mb}MB below minimum $((min_bytes/1024/1024))MB" -fi -if [[ $bytes -gt $max_bytes ]]; then - log_fail "Writes ${bytes_mb}MB above maximum $((max_bytes/1024/1024))MB" +# First window must have non-trivial writes +if [[ ${window_bytes[1]} -le 0 ]]; then + log_fail "No L2ARC writes in first window" fi -log_must zpool destroy $TESTPOOL +# Each subsequent window must be at least 50% of the first +typeset min_bytes=$((window_bytes[1] * 50 / 100)) +for w in $(seq 2 $num_windows); do + if [[ ${window_bytes[$w]} -lt $min_bytes ]]; then + log_fail "Window $w ($((window_bytes[$w] / 1024 / 1024))MB)" \ + "degraded below 50% of window 1" \ + "($((window_bytes[1] / 1024 / 1024))MB)" + fi +done -log_pass "L2ARC parallel writes scale with number of cache devices." +destroy_pool $TESTPOOL + +log_pass "L2ARC parallel writes sustain throughput without degradation."