Fix log vdev removal issues

When we clear the log, we should clear all the fields, not only
zh_log.  Otherwise remaining ZIL_REPLAY_NEEDED will prevent the
vdev removal.  Handle it also from the other side, when zh_log
is already cleared, while zh_flags is not.

spa_vdev_remove_log() asserts that allocated space on removed log
device is zero.  While it should be so in perfect world, it might
be not if space leaked at any point.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18277
This commit is contained in:
Alexander Motin 2026-03-04 09:12:14 -05:00 committed by GitHub
parent f6205fdf64
commit 1e1d64d665
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 113 additions and 6 deletions

View File

@ -2151,7 +2151,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
ASSERT0P(vd->vdev_log_mg); ASSERT0P(vd->vdev_log_mg);
return (error); return (error);
} }
ASSERT0(vd->vdev_stat.vs_alloc);
/* /*
* The evacuation succeeded. Remove any remaining MOS metadata * The evacuation succeeded. Remove any remaining MOS metadata

View File

@ -1096,7 +1096,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
zilog->zl_old_header = *zh; /* debugging aid */ zilog->zl_old_header = *zh; /* debugging aid */
if (BP_IS_HOLE(&zh->zh_log)) if (BP_IS_HOLE(&zh->zh_log) && zh->zh_flags == 0)
return (B_FALSE); return (B_FALSE);
tx = dmu_tx_create(zilog->zl_os); tx = dmu_tx_create(zilog->zl_os);
@ -1166,6 +1166,15 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
zilog = dmu_objset_zil(os); zilog = dmu_objset_zil(os);
zh = zil_header_in_syncing_context(zilog); zh = zil_header_in_syncing_context(zilog);
ASSERT3U(tx->tx_txg, ==, spa_first_txg(zilog->zl_spa)); ASSERT3U(tx->tx_txg, ==, spa_first_txg(zilog->zl_spa));
/*
* If the log is empty, then there is nothing to do here.
*/
if (BP_IS_HOLE(&zh->zh_log)) {
dmu_objset_disown(os, B_FALSE, FTAG);
return (0);
}
first_txg = spa_min_claim_txg(zilog->zl_spa); first_txg = spa_min_claim_txg(zilog->zl_spa);
/* /*
@ -1198,11 +1207,14 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR || if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR ||
(zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 && (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 &&
zh->zh_claim_txg == 0)) { zh->zh_claim_txg == 0)) {
if (!BP_IS_HOLE(&zh->zh_log)) { if (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 &&
BP_GET_BIRTH(&zh->zh_log) < first_txg) {
(void) zil_parse(zilog, zil_clear_log_block, (void) zil_parse(zilog, zil_clear_log_block,
zil_noop_log_record, tx, first_txg, B_FALSE); zil_noop_log_record, tx, first_txg, B_FALSE);
} else {
zio_free(zilog->zl_spa, first_txg, &zh->zh_log);
} }
BP_ZERO(&zh->zh_log); memset(zh, 0, sizeof (zil_header_t));
if (os->os_encrypted) if (os->os_encrypted)
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
dsl_dataset_dirty(dmu_objset_ds(os), tx); dsl_dataset_dirty(dmu_objset_ds(os), tx);
@ -1224,7 +1236,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
* or destroy beyond the last block we successfully claimed. * or destroy beyond the last block we successfully claimed.
*/ */
ASSERT3U(zh->zh_claim_txg, <=, first_txg); ASSERT3U(zh->zh_claim_txg, <=, first_txg);
if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { if (zh->zh_claim_txg == 0) {
(void) zil_parse(zilog, zil_claim_log_block, (void) zil_parse(zilog, zil_claim_log_block,
zil_claim_log_record, tx, first_txg, B_FALSE); zil_claim_log_record, tx, first_txg, B_FALSE);
zh->zh_claim_txg = first_txg; zh->zh_claim_txg = first_txg;

View File

@ -947,7 +947,7 @@ tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
'removal_with_write', 'removal_with_zdb', 'remove_expanded', 'removal_with_write', 'removal_with_zdb', 'remove_expanded',
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz', 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
'remove_indirect', 'remove_attach_mirror', 'removal_reservation', 'remove_indirect', 'remove_attach_mirror', 'removal_reservation',
'removal_with_hole'] 'removal_with_hole', 'removal_with_missing_log']
tags = ['functional', 'removal'] tags = ['functional', 'removal']
[tests/functional/rename_dirs] [tests/functional/rename_dirs]

View File

@ -1947,6 +1947,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/removal/removal_with_faulted.ksh \ functional/removal/removal_with_faulted.ksh \
functional/removal/removal_with_ganging.ksh \ functional/removal/removal_with_ganging.ksh \
functional/removal/removal_with_hole.ksh \ functional/removal/removal_with_hole.ksh \
functional/removal/removal_with_missing_log.ksh \
functional/removal/removal_with_indirect.ksh \ functional/removal/removal_with_indirect.ksh \
functional/removal/removal_with_remove.ksh \ functional/removal/removal_with_remove.ksh \
functional/removal/removal_with_scrub.ksh \ functional/removal/removal_with_scrub.ksh \

View File

@ -0,0 +1,95 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2026, TrueNAS.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/removal/removal.kshlib
#
# DESCRIPTION:
# Verify that a missing SLOG device can be removed even when
# ZIL blocks exist on it.
#
# STRATEGY:
# 1. Create a pool with a SLOG device
# 2. Freeze the pool and write data to ZIL
# 3. Export the pool (ZIL blocks remain uncommitted)
# 4. Import with -N to claim logs without replay
# 5. Export and clear SLOG device labels to simulate failure
# 6. Import with -m (missing devices allowed)
# 7. Remove the missing SLOG vdev
# 8. Verify pool is healthy and space accounting is correct
#
verify_runnable "global"
log_assert "Removal of missing SLOG with ZIL blocks succeeds"
function cleanup
{
poolexists $TESTPOOL && destroy_pool $TESTPOOL
}
log_onexit cleanup
VDEV1="$(echo $DISKS | cut -d' ' -f1)"
VDEV2="$(echo $DISKS | cut -d' ' -f2)"
# Create pool with SLOG and dataset
log_must zpool create $TESTPOOL $VDEV1 log $VDEV2
log_must zfs create $TESTPOOL/$TESTFS
# Create initial ZIL header (required before freezing)
log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/init \
conv=fdatasync,fsync bs=1 count=1
# Freeze pool and write data to ZIL
log_must zpool freeze $TESTPOOL
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \
oflag=sync bs=128k count=128
# Export with uncommitted ZIL transactions
log_must zpool export $TESTPOOL
# Import with -N to claim logs without mounting/replaying
log_must zpool import -N $TESTPOOL
log_must zpool export $TESTPOOL
# Clear SLOG labels to simulate device failure
log_must zpool labelclear -f $VDEV2
# Import with missing SLOG allowed
log_must zpool import -m $TESTPOOL
log_must eval "zpool status $TESTPOOL | grep UNAVAIL"
# Remove the missing SLOG - should succeed
log_must zpool remove $TESTPOOL $VDEV2
log_must zpool wait -t remove $TESTPOOL
sync_pool $TESTPOOL
log_mustnot eval "zpool status -v $TESTPOOL | grep $VDEV2"
# Verify pool health
log_must zpool scrub -w $TESTPOOL
log_must check_pool_status $TESTPOOL "errors" "No known data errors"
# Verify space accounting is correct
log_must zdb -c $TESTPOOL
log_pass "Removal of missing SLOG with ZIL blocks succeeded"