mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 01:51:00 +03:00
Fix zpool status in case of unloaded keys
When scrubbing an encrypted filesystem with unloaded key still report an error in zpool status. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alek Pinchuk <apinchuk@axcient.com> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #13675 Closes #13717
This commit is contained in:
parent
17e212652d
commit
0c4064d9a0
@ -222,5 +222,6 @@ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
|
||||
dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
|
||||
uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
|
||||
boolean_t *no_crypt);
|
||||
zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd);
|
||||
|
||||
#endif
|
||||
|
@ -545,6 +545,9 @@ This feature enables the upgraded version of errlog, which required an on-disk
|
||||
error log format change.
|
||||
Now the error log of each head dataset is stored separately in the zap object
|
||||
and keyed by the head id.
|
||||
In case of encrypted filesystems with unloaded keys or unmounted encrypted
|
||||
filesystems we are unable to check their snapshots or clones for errors and
|
||||
these will not be reported.
|
||||
With this feature enabled, every dataset affected by an error block is listed
|
||||
in the output of
|
||||
.Nm zpool Cm status .
|
||||
|
@ -1137,7 +1137,7 @@ dmu_objset_check_wkey_loaded(dsl_dir_t *dd)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static zfs_keystatus_t
|
||||
zfs_keystatus_t
|
||||
dsl_dataset_get_keystatus(dsl_dir_t *dd)
|
||||
{
|
||||
/* check if this dd has a has a dsl key */
|
||||
|
@ -21,8 +21,8 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013, 2014, Delphix. All rights reserved.
|
||||
* Copyright (c) 2021, George Amanakis. All rights reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2021, 2022, George Amanakis. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -68,6 +68,7 @@
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#define NAME_MAX_LEN 64
|
||||
|
||||
@ -175,6 +176,23 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the key is not loaded dbuf_dnode_findbp() will error out with
|
||||
* EACCES. However in that case dnode_hold() will eventually call
|
||||
* dbuf_read()->zio_wait() which may call spa_log_error(). This will
|
||||
* lead to a deadlock due to us holding the mutex spa_errlist_lock.
|
||||
* Avoid this by checking here if the keys are loaded, if not return.
|
||||
* If the keys are not loaded the head_errlog feature is meaningless
|
||||
* as we cannot figure out the birth txg of the block pointer.
|
||||
*/
|
||||
if (dsl_dataset_get_keystatus(ds->ds_dir) ==
|
||||
ZFS_KEYSTATUS_UNAVAILABLE) {
|
||||
zep->zb_birth = 0;
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
dnode_t *dn;
|
||||
blkptr_t bp;
|
||||
|
||||
@ -188,11 +206,22 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
|
||||
NULL);
|
||||
|
||||
if (error == 0 && BP_IS_HOLE(&bp))
|
||||
error = SET_ERROR(ENOENT);
|
||||
|
||||
zep->zb_birth = bp.blk_birth;
|
||||
/*
|
||||
* If the key is loaded but the encrypted filesystem is unmounted when
|
||||
* a scrub is run, then dbuf_dnode_findbp() will still error out with
|
||||
* EACCES (possibly due to the key mapping being removed upon
|
||||
* unmounting). In that case the head_errlog feature is also
|
||||
* meaningless as we cannot figure out the birth txg of the block
|
||||
* pointer.
|
||||
*/
|
||||
if (error == EACCES)
|
||||
error = 0;
|
||||
else if (!error)
|
||||
zep->zb_birth = bp.blk_birth;
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
@ -264,7 +293,6 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
|
||||
NULL);
|
||||
|
||||
if (error == 0 && BP_IS_HOLE(&bp))
|
||||
error = SET_ERROR(ENOENT);
|
||||
|
||||
@ -298,27 +326,40 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
|
||||
uint64_t txg_to_consider = spa->spa_syncing_txg;
|
||||
boolean_t check_snapshot = B_TRUE;
|
||||
error = find_birth_txg(ds, zep, &latest_txg);
|
||||
if (error == 0) {
|
||||
if (zep->zb_birth == latest_txg) {
|
||||
/* Block neither free nor rewritten. */
|
||||
if (!only_count) {
|
||||
zbookmark_phys_t zb;
|
||||
zep_to_zb(head_ds, zep, &zb);
|
||||
if (copyout(&zb, (char *)uaddr + (*count - 1)
|
||||
* sizeof (zbookmark_phys_t),
|
||||
sizeof (zbookmark_phys_t)) != 0) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (SET_ERROR(EFAULT));
|
||||
}
|
||||
(*count)--;
|
||||
} else {
|
||||
(*count)++;
|
||||
|
||||
/*
|
||||
* If we cannot figure out the current birth txg of the block pointer
|
||||
* error out. If the filesystem is encrypted and the key is not loaded
|
||||
* or the encrypted filesystem is not mounted the error will be EACCES.
|
||||
* In that case do not return an error.
|
||||
*/
|
||||
if (error == EACCES) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (0);
|
||||
}
|
||||
if (error) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (error);
|
||||
}
|
||||
if (zep->zb_birth == latest_txg) {
|
||||
/* Block neither free nor rewritten. */
|
||||
if (!only_count) {
|
||||
zbookmark_phys_t zb;
|
||||
zep_to_zb(head_ds, zep, &zb);
|
||||
if (copyout(&zb, (char *)uaddr + (*count - 1)
|
||||
* sizeof (zbookmark_phys_t),
|
||||
sizeof (zbookmark_phys_t)) != 0) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (SET_ERROR(EFAULT));
|
||||
}
|
||||
check_snapshot = B_FALSE;
|
||||
(*count)--;
|
||||
} else {
|
||||
ASSERT3U(zep->zb_birth, <, latest_txg);
|
||||
txg_to_consider = latest_txg;
|
||||
(*count)++;
|
||||
}
|
||||
check_snapshot = B_FALSE;
|
||||
} else {
|
||||
ASSERT3U(zep->zb_birth, <, latest_txg);
|
||||
txg_to_consider = latest_txg;
|
||||
}
|
||||
|
||||
/* How many snapshots reference this block. */
|
||||
@ -439,9 +480,31 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
|
||||
uint64_t *count, void *uaddr, boolean_t only_count)
|
||||
{
|
||||
dsl_pool_t *dp = spa->spa_dsl_pool;
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
uint64_t top_affected_fs;
|
||||
|
||||
/*
|
||||
* If the zb_birth is 0 it means we failed to retrieve the birth txg
|
||||
* of the block pointer. This happens when an encrypted filesystem is
|
||||
* not mounted or when the key is not loaded. Do not proceed to
|
||||
* check_filesystem(), instead do the accounting here.
|
||||
*/
|
||||
if (zep->zb_birth == 0) {
|
||||
if (!only_count) {
|
||||
zbookmark_phys_t zb;
|
||||
zep_to_zb(head_ds, zep, &zb);
|
||||
if (copyout(&zb, (char *)uaddr + (*count - 1)
|
||||
* sizeof (zbookmark_phys_t),
|
||||
sizeof (zbookmark_phys_t)) != 0) {
|
||||
return (SET_ERROR(EFAULT));
|
||||
}
|
||||
(*count)--;
|
||||
} else {
|
||||
(*count)++;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
|
||||
if (error == 0)
|
||||
error = check_filesystem(spa, top_affected_fs, zep, count,
|
||||
@ -497,6 +560,7 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
|
||||
zep.zb_object = se->se_bookmark.zb_object;
|
||||
zep.zb_level = se->se_bookmark.zb_level;
|
||||
zep.zb_blkid = se->se_bookmark.zb_blkid;
|
||||
zep.zb_birth = 0;
|
||||
|
||||
/*
|
||||
* If we cannot find out the head dataset and birth txg of
|
||||
@ -505,8 +569,10 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
|
||||
* sync_error_list() and written to the on-disk error log.
|
||||
*/
|
||||
uint64_t head_ds_obj;
|
||||
if (get_head_and_birth_txg(spa, &zep,
|
||||
se->se_bookmark.zb_objset, &head_ds_obj) == 0)
|
||||
int error = get_head_and_birth_txg(spa, &zep,
|
||||
se->se_bookmark.zb_objset, &head_ds_obj);
|
||||
|
||||
if (!error)
|
||||
(void) process_error_block(spa, head_ds_obj, &zep,
|
||||
&total, NULL, B_TRUE);
|
||||
}
|
||||
@ -695,6 +761,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
|
||||
zep.zb_object = zb.zb_object;
|
||||
zep.zb_level = zb.zb_level;
|
||||
zep.zb_blkid = zb.zb_blkid;
|
||||
zep.zb_birth = 0;
|
||||
|
||||
/*
|
||||
* We cannot use get_head_and_birth_txg() because it will
|
||||
@ -737,8 +804,11 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp,
|
||||
NULL, NULL);
|
||||
if (error == EACCES)
|
||||
error = 0;
|
||||
else if (!error)
|
||||
zep.zb_birth = bp.blk_birth;
|
||||
|
||||
zep.zb_birth = bp.blk_birth;
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
@ -885,16 +955,16 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
|
||||
zep.zb_object = se->se_bookmark.zb_object;
|
||||
zep.zb_level = se->se_bookmark.zb_level;
|
||||
zep.zb_blkid = se->se_bookmark.zb_blkid;
|
||||
zep.zb_birth = 0;
|
||||
|
||||
uint64_t head_ds_obj;
|
||||
int error = get_head_and_birth_txg(spa, &zep,
|
||||
se->se_bookmark.zb_objset, &head_ds_obj);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
error = process_error_block(spa, head_ds_obj, &zep, count,
|
||||
uaddr, B_FALSE);
|
||||
if (error != 0)
|
||||
if (!error)
|
||||
error = process_error_block(spa, head_ds_obj, &zep,
|
||||
count, uaddr, B_FALSE);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
return (0);
|
||||
@ -1013,6 +1083,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
|
||||
zep.zb_object = se->se_bookmark.zb_object;
|
||||
zep.zb_level = se->se_bookmark.zb_level;
|
||||
zep.zb_blkid = se->se_bookmark.zb_blkid;
|
||||
zep.zb_birth = 0;
|
||||
|
||||
/*
|
||||
* If we cannot find out the head dataset and birth txg
|
||||
@ -1024,7 +1095,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
|
||||
uint64_t head_dataset_obj;
|
||||
int error = get_head_and_birth_txg(spa, &zep,
|
||||
se->se_bookmark.zb_objset, &head_dataset_obj);
|
||||
if (error != 0)
|
||||
if (error)
|
||||
continue;
|
||||
|
||||
uint64_t err_obj;
|
||||
|
@ -494,7 +494,7 @@ tags = ['functional', 'cli_root', 'zpool_split']
|
||||
[tests/functional/cli_root/zpool_status]
|
||||
tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
|
||||
'zpool_status_003_pos', 'zpool_status_004_pos',
|
||||
'zpool_status_features_001_pos']
|
||||
'zpool_status_005_pos', 'zpool_status_features_001_pos']
|
||||
tags = ['functional', 'cli_root', 'zpool_status']
|
||||
|
||||
[tests/functional/cli_root/zpool_sync]
|
||||
|
@ -1161,6 +1161,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/cli_root/zpool_status/zpool_status_002_pos.ksh \
|
||||
functional/cli_root/zpool_status/zpool_status_003_pos.ksh \
|
||||
functional/cli_root/zpool_status/zpool_status_004_pos.ksh \
|
||||
functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
|
||||
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
|
||||
functional/cli_root/zpool_sync/cleanup.ksh \
|
||||
functional/cli_root/zpool_sync/setup.ksh \
|
||||
|
@ -63,6 +63,7 @@ log_must zfs snapshot $TESTPOOL2@snap
|
||||
log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone
|
||||
|
||||
# Look to see that snapshot, clone and filesystem our files report errors
|
||||
log_must zpool status -v $TESTPOOL2
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
|
||||
|
@ -74,6 +74,7 @@ log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
|
||||
|
||||
# Check that enabling the feature reports the error properly.
|
||||
log_must zpool set feature@head_errlog=enabled $TESTPOOL2
|
||||
log_must zpool status -v $TESTPOOL2
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
|
||||
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
|
||||
|
@ -0,0 +1,78 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2022 George Amanakis. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify correct output with 'zpool status -v' after corrupting a file
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool, an ancrypted filesystem and a file
|
||||
# 2. zinject checksum errors
|
||||
# 3. Unmount the filesystem and unload the key
|
||||
# 4. Scrub the pool
|
||||
# 5. Verify we report errors in the pool in 'zpool status -v'
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
DISK=${DISKS%% *}
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
destroy_pool $TESTPOOL2
|
||||
rm -f $TESTDIR/vdev_a
|
||||
}
|
||||
|
||||
log_assert "Verify reporting errors with unloaded keys works"
|
||||
log_onexit cleanup
|
||||
|
||||
typeset passphrase="password"
|
||||
typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0"
|
||||
|
||||
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
|
||||
log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a
|
||||
|
||||
log_must eval "echo $passphrase > /$TESTPOOL2/pwd"
|
||||
|
||||
log_must zfs create -o encryption=aes-256-ccm -o keyformat=passphrase \
|
||||
-o keylocation=file:///$TESTPOOL2/pwd -o primarycache=none \
|
||||
$TESTPOOL2/$TESTFS1
|
||||
|
||||
log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
|
||||
log_must eval "echo 'aaaaaaaa' >> "$file
|
||||
|
||||
corrupt_blocks_at_level $file 0
|
||||
log_must zfs unmount $TESTPOOL2/$TESTFS1
|
||||
log_must zfs unload-key $TESTPOOL2/$TESTFS1
|
||||
log_must zpool sync $TESTPOOL2
|
||||
log_must zpool scrub $TESTPOOL2
|
||||
log_must zpool wait -t scrub $TESTPOOL2
|
||||
log_must zpool status -v $TESTPOOL2
|
||||
log_must eval "zpool status -v $TESTPOOL2 | \
|
||||
grep \"Permanent errors have been detected\""
|
||||
|
||||
log_pass "Verify reporting errors with unloaded keys works"
|
Loading…
Reference in New Issue
Block a user