From 0c4064d9a08ca2cf601ad1010e7cfd3f917cb991 Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Tue, 23 Aug 2022 02:42:01 +0200 Subject: [PATCH] Fix zpool status in case of unloaded keys When scrubbing an encrypted filesystem with unloaded key still report an error in zpool status. Reviewed-by: Brian Behlendorf Reviewed-by: Alek Pinchuk Signed-off-by: George Amanakis Closes #13675 Closes #13717 --- include/sys/dsl_crypt.h | 1 + man/man7/zpool-features.7 | 3 + module/zfs/dsl_crypt.c | 2 +- module/zfs/spa_errlog.c | 135 +++++++++++++----- tests/runfiles/common.run | 2 +- tests/zfs-tests/tests/Makefile.am | 1 + .../zpool_status/zpool_status_003_pos.ksh | 1 + .../zpool_status/zpool_status_004_pos.ksh | 1 + .../zpool_status/zpool_status_005_pos.ksh | 78 ++++++++++ 9 files changed, 190 insertions(+), 34 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index db594eece..72716e296 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -222,5 +222,6 @@ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); +zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd); #endif diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 09f1e50de..0ff1a9a52 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -545,6 +545,9 @@ This feature enables the upgraded version of errlog, which required an on-disk error log format change. Now the error log of each head dataset is stored separately in the zap object and keyed by the head id. +In case of encrypted filesystems with unloaded keys or unmounted encrypted +filesystems we are unable to check their snapshots or clones for errors and +these will not be reported. With this feature enabled, every dataset affected by an error block is listed in the output of .Nm zpool Cm status . diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 25cb4f6ab..ce2e6ce74 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1137,7 +1137,7 @@ dmu_objset_check_wkey_loaded(dsl_dir_t *dd) return (0); } -static zfs_keystatus_t +zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd) { /* check if this dd has a has a dsl key */ diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index 4572a6e56..e682f6c69 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -21,8 +21,8 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2014, Delphix. All rights reserved. - * Copyright (c) 2021, George Amanakis. All rights reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2021, 2022, George Amanakis. All rights reserved. */ /* @@ -68,6 +68,7 @@ #include #include #include +#include #define NAME_MAX_LEN 64 @@ -175,6 +176,23 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, return (error); } + /* + * If the key is not loaded dbuf_dnode_findbp() will error out with + * EACCES. However in that case dnode_hold() will eventually call + * dbuf_read()->zio_wait() which may call spa_log_error(). This will + * lead to a deadlock due to us holding the mutex spa_errlist_lock. + * Avoid this by checking here if the keys are loaded, if not return. + * If the keys are not loaded the head_errlog feature is meaningless + * as we cannot figure out the birth txg of the block pointer. + */ + if (dsl_dataset_get_keystatus(ds->ds_dir) == + ZFS_KEYSTATUS_UNAVAILABLE) { + zep->zb_birth = 0; + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + return (0); + } + dnode_t *dn; blkptr_t bp; @@ -188,11 +206,22 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, NULL); - if (error == 0 && BP_IS_HOLE(&bp)) error = SET_ERROR(ENOENT); - zep->zb_birth = bp.blk_birth; + /* + * If the key is loaded but the encrypted filesystem is unmounted when + * a scrub is run, then dbuf_dnode_findbp() will still error out with + * EACCES (possibly due to the key mapping being removed upon + * unmounting). In that case the head_errlog feature is also + * meaningless as we cannot figure out the birth txg of the block + * pointer. + */ + if (error == EACCES) + error = 0; + else if (!error) + zep->zb_birth = bp.blk_birth; + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); dsl_dataset_rele(ds, FTAG); @@ -264,7 +293,6 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep, rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, NULL); - if (error == 0 && BP_IS_HOLE(&bp)) error = SET_ERROR(ENOENT); @@ -298,27 +326,40 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t txg_to_consider = spa->spa_syncing_txg; boolean_t check_snapshot = B_TRUE; error = find_birth_txg(ds, zep, &latest_txg); - if (error == 0) { - if (zep->zb_birth == latest_txg) { - /* Block neither free nor rewritten. */ - if (!only_count) { - zbookmark_phys_t zb; - zep_to_zb(head_ds, zep, &zb); - if (copyout(&zb, (char *)uaddr + (*count - 1) - * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { - dsl_dataset_rele(ds, FTAG); - return (SET_ERROR(EFAULT)); - } - (*count)--; - } else { - (*count)++; + + /* + * If we cannot figure out the current birth txg of the block pointer + * error out. If the filesystem is encrypted and the key is not loaded + * or the encrypted filesystem is not mounted the error will be EACCES. + * In that case do not return an error. + */ + if (error == EACCES) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + if (zep->zb_birth == latest_txg) { + /* Block neither free nor rewritten. */ + if (!only_count) { + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + if (copyout(&zb, (char *)uaddr + (*count - 1) + * sizeof (zbookmark_phys_t), + sizeof (zbookmark_phys_t)) != 0) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EFAULT)); } - check_snapshot = B_FALSE; + (*count)--; } else { - ASSERT3U(zep->zb_birth, <, latest_txg); - txg_to_consider = latest_txg; + (*count)++; } + check_snapshot = B_FALSE; + } else { + ASSERT3U(zep->zb_birth, <, latest_txg); + txg_to_consider = latest_txg; } /* How many snapshots reference this block. */ @@ -439,9 +480,31 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t *count, void *uaddr, boolean_t only_count) { dsl_pool_t *dp = spa->spa_dsl_pool; - dsl_pool_config_enter(dp, FTAG); uint64_t top_affected_fs; + /* + * If the zb_birth is 0 it means we failed to retrieve the birth txg + * of the block pointer. This happens when an encrypted filesystem is + * not mounted or when the key is not loaded. Do not proceed to + * check_filesystem(), instead do the accounting here. + */ + if (zep->zb_birth == 0) { + if (!only_count) { + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + if (copyout(&zb, (char *)uaddr + (*count - 1) + * sizeof (zbookmark_phys_t), + sizeof (zbookmark_phys_t)) != 0) { + return (SET_ERROR(EFAULT)); + } + (*count)--; + } else { + (*count)++; + } + return (0); + } + + dsl_pool_config_enter(dp, FTAG); int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs); if (error == 0) error = check_filesystem(spa, top_affected_fs, zep, count, @@ -497,6 +560,7 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree) zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; /* * If we cannot find out the head dataset and birth txg of @@ -505,8 +569,10 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree) * sync_error_list() and written to the on-disk error log. */ uint64_t head_ds_obj; - if (get_head_and_birth_txg(spa, &zep, - se->se_bookmark.zb_objset, &head_ds_obj) == 0) + int error = get_head_and_birth_txg(spa, &zep, + se->se_bookmark.zb_objset, &head_ds_obj); + + if (!error) (void) process_error_block(spa, head_ds_obj, &zep, &total, NULL, B_TRUE); } @@ -695,6 +761,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, zep.zb_object = zb.zb_object; zep.zb_level = zb.zb_level; zep.zb_blkid = zb.zb_blkid; + zep.zb_birth = 0; /* * We cannot use get_head_and_birth_txg() because it will @@ -737,8 +804,11 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp, NULL, NULL); + if (error == EACCES) + error = 0; + else if (!error) + zep.zb_birth = bp.blk_birth; - zep.zb_birth = bp.blk_birth; rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); dsl_dataset_rele(ds, FTAG); @@ -885,16 +955,16 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count) zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; uint64_t head_ds_obj; int error = get_head_and_birth_txg(spa, &zep, se->se_bookmark.zb_objset, &head_ds_obj); - if (error != 0) - return (error); - error = process_error_block(spa, head_ds_obj, &zep, count, - uaddr, B_FALSE); - if (error != 0) + if (!error) + error = process_error_block(spa, head_ds_obj, &zep, + count, uaddr, B_FALSE); + if (error) return (error); } return (0); @@ -1013,6 +1083,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; /* * If we cannot find out the head dataset and birth txg @@ -1024,7 +1095,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) uint64_t head_dataset_obj; int error = get_head_and_birth_txg(spa, &zep, se->se_bookmark.zb_objset, &head_dataset_obj); - if (error != 0) + if (error) continue; uint64_t err_obj; diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 414316eb6..b9a9e0efc 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -494,7 +494,7 @@ tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] tests = ['zpool_status_001_pos', 'zpool_status_002_pos', 'zpool_status_003_pos', 'zpool_status_004_pos', - 'zpool_status_features_001_pos'] + 'zpool_status_005_pos', 'zpool_status_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index d65788d08..4a815db8a 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1161,6 +1161,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_status/zpool_status_002_pos.ksh \ functional/cli_root/zpool_status/zpool_status_003_pos.ksh \ functional/cli_root/zpool_status/zpool_status_004_pos.ksh \ + functional/cli_root/zpool_status/zpool_status_005_pos.ksh \ functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \ functional/cli_root/zpool_sync/cleanup.ksh \ functional/cli_root/zpool_sync/setup.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh index 231c46d4f..a243944e4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh @@ -63,6 +63,7 @@ log_must zfs snapshot $TESTPOOL2@snap log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone # Look to see that snapshot, clone and filesystem our files report errors +log_must zpool status -v $TESTPOOL2 log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh index 1ac3e03b7..111d598df 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh @@ -74,6 +74,7 @@ log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" # Check that enabling the feature reports the error properly. log_must zpool set feature@head_errlog=enabled $TESTPOOL2 +log_must zpool status -v $TESTPOOL2 log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh new file mode 100755 index 000000000..3eb7825ca --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 George Amanakis. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +# +# DESCRIPTION: +# Verify correct output with 'zpool status -v' after corrupting a file +# +# STRATEGY: +# 1. Create a pool, an ancrypted filesystem and a file +# 2. zinject checksum errors +# 3. Unmount the filesystem and unload the key +# 4. Scrub the pool +# 5. Verify we report errors in the pool in 'zpool status -v' + +verify_runnable "both" + +DISK=${DISKS%% *} + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL2 + rm -f $TESTDIR/vdev_a +} + +log_assert "Verify reporting errors with unloaded keys works" +log_onexit cleanup + +typeset passphrase="password" +typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0" + +truncate -s $MINVDEVSIZE $TESTDIR/vdev_a +log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a + +log_must eval "echo $passphrase > /$TESTPOOL2/pwd" + +log_must zfs create -o encryption=aes-256-ccm -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL2/pwd -o primarycache=none \ + $TESTPOOL2/$TESTFS1 + +log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync +log_must eval "echo 'aaaaaaaa' >> "$file + +corrupt_blocks_at_level $file 0 +log_must zfs unmount $TESTPOOL2/$TESTFS1 +log_must zfs unload-key $TESTPOOL2/$TESTFS1 +log_must zpool sync $TESTPOOL2 +log_must zpool scrub $TESTPOOL2 +log_must zpool wait -t scrub $TESTPOOL2 +log_must zpool status -v $TESTPOOL2 +log_must eval "zpool status -v $TESTPOOL2 | \ + grep \"Permanent errors have been detected\"" + +log_pass "Verify reporting errors with unloaded keys works"